{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 500, "global_step": 11528, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00017349063150589867, "grad_norm": 4.815335750579834, "learning_rate": 1.7346053772766696e-08, "loss": 1.6353, "step": 1 }, { "epoch": 0.00034698126301179735, "grad_norm": 6.2609639167785645, "learning_rate": 3.469210754553339e-08, "loss": 1.6396, "step": 2 }, { "epoch": 0.000520471894517696, "grad_norm": 8.841588973999023, "learning_rate": 5.203816131830009e-08, "loss": 1.834, "step": 3 }, { "epoch": 0.0006939625260235947, "grad_norm": 4.740471363067627, "learning_rate": 6.938421509106678e-08, "loss": 1.5742, "step": 4 }, { "epoch": 0.0008674531575294934, "grad_norm": 6.70953369140625, "learning_rate": 8.673026886383348e-08, "loss": 1.5811, "step": 5 }, { "epoch": 0.001040943789035392, "grad_norm": 5.489251136779785, "learning_rate": 1.0407632263660019e-07, "loss": 1.6089, "step": 6 }, { "epoch": 0.0012144344205412907, "grad_norm": 6.307178020477295, "learning_rate": 1.214223764093669e-07, "loss": 1.5957, "step": 7 }, { "epoch": 0.0013879250520471894, "grad_norm": 8.668987274169922, "learning_rate": 1.3876843018213356e-07, "loss": 1.5532, "step": 8 }, { "epoch": 0.001561415683553088, "grad_norm": 6.398656368255615, "learning_rate": 1.5611448395490029e-07, "loss": 1.7119, "step": 9 }, { "epoch": 0.0017349063150589867, "grad_norm": 9.915180206298828, "learning_rate": 1.7346053772766696e-07, "loss": 1.5229, "step": 10 }, { "epoch": 0.0019083969465648854, "grad_norm": 7.884058952331543, "learning_rate": 1.9080659150043368e-07, "loss": 1.6274, "step": 11 }, { "epoch": 0.002081887578070784, "grad_norm": 5.94698429107666, "learning_rate": 2.0815264527320037e-07, "loss": 1.5659, "step": 12 }, { "epoch": 0.002255378209576683, "grad_norm": 5.980604648590088, "learning_rate": 2.2549869904596704e-07, "loss": 1.6509, "step": 13 }, { "epoch": 0.0024288688410825814, "grad_norm": 5.658161640167236, "learning_rate": 2.428447528187338e-07, "loss": 1.5566, "step": 14 }, { "epoch": 0.0026023594725884803, "grad_norm": 5.7178192138671875, "learning_rate": 2.6019080659150043e-07, "loss": 1.5186, "step": 15 }, { "epoch": 0.002775850104094379, "grad_norm": 5.625595569610596, "learning_rate": 2.7753686036426713e-07, "loss": 1.6133, "step": 16 }, { "epoch": 0.0029493407356002777, "grad_norm": 5.517483711242676, "learning_rate": 2.948829141370339e-07, "loss": 1.6411, "step": 17 }, { "epoch": 0.003122831367106176, "grad_norm": 5.840810298919678, "learning_rate": 3.1222896790980057e-07, "loss": 1.5659, "step": 18 }, { "epoch": 0.003296321998612075, "grad_norm": 5.345042705535889, "learning_rate": 3.295750216825672e-07, "loss": 1.7026, "step": 19 }, { "epoch": 0.0034698126301179735, "grad_norm": 12.300100326538086, "learning_rate": 3.469210754553339e-07, "loss": 1.5674, "step": 20 }, { "epoch": 0.0036433032616238724, "grad_norm": 5.376595973968506, "learning_rate": 3.642671292281006e-07, "loss": 1.6851, "step": 21 }, { "epoch": 0.003816793893129771, "grad_norm": 6.235954761505127, "learning_rate": 3.8161318300086735e-07, "loss": 1.5957, "step": 22 }, { "epoch": 0.003990284524635669, "grad_norm": 6.174750804901123, "learning_rate": 3.9895923677363405e-07, "loss": 1.6279, "step": 23 }, { "epoch": 0.004163775156141568, "grad_norm": 5.745070934295654, "learning_rate": 4.1630529054640075e-07, "loss": 1.6548, "step": 24 }, { "epoch": 0.004337265787647467, "grad_norm": 5.718823432922363, "learning_rate": 4.3365134431916744e-07, "loss": 1.5581, "step": 25 }, { "epoch": 0.004510756419153366, "grad_norm": 5.081756114959717, "learning_rate": 4.509973980919341e-07, "loss": 1.5991, "step": 26 }, { "epoch": 0.004684247050659264, "grad_norm": 7.115768909454346, "learning_rate": 4.683434518647008e-07, "loss": 1.8408, "step": 27 }, { "epoch": 0.004857737682165163, "grad_norm": 4.278243064880371, "learning_rate": 4.856895056374676e-07, "loss": 1.5459, "step": 28 }, { "epoch": 0.005031228313671062, "grad_norm": 5.30607271194458, "learning_rate": 5.030355594102343e-07, "loss": 1.5732, "step": 29 }, { "epoch": 0.005204718945176961, "grad_norm": 4.440147399902344, "learning_rate": 5.203816131830009e-07, "loss": 1.5776, "step": 30 }, { "epoch": 0.005378209576682859, "grad_norm": 5.735405921936035, "learning_rate": 5.377276669557676e-07, "loss": 1.6392, "step": 31 }, { "epoch": 0.005551700208188758, "grad_norm": 5.50547456741333, "learning_rate": 5.550737207285343e-07, "loss": 1.541, "step": 32 }, { "epoch": 0.0057251908396946565, "grad_norm": 5.245941638946533, "learning_rate": 5.72419774501301e-07, "loss": 1.564, "step": 33 }, { "epoch": 0.005898681471200555, "grad_norm": 5.313120365142822, "learning_rate": 5.897658282740678e-07, "loss": 1.522, "step": 34 }, { "epoch": 0.006072172102706454, "grad_norm": 5.691841125488281, "learning_rate": 6.071118820468344e-07, "loss": 1.5229, "step": 35 }, { "epoch": 0.006245662734212352, "grad_norm": 6.22916316986084, "learning_rate": 6.244579358196011e-07, "loss": 1.5361, "step": 36 }, { "epoch": 0.006419153365718251, "grad_norm": 5.261981964111328, "learning_rate": 6.418039895923677e-07, "loss": 1.6646, "step": 37 }, { "epoch": 0.00659264399722415, "grad_norm": 6.444558620452881, "learning_rate": 6.591500433651344e-07, "loss": 1.5459, "step": 38 }, { "epoch": 0.006766134628730049, "grad_norm": 6.428016662597656, "learning_rate": 6.764960971379011e-07, "loss": 1.5625, "step": 39 }, { "epoch": 0.006939625260235947, "grad_norm": 6.500727653503418, "learning_rate": 6.938421509106678e-07, "loss": 1.5977, "step": 40 }, { "epoch": 0.007113115891741846, "grad_norm": 5.337209701538086, "learning_rate": 7.111882046834345e-07, "loss": 1.54, "step": 41 }, { "epoch": 0.007286606523247745, "grad_norm": 4.817502498626709, "learning_rate": 7.285342584562012e-07, "loss": 1.5469, "step": 42 }, { "epoch": 0.007460097154753644, "grad_norm": 5.187300205230713, "learning_rate": 7.45880312228968e-07, "loss": 1.5542, "step": 43 }, { "epoch": 0.007633587786259542, "grad_norm": 5.807931900024414, "learning_rate": 7.632263660017347e-07, "loss": 1.4937, "step": 44 }, { "epoch": 0.007807078417765441, "grad_norm": 4.923823356628418, "learning_rate": 7.805724197745014e-07, "loss": 1.5527, "step": 45 }, { "epoch": 0.007980569049271339, "grad_norm": 6.542623043060303, "learning_rate": 7.979184735472681e-07, "loss": 1.584, "step": 46 }, { "epoch": 0.008154059680777238, "grad_norm": 5.157179355621338, "learning_rate": 8.152645273200348e-07, "loss": 1.4624, "step": 47 }, { "epoch": 0.008327550312283136, "grad_norm": 5.175705909729004, "learning_rate": 8.326105810928015e-07, "loss": 1.4834, "step": 48 }, { "epoch": 0.008501040943789036, "grad_norm": 4.367042541503906, "learning_rate": 8.499566348655682e-07, "loss": 1.4966, "step": 49 }, { "epoch": 0.008674531575294934, "grad_norm": 6.201111793518066, "learning_rate": 8.673026886383349e-07, "loss": 1.4912, "step": 50 }, { "epoch": 0.008848022206800832, "grad_norm": 4.258865833282471, "learning_rate": 8.846487424111015e-07, "loss": 1.5898, "step": 51 }, { "epoch": 0.009021512838306732, "grad_norm": 5.785416603088379, "learning_rate": 9.019947961838682e-07, "loss": 1.4507, "step": 52 }, { "epoch": 0.00919500346981263, "grad_norm": 5.257880687713623, "learning_rate": 9.193408499566349e-07, "loss": 1.5249, "step": 53 }, { "epoch": 0.009368494101318528, "grad_norm": 4.440859794616699, "learning_rate": 9.366869037294016e-07, "loss": 1.4727, "step": 54 }, { "epoch": 0.009541984732824428, "grad_norm": 5.027373313903809, "learning_rate": 9.540329575021685e-07, "loss": 1.5361, "step": 55 }, { "epoch": 0.009715475364330326, "grad_norm": 5.587261199951172, "learning_rate": 9.713790112749352e-07, "loss": 1.4995, "step": 56 }, { "epoch": 0.009888965995836226, "grad_norm": 5.661511421203613, "learning_rate": 9.887250650477019e-07, "loss": 1.4868, "step": 57 }, { "epoch": 0.010062456627342124, "grad_norm": 5.535928726196289, "learning_rate": 1.0060711188204686e-06, "loss": 1.47, "step": 58 }, { "epoch": 0.010235947258848022, "grad_norm": 7.044625759124756, "learning_rate": 1.0234171725932352e-06, "loss": 1.5581, "step": 59 }, { "epoch": 0.010409437890353921, "grad_norm": 5.890305519104004, "learning_rate": 1.0407632263660017e-06, "loss": 1.4434, "step": 60 }, { "epoch": 0.01058292852185982, "grad_norm": 4.891403675079346, "learning_rate": 1.0581092801387684e-06, "loss": 1.46, "step": 61 }, { "epoch": 0.010756419153365717, "grad_norm": 3.755310297012329, "learning_rate": 1.0754553339115351e-06, "loss": 1.5347, "step": 62 }, { "epoch": 0.010929909784871617, "grad_norm": 4.348376750946045, "learning_rate": 1.0928013876843018e-06, "loss": 1.4775, "step": 63 }, { "epoch": 0.011103400416377515, "grad_norm": 4.379542827606201, "learning_rate": 1.1101474414570685e-06, "loss": 1.3525, "step": 64 }, { "epoch": 0.011276891047883415, "grad_norm": 3.707447052001953, "learning_rate": 1.1274934952298352e-06, "loss": 1.5146, "step": 65 }, { "epoch": 0.011450381679389313, "grad_norm": 3.2854342460632324, "learning_rate": 1.144839549002602e-06, "loss": 1.4409, "step": 66 }, { "epoch": 0.011623872310895211, "grad_norm": 4.887171268463135, "learning_rate": 1.1621856027753688e-06, "loss": 1.29, "step": 67 }, { "epoch": 0.01179736294240111, "grad_norm": 3.731964349746704, "learning_rate": 1.1795316565481355e-06, "loss": 1.3916, "step": 68 }, { "epoch": 0.011970853573907009, "grad_norm": 4.650766372680664, "learning_rate": 1.1968777103209022e-06, "loss": 1.3745, "step": 69 }, { "epoch": 0.012144344205412909, "grad_norm": 4.351012706756592, "learning_rate": 1.214223764093669e-06, "loss": 1.2866, "step": 70 }, { "epoch": 0.012317834836918807, "grad_norm": 3.9581916332244873, "learning_rate": 1.2315698178664356e-06, "loss": 1.3257, "step": 71 }, { "epoch": 0.012491325468424705, "grad_norm": 3.449936628341675, "learning_rate": 1.2489158716392023e-06, "loss": 1.4653, "step": 72 }, { "epoch": 0.012664816099930604, "grad_norm": 3.5131375789642334, "learning_rate": 1.266261925411969e-06, "loss": 1.3169, "step": 73 }, { "epoch": 0.012838306731436502, "grad_norm": 4.998897075653076, "learning_rate": 1.2836079791847355e-06, "loss": 1.2939, "step": 74 }, { "epoch": 0.0130117973629424, "grad_norm": 3.755049705505371, "learning_rate": 1.3009540329575024e-06, "loss": 1.3618, "step": 75 }, { "epoch": 0.0131852879944483, "grad_norm": 4.029270172119141, "learning_rate": 1.3183000867302689e-06, "loss": 1.2971, "step": 76 }, { "epoch": 0.013358778625954198, "grad_norm": 3.4073169231414795, "learning_rate": 1.3356461405030358e-06, "loss": 1.3311, "step": 77 }, { "epoch": 0.013532269257460098, "grad_norm": 4.988164901733398, "learning_rate": 1.3529921942758023e-06, "loss": 1.2456, "step": 78 }, { "epoch": 0.013705759888965996, "grad_norm": 3.605191230773926, "learning_rate": 1.3703382480485692e-06, "loss": 1.3921, "step": 79 }, { "epoch": 0.013879250520471894, "grad_norm": 3.802180051803589, "learning_rate": 1.3876843018213356e-06, "loss": 1.2939, "step": 80 }, { "epoch": 0.014052741151977794, "grad_norm": 4.355827808380127, "learning_rate": 1.4050303555941025e-06, "loss": 1.1885, "step": 81 }, { "epoch": 0.014226231783483692, "grad_norm": 2.91510009765625, "learning_rate": 1.422376409366869e-06, "loss": 1.2898, "step": 82 }, { "epoch": 0.01439972241498959, "grad_norm": 3.5141520500183105, "learning_rate": 1.439722463139636e-06, "loss": 1.2383, "step": 83 }, { "epoch": 0.01457321304649549, "grad_norm": 4.397307872772217, "learning_rate": 1.4570685169124024e-06, "loss": 1.3105, "step": 84 }, { "epoch": 0.014746703678001388, "grad_norm": 5.1851983070373535, "learning_rate": 1.4744145706851693e-06, "loss": 1.1816, "step": 85 }, { "epoch": 0.014920194309507287, "grad_norm": 3.397026777267456, "learning_rate": 1.491760624457936e-06, "loss": 1.2275, "step": 86 }, { "epoch": 0.015093684941013185, "grad_norm": 3.9106953144073486, "learning_rate": 1.5091066782307025e-06, "loss": 1.1262, "step": 87 }, { "epoch": 0.015267175572519083, "grad_norm": 3.204820394515991, "learning_rate": 1.5264527320034694e-06, "loss": 1.1958, "step": 88 }, { "epoch": 0.015440666204024983, "grad_norm": 4.559689998626709, "learning_rate": 1.543798785776236e-06, "loss": 1.3069, "step": 89 }, { "epoch": 0.015614156835530881, "grad_norm": 2.379528284072876, "learning_rate": 1.5611448395490028e-06, "loss": 1.0981, "step": 90 }, { "epoch": 0.01578764746703678, "grad_norm": 6.517797946929932, "learning_rate": 1.5784908933217693e-06, "loss": 0.998, "step": 91 }, { "epoch": 0.015961138098542677, "grad_norm": 2.010162115097046, "learning_rate": 1.5958369470945362e-06, "loss": 1.0688, "step": 92 }, { "epoch": 0.01613462873004858, "grad_norm": 2.0165321826934814, "learning_rate": 1.6131830008673027e-06, "loss": 1.1516, "step": 93 }, { "epoch": 0.016308119361554477, "grad_norm": 1.6602797508239746, "learning_rate": 1.6305290546400696e-06, "loss": 1.1128, "step": 94 }, { "epoch": 0.016481609993060375, "grad_norm": 2.1719162464141846, "learning_rate": 1.647875108412836e-06, "loss": 1.0203, "step": 95 }, { "epoch": 0.016655100624566273, "grad_norm": 1.6706749200820923, "learning_rate": 1.665221162185603e-06, "loss": 1.0068, "step": 96 }, { "epoch": 0.01682859125607217, "grad_norm": 1.6050095558166504, "learning_rate": 1.6825672159583695e-06, "loss": 1.0884, "step": 97 }, { "epoch": 0.017002081887578072, "grad_norm": 1.8640276193618774, "learning_rate": 1.6999132697311364e-06, "loss": 1.0833, "step": 98 }, { "epoch": 0.01717557251908397, "grad_norm": 1.4145679473876953, "learning_rate": 1.717259323503903e-06, "loss": 0.991, "step": 99 }, { "epoch": 0.01734906315058987, "grad_norm": 1.441550850868225, "learning_rate": 1.7346053772766698e-06, "loss": 1.033, "step": 100 }, { "epoch": 0.017522553782095766, "grad_norm": 1.8363375663757324, "learning_rate": 1.7519514310494365e-06, "loss": 1.1899, "step": 101 }, { "epoch": 0.017696044413601664, "grad_norm": 1.7074156999588013, "learning_rate": 1.769297484822203e-06, "loss": 0.9524, "step": 102 }, { "epoch": 0.017869535045107566, "grad_norm": 1.4260603189468384, "learning_rate": 1.7866435385949699e-06, "loss": 1.0457, "step": 103 }, { "epoch": 0.018043025676613464, "grad_norm": 1.432376742362976, "learning_rate": 1.8039895923677363e-06, "loss": 0.9365, "step": 104 }, { "epoch": 0.018216516308119362, "grad_norm": 1.3644927740097046, "learning_rate": 1.8213356461405032e-06, "loss": 1.0605, "step": 105 }, { "epoch": 0.01839000693962526, "grad_norm": 1.4237080812454224, "learning_rate": 1.8386816999132697e-06, "loss": 1.1375, "step": 106 }, { "epoch": 0.018563497571131158, "grad_norm": 1.280415415763855, "learning_rate": 1.8560277536860366e-06, "loss": 1.0276, "step": 107 }, { "epoch": 0.018736988202637056, "grad_norm": 1.3138136863708496, "learning_rate": 1.8733738074588031e-06, "loss": 0.9885, "step": 108 }, { "epoch": 0.018910478834142957, "grad_norm": 1.2917845249176025, "learning_rate": 1.89071986123157e-06, "loss": 1.1091, "step": 109 }, { "epoch": 0.019083969465648856, "grad_norm": 1.70353102684021, "learning_rate": 1.908065915004337e-06, "loss": 1.1401, "step": 110 }, { "epoch": 0.019257460097154754, "grad_norm": 1.7633841037750244, "learning_rate": 1.925411968777103e-06, "loss": 1.0864, "step": 111 }, { "epoch": 0.01943095072866065, "grad_norm": 1.9823323488235474, "learning_rate": 1.9427580225498703e-06, "loss": 1.2375, "step": 112 }, { "epoch": 0.01960444136016655, "grad_norm": 1.3398616313934326, "learning_rate": 1.9601040763226366e-06, "loss": 1.0801, "step": 113 }, { "epoch": 0.01977793199167245, "grad_norm": 1.2672946453094482, "learning_rate": 1.9774501300954037e-06, "loss": 1.1201, "step": 114 }, { "epoch": 0.01995142262317835, "grad_norm": 1.2124594449996948, "learning_rate": 1.99479618386817e-06, "loss": 1.0732, "step": 115 }, { "epoch": 0.020124913254684247, "grad_norm": 1.215266227722168, "learning_rate": 2.012142237640937e-06, "loss": 1.075, "step": 116 }, { "epoch": 0.020298403886190145, "grad_norm": 1.3705044984817505, "learning_rate": 2.0294882914137034e-06, "loss": 1.1982, "step": 117 }, { "epoch": 0.020471894517696043, "grad_norm": 1.158579707145691, "learning_rate": 2.0468343451864705e-06, "loss": 1.0884, "step": 118 }, { "epoch": 0.020645385149201945, "grad_norm": 1.1770604848861694, "learning_rate": 2.0641803989592368e-06, "loss": 1.0688, "step": 119 }, { "epoch": 0.020818875780707843, "grad_norm": 1.2560598850250244, "learning_rate": 2.0815264527320035e-06, "loss": 0.9971, "step": 120 }, { "epoch": 0.02099236641221374, "grad_norm": 1.1297913789749146, "learning_rate": 2.09887250650477e-06, "loss": 0.9954, "step": 121 }, { "epoch": 0.02116585704371964, "grad_norm": 1.1109462976455688, "learning_rate": 2.116218560277537e-06, "loss": 0.991, "step": 122 }, { "epoch": 0.021339347675225537, "grad_norm": 2.530078411102295, "learning_rate": 2.133564614050304e-06, "loss": 1.1199, "step": 123 }, { "epoch": 0.021512838306731435, "grad_norm": 2.565696954727173, "learning_rate": 2.1509106678230702e-06, "loss": 1.0833, "step": 124 }, { "epoch": 0.021686328938237336, "grad_norm": 1.5229138135910034, "learning_rate": 2.1682567215958374e-06, "loss": 0.9124, "step": 125 }, { "epoch": 0.021859819569743234, "grad_norm": 1.0388314723968506, "learning_rate": 2.1856027753686036e-06, "loss": 0.9087, "step": 126 }, { "epoch": 0.022033310201249132, "grad_norm": 1.0649573802947998, "learning_rate": 2.2029488291413708e-06, "loss": 0.9292, "step": 127 }, { "epoch": 0.02220680083275503, "grad_norm": 1.1507086753845215, "learning_rate": 2.220294882914137e-06, "loss": 0.8608, "step": 128 }, { "epoch": 0.02238029146426093, "grad_norm": 0.8832144141197205, "learning_rate": 2.237640936686904e-06, "loss": 0.8677, "step": 129 }, { "epoch": 0.02255378209576683, "grad_norm": 1.2134603261947632, "learning_rate": 2.2549869904596704e-06, "loss": 1.1113, "step": 130 }, { "epoch": 0.022727272727272728, "grad_norm": 0.953169047832489, "learning_rate": 2.2723330442324375e-06, "loss": 0.9861, "step": 131 }, { "epoch": 0.022900763358778626, "grad_norm": 0.9815278649330139, "learning_rate": 2.289679098005204e-06, "loss": 0.9607, "step": 132 }, { "epoch": 0.023074253990284524, "grad_norm": 0.8123059868812561, "learning_rate": 2.3070251517779705e-06, "loss": 0.8423, "step": 133 }, { "epoch": 0.023247744621790422, "grad_norm": 0.8939048647880554, "learning_rate": 2.3243712055507376e-06, "loss": 0.9629, "step": 134 }, { "epoch": 0.023421235253296323, "grad_norm": 0.8204233646392822, "learning_rate": 2.341717259323504e-06, "loss": 1.0103, "step": 135 }, { "epoch": 0.02359472588480222, "grad_norm": 1.450378656387329, "learning_rate": 2.359063313096271e-06, "loss": 0.9797, "step": 136 }, { "epoch": 0.02376821651630812, "grad_norm": 0.69654381275177, "learning_rate": 2.3764093668690373e-06, "loss": 0.8779, "step": 137 }, { "epoch": 0.023941707147814018, "grad_norm": 0.876659095287323, "learning_rate": 2.3937554206418044e-06, "loss": 0.9592, "step": 138 }, { "epoch": 0.024115197779319916, "grad_norm": 0.967021644115448, "learning_rate": 2.4111014744145707e-06, "loss": 0.9807, "step": 139 }, { "epoch": 0.024288688410825817, "grad_norm": 0.9644420742988586, "learning_rate": 2.428447528187338e-06, "loss": 0.9805, "step": 140 }, { "epoch": 0.024462179042331715, "grad_norm": 0.7096670269966125, "learning_rate": 2.445793581960104e-06, "loss": 0.9441, "step": 141 }, { "epoch": 0.024635669673837613, "grad_norm": 0.8356378078460693, "learning_rate": 2.463139635732871e-06, "loss": 0.937, "step": 142 }, { "epoch": 0.02480916030534351, "grad_norm": 0.7720776796340942, "learning_rate": 2.4804856895056375e-06, "loss": 1.0525, "step": 143 }, { "epoch": 0.02498265093684941, "grad_norm": 1.4250718355178833, "learning_rate": 2.4978317432784046e-06, "loss": 0.8657, "step": 144 }, { "epoch": 0.025156141568355307, "grad_norm": 0.8530842661857605, "learning_rate": 2.515177797051171e-06, "loss": 0.7996, "step": 145 }, { "epoch": 0.02532963219986121, "grad_norm": 0.7666248679161072, "learning_rate": 2.532523850823938e-06, "loss": 1.0222, "step": 146 }, { "epoch": 0.025503122831367107, "grad_norm": 1.1478543281555176, "learning_rate": 2.5498699045967047e-06, "loss": 0.8037, "step": 147 }, { "epoch": 0.025676613462873005, "grad_norm": 0.852233350276947, "learning_rate": 2.567215958369471e-06, "loss": 0.8386, "step": 148 }, { "epoch": 0.025850104094378903, "grad_norm": 1.0843455791473389, "learning_rate": 2.5845620121422376e-06, "loss": 0.7981, "step": 149 }, { "epoch": 0.0260235947258848, "grad_norm": 0.7627226710319519, "learning_rate": 2.6019080659150048e-06, "loss": 0.938, "step": 150 }, { "epoch": 0.026197085357390702, "grad_norm": 1.3740365505218506, "learning_rate": 2.6192541196877714e-06, "loss": 1.0828, "step": 151 }, { "epoch": 0.0263705759888966, "grad_norm": 0.8279482126235962, "learning_rate": 2.6366001734605377e-06, "loss": 0.7927, "step": 152 }, { "epoch": 0.0265440666204025, "grad_norm": 0.7392839789390564, "learning_rate": 2.6539462272333044e-06, "loss": 0.9153, "step": 153 }, { "epoch": 0.026717557251908396, "grad_norm": 1.2634882926940918, "learning_rate": 2.6712922810060715e-06, "loss": 0.8733, "step": 154 }, { "epoch": 0.026891047883414294, "grad_norm": 0.8666257858276367, "learning_rate": 2.6886383347788382e-06, "loss": 0.8186, "step": 155 }, { "epoch": 0.027064538514920196, "grad_norm": 0.6611669063568115, "learning_rate": 2.7059843885516045e-06, "loss": 0.9236, "step": 156 }, { "epoch": 0.027238029146426094, "grad_norm": 0.6729313135147095, "learning_rate": 2.723330442324371e-06, "loss": 1.0305, "step": 157 }, { "epoch": 0.027411519777931992, "grad_norm": 0.9559786915779114, "learning_rate": 2.7406764960971383e-06, "loss": 0.8596, "step": 158 }, { "epoch": 0.02758501040943789, "grad_norm": 0.8773760199546814, "learning_rate": 2.758022549869905e-06, "loss": 0.8076, "step": 159 }, { "epoch": 0.027758501040943788, "grad_norm": 0.8217472434043884, "learning_rate": 2.7753686036426713e-06, "loss": 1.0337, "step": 160 }, { "epoch": 0.02793199167244969, "grad_norm": 0.8561400175094604, "learning_rate": 2.7927146574154384e-06, "loss": 0.8726, "step": 161 }, { "epoch": 0.028105482303955587, "grad_norm": 0.7257559299468994, "learning_rate": 2.810060711188205e-06, "loss": 0.8669, "step": 162 }, { "epoch": 0.028278972935461485, "grad_norm": 0.7245901823043823, "learning_rate": 2.8274067649609714e-06, "loss": 1.0156, "step": 163 }, { "epoch": 0.028452463566967384, "grad_norm": 0.8338601589202881, "learning_rate": 2.844752818733738e-06, "loss": 0.8984, "step": 164 }, { "epoch": 0.02862595419847328, "grad_norm": 1.102246880531311, "learning_rate": 2.862098872506505e-06, "loss": 0.8914, "step": 165 }, { "epoch": 0.02879944482997918, "grad_norm": 0.6566537618637085, "learning_rate": 2.879444926279272e-06, "loss": 1.0024, "step": 166 }, { "epoch": 0.02897293546148508, "grad_norm": 0.7692321538925171, "learning_rate": 2.896790980052038e-06, "loss": 0.8635, "step": 167 }, { "epoch": 0.02914642609299098, "grad_norm": 0.7698785662651062, "learning_rate": 2.914137033824805e-06, "loss": 1.071, "step": 168 }, { "epoch": 0.029319916724496877, "grad_norm": 0.6429572701454163, "learning_rate": 2.931483087597572e-06, "loss": 0.8945, "step": 169 }, { "epoch": 0.029493407356002775, "grad_norm": 0.7103275656700134, "learning_rate": 2.9488291413703387e-06, "loss": 0.884, "step": 170 }, { "epoch": 0.029666897987508673, "grad_norm": 0.7187206149101257, "learning_rate": 2.966175195143105e-06, "loss": 0.9307, "step": 171 }, { "epoch": 0.029840388619014575, "grad_norm": 0.5949711799621582, "learning_rate": 2.983521248915872e-06, "loss": 0.9526, "step": 172 }, { "epoch": 0.030013879250520473, "grad_norm": 0.7339666485786438, "learning_rate": 3.0008673026886387e-06, "loss": 0.8528, "step": 173 }, { "epoch": 0.03018736988202637, "grad_norm": 0.7466350197792053, "learning_rate": 3.018213356461405e-06, "loss": 0.8169, "step": 174 }, { "epoch": 0.03036086051353227, "grad_norm": 1.0726069211959839, "learning_rate": 3.0355594102341717e-06, "loss": 0.8491, "step": 175 }, { "epoch": 0.030534351145038167, "grad_norm": 0.8756661415100098, "learning_rate": 3.052905464006939e-06, "loss": 0.897, "step": 176 }, { "epoch": 0.030707841776544068, "grad_norm": 0.6892372369766235, "learning_rate": 3.0702515177797055e-06, "loss": 0.8511, "step": 177 }, { "epoch": 0.030881332408049966, "grad_norm": 1.579180359840393, "learning_rate": 3.087597571552472e-06, "loss": 0.7449, "step": 178 }, { "epoch": 0.031054823039555864, "grad_norm": 0.9959123134613037, "learning_rate": 3.1049436253252385e-06, "loss": 0.7668, "step": 179 }, { "epoch": 0.031228313671061762, "grad_norm": 0.8271302580833435, "learning_rate": 3.1222896790980056e-06, "loss": 0.8684, "step": 180 }, { "epoch": 0.031401804302567664, "grad_norm": 0.6397276520729065, "learning_rate": 3.1396357328707723e-06, "loss": 0.8875, "step": 181 }, { "epoch": 0.03157529493407356, "grad_norm": 0.9459241032600403, "learning_rate": 3.1569817866435386e-06, "loss": 0.8673, "step": 182 }, { "epoch": 0.03174878556557946, "grad_norm": 0.7759047150611877, "learning_rate": 3.1743278404163057e-06, "loss": 0.8711, "step": 183 }, { "epoch": 0.031922276197085354, "grad_norm": 0.7790178656578064, "learning_rate": 3.1916738941890724e-06, "loss": 0.8477, "step": 184 }, { "epoch": 0.032095766828591256, "grad_norm": 1.604477047920227, "learning_rate": 3.209019947961839e-06, "loss": 0.8777, "step": 185 }, { "epoch": 0.03226925746009716, "grad_norm": 0.676693856716156, "learning_rate": 3.2263660017346054e-06, "loss": 0.8643, "step": 186 }, { "epoch": 0.03244274809160305, "grad_norm": 0.644795298576355, "learning_rate": 3.2437120555073725e-06, "loss": 0.8586, "step": 187 }, { "epoch": 0.03261623872310895, "grad_norm": 0.6525770425796509, "learning_rate": 3.261058109280139e-06, "loss": 0.9644, "step": 188 }, { "epoch": 0.03278972935461485, "grad_norm": 0.7150494456291199, "learning_rate": 3.2784041630529055e-06, "loss": 0.9407, "step": 189 }, { "epoch": 0.03296321998612075, "grad_norm": 0.673234760761261, "learning_rate": 3.295750216825672e-06, "loss": 0.792, "step": 190 }, { "epoch": 0.03313671061762665, "grad_norm": 0.6626754403114319, "learning_rate": 3.3130962705984393e-06, "loss": 0.9353, "step": 191 }, { "epoch": 0.033310201249132546, "grad_norm": 0.7096927165985107, "learning_rate": 3.330442324371206e-06, "loss": 0.8811, "step": 192 }, { "epoch": 0.03348369188063845, "grad_norm": 0.7351794242858887, "learning_rate": 3.3477883781439722e-06, "loss": 0.6892, "step": 193 }, { "epoch": 0.03365718251214434, "grad_norm": 0.7929335832595825, "learning_rate": 3.365134431916739e-06, "loss": 0.7441, "step": 194 }, { "epoch": 0.03383067314365024, "grad_norm": 0.7624382376670837, "learning_rate": 3.382480485689506e-06, "loss": 0.8257, "step": 195 }, { "epoch": 0.034004163775156145, "grad_norm": 0.7748669385910034, "learning_rate": 3.3998265394622727e-06, "loss": 0.8481, "step": 196 }, { "epoch": 0.03417765440666204, "grad_norm": 0.814811110496521, "learning_rate": 3.417172593235039e-06, "loss": 0.8699, "step": 197 }, { "epoch": 0.03435114503816794, "grad_norm": 0.7616357803344727, "learning_rate": 3.434518647007806e-06, "loss": 0.803, "step": 198 }, { "epoch": 0.034524635669673835, "grad_norm": 0.6103156208992004, "learning_rate": 3.451864700780573e-06, "loss": 0.9177, "step": 199 }, { "epoch": 0.03469812630117974, "grad_norm": 0.6358505487442017, "learning_rate": 3.4692107545533395e-06, "loss": 0.9548, "step": 200 }, { "epoch": 0.03487161693268564, "grad_norm": 0.8462308645248413, "learning_rate": 3.486556808326106e-06, "loss": 0.876, "step": 201 }, { "epoch": 0.03504510756419153, "grad_norm": 0.6185916662216187, "learning_rate": 3.503902862098873e-06, "loss": 0.9285, "step": 202 }, { "epoch": 0.035218598195697434, "grad_norm": 0.810539722442627, "learning_rate": 3.5212489158716396e-06, "loss": 0.7559, "step": 203 }, { "epoch": 0.03539208882720333, "grad_norm": 0.7707465887069702, "learning_rate": 3.538594969644406e-06, "loss": 0.7766, "step": 204 }, { "epoch": 0.03556557945870923, "grad_norm": 0.8495586514472961, "learning_rate": 3.5559410234171726e-06, "loss": 0.8794, "step": 205 }, { "epoch": 0.03573907009021513, "grad_norm": 0.8893985748291016, "learning_rate": 3.5732870771899397e-06, "loss": 0.8918, "step": 206 }, { "epoch": 0.035912560721721026, "grad_norm": 1.4733084440231323, "learning_rate": 3.5906331309627064e-06, "loss": 0.6824, "step": 207 }, { "epoch": 0.03608605135322693, "grad_norm": 0.7809956073760986, "learning_rate": 3.6079791847354727e-06, "loss": 0.8, "step": 208 }, { "epoch": 0.03625954198473282, "grad_norm": 0.9555914998054504, "learning_rate": 3.62532523850824e-06, "loss": 0.7207, "step": 209 }, { "epoch": 0.036433032616238724, "grad_norm": 0.7463303804397583, "learning_rate": 3.6426712922810065e-06, "loss": 0.8175, "step": 210 }, { "epoch": 0.03660652324774462, "grad_norm": 0.769335150718689, "learning_rate": 3.660017346053773e-06, "loss": 0.7695, "step": 211 }, { "epoch": 0.03678001387925052, "grad_norm": 0.536637544631958, "learning_rate": 3.6773633998265395e-06, "loss": 0.9326, "step": 212 }, { "epoch": 0.03695350451075642, "grad_norm": 0.686669111251831, "learning_rate": 3.6947094535993066e-06, "loss": 0.8235, "step": 213 }, { "epoch": 0.037126995142262316, "grad_norm": 0.908027172088623, "learning_rate": 3.7120555073720733e-06, "loss": 0.802, "step": 214 }, { "epoch": 0.03730048577376822, "grad_norm": 0.89664226770401, "learning_rate": 3.72940156114484e-06, "loss": 0.7837, "step": 215 }, { "epoch": 0.03747397640527411, "grad_norm": 0.8224126100540161, "learning_rate": 3.7467476149176062e-06, "loss": 0.7007, "step": 216 }, { "epoch": 0.037647467036780013, "grad_norm": 0.8188750743865967, "learning_rate": 3.7640936686903734e-06, "loss": 0.8289, "step": 217 }, { "epoch": 0.037820957668285915, "grad_norm": 2.1264846324920654, "learning_rate": 3.78143972246314e-06, "loss": 0.7163, "step": 218 }, { "epoch": 0.03799444829979181, "grad_norm": 0.7793065309524536, "learning_rate": 3.7987857762359063e-06, "loss": 0.7161, "step": 219 }, { "epoch": 0.03816793893129771, "grad_norm": 7.660075664520264, "learning_rate": 3.816131830008674e-06, "loss": 0.7937, "step": 220 }, { "epoch": 0.038341429562803606, "grad_norm": 0.9559584856033325, "learning_rate": 3.8334778837814406e-06, "loss": 0.7888, "step": 221 }, { "epoch": 0.03851492019430951, "grad_norm": 0.8368854522705078, "learning_rate": 3.850823937554206e-06, "loss": 0.8337, "step": 222 }, { "epoch": 0.03868841082581541, "grad_norm": 0.8978548645973206, "learning_rate": 3.868169991326973e-06, "loss": 0.8235, "step": 223 }, { "epoch": 0.0388619014573213, "grad_norm": 0.7543613314628601, "learning_rate": 3.885516045099741e-06, "loss": 0.7761, "step": 224 }, { "epoch": 0.039035392088827205, "grad_norm": 0.7012503147125244, "learning_rate": 3.9028620988725065e-06, "loss": 0.8542, "step": 225 }, { "epoch": 0.0392088827203331, "grad_norm": 0.64545077085495, "learning_rate": 3.920208152645273e-06, "loss": 0.8572, "step": 226 }, { "epoch": 0.039382373351839, "grad_norm": 1.1598503589630127, "learning_rate": 3.93755420641804e-06, "loss": 0.8157, "step": 227 }, { "epoch": 0.0395558639833449, "grad_norm": 0.8046950101852417, "learning_rate": 3.9549002601908074e-06, "loss": 0.835, "step": 228 }, { "epoch": 0.0397293546148508, "grad_norm": 0.9517707824707031, "learning_rate": 3.972246313963573e-06, "loss": 0.8191, "step": 229 }, { "epoch": 0.0399028452463567, "grad_norm": 1.009547472000122, "learning_rate": 3.98959236773634e-06, "loss": 0.6919, "step": 230 }, { "epoch": 0.04007633587786259, "grad_norm": 0.7407523989677429, "learning_rate": 4.0069384215091075e-06, "loss": 0.7905, "step": 231 }, { "epoch": 0.040249826509368494, "grad_norm": 0.6676948666572571, "learning_rate": 4.024284475281874e-06, "loss": 0.832, "step": 232 }, { "epoch": 0.040423317140874396, "grad_norm": 0.9606331586837769, "learning_rate": 4.04163052905464e-06, "loss": 0.7549, "step": 233 }, { "epoch": 0.04059680777238029, "grad_norm": 0.7595999836921692, "learning_rate": 4.058976582827407e-06, "loss": 0.8694, "step": 234 }, { "epoch": 0.04077029840388619, "grad_norm": 2.211470127105713, "learning_rate": 4.076322636600174e-06, "loss": 0.844, "step": 235 }, { "epoch": 0.040943789035392086, "grad_norm": 0.6662604212760925, "learning_rate": 4.093668690372941e-06, "loss": 0.9075, "step": 236 }, { "epoch": 0.04111727966689799, "grad_norm": 0.7361257076263428, "learning_rate": 4.111014744145707e-06, "loss": 0.7053, "step": 237 }, { "epoch": 0.04129077029840389, "grad_norm": 0.8374466896057129, "learning_rate": 4.1283607979184735e-06, "loss": 0.9587, "step": 238 }, { "epoch": 0.041464260929909784, "grad_norm": 0.8268419504165649, "learning_rate": 4.145706851691241e-06, "loss": 0.7429, "step": 239 }, { "epoch": 0.041637751561415685, "grad_norm": 0.7588842511177063, "learning_rate": 4.163052905464007e-06, "loss": 0.8174, "step": 240 }, { "epoch": 0.04181124219292158, "grad_norm": 0.66733717918396, "learning_rate": 4.180398959236774e-06, "loss": 0.9172, "step": 241 }, { "epoch": 0.04198473282442748, "grad_norm": 0.7593885064125061, "learning_rate": 4.19774501300954e-06, "loss": 0.7698, "step": 242 }, { "epoch": 0.04215822345593338, "grad_norm": 1.1506482362747192, "learning_rate": 4.215091066782308e-06, "loss": 0.8696, "step": 243 }, { "epoch": 0.04233171408743928, "grad_norm": 0.8607063293457031, "learning_rate": 4.232437120555074e-06, "loss": 0.9065, "step": 244 }, { "epoch": 0.04250520471894518, "grad_norm": 0.7805898189544678, "learning_rate": 4.24978317432784e-06, "loss": 0.7156, "step": 245 }, { "epoch": 0.042678695350451074, "grad_norm": 0.9701886177062988, "learning_rate": 4.267129228100608e-06, "loss": 0.6403, "step": 246 }, { "epoch": 0.042852185981956975, "grad_norm": 0.8391703963279724, "learning_rate": 4.284475281873375e-06, "loss": 0.6885, "step": 247 }, { "epoch": 0.04302567661346287, "grad_norm": 0.7802613973617554, "learning_rate": 4.3018213356461405e-06, "loss": 0.8601, "step": 248 }, { "epoch": 0.04319916724496877, "grad_norm": 0.7520661354064941, "learning_rate": 4.319167389418907e-06, "loss": 1.0027, "step": 249 }, { "epoch": 0.04337265787647467, "grad_norm": 0.8130558729171753, "learning_rate": 4.336513443191675e-06, "loss": 0.7485, "step": 250 }, { "epoch": 0.04354614850798057, "grad_norm": 0.8154857158660889, "learning_rate": 4.353859496964441e-06, "loss": 0.8755, "step": 251 }, { "epoch": 0.04371963913948647, "grad_norm": 0.757587730884552, "learning_rate": 4.371205550737207e-06, "loss": 0.8047, "step": 252 }, { "epoch": 0.04389312977099236, "grad_norm": 0.6265802979469299, "learning_rate": 4.388551604509974e-06, "loss": 0.7207, "step": 253 }, { "epoch": 0.044066620402498265, "grad_norm": 0.6111220717430115, "learning_rate": 4.4058976582827415e-06, "loss": 0.9375, "step": 254 }, { "epoch": 0.044240111034004166, "grad_norm": 0.9138938188552856, "learning_rate": 4.423243712055507e-06, "loss": 0.8058, "step": 255 }, { "epoch": 0.04441360166551006, "grad_norm": 0.8813202381134033, "learning_rate": 4.440589765828274e-06, "loss": 0.7773, "step": 256 }, { "epoch": 0.04458709229701596, "grad_norm": 1.5841827392578125, "learning_rate": 4.457935819601042e-06, "loss": 0.9963, "step": 257 }, { "epoch": 0.04476058292852186, "grad_norm": 0.766584038734436, "learning_rate": 4.475281873373808e-06, "loss": 0.7659, "step": 258 }, { "epoch": 0.04493407356002776, "grad_norm": 0.6755748987197876, "learning_rate": 4.492627927146574e-06, "loss": 0.7212, "step": 259 }, { "epoch": 0.04510756419153366, "grad_norm": 2.221755266189575, "learning_rate": 4.509973980919341e-06, "loss": 0.7344, "step": 260 }, { "epoch": 0.045281054823039554, "grad_norm": 0.805280327796936, "learning_rate": 4.527320034692108e-06, "loss": 0.9041, "step": 261 }, { "epoch": 0.045454545454545456, "grad_norm": 3.478299617767334, "learning_rate": 4.544666088464875e-06, "loss": 0.6799, "step": 262 }, { "epoch": 0.04562803608605135, "grad_norm": 0.8958662152290344, "learning_rate": 4.562012142237641e-06, "loss": 0.7192, "step": 263 }, { "epoch": 0.04580152671755725, "grad_norm": 0.9214358925819397, "learning_rate": 4.579358196010408e-06, "loss": 0.8047, "step": 264 }, { "epoch": 0.04597501734906315, "grad_norm": 0.6886780261993408, "learning_rate": 4.596704249783175e-06, "loss": 0.8123, "step": 265 }, { "epoch": 0.04614850798056905, "grad_norm": 0.7185443043708801, "learning_rate": 4.614050303555941e-06, "loss": 0.8203, "step": 266 }, { "epoch": 0.04632199861207495, "grad_norm": 0.7269342541694641, "learning_rate": 4.631396357328708e-06, "loss": 0.7539, "step": 267 }, { "epoch": 0.046495489243580844, "grad_norm": 0.7610074877738953, "learning_rate": 4.648742411101475e-06, "loss": 0.7434, "step": 268 }, { "epoch": 0.046668979875086745, "grad_norm": 0.5354163646697998, "learning_rate": 4.666088464874242e-06, "loss": 0.8367, "step": 269 }, { "epoch": 0.04684247050659265, "grad_norm": 0.7159690260887146, "learning_rate": 4.683434518647008e-06, "loss": 0.7781, "step": 270 }, { "epoch": 0.04701596113809854, "grad_norm": 0.639028012752533, "learning_rate": 4.7007805724197745e-06, "loss": 0.8618, "step": 271 }, { "epoch": 0.04718945176960444, "grad_norm": 1.0831190347671509, "learning_rate": 4.718126626192542e-06, "loss": 0.8411, "step": 272 }, { "epoch": 0.04736294240111034, "grad_norm": 1.0538662672042847, "learning_rate": 4.735472679965309e-06, "loss": 0.7742, "step": 273 }, { "epoch": 0.04753643303261624, "grad_norm": 0.7081946134567261, "learning_rate": 4.7528187337380746e-06, "loss": 0.8293, "step": 274 }, { "epoch": 0.04770992366412214, "grad_norm": 0.7760938405990601, "learning_rate": 4.770164787510841e-06, "loss": 0.7058, "step": 275 }, { "epoch": 0.047883414295628035, "grad_norm": 0.885649561882019, "learning_rate": 4.787510841283609e-06, "loss": 0.7678, "step": 276 }, { "epoch": 0.04805690492713394, "grad_norm": 4.859396457672119, "learning_rate": 4.8048568950563755e-06, "loss": 0.7527, "step": 277 }, { "epoch": 0.04823039555863983, "grad_norm": 0.6299145817756653, "learning_rate": 4.822202948829141e-06, "loss": 0.8083, "step": 278 }, { "epoch": 0.04840388619014573, "grad_norm": 0.7858087420463562, "learning_rate": 4.839549002601908e-06, "loss": 0.8025, "step": 279 }, { "epoch": 0.048577376821651634, "grad_norm": 0.7333070039749146, "learning_rate": 4.856895056374676e-06, "loss": 0.7378, "step": 280 }, { "epoch": 0.04875086745315753, "grad_norm": 0.6566837430000305, "learning_rate": 4.8742411101474414e-06, "loss": 0.8901, "step": 281 }, { "epoch": 0.04892435808466343, "grad_norm": 0.9724940061569214, "learning_rate": 4.891587163920208e-06, "loss": 0.7305, "step": 282 }, { "epoch": 0.049097848716169325, "grad_norm": 10.982410430908203, "learning_rate": 4.908933217692976e-06, "loss": 0.7285, "step": 283 }, { "epoch": 0.049271339347675226, "grad_norm": 0.6716983318328857, "learning_rate": 4.926279271465742e-06, "loss": 0.907, "step": 284 }, { "epoch": 0.04944482997918112, "grad_norm": 0.5775556564331055, "learning_rate": 4.943625325238508e-06, "loss": 0.9473, "step": 285 }, { "epoch": 0.04961832061068702, "grad_norm": 0.6030986309051514, "learning_rate": 4.960971379011275e-06, "loss": 0.7859, "step": 286 }, { "epoch": 0.049791811242192924, "grad_norm": 0.6085160374641418, "learning_rate": 4.9783174327840425e-06, "loss": 0.8865, "step": 287 }, { "epoch": 0.04996530187369882, "grad_norm": 0.5863941311836243, "learning_rate": 4.995663486556809e-06, "loss": 0.8601, "step": 288 }, { "epoch": 0.05013879250520472, "grad_norm": 0.6125705242156982, "learning_rate": 5.013009540329575e-06, "loss": 0.8037, "step": 289 }, { "epoch": 0.050312283136710614, "grad_norm": 0.571257472038269, "learning_rate": 5.030355594102342e-06, "loss": 0.9082, "step": 290 }, { "epoch": 0.050485773768216516, "grad_norm": 0.791437566280365, "learning_rate": 5.047701647875108e-06, "loss": 0.6273, "step": 291 }, { "epoch": 0.05065926439972242, "grad_norm": 0.8369519710540771, "learning_rate": 5.065047701647876e-06, "loss": 0.8008, "step": 292 }, { "epoch": 0.05083275503122831, "grad_norm": 0.56186443567276, "learning_rate": 5.082393755420643e-06, "loss": 0.9321, "step": 293 }, { "epoch": 0.05100624566273421, "grad_norm": 0.7457464933395386, "learning_rate": 5.099739809193409e-06, "loss": 0.8228, "step": 294 }, { "epoch": 0.05117973629424011, "grad_norm": 0.528501033782959, "learning_rate": 5.117085862966176e-06, "loss": 0.8887, "step": 295 }, { "epoch": 0.05135322692574601, "grad_norm": 0.7900983691215515, "learning_rate": 5.134431916738942e-06, "loss": 0.6978, "step": 296 }, { "epoch": 0.05152671755725191, "grad_norm": 0.6286918520927429, "learning_rate": 5.1517779705117086e-06, "loss": 0.9106, "step": 297 }, { "epoch": 0.051700208188757805, "grad_norm": 0.7337638139724731, "learning_rate": 5.169124024284475e-06, "loss": 0.8862, "step": 298 }, { "epoch": 0.05187369882026371, "grad_norm": 0.5130578875541687, "learning_rate": 5.186470078057242e-06, "loss": 0.9341, "step": 299 }, { "epoch": 0.0520471894517696, "grad_norm": 0.8467931747436523, "learning_rate": 5.2038161318300095e-06, "loss": 0.8159, "step": 300 }, { "epoch": 0.0522206800832755, "grad_norm": 0.7748866081237793, "learning_rate": 5.221162185602776e-06, "loss": 0.8328, "step": 301 }, { "epoch": 0.052394170714781405, "grad_norm": 1.04865300655365, "learning_rate": 5.238508239375543e-06, "loss": 0.8247, "step": 302 }, { "epoch": 0.0525676613462873, "grad_norm": 0.8614583611488342, "learning_rate": 5.25585429314831e-06, "loss": 0.771, "step": 303 }, { "epoch": 0.0527411519777932, "grad_norm": 0.8058211207389832, "learning_rate": 5.2732003469210754e-06, "loss": 0.8032, "step": 304 }, { "epoch": 0.052914642609299095, "grad_norm": 1.5144559144973755, "learning_rate": 5.290546400693842e-06, "loss": 0.6587, "step": 305 }, { "epoch": 0.053088133240805, "grad_norm": 0.7051360011100769, "learning_rate": 5.307892454466609e-06, "loss": 0.7852, "step": 306 }, { "epoch": 0.0532616238723109, "grad_norm": 1.4300090074539185, "learning_rate": 5.325238508239376e-06, "loss": 0.6649, "step": 307 }, { "epoch": 0.05343511450381679, "grad_norm": 0.8560264110565186, "learning_rate": 5.342584562012143e-06, "loss": 0.7117, "step": 308 }, { "epoch": 0.053608605135322694, "grad_norm": 0.8068621158599854, "learning_rate": 5.35993061578491e-06, "loss": 0.782, "step": 309 }, { "epoch": 0.05378209576682859, "grad_norm": 0.831801176071167, "learning_rate": 5.3772766695576765e-06, "loss": 0.748, "step": 310 }, { "epoch": 0.05395558639833449, "grad_norm": 0.7599542140960693, "learning_rate": 5.394622723330442e-06, "loss": 0.74, "step": 311 }, { "epoch": 0.05412907702984039, "grad_norm": 0.6961326599121094, "learning_rate": 5.411968777103209e-06, "loss": 0.6924, "step": 312 }, { "epoch": 0.054302567661346286, "grad_norm": 0.8084877729415894, "learning_rate": 5.429314830875976e-06, "loss": 0.6763, "step": 313 }, { "epoch": 0.05447605829285219, "grad_norm": 0.7455685138702393, "learning_rate": 5.446660884648742e-06, "loss": 0.6255, "step": 314 }, { "epoch": 0.05464954892435808, "grad_norm": 0.5308278203010559, "learning_rate": 5.46400693842151e-06, "loss": 0.9922, "step": 315 }, { "epoch": 0.054823039555863984, "grad_norm": 0.9737206101417542, "learning_rate": 5.481352992194277e-06, "loss": 0.7515, "step": 316 }, { "epoch": 0.054996530187369885, "grad_norm": 0.7302623987197876, "learning_rate": 5.498699045967043e-06, "loss": 0.9568, "step": 317 }, { "epoch": 0.05517002081887578, "grad_norm": 1.1479965448379517, "learning_rate": 5.51604509973981e-06, "loss": 0.6812, "step": 318 }, { "epoch": 0.05534351145038168, "grad_norm": 0.7652915120124817, "learning_rate": 5.533391153512576e-06, "loss": 0.7554, "step": 319 }, { "epoch": 0.055517002081887576, "grad_norm": 0.6627578139305115, "learning_rate": 5.5507372072853426e-06, "loss": 0.8398, "step": 320 }, { "epoch": 0.05569049271339348, "grad_norm": 0.6127437949180603, "learning_rate": 5.568083261058109e-06, "loss": 0.8469, "step": 321 }, { "epoch": 0.05586398334489938, "grad_norm": 1.0261603593826294, "learning_rate": 5.585429314830877e-06, "loss": 0.6429, "step": 322 }, { "epoch": 0.05603747397640527, "grad_norm": 0.6037549376487732, "learning_rate": 5.6027753686036435e-06, "loss": 0.8923, "step": 323 }, { "epoch": 0.056210964607911175, "grad_norm": 0.7665083408355713, "learning_rate": 5.62012142237641e-06, "loss": 0.7959, "step": 324 }, { "epoch": 0.05638445523941707, "grad_norm": 0.6456735134124756, "learning_rate": 5.637467476149177e-06, "loss": 0.8284, "step": 325 }, { "epoch": 0.05655794587092297, "grad_norm": 0.6334142684936523, "learning_rate": 5.654813529921943e-06, "loss": 0.8318, "step": 326 }, { "epoch": 0.056731436502428866, "grad_norm": 0.695650041103363, "learning_rate": 5.6721595836947094e-06, "loss": 0.9285, "step": 327 }, { "epoch": 0.05690492713393477, "grad_norm": 2.0656869411468506, "learning_rate": 5.689505637467476e-06, "loss": 0.7832, "step": 328 }, { "epoch": 0.05707841776544067, "grad_norm": 0.5046230554580688, "learning_rate": 5.706851691240244e-06, "loss": 0.8992, "step": 329 }, { "epoch": 0.05725190839694656, "grad_norm": 0.5988643169403076, "learning_rate": 5.72419774501301e-06, "loss": 0.804, "step": 330 }, { "epoch": 0.057425399028452465, "grad_norm": 0.692200243473053, "learning_rate": 5.741543798785777e-06, "loss": 0.7488, "step": 331 }, { "epoch": 0.05759888965995836, "grad_norm": 0.6809567213058472, "learning_rate": 5.758889852558544e-06, "loss": 0.8037, "step": 332 }, { "epoch": 0.05777238029146426, "grad_norm": 0.638801097869873, "learning_rate": 5.7762359063313105e-06, "loss": 0.7747, "step": 333 }, { "epoch": 0.05794587092297016, "grad_norm": 0.9376187920570374, "learning_rate": 5.793581960104076e-06, "loss": 0.6567, "step": 334 }, { "epoch": 0.05811936155447606, "grad_norm": 0.89469313621521, "learning_rate": 5.810928013876843e-06, "loss": 0.9426, "step": 335 }, { "epoch": 0.05829285218598196, "grad_norm": 0.8213267922401428, "learning_rate": 5.82827406764961e-06, "loss": 0.731, "step": 336 }, { "epoch": 0.05846634281748785, "grad_norm": 1.1361161470413208, "learning_rate": 5.845620121422377e-06, "loss": 0.6674, "step": 337 }, { "epoch": 0.058639833448993754, "grad_norm": 1.0722969770431519, "learning_rate": 5.862966175195144e-06, "loss": 0.7927, "step": 338 }, { "epoch": 0.058813324080499656, "grad_norm": 0.7767350077629089, "learning_rate": 5.880312228967911e-06, "loss": 0.8269, "step": 339 }, { "epoch": 0.05898681471200555, "grad_norm": 0.7055554986000061, "learning_rate": 5.897658282740677e-06, "loss": 0.8315, "step": 340 }, { "epoch": 0.05916030534351145, "grad_norm": 0.7638046145439148, "learning_rate": 5.915004336513443e-06, "loss": 0.7021, "step": 341 }, { "epoch": 0.059333795975017346, "grad_norm": 0.7253721952438354, "learning_rate": 5.93235039028621e-06, "loss": 0.8967, "step": 342 }, { "epoch": 0.05950728660652325, "grad_norm": 0.5444093346595764, "learning_rate": 5.9496964440589766e-06, "loss": 0.7219, "step": 343 }, { "epoch": 0.05968077723802915, "grad_norm": 0.5293662548065186, "learning_rate": 5.967042497831744e-06, "loss": 0.7992, "step": 344 }, { "epoch": 0.059854267869535044, "grad_norm": 0.567878782749176, "learning_rate": 5.984388551604511e-06, "loss": 0.7272, "step": 345 }, { "epoch": 0.060027758501040945, "grad_norm": 0.7837110757827759, "learning_rate": 6.0017346053772775e-06, "loss": 0.6542, "step": 346 }, { "epoch": 0.06020124913254684, "grad_norm": 1.6302368640899658, "learning_rate": 6.019080659150044e-06, "loss": 0.8364, "step": 347 }, { "epoch": 0.06037473976405274, "grad_norm": 0.7898486852645874, "learning_rate": 6.03642671292281e-06, "loss": 0.8018, "step": 348 }, { "epoch": 0.06054823039555864, "grad_norm": 0.7227967381477356, "learning_rate": 6.053772766695577e-06, "loss": 0.8589, "step": 349 }, { "epoch": 0.06072172102706454, "grad_norm": 0.918053925037384, "learning_rate": 6.0711188204683434e-06, "loss": 0.855, "step": 350 }, { "epoch": 0.06089521165857044, "grad_norm": 0.7263798117637634, "learning_rate": 6.08846487424111e-06, "loss": 0.7437, "step": 351 }, { "epoch": 0.061068702290076333, "grad_norm": 0.6784784197807312, "learning_rate": 6.105810928013878e-06, "loss": 0.7058, "step": 352 }, { "epoch": 0.061242192921582235, "grad_norm": 0.5894147157669067, "learning_rate": 6.123156981786644e-06, "loss": 0.8665, "step": 353 }, { "epoch": 0.061415683553088136, "grad_norm": 0.5161333680152893, "learning_rate": 6.140503035559411e-06, "loss": 0.8225, "step": 354 }, { "epoch": 0.06158917418459403, "grad_norm": 0.8817012310028076, "learning_rate": 6.157849089332178e-06, "loss": 0.8091, "step": 355 }, { "epoch": 0.06176266481609993, "grad_norm": 1.0701371431350708, "learning_rate": 6.175195143104944e-06, "loss": 0.6826, "step": 356 }, { "epoch": 0.06193615544760583, "grad_norm": 0.5561087727546692, "learning_rate": 6.19254119687771e-06, "loss": 0.7378, "step": 357 }, { "epoch": 0.06210964607911173, "grad_norm": 0.6870754957199097, "learning_rate": 6.209887250650477e-06, "loss": 0.9226, "step": 358 }, { "epoch": 0.06228313671061763, "grad_norm": 0.9816715717315674, "learning_rate": 6.2272333044232445e-06, "loss": 0.7219, "step": 359 }, { "epoch": 0.062456627342123525, "grad_norm": 0.7335995435714722, "learning_rate": 6.244579358196011e-06, "loss": 0.7476, "step": 360 }, { "epoch": 0.06263011797362943, "grad_norm": 0.9058141708374023, "learning_rate": 6.261925411968778e-06, "loss": 0.9377, "step": 361 }, { "epoch": 0.06280360860513533, "grad_norm": 0.6555805802345276, "learning_rate": 6.279271465741545e-06, "loss": 0.908, "step": 362 }, { "epoch": 0.06297709923664122, "grad_norm": 0.6734438538551331, "learning_rate": 6.2966175195143105e-06, "loss": 0.7739, "step": 363 }, { "epoch": 0.06315058986814712, "grad_norm": 0.8846603035926819, "learning_rate": 6.313963573287077e-06, "loss": 0.834, "step": 364 }, { "epoch": 0.06332408049965302, "grad_norm": 0.8238341212272644, "learning_rate": 6.331309627059844e-06, "loss": 0.6595, "step": 365 }, { "epoch": 0.06349757113115892, "grad_norm": 0.8995324969291687, "learning_rate": 6.348655680832611e-06, "loss": 0.8555, "step": 366 }, { "epoch": 0.06367106176266482, "grad_norm": 0.8729636073112488, "learning_rate": 6.366001734605378e-06, "loss": 0.6772, "step": 367 }, { "epoch": 0.06384455239417071, "grad_norm": 0.8361624479293823, "learning_rate": 6.383347788378145e-06, "loss": 0.8401, "step": 368 }, { "epoch": 0.06401804302567661, "grad_norm": 0.7549445033073425, "learning_rate": 6.4006938421509115e-06, "loss": 0.7329, "step": 369 }, { "epoch": 0.06419153365718251, "grad_norm": 0.7796569466590881, "learning_rate": 6.418039895923678e-06, "loss": 0.8467, "step": 370 }, { "epoch": 0.06436502428868841, "grad_norm": 0.7562674283981323, "learning_rate": 6.435385949696444e-06, "loss": 0.6274, "step": 371 }, { "epoch": 0.06453851492019431, "grad_norm": 0.8655654191970825, "learning_rate": 6.452732003469211e-06, "loss": 0.7244, "step": 372 }, { "epoch": 0.0647120055517002, "grad_norm": 0.6695210933685303, "learning_rate": 6.4700780572419774e-06, "loss": 0.7799, "step": 373 }, { "epoch": 0.0648854961832061, "grad_norm": 1.7294037342071533, "learning_rate": 6.487424111014745e-06, "loss": 0.7656, "step": 374 }, { "epoch": 0.065058986814712, "grad_norm": 0.8687999248504639, "learning_rate": 6.504770164787512e-06, "loss": 0.7002, "step": 375 }, { "epoch": 0.0652324774462179, "grad_norm": 0.7759955525398254, "learning_rate": 6.522116218560278e-06, "loss": 0.7661, "step": 376 }, { "epoch": 0.06540596807772381, "grad_norm": 0.6977130770683289, "learning_rate": 6.539462272333045e-06, "loss": 0.8188, "step": 377 }, { "epoch": 0.0655794587092297, "grad_norm": 1.5170788764953613, "learning_rate": 6.556808326105811e-06, "loss": 0.6729, "step": 378 }, { "epoch": 0.0657529493407356, "grad_norm": 0.7143145203590393, "learning_rate": 6.574154379878578e-06, "loss": 0.8057, "step": 379 }, { "epoch": 0.0659264399722415, "grad_norm": 0.7431565523147583, "learning_rate": 6.591500433651344e-06, "loss": 0.76, "step": 380 }, { "epoch": 0.0660999306037474, "grad_norm": 0.8921603560447693, "learning_rate": 6.608846487424112e-06, "loss": 0.7434, "step": 381 }, { "epoch": 0.0662734212352533, "grad_norm": 0.791877031326294, "learning_rate": 6.6261925411968785e-06, "loss": 0.7627, "step": 382 }, { "epoch": 0.06644691186675919, "grad_norm": 0.6708717346191406, "learning_rate": 6.643538594969645e-06, "loss": 0.7662, "step": 383 }, { "epoch": 0.06662040249826509, "grad_norm": 0.8915305733680725, "learning_rate": 6.660884648742412e-06, "loss": 0.6855, "step": 384 }, { "epoch": 0.06679389312977099, "grad_norm": 1.2064861059188843, "learning_rate": 6.678230702515179e-06, "loss": 0.6848, "step": 385 }, { "epoch": 0.0669673837612769, "grad_norm": 1.4230811595916748, "learning_rate": 6.6955767562879445e-06, "loss": 0.631, "step": 386 }, { "epoch": 0.0671408743927828, "grad_norm": 0.8477129936218262, "learning_rate": 6.712922810060711e-06, "loss": 0.7196, "step": 387 }, { "epoch": 0.06731436502428868, "grad_norm": 0.794474720954895, "learning_rate": 6.730268863833478e-06, "loss": 0.7865, "step": 388 }, { "epoch": 0.06748785565579458, "grad_norm": 0.7628791928291321, "learning_rate": 6.747614917606245e-06, "loss": 0.7771, "step": 389 }, { "epoch": 0.06766134628730049, "grad_norm": 0.669377863407135, "learning_rate": 6.764960971379012e-06, "loss": 0.79, "step": 390 }, { "epoch": 0.06783483691880639, "grad_norm": 0.930276095867157, "learning_rate": 6.782307025151779e-06, "loss": 0.751, "step": 391 }, { "epoch": 0.06800832755031229, "grad_norm": 1.2251073122024536, "learning_rate": 6.7996530789245455e-06, "loss": 0.6873, "step": 392 }, { "epoch": 0.06818181818181818, "grad_norm": 0.5957617163658142, "learning_rate": 6.816999132697311e-06, "loss": 0.877, "step": 393 }, { "epoch": 0.06835530881332408, "grad_norm": 0.6212051510810852, "learning_rate": 6.834345186470078e-06, "loss": 0.8433, "step": 394 }, { "epoch": 0.06852879944482998, "grad_norm": 0.8099094033241272, "learning_rate": 6.851691240242845e-06, "loss": 0.7192, "step": 395 }, { "epoch": 0.06870229007633588, "grad_norm": 0.7998353838920593, "learning_rate": 6.869037294015612e-06, "loss": 0.8076, "step": 396 }, { "epoch": 0.06887578070784178, "grad_norm": 0.7063225507736206, "learning_rate": 6.886383347788379e-06, "loss": 0.6953, "step": 397 }, { "epoch": 0.06904927133934767, "grad_norm": 0.5355812311172485, "learning_rate": 6.903729401561146e-06, "loss": 0.8667, "step": 398 }, { "epoch": 0.06922276197085357, "grad_norm": 1.0308690071105957, "learning_rate": 6.921075455333912e-06, "loss": 0.7527, "step": 399 }, { "epoch": 0.06939625260235947, "grad_norm": 0.5810636878013611, "learning_rate": 6.938421509106679e-06, "loss": 0.7284, "step": 400 }, { "epoch": 0.06956974323386537, "grad_norm": 0.7493751049041748, "learning_rate": 6.955767562879445e-06, "loss": 0.7474, "step": 401 }, { "epoch": 0.06974323386537128, "grad_norm": 1.0948079824447632, "learning_rate": 6.973113616652212e-06, "loss": 0.7709, "step": 402 }, { "epoch": 0.06991672449687716, "grad_norm": 0.7085433006286621, "learning_rate": 6.990459670424979e-06, "loss": 0.8494, "step": 403 }, { "epoch": 0.07009021512838307, "grad_norm": 0.6970533728599548, "learning_rate": 7.007805724197746e-06, "loss": 0.7302, "step": 404 }, { "epoch": 0.07026370575988897, "grad_norm": 0.7963132858276367, "learning_rate": 7.0251517779705125e-06, "loss": 0.725, "step": 405 }, { "epoch": 0.07043719639139487, "grad_norm": 0.6666185259819031, "learning_rate": 7.042497831743279e-06, "loss": 0.7947, "step": 406 }, { "epoch": 0.07061068702290077, "grad_norm": 0.7390024065971375, "learning_rate": 7.059843885516046e-06, "loss": 0.6677, "step": 407 }, { "epoch": 0.07078417765440666, "grad_norm": 0.9136267304420471, "learning_rate": 7.077189939288812e-06, "loss": 0.6533, "step": 408 }, { "epoch": 0.07095766828591256, "grad_norm": 0.694338858127594, "learning_rate": 7.0945359930615785e-06, "loss": 0.7598, "step": 409 }, { "epoch": 0.07113115891741846, "grad_norm": 0.9569571614265442, "learning_rate": 7.111882046834345e-06, "loss": 0.7534, "step": 410 }, { "epoch": 0.07130464954892436, "grad_norm": 1.0424975156784058, "learning_rate": 7.129228100607113e-06, "loss": 0.6542, "step": 411 }, { "epoch": 0.07147814018043026, "grad_norm": 0.9410294890403748, "learning_rate": 7.146574154379879e-06, "loss": 0.6138, "step": 412 }, { "epoch": 0.07165163081193615, "grad_norm": 0.7803980708122253, "learning_rate": 7.163920208152646e-06, "loss": 0.7131, "step": 413 }, { "epoch": 0.07182512144344205, "grad_norm": 1.045674204826355, "learning_rate": 7.181266261925413e-06, "loss": 0.7546, "step": 414 }, { "epoch": 0.07199861207494795, "grad_norm": 0.8654783964157104, "learning_rate": 7.1986123156981795e-06, "loss": 0.7412, "step": 415 }, { "epoch": 0.07217210270645386, "grad_norm": 0.6371574401855469, "learning_rate": 7.215958369470945e-06, "loss": 0.8594, "step": 416 }, { "epoch": 0.07234559333795976, "grad_norm": 0.8476478457450867, "learning_rate": 7.233304423243712e-06, "loss": 0.6602, "step": 417 }, { "epoch": 0.07251908396946564, "grad_norm": 0.560556948184967, "learning_rate": 7.25065047701648e-06, "loss": 0.8, "step": 418 }, { "epoch": 0.07269257460097155, "grad_norm": 0.6393320560455322, "learning_rate": 7.267996530789246e-06, "loss": 0.8726, "step": 419 }, { "epoch": 0.07286606523247745, "grad_norm": 1.0716561079025269, "learning_rate": 7.285342584562013e-06, "loss": 0.6736, "step": 420 }, { "epoch": 0.07303955586398335, "grad_norm": 0.8586274981498718, "learning_rate": 7.30268863833478e-06, "loss": 0.7964, "step": 421 }, { "epoch": 0.07321304649548924, "grad_norm": 0.6543982028961182, "learning_rate": 7.320034692107546e-06, "loss": 0.771, "step": 422 }, { "epoch": 0.07338653712699514, "grad_norm": 0.9546947479248047, "learning_rate": 7.337380745880312e-06, "loss": 0.6929, "step": 423 }, { "epoch": 0.07356002775850104, "grad_norm": 0.7377446293830872, "learning_rate": 7.354726799653079e-06, "loss": 0.677, "step": 424 }, { "epoch": 0.07373351839000694, "grad_norm": 0.7322527170181274, "learning_rate": 7.3720728534258464e-06, "loss": 0.7683, "step": 425 }, { "epoch": 0.07390700902151284, "grad_norm": 0.7413867115974426, "learning_rate": 7.389418907198613e-06, "loss": 0.771, "step": 426 }, { "epoch": 0.07408049965301873, "grad_norm": 0.7413671612739563, "learning_rate": 7.40676496097138e-06, "loss": 0.822, "step": 427 }, { "epoch": 0.07425399028452463, "grad_norm": 0.8946639895439148, "learning_rate": 7.4241110147441465e-06, "loss": 0.854, "step": 428 }, { "epoch": 0.07442748091603053, "grad_norm": 0.8664313554763794, "learning_rate": 7.441457068516913e-06, "loss": 0.7322, "step": 429 }, { "epoch": 0.07460097154753643, "grad_norm": 0.9482120275497437, "learning_rate": 7.45880312228968e-06, "loss": 0.6526, "step": 430 }, { "epoch": 0.07477446217904234, "grad_norm": 0.5458512902259827, "learning_rate": 7.476149176062446e-06, "loss": 0.8723, "step": 431 }, { "epoch": 0.07494795281054822, "grad_norm": 0.8940516710281372, "learning_rate": 7.4934952298352125e-06, "loss": 0.7019, "step": 432 }, { "epoch": 0.07512144344205413, "grad_norm": 0.6466241478919983, "learning_rate": 7.51084128360798e-06, "loss": 0.7361, "step": 433 }, { "epoch": 0.07529493407356003, "grad_norm": 0.7802069187164307, "learning_rate": 7.528187337380747e-06, "loss": 0.6299, "step": 434 }, { "epoch": 0.07546842470506593, "grad_norm": 0.7344783544540405, "learning_rate": 7.545533391153513e-06, "loss": 0.7388, "step": 435 }, { "epoch": 0.07564191533657183, "grad_norm": 0.6571396589279175, "learning_rate": 7.56287944492628e-06, "loss": 0.7869, "step": 436 }, { "epoch": 0.07581540596807772, "grad_norm": 3.0189146995544434, "learning_rate": 7.580225498699047e-06, "loss": 0.7114, "step": 437 }, { "epoch": 0.07598889659958362, "grad_norm": 0.6549769043922424, "learning_rate": 7.597571552471813e-06, "loss": 0.7844, "step": 438 }, { "epoch": 0.07616238723108952, "grad_norm": 1.1466854810714722, "learning_rate": 7.614917606244579e-06, "loss": 0.7739, "step": 439 }, { "epoch": 0.07633587786259542, "grad_norm": 0.9282358288764954, "learning_rate": 7.632263660017348e-06, "loss": 0.845, "step": 440 }, { "epoch": 0.07650936849410132, "grad_norm": 0.9557682275772095, "learning_rate": 7.649609713790114e-06, "loss": 0.7102, "step": 441 }, { "epoch": 0.07668285912560721, "grad_norm": 0.6585580706596375, "learning_rate": 7.666955767562881e-06, "loss": 0.811, "step": 442 }, { "epoch": 0.07685634975711311, "grad_norm": 0.7832518219947815, "learning_rate": 7.684301821335646e-06, "loss": 0.6974, "step": 443 }, { "epoch": 0.07702984038861901, "grad_norm": 0.6745266318321228, "learning_rate": 7.701647875108413e-06, "loss": 0.8176, "step": 444 }, { "epoch": 0.07720333102012492, "grad_norm": 0.852424144744873, "learning_rate": 7.71899392888118e-06, "loss": 0.7551, "step": 445 }, { "epoch": 0.07737682165163082, "grad_norm": 0.8548586368560791, "learning_rate": 7.736339982653946e-06, "loss": 0.7634, "step": 446 }, { "epoch": 0.0775503122831367, "grad_norm": 1.3087246417999268, "learning_rate": 7.753686036426713e-06, "loss": 0.7422, "step": 447 }, { "epoch": 0.0777238029146426, "grad_norm": 0.9239414930343628, "learning_rate": 7.771032090199481e-06, "loss": 0.7153, "step": 448 }, { "epoch": 0.07789729354614851, "grad_norm": 0.9753161072731018, "learning_rate": 7.788378143972248e-06, "loss": 0.6571, "step": 449 }, { "epoch": 0.07807078417765441, "grad_norm": 0.8641350269317627, "learning_rate": 7.805724197745013e-06, "loss": 0.6698, "step": 450 }, { "epoch": 0.07824427480916031, "grad_norm": 0.773655354976654, "learning_rate": 7.82307025151778e-06, "loss": 0.7451, "step": 451 }, { "epoch": 0.0784177654406662, "grad_norm": 0.8763969540596008, "learning_rate": 7.840416305290546e-06, "loss": 0.5737, "step": 452 }, { "epoch": 0.0785912560721721, "grad_norm": 1.2770614624023438, "learning_rate": 7.857762359063313e-06, "loss": 0.6241, "step": 453 }, { "epoch": 0.078764746703678, "grad_norm": 0.7275820970535278, "learning_rate": 7.87510841283608e-06, "loss": 0.7694, "step": 454 }, { "epoch": 0.0789382373351839, "grad_norm": 0.8261591196060181, "learning_rate": 7.892454466608848e-06, "loss": 0.7981, "step": 455 }, { "epoch": 0.0791117279666898, "grad_norm": 0.703579843044281, "learning_rate": 7.909800520381615e-06, "loss": 0.6826, "step": 456 }, { "epoch": 0.07928521859819569, "grad_norm": 1.005600929260254, "learning_rate": 7.927146574154382e-06, "loss": 0.7288, "step": 457 }, { "epoch": 0.0794587092297016, "grad_norm": 0.7515043616294861, "learning_rate": 7.944492627927147e-06, "loss": 0.8726, "step": 458 }, { "epoch": 0.0796321998612075, "grad_norm": 1.3452553749084473, "learning_rate": 7.961838681699913e-06, "loss": 0.8608, "step": 459 }, { "epoch": 0.0798056904927134, "grad_norm": 0.7214851379394531, "learning_rate": 7.97918473547268e-06, "loss": 0.6246, "step": 460 }, { "epoch": 0.0799791811242193, "grad_norm": 0.7828449606895447, "learning_rate": 7.996530789245447e-06, "loss": 0.7883, "step": 461 }, { "epoch": 0.08015267175572519, "grad_norm": 0.7184955477714539, "learning_rate": 8.013876843018215e-06, "loss": 0.7754, "step": 462 }, { "epoch": 0.08032616238723109, "grad_norm": 1.6390373706817627, "learning_rate": 8.031222896790982e-06, "loss": 0.8508, "step": 463 }, { "epoch": 0.08049965301873699, "grad_norm": 0.7775904536247253, "learning_rate": 8.048568950563748e-06, "loss": 0.7139, "step": 464 }, { "epoch": 0.08067314365024289, "grad_norm": 0.579129695892334, "learning_rate": 8.065915004336513e-06, "loss": 0.8235, "step": 465 }, { "epoch": 0.08084663428174879, "grad_norm": 0.775214433670044, "learning_rate": 8.08326105810928e-06, "loss": 0.7097, "step": 466 }, { "epoch": 0.08102012491325468, "grad_norm": 0.7665948271751404, "learning_rate": 8.100607111882047e-06, "loss": 0.605, "step": 467 }, { "epoch": 0.08119361554476058, "grad_norm": 0.6703997254371643, "learning_rate": 8.117953165654814e-06, "loss": 0.7177, "step": 468 }, { "epoch": 0.08136710617626648, "grad_norm": 0.7012144327163696, "learning_rate": 8.13529921942758e-06, "loss": 0.6602, "step": 469 }, { "epoch": 0.08154059680777238, "grad_norm": 0.7739162445068359, "learning_rate": 8.152645273200349e-06, "loss": 0.8223, "step": 470 }, { "epoch": 0.08171408743927829, "grad_norm": 1.3429744243621826, "learning_rate": 8.169991326973115e-06, "loss": 0.8232, "step": 471 }, { "epoch": 0.08188757807078417, "grad_norm": 0.7664818167686462, "learning_rate": 8.187337380745882e-06, "loss": 0.7393, "step": 472 }, { "epoch": 0.08206106870229007, "grad_norm": 0.709116518497467, "learning_rate": 8.204683434518647e-06, "loss": 0.8679, "step": 473 }, { "epoch": 0.08223455933379598, "grad_norm": 1.02998685836792, "learning_rate": 8.222029488291414e-06, "loss": 0.728, "step": 474 }, { "epoch": 0.08240804996530188, "grad_norm": 0.8173246383666992, "learning_rate": 8.23937554206418e-06, "loss": 0.8169, "step": 475 }, { "epoch": 0.08258154059680778, "grad_norm": 0.8525285124778748, "learning_rate": 8.256721595836947e-06, "loss": 0.6626, "step": 476 }, { "epoch": 0.08275503122831367, "grad_norm": 0.8937381505966187, "learning_rate": 8.274067649609715e-06, "loss": 0.5542, "step": 477 }, { "epoch": 0.08292852185981957, "grad_norm": 0.7560815215110779, "learning_rate": 8.291413703382482e-06, "loss": 0.6812, "step": 478 }, { "epoch": 0.08310201249132547, "grad_norm": 0.8020071983337402, "learning_rate": 8.308759757155249e-06, "loss": 0.7202, "step": 479 }, { "epoch": 0.08327550312283137, "grad_norm": 0.8825773596763611, "learning_rate": 8.326105810928014e-06, "loss": 0.7566, "step": 480 }, { "epoch": 0.08344899375433727, "grad_norm": 0.5909084677696228, "learning_rate": 8.34345186470078e-06, "loss": 0.8176, "step": 481 }, { "epoch": 0.08362248438584316, "grad_norm": 0.8789088129997253, "learning_rate": 8.360797918473547e-06, "loss": 0.7727, "step": 482 }, { "epoch": 0.08379597501734906, "grad_norm": 0.9357159733772278, "learning_rate": 8.378143972246314e-06, "loss": 0.5996, "step": 483 }, { "epoch": 0.08396946564885496, "grad_norm": 1.0077883005142212, "learning_rate": 8.39549002601908e-06, "loss": 0.5942, "step": 484 }, { "epoch": 0.08414295628036086, "grad_norm": 0.6832888126373291, "learning_rate": 8.412836079791849e-06, "loss": 0.7163, "step": 485 }, { "epoch": 0.08431644691186677, "grad_norm": 0.9494338631629944, "learning_rate": 8.430182133564616e-06, "loss": 0.5977, "step": 486 }, { "epoch": 0.08448993754337265, "grad_norm": 1.0955969095230103, "learning_rate": 8.44752818733738e-06, "loss": 0.6746, "step": 487 }, { "epoch": 0.08466342817487855, "grad_norm": 1.162654995918274, "learning_rate": 8.464874241110147e-06, "loss": 0.7969, "step": 488 }, { "epoch": 0.08483691880638446, "grad_norm": 0.5996276140213013, "learning_rate": 8.482220294882914e-06, "loss": 0.8638, "step": 489 }, { "epoch": 0.08501040943789036, "grad_norm": 0.9170894622802734, "learning_rate": 8.49956634865568e-06, "loss": 0.632, "step": 490 }, { "epoch": 0.08518390006939626, "grad_norm": 0.9191092848777771, "learning_rate": 8.516912402428448e-06, "loss": 0.7981, "step": 491 }, { "epoch": 0.08535739070090215, "grad_norm": 0.7416355609893799, "learning_rate": 8.534258456201216e-06, "loss": 0.5829, "step": 492 }, { "epoch": 0.08553088133240805, "grad_norm": 0.779184103012085, "learning_rate": 8.551604509973983e-06, "loss": 0.6583, "step": 493 }, { "epoch": 0.08570437196391395, "grad_norm": 0.8102035522460938, "learning_rate": 8.56895056374675e-06, "loss": 0.6511, "step": 494 }, { "epoch": 0.08587786259541985, "grad_norm": 0.8218130469322205, "learning_rate": 8.586296617519514e-06, "loss": 0.7849, "step": 495 }, { "epoch": 0.08605135322692574, "grad_norm": 0.628180742263794, "learning_rate": 8.603642671292281e-06, "loss": 0.7087, "step": 496 }, { "epoch": 0.08622484385843164, "grad_norm": 1.953553318977356, "learning_rate": 8.620988725065048e-06, "loss": 0.7119, "step": 497 }, { "epoch": 0.08639833448993754, "grad_norm": 0.8177529573440552, "learning_rate": 8.638334778837814e-06, "loss": 0.623, "step": 498 }, { "epoch": 0.08657182512144344, "grad_norm": 0.6821890473365784, "learning_rate": 8.655680832610583e-06, "loss": 0.7053, "step": 499 }, { "epoch": 0.08674531575294935, "grad_norm": 0.885227620601654, "learning_rate": 8.67302688638335e-06, "loss": 0.6406, "step": 500 }, { "epoch": 0.08691880638445523, "grad_norm": 0.7139533758163452, "learning_rate": 8.690372940156116e-06, "loss": 0.7063, "step": 501 }, { "epoch": 0.08709229701596113, "grad_norm": 1.3664354085922241, "learning_rate": 8.707718993928881e-06, "loss": 0.6804, "step": 502 }, { "epoch": 0.08726578764746704, "grad_norm": 0.8646285533905029, "learning_rate": 8.725065047701648e-06, "loss": 0.7297, "step": 503 }, { "epoch": 0.08743927827897294, "grad_norm": 1.329119324684143, "learning_rate": 8.742411101474415e-06, "loss": 0.7087, "step": 504 }, { "epoch": 0.08761276891047884, "grad_norm": 0.7398525476455688, "learning_rate": 8.759757155247181e-06, "loss": 0.7896, "step": 505 }, { "epoch": 0.08778625954198473, "grad_norm": 0.7473292350769043, "learning_rate": 8.777103209019948e-06, "loss": 0.663, "step": 506 }, { "epoch": 0.08795975017349063, "grad_norm": 0.8798092603683472, "learning_rate": 8.794449262792716e-06, "loss": 0.6587, "step": 507 }, { "epoch": 0.08813324080499653, "grad_norm": 0.8144434094429016, "learning_rate": 8.811795316565483e-06, "loss": 0.6016, "step": 508 }, { "epoch": 0.08830673143650243, "grad_norm": 1.0975896120071411, "learning_rate": 8.82914137033825e-06, "loss": 0.6672, "step": 509 }, { "epoch": 0.08848022206800833, "grad_norm": 0.764312744140625, "learning_rate": 8.846487424111015e-06, "loss": 0.7397, "step": 510 }, { "epoch": 0.08865371269951422, "grad_norm": 0.689689040184021, "learning_rate": 8.863833477883781e-06, "loss": 0.7532, "step": 511 }, { "epoch": 0.08882720333102012, "grad_norm": 0.9319776296615601, "learning_rate": 8.881179531656548e-06, "loss": 0.7139, "step": 512 }, { "epoch": 0.08900069396252602, "grad_norm": 1.0532619953155518, "learning_rate": 8.898525585429315e-06, "loss": 0.6583, "step": 513 }, { "epoch": 0.08917418459403192, "grad_norm": 0.6886184811592102, "learning_rate": 8.915871639202083e-06, "loss": 0.7584, "step": 514 }, { "epoch": 0.08934767522553783, "grad_norm": 0.7394210696220398, "learning_rate": 8.93321769297485e-06, "loss": 0.7539, "step": 515 }, { "epoch": 0.08952116585704371, "grad_norm": 0.5901085734367371, "learning_rate": 8.950563746747617e-06, "loss": 0.7644, "step": 516 }, { "epoch": 0.08969465648854962, "grad_norm": 0.8676567077636719, "learning_rate": 8.967909800520382e-06, "loss": 0.7686, "step": 517 }, { "epoch": 0.08986814712005552, "grad_norm": 0.7285106182098389, "learning_rate": 8.985255854293148e-06, "loss": 0.6838, "step": 518 }, { "epoch": 0.09004163775156142, "grad_norm": 0.6929370760917664, "learning_rate": 9.002601908065915e-06, "loss": 0.7634, "step": 519 }, { "epoch": 0.09021512838306732, "grad_norm": 0.8217961192131042, "learning_rate": 9.019947961838682e-06, "loss": 0.7397, "step": 520 }, { "epoch": 0.09038861901457321, "grad_norm": 0.9813176393508911, "learning_rate": 9.037294015611448e-06, "loss": 0.7473, "step": 521 }, { "epoch": 0.09056210964607911, "grad_norm": 0.755930483341217, "learning_rate": 9.054640069384217e-06, "loss": 0.6104, "step": 522 }, { "epoch": 0.09073560027758501, "grad_norm": 0.8116580843925476, "learning_rate": 9.071986123156983e-06, "loss": 0.5685, "step": 523 }, { "epoch": 0.09090909090909091, "grad_norm": 0.7591921091079712, "learning_rate": 9.08933217692975e-06, "loss": 0.6926, "step": 524 }, { "epoch": 0.09108258154059681, "grad_norm": 0.7296175360679626, "learning_rate": 9.106678230702515e-06, "loss": 0.696, "step": 525 }, { "epoch": 0.0912560721721027, "grad_norm": 0.6052044034004211, "learning_rate": 9.124024284475282e-06, "loss": 0.813, "step": 526 }, { "epoch": 0.0914295628036086, "grad_norm": 0.9392281174659729, "learning_rate": 9.141370338248049e-06, "loss": 0.6506, "step": 527 }, { "epoch": 0.0916030534351145, "grad_norm": 0.7543035745620728, "learning_rate": 9.158716392020815e-06, "loss": 0.714, "step": 528 }, { "epoch": 0.0917765440666204, "grad_norm": 1.0716018676757812, "learning_rate": 9.176062445793584e-06, "loss": 0.6085, "step": 529 }, { "epoch": 0.0919500346981263, "grad_norm": 1.0834380388259888, "learning_rate": 9.19340849956635e-06, "loss": 0.6614, "step": 530 }, { "epoch": 0.0921235253296322, "grad_norm": 0.9322414398193359, "learning_rate": 9.210754553339117e-06, "loss": 0.585, "step": 531 }, { "epoch": 0.0922970159611381, "grad_norm": 1.064762830734253, "learning_rate": 9.228100607111882e-06, "loss": 0.905, "step": 532 }, { "epoch": 0.092470506592644, "grad_norm": 0.7074531316757202, "learning_rate": 9.245446660884649e-06, "loss": 0.7267, "step": 533 }, { "epoch": 0.0926439972241499, "grad_norm": 0.7268208861351013, "learning_rate": 9.262792714657415e-06, "loss": 0.7759, "step": 534 }, { "epoch": 0.0928174878556558, "grad_norm": 1.8754379749298096, "learning_rate": 9.280138768430182e-06, "loss": 0.5776, "step": 535 }, { "epoch": 0.09299097848716169, "grad_norm": 2.2080729007720947, "learning_rate": 9.29748482220295e-06, "loss": 0.7365, "step": 536 }, { "epoch": 0.09316446911866759, "grad_norm": 0.8226796984672546, "learning_rate": 9.314830875975717e-06, "loss": 0.8542, "step": 537 }, { "epoch": 0.09333795975017349, "grad_norm": 1.653834581375122, "learning_rate": 9.332176929748484e-06, "loss": 0.6907, "step": 538 }, { "epoch": 0.09351145038167939, "grad_norm": 0.6772366166114807, "learning_rate": 9.34952298352125e-06, "loss": 0.7031, "step": 539 }, { "epoch": 0.0936849410131853, "grad_norm": 0.5401658415794373, "learning_rate": 9.366869037294016e-06, "loss": 0.8594, "step": 540 }, { "epoch": 0.09385843164469118, "grad_norm": 1.2171071767807007, "learning_rate": 9.384215091066782e-06, "loss": 0.657, "step": 541 }, { "epoch": 0.09403192227619708, "grad_norm": 0.568236231803894, "learning_rate": 9.401561144839549e-06, "loss": 0.7146, "step": 542 }, { "epoch": 0.09420541290770298, "grad_norm": 0.5544914603233337, "learning_rate": 9.418907198612316e-06, "loss": 0.8723, "step": 543 }, { "epoch": 0.09437890353920889, "grad_norm": 0.7629676461219788, "learning_rate": 9.436253252385084e-06, "loss": 0.7893, "step": 544 }, { "epoch": 0.09455239417071479, "grad_norm": 0.8096640110015869, "learning_rate": 9.45359930615785e-06, "loss": 0.8293, "step": 545 }, { "epoch": 0.09472588480222068, "grad_norm": 0.9710381627082825, "learning_rate": 9.470945359930617e-06, "loss": 0.6384, "step": 546 }, { "epoch": 0.09489937543372658, "grad_norm": 0.6319253444671631, "learning_rate": 9.488291413703382e-06, "loss": 0.8062, "step": 547 }, { "epoch": 0.09507286606523248, "grad_norm": 0.7259004712104797, "learning_rate": 9.505637467476149e-06, "loss": 0.7434, "step": 548 }, { "epoch": 0.09524635669673838, "grad_norm": 0.9125682711601257, "learning_rate": 9.522983521248916e-06, "loss": 0.7483, "step": 549 }, { "epoch": 0.09541984732824428, "grad_norm": 0.7326458692550659, "learning_rate": 9.540329575021683e-06, "loss": 0.749, "step": 550 }, { "epoch": 0.09559333795975017, "grad_norm": 0.7823507785797119, "learning_rate": 9.557675628794451e-06, "loss": 0.689, "step": 551 }, { "epoch": 0.09576682859125607, "grad_norm": 0.8902698755264282, "learning_rate": 9.575021682567218e-06, "loss": 0.7332, "step": 552 }, { "epoch": 0.09594031922276197, "grad_norm": 0.7595632076263428, "learning_rate": 9.592367736339984e-06, "loss": 0.7043, "step": 553 }, { "epoch": 0.09611380985426787, "grad_norm": 0.792465090751648, "learning_rate": 9.609713790112751e-06, "loss": 0.7013, "step": 554 }, { "epoch": 0.09628730048577377, "grad_norm": 0.7701244950294495, "learning_rate": 9.627059843885516e-06, "loss": 0.791, "step": 555 }, { "epoch": 0.09646079111727966, "grad_norm": 0.8943719267845154, "learning_rate": 9.644405897658283e-06, "loss": 0.7175, "step": 556 }, { "epoch": 0.09663428174878556, "grad_norm": 1.3731937408447266, "learning_rate": 9.66175195143105e-06, "loss": 0.7277, "step": 557 }, { "epoch": 0.09680777238029147, "grad_norm": 0.8585498332977295, "learning_rate": 9.679098005203816e-06, "loss": 0.6948, "step": 558 }, { "epoch": 0.09698126301179737, "grad_norm": 0.6464988589286804, "learning_rate": 9.696444058976584e-06, "loss": 0.8181, "step": 559 }, { "epoch": 0.09715475364330327, "grad_norm": 0.8568048477172852, "learning_rate": 9.713790112749351e-06, "loss": 0.7307, "step": 560 }, { "epoch": 0.09732824427480916, "grad_norm": 0.8411333560943604, "learning_rate": 9.731136166522118e-06, "loss": 0.6648, "step": 561 }, { "epoch": 0.09750173490631506, "grad_norm": 7.223629474639893, "learning_rate": 9.748482220294883e-06, "loss": 0.6119, "step": 562 }, { "epoch": 0.09767522553782096, "grad_norm": 0.6756853461265564, "learning_rate": 9.76582827406765e-06, "loss": 0.6816, "step": 563 }, { "epoch": 0.09784871616932686, "grad_norm": 0.6368600130081177, "learning_rate": 9.783174327840416e-06, "loss": 0.7976, "step": 564 }, { "epoch": 0.09802220680083276, "grad_norm": 1.1108633279800415, "learning_rate": 9.800520381613183e-06, "loss": 0.6875, "step": 565 }, { "epoch": 0.09819569743233865, "grad_norm": 0.842835009098053, "learning_rate": 9.817866435385951e-06, "loss": 0.7048, "step": 566 }, { "epoch": 0.09836918806384455, "grad_norm": 0.6681727170944214, "learning_rate": 9.835212489158718e-06, "loss": 0.7976, "step": 567 }, { "epoch": 0.09854267869535045, "grad_norm": 0.8945640325546265, "learning_rate": 9.852558542931485e-06, "loss": 0.8228, "step": 568 }, { "epoch": 0.09871616932685635, "grad_norm": 0.6476752161979675, "learning_rate": 9.869904596704251e-06, "loss": 0.7466, "step": 569 }, { "epoch": 0.09888965995836224, "grad_norm": 0.9636639356613159, "learning_rate": 9.887250650477016e-06, "loss": 0.8408, "step": 570 }, { "epoch": 0.09906315058986814, "grad_norm": 0.7499241828918457, "learning_rate": 9.904596704249783e-06, "loss": 0.7227, "step": 571 }, { "epoch": 0.09923664122137404, "grad_norm": 0.8054186105728149, "learning_rate": 9.92194275802255e-06, "loss": 0.712, "step": 572 }, { "epoch": 0.09941013185287995, "grad_norm": 0.5592566132545471, "learning_rate": 9.939288811795318e-06, "loss": 0.8115, "step": 573 }, { "epoch": 0.09958362248438585, "grad_norm": 1.003616213798523, "learning_rate": 9.956634865568085e-06, "loss": 0.6499, "step": 574 }, { "epoch": 0.09975711311589174, "grad_norm": 0.7742766737937927, "learning_rate": 9.973980919340852e-06, "loss": 0.7188, "step": 575 }, { "epoch": 0.09993060374739764, "grad_norm": 1.2237392663955688, "learning_rate": 9.991326973113618e-06, "loss": 0.6455, "step": 576 }, { "epoch": 0.10010409437890354, "grad_norm": 0.8949840068817139, "learning_rate": 1.0008673026886383e-05, "loss": 0.5955, "step": 577 }, { "epoch": 0.10027758501040944, "grad_norm": 0.9954709410667419, "learning_rate": 1.002601908065915e-05, "loss": 0.6442, "step": 578 }, { "epoch": 0.10045107564191534, "grad_norm": 0.939723551273346, "learning_rate": 1.0043365134431917e-05, "loss": 0.6877, "step": 579 }, { "epoch": 0.10062456627342123, "grad_norm": 0.7950365543365479, "learning_rate": 1.0060711188204683e-05, "loss": 0.7377, "step": 580 }, { "epoch": 0.10079805690492713, "grad_norm": 0.8031066656112671, "learning_rate": 1.007805724197745e-05, "loss": 0.7664, "step": 581 }, { "epoch": 0.10097154753643303, "grad_norm": 0.7567896842956543, "learning_rate": 1.0095403295750217e-05, "loss": 0.7759, "step": 582 }, { "epoch": 0.10114503816793893, "grad_norm": 0.6213170886039734, "learning_rate": 1.0112749349522983e-05, "loss": 0.8481, "step": 583 }, { "epoch": 0.10131852879944483, "grad_norm": 0.6094651222229004, "learning_rate": 1.0130095403295752e-05, "loss": 0.7006, "step": 584 }, { "epoch": 0.10149201943095072, "grad_norm": 1.0541772842407227, "learning_rate": 1.0147441457068519e-05, "loss": 0.6729, "step": 585 }, { "epoch": 0.10166551006245662, "grad_norm": 0.7511570453643799, "learning_rate": 1.0164787510841285e-05, "loss": 0.7402, "step": 586 }, { "epoch": 0.10183900069396253, "grad_norm": 0.6804909110069275, "learning_rate": 1.0182133564614052e-05, "loss": 0.5862, "step": 587 }, { "epoch": 0.10201249132546843, "grad_norm": 0.9981895685195923, "learning_rate": 1.0199479618386819e-05, "loss": 0.8435, "step": 588 }, { "epoch": 0.10218598195697433, "grad_norm": 0.9453763365745544, "learning_rate": 1.0216825672159585e-05, "loss": 0.6151, "step": 589 }, { "epoch": 0.10235947258848022, "grad_norm": 0.5910636186599731, "learning_rate": 1.0234171725932352e-05, "loss": 0.7754, "step": 590 }, { "epoch": 0.10253296321998612, "grad_norm": 0.6517502069473267, "learning_rate": 1.0251517779705119e-05, "loss": 0.7522, "step": 591 }, { "epoch": 0.10270645385149202, "grad_norm": 1.0400340557098389, "learning_rate": 1.0268863833477884e-05, "loss": 0.884, "step": 592 }, { "epoch": 0.10287994448299792, "grad_norm": 0.9368115067481995, "learning_rate": 1.028620988725065e-05, "loss": 0.6853, "step": 593 }, { "epoch": 0.10305343511450382, "grad_norm": 0.6270506978034973, "learning_rate": 1.0303555941023417e-05, "loss": 0.7229, "step": 594 }, { "epoch": 0.10322692574600971, "grad_norm": 1.6493935585021973, "learning_rate": 1.0320901994796184e-05, "loss": 0.6746, "step": 595 }, { "epoch": 0.10340041637751561, "grad_norm": 0.6048089861869812, "learning_rate": 1.033824804856895e-05, "loss": 0.7174, "step": 596 }, { "epoch": 0.10357390700902151, "grad_norm": 1.015458345413208, "learning_rate": 1.0355594102341717e-05, "loss": 0.8799, "step": 597 }, { "epoch": 0.10374739764052741, "grad_norm": 0.6777968406677246, "learning_rate": 1.0372940156114484e-05, "loss": 0.8088, "step": 598 }, { "epoch": 0.10392088827203332, "grad_norm": 0.8517946600914001, "learning_rate": 1.0390286209887252e-05, "loss": 0.6487, "step": 599 }, { "epoch": 0.1040943789035392, "grad_norm": 0.880364179611206, "learning_rate": 1.0407632263660019e-05, "loss": 0.7549, "step": 600 }, { "epoch": 0.1042678695350451, "grad_norm": 0.6786611676216125, "learning_rate": 1.0424978317432786e-05, "loss": 0.7529, "step": 601 }, { "epoch": 0.104441360166551, "grad_norm": 1.0398542881011963, "learning_rate": 1.0442324371205552e-05, "loss": 0.6331, "step": 602 }, { "epoch": 0.10461485079805691, "grad_norm": 0.731816828250885, "learning_rate": 1.0459670424978319e-05, "loss": 0.9141, "step": 603 }, { "epoch": 0.10478834142956281, "grad_norm": 0.8709906935691833, "learning_rate": 1.0477016478751086e-05, "loss": 0.6921, "step": 604 }, { "epoch": 0.1049618320610687, "grad_norm": 0.9583622217178345, "learning_rate": 1.0494362532523852e-05, "loss": 0.636, "step": 605 }, { "epoch": 0.1051353226925746, "grad_norm": 1.5152076482772827, "learning_rate": 1.051170858629662e-05, "loss": 0.676, "step": 606 }, { "epoch": 0.1053088133240805, "grad_norm": 1.03229820728302, "learning_rate": 1.0529054640069384e-05, "loss": 0.5743, "step": 607 }, { "epoch": 0.1054823039555864, "grad_norm": 0.5453727841377258, "learning_rate": 1.0546400693842151e-05, "loss": 0.7979, "step": 608 }, { "epoch": 0.1056557945870923, "grad_norm": 0.625810444355011, "learning_rate": 1.0563746747614918e-05, "loss": 0.7998, "step": 609 }, { "epoch": 0.10582928521859819, "grad_norm": 1.0403211116790771, "learning_rate": 1.0581092801387684e-05, "loss": 0.582, "step": 610 }, { "epoch": 0.10600277585010409, "grad_norm": 1.1652127504348755, "learning_rate": 1.0598438855160451e-05, "loss": 0.6367, "step": 611 }, { "epoch": 0.10617626648161, "grad_norm": 0.8623735904693604, "learning_rate": 1.0615784908933218e-05, "loss": 0.7267, "step": 612 }, { "epoch": 0.1063497571131159, "grad_norm": 0.5244912505149841, "learning_rate": 1.0633130962705984e-05, "loss": 0.8396, "step": 613 }, { "epoch": 0.1065232477446218, "grad_norm": 0.7100646495819092, "learning_rate": 1.0650477016478753e-05, "loss": 0.8169, "step": 614 }, { "epoch": 0.10669673837612768, "grad_norm": 0.5875173211097717, "learning_rate": 1.066782307025152e-05, "loss": 0.6633, "step": 615 }, { "epoch": 0.10687022900763359, "grad_norm": 0.7332322597503662, "learning_rate": 1.0685169124024286e-05, "loss": 0.8625, "step": 616 }, { "epoch": 0.10704371963913949, "grad_norm": 0.6259750127792358, "learning_rate": 1.0702515177797053e-05, "loss": 0.8584, "step": 617 }, { "epoch": 0.10721721027064539, "grad_norm": 0.7651997208595276, "learning_rate": 1.071986123156982e-05, "loss": 0.7227, "step": 618 }, { "epoch": 0.10739070090215129, "grad_norm": 0.9559131264686584, "learning_rate": 1.0737207285342586e-05, "loss": 0.8584, "step": 619 }, { "epoch": 0.10756419153365718, "grad_norm": 0.8246591687202454, "learning_rate": 1.0754553339115353e-05, "loss": 0.6956, "step": 620 }, { "epoch": 0.10773768216516308, "grad_norm": 1.5528041124343872, "learning_rate": 1.077189939288812e-05, "loss": 0.6495, "step": 621 }, { "epoch": 0.10791117279666898, "grad_norm": 0.8507909774780273, "learning_rate": 1.0789245446660885e-05, "loss": 0.6914, "step": 622 }, { "epoch": 0.10808466342817488, "grad_norm": 1.2405694723129272, "learning_rate": 1.0806591500433651e-05, "loss": 0.6771, "step": 623 }, { "epoch": 0.10825815405968078, "grad_norm": 0.8569530248641968, "learning_rate": 1.0823937554206418e-05, "loss": 0.5836, "step": 624 }, { "epoch": 0.10843164469118667, "grad_norm": 0.7054854035377502, "learning_rate": 1.0841283607979185e-05, "loss": 0.7961, "step": 625 }, { "epoch": 0.10860513532269257, "grad_norm": 0.6264186501502991, "learning_rate": 1.0858629661751951e-05, "loss": 0.7217, "step": 626 }, { "epoch": 0.10877862595419847, "grad_norm": 0.7073953747749329, "learning_rate": 1.0875975715524718e-05, "loss": 0.7085, "step": 627 }, { "epoch": 0.10895211658570438, "grad_norm": 1.7915912866592407, "learning_rate": 1.0893321769297485e-05, "loss": 0.6115, "step": 628 }, { "epoch": 0.10912560721721028, "grad_norm": 1.0424374341964722, "learning_rate": 1.0910667823070253e-05, "loss": 0.7844, "step": 629 }, { "epoch": 0.10929909784871616, "grad_norm": 1.5371145009994507, "learning_rate": 1.092801387684302e-05, "loss": 0.6057, "step": 630 }, { "epoch": 0.10947258848022207, "grad_norm": 0.684110164642334, "learning_rate": 1.0945359930615787e-05, "loss": 0.7847, "step": 631 }, { "epoch": 0.10964607911172797, "grad_norm": 0.6034166812896729, "learning_rate": 1.0962705984388553e-05, "loss": 0.7946, "step": 632 }, { "epoch": 0.10981956974323387, "grad_norm": 0.9649418592453003, "learning_rate": 1.098005203816132e-05, "loss": 0.5924, "step": 633 }, { "epoch": 0.10999306037473977, "grad_norm": 1.540686011314392, "learning_rate": 1.0997398091934087e-05, "loss": 0.6996, "step": 634 }, { "epoch": 0.11016655100624566, "grad_norm": 0.6039450764656067, "learning_rate": 1.1014744145706853e-05, "loss": 0.772, "step": 635 }, { "epoch": 0.11034004163775156, "grad_norm": 0.7755488753318787, "learning_rate": 1.103209019947962e-05, "loss": 0.7344, "step": 636 }, { "epoch": 0.11051353226925746, "grad_norm": 0.6978097558021545, "learning_rate": 1.1049436253252385e-05, "loss": 0.8052, "step": 637 }, { "epoch": 0.11068702290076336, "grad_norm": 0.9445908665657043, "learning_rate": 1.1066782307025152e-05, "loss": 0.6602, "step": 638 }, { "epoch": 0.11086051353226926, "grad_norm": 3.0431437492370605, "learning_rate": 1.1084128360797918e-05, "loss": 0.7188, "step": 639 }, { "epoch": 0.11103400416377515, "grad_norm": 1.0811216831207275, "learning_rate": 1.1101474414570685e-05, "loss": 0.7437, "step": 640 }, { "epoch": 0.11120749479528105, "grad_norm": 0.763873279094696, "learning_rate": 1.1118820468343452e-05, "loss": 0.7471, "step": 641 }, { "epoch": 0.11138098542678695, "grad_norm": 0.7147685289382935, "learning_rate": 1.1136166522116219e-05, "loss": 0.6162, "step": 642 }, { "epoch": 0.11155447605829286, "grad_norm": 1.0152665376663208, "learning_rate": 1.1153512575888985e-05, "loss": 0.7397, "step": 643 }, { "epoch": 0.11172796668979876, "grad_norm": 0.8622577786445618, "learning_rate": 1.1170858629661754e-05, "loss": 0.6323, "step": 644 }, { "epoch": 0.11190145732130465, "grad_norm": 0.893168032169342, "learning_rate": 1.118820468343452e-05, "loss": 0.6046, "step": 645 }, { "epoch": 0.11207494795281055, "grad_norm": 0.9592801332473755, "learning_rate": 1.1205550737207287e-05, "loss": 0.621, "step": 646 }, { "epoch": 0.11224843858431645, "grad_norm": 0.8946881294250488, "learning_rate": 1.1222896790980054e-05, "loss": 0.6152, "step": 647 }, { "epoch": 0.11242192921582235, "grad_norm": 1.1318047046661377, "learning_rate": 1.124024284475282e-05, "loss": 0.5914, "step": 648 }, { "epoch": 0.11259541984732824, "grad_norm": 0.7299327254295349, "learning_rate": 1.1257588898525587e-05, "loss": 0.761, "step": 649 }, { "epoch": 0.11276891047883414, "grad_norm": 0.785463273525238, "learning_rate": 1.1274934952298354e-05, "loss": 0.6516, "step": 650 }, { "epoch": 0.11294240111034004, "grad_norm": 0.7696375250816345, "learning_rate": 1.129228100607112e-05, "loss": 0.7043, "step": 651 }, { "epoch": 0.11311589174184594, "grad_norm": 1.050863265991211, "learning_rate": 1.1309627059843885e-05, "loss": 0.7544, "step": 652 }, { "epoch": 0.11328938237335184, "grad_norm": 1.3764386177062988, "learning_rate": 1.1326973113616652e-05, "loss": 0.885, "step": 653 }, { "epoch": 0.11346287300485773, "grad_norm": 0.8897507786750793, "learning_rate": 1.1344319167389419e-05, "loss": 0.6536, "step": 654 }, { "epoch": 0.11363636363636363, "grad_norm": 1.1961376667022705, "learning_rate": 1.1361665221162186e-05, "loss": 0.7806, "step": 655 }, { "epoch": 0.11380985426786953, "grad_norm": 1.0161396265029907, "learning_rate": 1.1379011274934952e-05, "loss": 0.7877, "step": 656 }, { "epoch": 0.11398334489937544, "grad_norm": 0.9606118202209473, "learning_rate": 1.1396357328707719e-05, "loss": 0.6857, "step": 657 }, { "epoch": 0.11415683553088134, "grad_norm": 1.3000298738479614, "learning_rate": 1.1413703382480487e-05, "loss": 0.6887, "step": 658 }, { "epoch": 0.11433032616238722, "grad_norm": 0.9779266119003296, "learning_rate": 1.1431049436253254e-05, "loss": 0.6284, "step": 659 }, { "epoch": 0.11450381679389313, "grad_norm": 0.6240894198417664, "learning_rate": 1.144839549002602e-05, "loss": 0.7858, "step": 660 }, { "epoch": 0.11467730742539903, "grad_norm": 1.1069626808166504, "learning_rate": 1.1465741543798787e-05, "loss": 0.7329, "step": 661 }, { "epoch": 0.11485079805690493, "grad_norm": 0.8047638535499573, "learning_rate": 1.1483087597571554e-05, "loss": 0.7887, "step": 662 }, { "epoch": 0.11502428868841083, "grad_norm": 0.7948787808418274, "learning_rate": 1.150043365134432e-05, "loss": 0.6184, "step": 663 }, { "epoch": 0.11519777931991672, "grad_norm": 0.6296604871749878, "learning_rate": 1.1517779705117088e-05, "loss": 0.7542, "step": 664 }, { "epoch": 0.11537126995142262, "grad_norm": 1.9882172346115112, "learning_rate": 1.1535125758889854e-05, "loss": 0.8248, "step": 665 }, { "epoch": 0.11554476058292852, "grad_norm": 0.7945740222930908, "learning_rate": 1.1552471812662621e-05, "loss": 0.7227, "step": 666 }, { "epoch": 0.11571825121443442, "grad_norm": 1.1145966053009033, "learning_rate": 1.1569817866435386e-05, "loss": 0.7614, "step": 667 }, { "epoch": 0.11589174184594032, "grad_norm": 0.8767792582511902, "learning_rate": 1.1587163920208153e-05, "loss": 0.9128, "step": 668 }, { "epoch": 0.11606523247744621, "grad_norm": 0.6942266225814819, "learning_rate": 1.160450997398092e-05, "loss": 0.8083, "step": 669 }, { "epoch": 0.11623872310895211, "grad_norm": 0.8999573588371277, "learning_rate": 1.1621856027753686e-05, "loss": 0.5856, "step": 670 }, { "epoch": 0.11641221374045801, "grad_norm": 0.9616506099700928, "learning_rate": 1.1639202081526453e-05, "loss": 0.6599, "step": 671 }, { "epoch": 0.11658570437196392, "grad_norm": 1.0585647821426392, "learning_rate": 1.165654813529922e-05, "loss": 0.6489, "step": 672 }, { "epoch": 0.11675919500346982, "grad_norm": 0.7967655658721924, "learning_rate": 1.1673894189071988e-05, "loss": 0.5808, "step": 673 }, { "epoch": 0.1169326856349757, "grad_norm": 1.2765015363693237, "learning_rate": 1.1691240242844754e-05, "loss": 0.6726, "step": 674 }, { "epoch": 0.11710617626648161, "grad_norm": 0.666652500629425, "learning_rate": 1.1708586296617521e-05, "loss": 0.7703, "step": 675 }, { "epoch": 0.11727966689798751, "grad_norm": 0.9283226132392883, "learning_rate": 1.1725932350390288e-05, "loss": 0.6865, "step": 676 }, { "epoch": 0.11745315752949341, "grad_norm": 0.8013582229614258, "learning_rate": 1.1743278404163055e-05, "loss": 0.7915, "step": 677 }, { "epoch": 0.11762664816099931, "grad_norm": 1.0678712129592896, "learning_rate": 1.1760624457935821e-05, "loss": 0.6185, "step": 678 }, { "epoch": 0.1178001387925052, "grad_norm": 1.1896597146987915, "learning_rate": 1.1777970511708588e-05, "loss": 0.6167, "step": 679 }, { "epoch": 0.1179736294240111, "grad_norm": 0.7107986211776733, "learning_rate": 1.1795316565481355e-05, "loss": 0.7207, "step": 680 }, { "epoch": 0.118147120055517, "grad_norm": 1.224815011024475, "learning_rate": 1.181266261925412e-05, "loss": 0.7278, "step": 681 }, { "epoch": 0.1183206106870229, "grad_norm": 0.6233601570129395, "learning_rate": 1.1830008673026886e-05, "loss": 0.7137, "step": 682 }, { "epoch": 0.1184941013185288, "grad_norm": 1.0943691730499268, "learning_rate": 1.1847354726799653e-05, "loss": 0.5635, "step": 683 }, { "epoch": 0.11866759195003469, "grad_norm": 1.5748167037963867, "learning_rate": 1.186470078057242e-05, "loss": 0.8718, "step": 684 }, { "epoch": 0.1188410825815406, "grad_norm": 0.5649005174636841, "learning_rate": 1.1882046834345186e-05, "loss": 0.7893, "step": 685 }, { "epoch": 0.1190145732130465, "grad_norm": 2.1243364810943604, "learning_rate": 1.1899392888117953e-05, "loss": 0.7123, "step": 686 }, { "epoch": 0.1191880638445524, "grad_norm": 1.023429274559021, "learning_rate": 1.191673894189072e-05, "loss": 0.5942, "step": 687 }, { "epoch": 0.1193615544760583, "grad_norm": 0.6872596740722656, "learning_rate": 1.1934084995663488e-05, "loss": 0.7377, "step": 688 }, { "epoch": 0.11953504510756419, "grad_norm": 0.7104584574699402, "learning_rate": 1.1951431049436255e-05, "loss": 0.7969, "step": 689 }, { "epoch": 0.11970853573907009, "grad_norm": 0.8265088796615601, "learning_rate": 1.1968777103209022e-05, "loss": 0.6846, "step": 690 }, { "epoch": 0.11988202637057599, "grad_norm": 1.5311951637268066, "learning_rate": 1.1986123156981788e-05, "loss": 0.5569, "step": 691 }, { "epoch": 0.12005551700208189, "grad_norm": 1.1079038381576538, "learning_rate": 1.2003469210754555e-05, "loss": 0.7522, "step": 692 }, { "epoch": 0.12022900763358779, "grad_norm": 1.1485226154327393, "learning_rate": 1.2020815264527322e-05, "loss": 0.7632, "step": 693 }, { "epoch": 0.12040249826509368, "grad_norm": 0.621604323387146, "learning_rate": 1.2038161318300088e-05, "loss": 0.7302, "step": 694 }, { "epoch": 0.12057598889659958, "grad_norm": 0.8844202756881714, "learning_rate": 1.2055507372072855e-05, "loss": 0.7998, "step": 695 }, { "epoch": 0.12074947952810548, "grad_norm": 0.7744740843772888, "learning_rate": 1.207285342584562e-05, "loss": 0.6853, "step": 696 }, { "epoch": 0.12092297015961138, "grad_norm": 0.6762483716011047, "learning_rate": 1.2090199479618387e-05, "loss": 0.8545, "step": 697 }, { "epoch": 0.12109646079111729, "grad_norm": 1.5488284826278687, "learning_rate": 1.2107545533391153e-05, "loss": 0.6328, "step": 698 }, { "epoch": 0.12126995142262317, "grad_norm": 0.716681957244873, "learning_rate": 1.212489158716392e-05, "loss": 0.7561, "step": 699 }, { "epoch": 0.12144344205412907, "grad_norm": 0.8789547681808472, "learning_rate": 1.2142237640936687e-05, "loss": 0.5847, "step": 700 }, { "epoch": 0.12161693268563498, "grad_norm": 1.1625703573226929, "learning_rate": 1.2159583694709454e-05, "loss": 0.5979, "step": 701 }, { "epoch": 0.12179042331714088, "grad_norm": 0.843895435333252, "learning_rate": 1.217692974848222e-05, "loss": 0.7214, "step": 702 }, { "epoch": 0.12196391394864678, "grad_norm": 0.9099196195602417, "learning_rate": 1.2194275802254989e-05, "loss": 0.6191, "step": 703 }, { "epoch": 0.12213740458015267, "grad_norm": 1.0822001695632935, "learning_rate": 1.2211621856027755e-05, "loss": 0.6824, "step": 704 }, { "epoch": 0.12231089521165857, "grad_norm": 1.053365707397461, "learning_rate": 1.2228967909800522e-05, "loss": 0.7758, "step": 705 }, { "epoch": 0.12248438584316447, "grad_norm": 1.6115442514419556, "learning_rate": 1.2246313963573289e-05, "loss": 0.6538, "step": 706 }, { "epoch": 0.12265787647467037, "grad_norm": 0.63421231508255, "learning_rate": 1.2263660017346055e-05, "loss": 0.7159, "step": 707 }, { "epoch": 0.12283136710617627, "grad_norm": 0.7350783944129944, "learning_rate": 1.2281006071118822e-05, "loss": 0.8186, "step": 708 }, { "epoch": 0.12300485773768216, "grad_norm": 0.6793596148490906, "learning_rate": 1.2298352124891589e-05, "loss": 0.7205, "step": 709 }, { "epoch": 0.12317834836918806, "grad_norm": 0.8642478585243225, "learning_rate": 1.2315698178664356e-05, "loss": 0.7021, "step": 710 }, { "epoch": 0.12335183900069396, "grad_norm": 0.7911598086357117, "learning_rate": 1.233304423243712e-05, "loss": 0.7319, "step": 711 }, { "epoch": 0.12352532963219987, "grad_norm": 0.5477712154388428, "learning_rate": 1.2350390286209887e-05, "loss": 0.6874, "step": 712 }, { "epoch": 0.12369882026370577, "grad_norm": 0.5802223682403564, "learning_rate": 1.2367736339982654e-05, "loss": 0.7007, "step": 713 }, { "epoch": 0.12387231089521165, "grad_norm": 0.8528922200202942, "learning_rate": 1.238508239375542e-05, "loss": 0.6584, "step": 714 }, { "epoch": 0.12404580152671756, "grad_norm": 2.9769327640533447, "learning_rate": 1.2402428447528187e-05, "loss": 0.7139, "step": 715 }, { "epoch": 0.12421929215822346, "grad_norm": 1.6489852666854858, "learning_rate": 1.2419774501300954e-05, "loss": 0.6698, "step": 716 }, { "epoch": 0.12439278278972936, "grad_norm": 0.5939946174621582, "learning_rate": 1.2437120555073722e-05, "loss": 0.9434, "step": 717 }, { "epoch": 0.12456627342123526, "grad_norm": 1.066463589668274, "learning_rate": 1.2454466608846489e-05, "loss": 0.7593, "step": 718 }, { "epoch": 0.12473976405274115, "grad_norm": 0.7959454655647278, "learning_rate": 1.2471812662619256e-05, "loss": 0.7546, "step": 719 }, { "epoch": 0.12491325468424705, "grad_norm": 0.763336718082428, "learning_rate": 1.2489158716392022e-05, "loss": 0.7537, "step": 720 }, { "epoch": 0.12508674531575295, "grad_norm": 1.3524776697158813, "learning_rate": 1.250650477016479e-05, "loss": 0.6158, "step": 721 }, { "epoch": 0.12526023594725885, "grad_norm": 0.8605296611785889, "learning_rate": 1.2523850823937556e-05, "loss": 0.7356, "step": 722 }, { "epoch": 0.12543372657876475, "grad_norm": 0.9525180459022522, "learning_rate": 1.2541196877710323e-05, "loss": 0.6804, "step": 723 }, { "epoch": 0.12560721721027066, "grad_norm": 0.6528279185295105, "learning_rate": 1.255854293148309e-05, "loss": 0.8223, "step": 724 }, { "epoch": 0.12578070784177656, "grad_norm": 0.7762887477874756, "learning_rate": 1.2575888985255856e-05, "loss": 0.6045, "step": 725 }, { "epoch": 0.12595419847328243, "grad_norm": 0.918840229511261, "learning_rate": 1.2593235039028621e-05, "loss": 0.5986, "step": 726 }, { "epoch": 0.12612768910478833, "grad_norm": 0.98841392993927, "learning_rate": 1.2610581092801388e-05, "loss": 0.66, "step": 727 }, { "epoch": 0.12630117973629423, "grad_norm": 1.0014210939407349, "learning_rate": 1.2627927146574154e-05, "loss": 0.748, "step": 728 }, { "epoch": 0.12647467036780013, "grad_norm": 0.89778071641922, "learning_rate": 1.2645273200346921e-05, "loss": 0.611, "step": 729 }, { "epoch": 0.12664816099930604, "grad_norm": 1.6311657428741455, "learning_rate": 1.2662619254119688e-05, "loss": 0.6222, "step": 730 }, { "epoch": 0.12682165163081194, "grad_norm": 0.8890562057495117, "learning_rate": 1.2679965307892454e-05, "loss": 0.7314, "step": 731 }, { "epoch": 0.12699514226231784, "grad_norm": 0.7859386801719666, "learning_rate": 1.2697311361665223e-05, "loss": 0.6956, "step": 732 }, { "epoch": 0.12716863289382374, "grad_norm": 1.78225839138031, "learning_rate": 1.271465741543799e-05, "loss": 0.7323, "step": 733 }, { "epoch": 0.12734212352532964, "grad_norm": 1.3801956176757812, "learning_rate": 1.2732003469210756e-05, "loss": 0.8142, "step": 734 }, { "epoch": 0.12751561415683554, "grad_norm": 0.9562146663665771, "learning_rate": 1.2749349522983523e-05, "loss": 0.6477, "step": 735 }, { "epoch": 0.12768910478834142, "grad_norm": 2.3081870079040527, "learning_rate": 1.276669557675629e-05, "loss": 0.8076, "step": 736 }, { "epoch": 0.12786259541984732, "grad_norm": 0.7006694674491882, "learning_rate": 1.2784041630529056e-05, "loss": 0.7605, "step": 737 }, { "epoch": 0.12803608605135322, "grad_norm": 0.8642357587814331, "learning_rate": 1.2801387684301823e-05, "loss": 0.6172, "step": 738 }, { "epoch": 0.12820957668285912, "grad_norm": 1.2619918584823608, "learning_rate": 1.281873373807459e-05, "loss": 0.5737, "step": 739 }, { "epoch": 0.12838306731436502, "grad_norm": 0.7796722650527954, "learning_rate": 1.2836079791847356e-05, "loss": 0.636, "step": 740 }, { "epoch": 0.12855655794587093, "grad_norm": 0.8895028829574585, "learning_rate": 1.2853425845620121e-05, "loss": 0.6311, "step": 741 }, { "epoch": 0.12873004857737683, "grad_norm": 1.0069208145141602, "learning_rate": 1.2870771899392888e-05, "loss": 0.5934, "step": 742 }, { "epoch": 0.12890353920888273, "grad_norm": 0.7322602868080139, "learning_rate": 1.2888117953165655e-05, "loss": 0.5977, "step": 743 }, { "epoch": 0.12907702984038863, "grad_norm": 1.4024487733840942, "learning_rate": 1.2905464006938421e-05, "loss": 0.5786, "step": 744 }, { "epoch": 0.12925052047189453, "grad_norm": 0.7118971943855286, "learning_rate": 1.2922810060711188e-05, "loss": 0.8091, "step": 745 }, { "epoch": 0.1294240111034004, "grad_norm": 0.9231390357017517, "learning_rate": 1.2940156114483955e-05, "loss": 0.7593, "step": 746 }, { "epoch": 0.1295975017349063, "grad_norm": 1.3302514553070068, "learning_rate": 1.2957502168256723e-05, "loss": 0.7173, "step": 747 }, { "epoch": 0.1297709923664122, "grad_norm": 1.1704890727996826, "learning_rate": 1.297484822202949e-05, "loss": 0.5905, "step": 748 }, { "epoch": 0.1299444829979181, "grad_norm": 0.7081783413887024, "learning_rate": 1.2992194275802257e-05, "loss": 0.7327, "step": 749 }, { "epoch": 0.130117973629424, "grad_norm": 0.6808786392211914, "learning_rate": 1.3009540329575023e-05, "loss": 0.8208, "step": 750 }, { "epoch": 0.1302914642609299, "grad_norm": 0.7967669367790222, "learning_rate": 1.302688638334779e-05, "loss": 0.6287, "step": 751 }, { "epoch": 0.1304649548924358, "grad_norm": 0.907132625579834, "learning_rate": 1.3044232437120557e-05, "loss": 0.7136, "step": 752 }, { "epoch": 0.13063844552394172, "grad_norm": 0.8171525597572327, "learning_rate": 1.3061578490893323e-05, "loss": 0.6865, "step": 753 }, { "epoch": 0.13081193615544762, "grad_norm": 0.6056315302848816, "learning_rate": 1.307892454466609e-05, "loss": 0.7515, "step": 754 }, { "epoch": 0.13098542678695352, "grad_norm": 1.0905085802078247, "learning_rate": 1.3096270598438857e-05, "loss": 0.7351, "step": 755 }, { "epoch": 0.1311589174184594, "grad_norm": 1.5320167541503906, "learning_rate": 1.3113616652211622e-05, "loss": 0.7522, "step": 756 }, { "epoch": 0.1313324080499653, "grad_norm": 1.0283844470977783, "learning_rate": 1.3130962705984389e-05, "loss": 0.6381, "step": 757 }, { "epoch": 0.1315058986814712, "grad_norm": 0.7436849474906921, "learning_rate": 1.3148308759757155e-05, "loss": 0.7091, "step": 758 }, { "epoch": 0.1316793893129771, "grad_norm": 0.8111729025840759, "learning_rate": 1.3165654813529922e-05, "loss": 0.6208, "step": 759 }, { "epoch": 0.131852879944483, "grad_norm": 0.7507044076919556, "learning_rate": 1.3183000867302689e-05, "loss": 0.7162, "step": 760 }, { "epoch": 0.1320263705759889, "grad_norm": 1.0076284408569336, "learning_rate": 1.3200346921075455e-05, "loss": 0.7106, "step": 761 }, { "epoch": 0.1321998612074948, "grad_norm": 0.8929764628410339, "learning_rate": 1.3217692974848224e-05, "loss": 0.7991, "step": 762 }, { "epoch": 0.1323733518390007, "grad_norm": 1.046856164932251, "learning_rate": 1.323503902862099e-05, "loss": 0.6948, "step": 763 }, { "epoch": 0.1325468424705066, "grad_norm": 0.8030807375907898, "learning_rate": 1.3252385082393757e-05, "loss": 0.7334, "step": 764 }, { "epoch": 0.13272033310201248, "grad_norm": 0.7715807557106018, "learning_rate": 1.3269731136166524e-05, "loss": 0.6484, "step": 765 }, { "epoch": 0.13289382373351838, "grad_norm": 0.8913213014602661, "learning_rate": 1.328707718993929e-05, "loss": 0.645, "step": 766 }, { "epoch": 0.13306731436502428, "grad_norm": 0.7100571393966675, "learning_rate": 1.3304423243712057e-05, "loss": 0.5028, "step": 767 }, { "epoch": 0.13324080499653018, "grad_norm": 0.858887791633606, "learning_rate": 1.3321769297484824e-05, "loss": 0.7266, "step": 768 }, { "epoch": 0.13341429562803608, "grad_norm": 1.0888160467147827, "learning_rate": 1.333911535125759e-05, "loss": 0.7002, "step": 769 }, { "epoch": 0.13358778625954199, "grad_norm": 0.833756685256958, "learning_rate": 1.3356461405030357e-05, "loss": 0.6841, "step": 770 }, { "epoch": 0.1337612768910479, "grad_norm": 1.0680832862854004, "learning_rate": 1.3373807458803122e-05, "loss": 0.687, "step": 771 }, { "epoch": 0.1339347675225538, "grad_norm": 1.829303503036499, "learning_rate": 1.3391153512575889e-05, "loss": 0.7256, "step": 772 }, { "epoch": 0.1341082581540597, "grad_norm": 0.8159741163253784, "learning_rate": 1.3408499566348656e-05, "loss": 0.597, "step": 773 }, { "epoch": 0.1342817487855656, "grad_norm": 0.8202490210533142, "learning_rate": 1.3425845620121422e-05, "loss": 0.5811, "step": 774 }, { "epoch": 0.13445523941707146, "grad_norm": 1.8445295095443726, "learning_rate": 1.3443191673894189e-05, "loss": 0.6796, "step": 775 }, { "epoch": 0.13462873004857737, "grad_norm": 0.7824861407279968, "learning_rate": 1.3460537727666956e-05, "loss": 0.6609, "step": 776 }, { "epoch": 0.13480222068008327, "grad_norm": 0.9484027624130249, "learning_rate": 1.3477883781439724e-05, "loss": 0.5861, "step": 777 }, { "epoch": 0.13497571131158917, "grad_norm": 0.9183481335639954, "learning_rate": 1.349522983521249e-05, "loss": 0.5896, "step": 778 }, { "epoch": 0.13514920194309507, "grad_norm": 0.8156819343566895, "learning_rate": 1.3512575888985258e-05, "loss": 0.7158, "step": 779 }, { "epoch": 0.13532269257460097, "grad_norm": 0.8175688982009888, "learning_rate": 1.3529921942758024e-05, "loss": 0.7183, "step": 780 }, { "epoch": 0.13549618320610687, "grad_norm": 1.0969560146331787, "learning_rate": 1.3547267996530791e-05, "loss": 0.7327, "step": 781 }, { "epoch": 0.13566967383761278, "grad_norm": 1.166895866394043, "learning_rate": 1.3564614050303558e-05, "loss": 0.771, "step": 782 }, { "epoch": 0.13584316446911868, "grad_norm": 3.4434897899627686, "learning_rate": 1.3581960104076324e-05, "loss": 0.8831, "step": 783 }, { "epoch": 0.13601665510062458, "grad_norm": 1.7552794218063354, "learning_rate": 1.3599306157849091e-05, "loss": 0.7871, "step": 784 }, { "epoch": 0.13619014573213045, "grad_norm": 0.8987719416618347, "learning_rate": 1.3616652211621858e-05, "loss": 0.7195, "step": 785 }, { "epoch": 0.13636363636363635, "grad_norm": 0.5590589642524719, "learning_rate": 1.3633998265394623e-05, "loss": 0.7832, "step": 786 }, { "epoch": 0.13653712699514226, "grad_norm": 0.7459977865219116, "learning_rate": 1.365134431916739e-05, "loss": 0.6086, "step": 787 }, { "epoch": 0.13671061762664816, "grad_norm": 0.7150686979293823, "learning_rate": 1.3668690372940156e-05, "loss": 0.7173, "step": 788 }, { "epoch": 0.13688410825815406, "grad_norm": 1.0562443733215332, "learning_rate": 1.3686036426712923e-05, "loss": 0.7086, "step": 789 }, { "epoch": 0.13705759888965996, "grad_norm": 1.3128288984298706, "learning_rate": 1.370338248048569e-05, "loss": 0.5895, "step": 790 }, { "epoch": 0.13723108952116586, "grad_norm": 1.139470100402832, "learning_rate": 1.3720728534258458e-05, "loss": 0.635, "step": 791 }, { "epoch": 0.13740458015267176, "grad_norm": 0.9284714460372925, "learning_rate": 1.3738074588031225e-05, "loss": 0.6987, "step": 792 }, { "epoch": 0.13757807078417766, "grad_norm": 0.7392175793647766, "learning_rate": 1.3755420641803991e-05, "loss": 0.5834, "step": 793 }, { "epoch": 0.13775156141568357, "grad_norm": 0.5572483539581299, "learning_rate": 1.3772766695576758e-05, "loss": 0.8179, "step": 794 }, { "epoch": 0.13792505204718944, "grad_norm": 1.1708852052688599, "learning_rate": 1.3790112749349525e-05, "loss": 0.7037, "step": 795 }, { "epoch": 0.13809854267869534, "grad_norm": 0.7428001165390015, "learning_rate": 1.3807458803122291e-05, "loss": 0.7423, "step": 796 }, { "epoch": 0.13827203331020124, "grad_norm": 0.8794804215431213, "learning_rate": 1.3824804856895058e-05, "loss": 0.731, "step": 797 }, { "epoch": 0.13844552394170714, "grad_norm": 0.779248058795929, "learning_rate": 1.3842150910667825e-05, "loss": 0.7002, "step": 798 }, { "epoch": 0.13861901457321305, "grad_norm": 0.8916496634483337, "learning_rate": 1.3859496964440591e-05, "loss": 0.7222, "step": 799 }, { "epoch": 0.13879250520471895, "grad_norm": 0.9523424506187439, "learning_rate": 1.3876843018213358e-05, "loss": 0.6304, "step": 800 }, { "epoch": 0.13896599583622485, "grad_norm": 1.1725637912750244, "learning_rate": 1.3894189071986123e-05, "loss": 0.6189, "step": 801 }, { "epoch": 0.13913948646773075, "grad_norm": 0.5944787859916687, "learning_rate": 1.391153512575889e-05, "loss": 0.8826, "step": 802 }, { "epoch": 0.13931297709923665, "grad_norm": 0.7201195955276489, "learning_rate": 1.3928881179531657e-05, "loss": 0.7395, "step": 803 }, { "epoch": 0.13948646773074255, "grad_norm": 0.8105780482292175, "learning_rate": 1.3946227233304423e-05, "loss": 0.6179, "step": 804 }, { "epoch": 0.13965995836224843, "grad_norm": 1.070915699005127, "learning_rate": 1.396357328707719e-05, "loss": 0.6484, "step": 805 }, { "epoch": 0.13983344899375433, "grad_norm": 1.1900025606155396, "learning_rate": 1.3980919340849958e-05, "loss": 0.5752, "step": 806 }, { "epoch": 0.14000693962526023, "grad_norm": 1.0084441900253296, "learning_rate": 1.3998265394622725e-05, "loss": 0.7759, "step": 807 }, { "epoch": 0.14018043025676613, "grad_norm": 1.2829084396362305, "learning_rate": 1.4015611448395492e-05, "loss": 0.6047, "step": 808 }, { "epoch": 0.14035392088827203, "grad_norm": 0.7799201011657715, "learning_rate": 1.4032957502168258e-05, "loss": 0.7181, "step": 809 }, { "epoch": 0.14052741151977793, "grad_norm": 0.8388474583625793, "learning_rate": 1.4050303555941025e-05, "loss": 0.7703, "step": 810 }, { "epoch": 0.14070090215128384, "grad_norm": 0.7399805188179016, "learning_rate": 1.4067649609713792e-05, "loss": 0.8882, "step": 811 }, { "epoch": 0.14087439278278974, "grad_norm": 1.1823039054870605, "learning_rate": 1.4084995663486558e-05, "loss": 0.8306, "step": 812 }, { "epoch": 0.14104788341429564, "grad_norm": 0.8547143340110779, "learning_rate": 1.4102341717259325e-05, "loss": 0.8206, "step": 813 }, { "epoch": 0.14122137404580154, "grad_norm": 0.9972624778747559, "learning_rate": 1.4119687771032092e-05, "loss": 0.6167, "step": 814 }, { "epoch": 0.1413948646773074, "grad_norm": 1.1319990158081055, "learning_rate": 1.4137033824804859e-05, "loss": 0.5568, "step": 815 }, { "epoch": 0.14156835530881332, "grad_norm": 1.930017352104187, "learning_rate": 1.4154379878577624e-05, "loss": 0.6726, "step": 816 }, { "epoch": 0.14174184594031922, "grad_norm": 0.94271320104599, "learning_rate": 1.417172593235039e-05, "loss": 0.7246, "step": 817 }, { "epoch": 0.14191533657182512, "grad_norm": 1.4749341011047363, "learning_rate": 1.4189071986123157e-05, "loss": 0.7723, "step": 818 }, { "epoch": 0.14208882720333102, "grad_norm": 0.8136767745018005, "learning_rate": 1.4206418039895924e-05, "loss": 0.759, "step": 819 }, { "epoch": 0.14226231783483692, "grad_norm": 0.8389351963996887, "learning_rate": 1.422376409366869e-05, "loss": 0.5598, "step": 820 }, { "epoch": 0.14243580846634282, "grad_norm": 0.7450869679450989, "learning_rate": 1.4241110147441459e-05, "loss": 0.8494, "step": 821 }, { "epoch": 0.14260929909784872, "grad_norm": 1.1043566465377808, "learning_rate": 1.4258456201214225e-05, "loss": 0.6223, "step": 822 }, { "epoch": 0.14278278972935463, "grad_norm": 0.7570394277572632, "learning_rate": 1.4275802254986992e-05, "loss": 0.6093, "step": 823 }, { "epoch": 0.14295628036086053, "grad_norm": 0.7042250037193298, "learning_rate": 1.4293148308759759e-05, "loss": 0.5449, "step": 824 }, { "epoch": 0.1431297709923664, "grad_norm": 0.6336850523948669, "learning_rate": 1.4310494362532526e-05, "loss": 0.7617, "step": 825 }, { "epoch": 0.1433032616238723, "grad_norm": 1.6511664390563965, "learning_rate": 1.4327840416305292e-05, "loss": 0.7332, "step": 826 }, { "epoch": 0.1434767522553782, "grad_norm": 1.9736170768737793, "learning_rate": 1.4345186470078059e-05, "loss": 0.6853, "step": 827 }, { "epoch": 0.1436502428868841, "grad_norm": 0.9473209977149963, "learning_rate": 1.4362532523850826e-05, "loss": 0.6881, "step": 828 }, { "epoch": 0.14382373351839, "grad_norm": 0.806874692440033, "learning_rate": 1.4379878577623592e-05, "loss": 0.5989, "step": 829 }, { "epoch": 0.1439972241498959, "grad_norm": 0.7855055928230286, "learning_rate": 1.4397224631396359e-05, "loss": 0.709, "step": 830 }, { "epoch": 0.1441707147814018, "grad_norm": 0.9963886141777039, "learning_rate": 1.4414570685169124e-05, "loss": 0.6379, "step": 831 }, { "epoch": 0.1443442054129077, "grad_norm": 0.7364680767059326, "learning_rate": 1.443191673894189e-05, "loss": 0.8228, "step": 832 }, { "epoch": 0.1445176960444136, "grad_norm": 0.8339455127716064, "learning_rate": 1.4449262792714657e-05, "loss": 0.6504, "step": 833 }, { "epoch": 0.14469118667591951, "grad_norm": 0.9416974782943726, "learning_rate": 1.4466608846487424e-05, "loss": 0.636, "step": 834 }, { "epoch": 0.1448646773074254, "grad_norm": 0.9278693199157715, "learning_rate": 1.448395490026019e-05, "loss": 0.6721, "step": 835 }, { "epoch": 0.1450381679389313, "grad_norm": 2.3351480960845947, "learning_rate": 1.450130095403296e-05, "loss": 0.6017, "step": 836 }, { "epoch": 0.1452116585704372, "grad_norm": 0.6672064661979675, "learning_rate": 1.4518647007805726e-05, "loss": 0.7229, "step": 837 }, { "epoch": 0.1453851492019431, "grad_norm": 0.743253767490387, "learning_rate": 1.4535993061578493e-05, "loss": 0.6313, "step": 838 }, { "epoch": 0.145558639833449, "grad_norm": 1.056763768196106, "learning_rate": 1.455333911535126e-05, "loss": 0.6968, "step": 839 }, { "epoch": 0.1457321304649549, "grad_norm": 0.9053840637207031, "learning_rate": 1.4570685169124026e-05, "loss": 0.6313, "step": 840 }, { "epoch": 0.1459056210964608, "grad_norm": 1.119525671005249, "learning_rate": 1.4588031222896793e-05, "loss": 0.5525, "step": 841 }, { "epoch": 0.1460791117279667, "grad_norm": 1.0211570262908936, "learning_rate": 1.460537727666956e-05, "loss": 0.6615, "step": 842 }, { "epoch": 0.1462526023594726, "grad_norm": 0.7650402188301086, "learning_rate": 1.4622723330442326e-05, "loss": 0.7095, "step": 843 }, { "epoch": 0.14642609299097847, "grad_norm": 1.1841182708740234, "learning_rate": 1.4640069384215093e-05, "loss": 0.6704, "step": 844 }, { "epoch": 0.14659958362248438, "grad_norm": 1.1515867710113525, "learning_rate": 1.465741543798786e-05, "loss": 0.5818, "step": 845 }, { "epoch": 0.14677307425399028, "grad_norm": 0.9013440608978271, "learning_rate": 1.4674761491760624e-05, "loss": 0.6653, "step": 846 }, { "epoch": 0.14694656488549618, "grad_norm": 1.0194121599197388, "learning_rate": 1.4692107545533391e-05, "loss": 0.7689, "step": 847 }, { "epoch": 0.14712005551700208, "grad_norm": 0.7369625568389893, "learning_rate": 1.4709453599306158e-05, "loss": 0.6931, "step": 848 }, { "epoch": 0.14729354614850798, "grad_norm": 0.776012122631073, "learning_rate": 1.4726799653078925e-05, "loss": 0.7512, "step": 849 }, { "epoch": 0.14746703678001388, "grad_norm": 0.8151385188102722, "learning_rate": 1.4744145706851693e-05, "loss": 0.6995, "step": 850 }, { "epoch": 0.14764052741151978, "grad_norm": 0.7555911540985107, "learning_rate": 1.476149176062446e-05, "loss": 0.611, "step": 851 }, { "epoch": 0.14781401804302569, "grad_norm": 1.3994958400726318, "learning_rate": 1.4778837814397226e-05, "loss": 0.6509, "step": 852 }, { "epoch": 0.1479875086745316, "grad_norm": 1.2116901874542236, "learning_rate": 1.4796183868169993e-05, "loss": 0.626, "step": 853 }, { "epoch": 0.14816099930603746, "grad_norm": 1.1880247592926025, "learning_rate": 1.481352992194276e-05, "loss": 0.6135, "step": 854 }, { "epoch": 0.14833448993754336, "grad_norm": 0.6173132061958313, "learning_rate": 1.4830875975715526e-05, "loss": 0.7172, "step": 855 }, { "epoch": 0.14850798056904926, "grad_norm": 0.8529770970344543, "learning_rate": 1.4848222029488293e-05, "loss": 0.6005, "step": 856 }, { "epoch": 0.14868147120055517, "grad_norm": 0.9621935486793518, "learning_rate": 1.486556808326106e-05, "loss": 0.6149, "step": 857 }, { "epoch": 0.14885496183206107, "grad_norm": 1.5569918155670166, "learning_rate": 1.4882914137033826e-05, "loss": 0.8989, "step": 858 }, { "epoch": 0.14902845246356697, "grad_norm": 0.931231677532196, "learning_rate": 1.4900260190806593e-05, "loss": 0.5619, "step": 859 }, { "epoch": 0.14920194309507287, "grad_norm": 1.5929428339004517, "learning_rate": 1.491760624457936e-05, "loss": 0.5854, "step": 860 }, { "epoch": 0.14937543372657877, "grad_norm": 0.9305289387702942, "learning_rate": 1.4934952298352125e-05, "loss": 0.6697, "step": 861 }, { "epoch": 0.14954892435808467, "grad_norm": 0.8200428485870361, "learning_rate": 1.4952298352124892e-05, "loss": 0.7217, "step": 862 }, { "epoch": 0.14972241498959057, "grad_norm": 4.3267107009887695, "learning_rate": 1.4969644405897658e-05, "loss": 0.5573, "step": 863 }, { "epoch": 0.14989590562109645, "grad_norm": 1.2004973888397217, "learning_rate": 1.4986990459670425e-05, "loss": 0.7554, "step": 864 }, { "epoch": 0.15006939625260235, "grad_norm": 0.9642512798309326, "learning_rate": 1.5004336513443193e-05, "loss": 0.6198, "step": 865 }, { "epoch": 0.15024288688410825, "grad_norm": 0.8531646728515625, "learning_rate": 1.502168256721596e-05, "loss": 0.8345, "step": 866 }, { "epoch": 0.15041637751561415, "grad_norm": 1.055066704750061, "learning_rate": 1.5039028620988727e-05, "loss": 0.749, "step": 867 }, { "epoch": 0.15058986814712005, "grad_norm": 1.1706310510635376, "learning_rate": 1.5056374674761493e-05, "loss": 0.6992, "step": 868 }, { "epoch": 0.15076335877862596, "grad_norm": 1.0913740396499634, "learning_rate": 1.507372072853426e-05, "loss": 0.6554, "step": 869 }, { "epoch": 0.15093684941013186, "grad_norm": 0.8652170300483704, "learning_rate": 1.5091066782307027e-05, "loss": 0.6669, "step": 870 }, { "epoch": 0.15111034004163776, "grad_norm": 0.9506787061691284, "learning_rate": 1.5108412836079793e-05, "loss": 0.7295, "step": 871 }, { "epoch": 0.15128383067314366, "grad_norm": 1.150251865386963, "learning_rate": 1.512575888985256e-05, "loss": 0.733, "step": 872 }, { "epoch": 0.15145732130464956, "grad_norm": 0.8466367721557617, "learning_rate": 1.5143104943625327e-05, "loss": 0.6395, "step": 873 }, { "epoch": 0.15163081193615544, "grad_norm": 2.574615240097046, "learning_rate": 1.5160450997398094e-05, "loss": 0.7278, "step": 874 }, { "epoch": 0.15180430256766134, "grad_norm": 1.0064374208450317, "learning_rate": 1.517779705117086e-05, "loss": 0.7107, "step": 875 }, { "epoch": 0.15197779319916724, "grad_norm": 0.8361401557922363, "learning_rate": 1.5195143104943625e-05, "loss": 0.6068, "step": 876 }, { "epoch": 0.15215128383067314, "grad_norm": 0.8473800420761108, "learning_rate": 1.5212489158716392e-05, "loss": 0.7333, "step": 877 }, { "epoch": 0.15232477446217904, "grad_norm": 1.097095012664795, "learning_rate": 1.5229835212489159e-05, "loss": 0.7101, "step": 878 }, { "epoch": 0.15249826509368494, "grad_norm": 0.9678106904029846, "learning_rate": 1.5247181266261925e-05, "loss": 0.6997, "step": 879 }, { "epoch": 0.15267175572519084, "grad_norm": 1.245478630065918, "learning_rate": 1.5264527320034695e-05, "loss": 0.7833, "step": 880 }, { "epoch": 0.15284524635669675, "grad_norm": 1.0961253643035889, "learning_rate": 1.528187337380746e-05, "loss": 0.7347, "step": 881 }, { "epoch": 0.15301873698820265, "grad_norm": 2.126734733581543, "learning_rate": 1.529921942758023e-05, "loss": 0.7664, "step": 882 }, { "epoch": 0.15319222761970855, "grad_norm": 0.8503320813179016, "learning_rate": 1.5316565481352994e-05, "loss": 0.6718, "step": 883 }, { "epoch": 0.15336571825121442, "grad_norm": 0.6812099814414978, "learning_rate": 1.5333911535125762e-05, "loss": 0.7156, "step": 884 }, { "epoch": 0.15353920888272032, "grad_norm": 0.7333630919456482, "learning_rate": 1.5351257588898527e-05, "loss": 0.6864, "step": 885 }, { "epoch": 0.15371269951422623, "grad_norm": 0.8481491208076477, "learning_rate": 1.5368603642671292e-05, "loss": 0.7781, "step": 886 }, { "epoch": 0.15388619014573213, "grad_norm": 1.4921408891677856, "learning_rate": 1.538594969644406e-05, "loss": 0.6251, "step": 887 }, { "epoch": 0.15405968077723803, "grad_norm": 0.9560161232948303, "learning_rate": 1.5403295750216826e-05, "loss": 0.6892, "step": 888 }, { "epoch": 0.15423317140874393, "grad_norm": 1.2779227495193481, "learning_rate": 1.5420641803989594e-05, "loss": 0.5886, "step": 889 }, { "epoch": 0.15440666204024983, "grad_norm": 0.7587497234344482, "learning_rate": 1.543798785776236e-05, "loss": 0.7178, "step": 890 }, { "epoch": 0.15458015267175573, "grad_norm": 0.8463174104690552, "learning_rate": 1.5455333911535127e-05, "loss": 0.7814, "step": 891 }, { "epoch": 0.15475364330326163, "grad_norm": 0.8361932039260864, "learning_rate": 1.5472679965307892e-05, "loss": 0.5793, "step": 892 }, { "epoch": 0.15492713393476754, "grad_norm": 0.8943549990653992, "learning_rate": 1.549002601908066e-05, "loss": 0.7914, "step": 893 }, { "epoch": 0.1551006245662734, "grad_norm": 0.8401117324829102, "learning_rate": 1.5507372072853426e-05, "loss": 0.6434, "step": 894 }, { "epoch": 0.1552741151977793, "grad_norm": 0.961362898349762, "learning_rate": 1.5524718126626194e-05, "loss": 0.7277, "step": 895 }, { "epoch": 0.1554476058292852, "grad_norm": 0.9641268849372864, "learning_rate": 1.5542064180398963e-05, "loss": 0.7878, "step": 896 }, { "epoch": 0.15562109646079111, "grad_norm": 0.8773832321166992, "learning_rate": 1.5559410234171728e-05, "loss": 0.6924, "step": 897 }, { "epoch": 0.15579458709229702, "grad_norm": 0.7039158940315247, "learning_rate": 1.5576756287944496e-05, "loss": 0.7548, "step": 898 }, { "epoch": 0.15596807772380292, "grad_norm": 0.7688771486282349, "learning_rate": 1.559410234171726e-05, "loss": 0.6881, "step": 899 }, { "epoch": 0.15614156835530882, "grad_norm": 0.8563050627708435, "learning_rate": 1.5611448395490026e-05, "loss": 0.7627, "step": 900 }, { "epoch": 0.15631505898681472, "grad_norm": 0.8748807907104492, "learning_rate": 1.5628794449262794e-05, "loss": 0.6493, "step": 901 }, { "epoch": 0.15648854961832062, "grad_norm": 0.7920365929603577, "learning_rate": 1.564614050303556e-05, "loss": 0.6841, "step": 902 }, { "epoch": 0.15666204024982652, "grad_norm": 0.6366187930107117, "learning_rate": 1.5663486556808328e-05, "loss": 0.6548, "step": 903 }, { "epoch": 0.1568355308813324, "grad_norm": 1.085413932800293, "learning_rate": 1.5680832610581093e-05, "loss": 0.6558, "step": 904 }, { "epoch": 0.1570090215128383, "grad_norm": 0.7230006456375122, "learning_rate": 1.569817866435386e-05, "loss": 0.7896, "step": 905 }, { "epoch": 0.1571825121443442, "grad_norm": 0.7782901525497437, "learning_rate": 1.5715524718126626e-05, "loss": 0.6873, "step": 906 }, { "epoch": 0.1573560027758501, "grad_norm": 0.542499303817749, "learning_rate": 1.5732870771899395e-05, "loss": 0.8535, "step": 907 }, { "epoch": 0.157529493407356, "grad_norm": 0.6911293268203735, "learning_rate": 1.575021682567216e-05, "loss": 0.6926, "step": 908 }, { "epoch": 0.1577029840388619, "grad_norm": 0.7596789002418518, "learning_rate": 1.5767562879444928e-05, "loss": 0.6484, "step": 909 }, { "epoch": 0.1578764746703678, "grad_norm": 0.6950908899307251, "learning_rate": 1.5784908933217696e-05, "loss": 0.6914, "step": 910 }, { "epoch": 0.1580499653018737, "grad_norm": 0.8597424030303955, "learning_rate": 1.580225498699046e-05, "loss": 0.6887, "step": 911 }, { "epoch": 0.1582234559333796, "grad_norm": 1.0358662605285645, "learning_rate": 1.581960104076323e-05, "loss": 0.7153, "step": 912 }, { "epoch": 0.15839694656488548, "grad_norm": 1.1343300342559814, "learning_rate": 1.5836947094535995e-05, "loss": 0.6909, "step": 913 }, { "epoch": 0.15857043719639138, "grad_norm": 0.7074477076530457, "learning_rate": 1.5854293148308763e-05, "loss": 0.6642, "step": 914 }, { "epoch": 0.15874392782789729, "grad_norm": 0.7301273345947266, "learning_rate": 1.5871639202081528e-05, "loss": 0.6434, "step": 915 }, { "epoch": 0.1589174184594032, "grad_norm": 1.0247890949249268, "learning_rate": 1.5888985255854293e-05, "loss": 0.6675, "step": 916 }, { "epoch": 0.1590909090909091, "grad_norm": 1.233616828918457, "learning_rate": 1.590633130962706e-05, "loss": 0.7847, "step": 917 }, { "epoch": 0.159264399722415, "grad_norm": 0.9381399154663086, "learning_rate": 1.5923677363399826e-05, "loss": 0.6826, "step": 918 }, { "epoch": 0.1594378903539209, "grad_norm": 0.8014280200004578, "learning_rate": 1.5941023417172595e-05, "loss": 0.6625, "step": 919 }, { "epoch": 0.1596113809854268, "grad_norm": 1.027308702468872, "learning_rate": 1.595836947094536e-05, "loss": 0.6035, "step": 920 }, { "epoch": 0.1597848716169327, "grad_norm": 0.686129629611969, "learning_rate": 1.5975715524718128e-05, "loss": 0.8022, "step": 921 }, { "epoch": 0.1599583622484386, "grad_norm": 0.7895461916923523, "learning_rate": 1.5993061578490893e-05, "loss": 0.6404, "step": 922 }, { "epoch": 0.16013185287994447, "grad_norm": 0.9063572883605957, "learning_rate": 1.601040763226366e-05, "loss": 0.6422, "step": 923 }, { "epoch": 0.16030534351145037, "grad_norm": 0.8540890216827393, "learning_rate": 1.602775368603643e-05, "loss": 0.6224, "step": 924 }, { "epoch": 0.16047883414295627, "grad_norm": 0.9980672001838684, "learning_rate": 1.6045099739809195e-05, "loss": 0.5835, "step": 925 }, { "epoch": 0.16065232477446217, "grad_norm": 0.8262863159179688, "learning_rate": 1.6062445793581963e-05, "loss": 0.7963, "step": 926 }, { "epoch": 0.16082581540596808, "grad_norm": 0.7476772665977478, "learning_rate": 1.607979184735473e-05, "loss": 0.7102, "step": 927 }, { "epoch": 0.16099930603747398, "grad_norm": 0.8359287977218628, "learning_rate": 1.6097137901127497e-05, "loss": 0.6128, "step": 928 }, { "epoch": 0.16117279666897988, "grad_norm": 0.7169504165649414, "learning_rate": 1.6114483954900262e-05, "loss": 0.7908, "step": 929 }, { "epoch": 0.16134628730048578, "grad_norm": 1.0917155742645264, "learning_rate": 1.6131830008673027e-05, "loss": 0.7098, "step": 930 }, { "epoch": 0.16151977793199168, "grad_norm": 0.9570122957229614, "learning_rate": 1.6149176062445795e-05, "loss": 0.6041, "step": 931 }, { "epoch": 0.16169326856349758, "grad_norm": 0.683272123336792, "learning_rate": 1.616652211621856e-05, "loss": 0.6094, "step": 932 }, { "epoch": 0.16186675919500346, "grad_norm": 0.9358447194099426, "learning_rate": 1.618386816999133e-05, "loss": 0.7856, "step": 933 }, { "epoch": 0.16204024982650936, "grad_norm": 0.6288849711418152, "learning_rate": 1.6201214223764094e-05, "loss": 0.6729, "step": 934 }, { "epoch": 0.16221374045801526, "grad_norm": 0.6991991996765137, "learning_rate": 1.6218560277536862e-05, "loss": 0.723, "step": 935 }, { "epoch": 0.16238723108952116, "grad_norm": 0.9126247763633728, "learning_rate": 1.6235906331309627e-05, "loss": 0.673, "step": 936 }, { "epoch": 0.16256072172102706, "grad_norm": 1.2215293645858765, "learning_rate": 1.6253252385082395e-05, "loss": 0.7214, "step": 937 }, { "epoch": 0.16273421235253296, "grad_norm": 0.8412079811096191, "learning_rate": 1.627059843885516e-05, "loss": 0.7268, "step": 938 }, { "epoch": 0.16290770298403887, "grad_norm": 0.8135339021682739, "learning_rate": 1.628794449262793e-05, "loss": 0.6954, "step": 939 }, { "epoch": 0.16308119361554477, "grad_norm": 1.5377135276794434, "learning_rate": 1.6305290546400697e-05, "loss": 0.6376, "step": 940 }, { "epoch": 0.16325468424705067, "grad_norm": 0.8019591569900513, "learning_rate": 1.6322636600173462e-05, "loss": 0.7319, "step": 941 }, { "epoch": 0.16342817487855657, "grad_norm": 0.8551116585731506, "learning_rate": 1.633998265394623e-05, "loss": 0.6748, "step": 942 }, { "epoch": 0.16360166551006244, "grad_norm": 0.7336849570274353, "learning_rate": 1.6357328707718996e-05, "loss": 0.684, "step": 943 }, { "epoch": 0.16377515614156835, "grad_norm": 0.756514310836792, "learning_rate": 1.6374674761491764e-05, "loss": 0.7261, "step": 944 }, { "epoch": 0.16394864677307425, "grad_norm": 0.8664772510528564, "learning_rate": 1.639202081526453e-05, "loss": 0.6232, "step": 945 }, { "epoch": 0.16412213740458015, "grad_norm": 0.9706249833106995, "learning_rate": 1.6409366869037294e-05, "loss": 0.7896, "step": 946 }, { "epoch": 0.16429562803608605, "grad_norm": 1.3999273777008057, "learning_rate": 1.6426712922810062e-05, "loss": 0.6323, "step": 947 }, { "epoch": 0.16446911866759195, "grad_norm": 0.9607291221618652, "learning_rate": 1.6444058976582827e-05, "loss": 0.6218, "step": 948 }, { "epoch": 0.16464260929909785, "grad_norm": 0.9746975302696228, "learning_rate": 1.6461405030355596e-05, "loss": 0.6968, "step": 949 }, { "epoch": 0.16481609993060375, "grad_norm": 0.8557673096656799, "learning_rate": 1.647875108412836e-05, "loss": 0.6807, "step": 950 }, { "epoch": 0.16498959056210966, "grad_norm": 1.3538912534713745, "learning_rate": 1.649609713790113e-05, "loss": 0.6714, "step": 951 }, { "epoch": 0.16516308119361556, "grad_norm": 0.7352486848831177, "learning_rate": 1.6513443191673894e-05, "loss": 0.679, "step": 952 }, { "epoch": 0.16533657182512143, "grad_norm": 1.0318317413330078, "learning_rate": 1.6530789245446663e-05, "loss": 0.8328, "step": 953 }, { "epoch": 0.16551006245662733, "grad_norm": 0.9433748126029968, "learning_rate": 1.654813529921943e-05, "loss": 0.6721, "step": 954 }, { "epoch": 0.16568355308813323, "grad_norm": 1.44105863571167, "learning_rate": 1.6565481352992196e-05, "loss": 0.7727, "step": 955 }, { "epoch": 0.16585704371963914, "grad_norm": 1.178808569908142, "learning_rate": 1.6582827406764964e-05, "loss": 0.6615, "step": 956 }, { "epoch": 0.16603053435114504, "grad_norm": 0.7474299073219299, "learning_rate": 1.660017346053773e-05, "loss": 0.6886, "step": 957 }, { "epoch": 0.16620402498265094, "grad_norm": 1.0822902917861938, "learning_rate": 1.6617519514310498e-05, "loss": 0.636, "step": 958 }, { "epoch": 0.16637751561415684, "grad_norm": 0.7172223329544067, "learning_rate": 1.6634865568083263e-05, "loss": 0.7212, "step": 959 }, { "epoch": 0.16655100624566274, "grad_norm": 0.789437472820282, "learning_rate": 1.6652211621856028e-05, "loss": 0.666, "step": 960 }, { "epoch": 0.16672449687716864, "grad_norm": 1.3900312185287476, "learning_rate": 1.6669557675628796e-05, "loss": 0.6277, "step": 961 }, { "epoch": 0.16689798750867454, "grad_norm": 0.7792858481407166, "learning_rate": 1.668690372940156e-05, "loss": 0.7878, "step": 962 }, { "epoch": 0.16707147814018042, "grad_norm": 0.6243866086006165, "learning_rate": 1.670424978317433e-05, "loss": 0.6951, "step": 963 }, { "epoch": 0.16724496877168632, "grad_norm": 0.7946894764900208, "learning_rate": 1.6721595836947094e-05, "loss": 0.621, "step": 964 }, { "epoch": 0.16741845940319222, "grad_norm": 1.2947770357131958, "learning_rate": 1.6738941890719863e-05, "loss": 0.676, "step": 965 }, { "epoch": 0.16759195003469812, "grad_norm": 1.1111828088760376, "learning_rate": 1.6756287944492628e-05, "loss": 0.7052, "step": 966 }, { "epoch": 0.16776544066620402, "grad_norm": 0.7376157641410828, "learning_rate": 1.6773633998265396e-05, "loss": 0.6995, "step": 967 }, { "epoch": 0.16793893129770993, "grad_norm": 1.1629353761672974, "learning_rate": 1.679098005203816e-05, "loss": 0.7086, "step": 968 }, { "epoch": 0.16811242192921583, "grad_norm": 0.9028804302215576, "learning_rate": 1.680832610581093e-05, "loss": 0.5719, "step": 969 }, { "epoch": 0.16828591256072173, "grad_norm": 1.0768271684646606, "learning_rate": 1.6825672159583698e-05, "loss": 0.6647, "step": 970 }, { "epoch": 0.16845940319222763, "grad_norm": 1.0081478357315063, "learning_rate": 1.6843018213356463e-05, "loss": 0.6356, "step": 971 }, { "epoch": 0.16863289382373353, "grad_norm": 1.476508378982544, "learning_rate": 1.686036426712923e-05, "loss": 0.8376, "step": 972 }, { "epoch": 0.1688063844552394, "grad_norm": 0.6738322377204895, "learning_rate": 1.6877710320901996e-05, "loss": 0.8074, "step": 973 }, { "epoch": 0.1689798750867453, "grad_norm": 0.8401079773902893, "learning_rate": 1.689505637467476e-05, "loss": 0.77, "step": 974 }, { "epoch": 0.1691533657182512, "grad_norm": 2.7095425128936768, "learning_rate": 1.691240242844753e-05, "loss": 0.6422, "step": 975 }, { "epoch": 0.1693268563497571, "grad_norm": 1.0661002397537231, "learning_rate": 1.6929748482220295e-05, "loss": 0.7129, "step": 976 }, { "epoch": 0.169500346981263, "grad_norm": 0.741974949836731, "learning_rate": 1.6947094535993063e-05, "loss": 0.7639, "step": 977 }, { "epoch": 0.1696738376127689, "grad_norm": 0.8547816276550293, "learning_rate": 1.6964440589765828e-05, "loss": 0.7159, "step": 978 }, { "epoch": 0.16984732824427481, "grad_norm": 0.731971025466919, "learning_rate": 1.6981786643538597e-05, "loss": 0.7456, "step": 979 }, { "epoch": 0.17002081887578072, "grad_norm": 1.1164857149124146, "learning_rate": 1.699913269731136e-05, "loss": 0.7031, "step": 980 }, { "epoch": 0.17019430950728662, "grad_norm": 0.8163184523582458, "learning_rate": 1.701647875108413e-05, "loss": 0.6238, "step": 981 }, { "epoch": 0.17036780013879252, "grad_norm": 0.7814517021179199, "learning_rate": 1.7033824804856895e-05, "loss": 0.6144, "step": 982 }, { "epoch": 0.1705412907702984, "grad_norm": 1.004395842552185, "learning_rate": 1.7051170858629663e-05, "loss": 0.5923, "step": 983 }, { "epoch": 0.1707147814018043, "grad_norm": 1.3236013650894165, "learning_rate": 1.7068516912402432e-05, "loss": 0.7759, "step": 984 }, { "epoch": 0.1708882720333102, "grad_norm": 0.7804445028305054, "learning_rate": 1.7085862966175197e-05, "loss": 0.7361, "step": 985 }, { "epoch": 0.1710617626648161, "grad_norm": 1.4555665254592896, "learning_rate": 1.7103209019947965e-05, "loss": 0.6619, "step": 986 }, { "epoch": 0.171235253296322, "grad_norm": 0.7560045123100281, "learning_rate": 1.712055507372073e-05, "loss": 0.728, "step": 987 }, { "epoch": 0.1714087439278279, "grad_norm": 1.0049445629119873, "learning_rate": 1.71379011274935e-05, "loss": 0.5458, "step": 988 }, { "epoch": 0.1715822345593338, "grad_norm": 1.0618914365768433, "learning_rate": 1.7155247181266264e-05, "loss": 0.6783, "step": 989 }, { "epoch": 0.1717557251908397, "grad_norm": 1.0983893871307373, "learning_rate": 1.717259323503903e-05, "loss": 0.6158, "step": 990 }, { "epoch": 0.1719292158223456, "grad_norm": 0.742469847202301, "learning_rate": 1.7189939288811797e-05, "loss": 0.6021, "step": 991 }, { "epoch": 0.17210270645385148, "grad_norm": 0.7174831628799438, "learning_rate": 1.7207285342584562e-05, "loss": 0.782, "step": 992 }, { "epoch": 0.17227619708535738, "grad_norm": 1.3333544731140137, "learning_rate": 1.722463139635733e-05, "loss": 0.5986, "step": 993 }, { "epoch": 0.17244968771686328, "grad_norm": 2.129117727279663, "learning_rate": 1.7241977450130095e-05, "loss": 0.7273, "step": 994 }, { "epoch": 0.17262317834836918, "grad_norm": 0.9279270172119141, "learning_rate": 1.7259323503902864e-05, "loss": 0.7134, "step": 995 }, { "epoch": 0.17279666897987508, "grad_norm": 0.8577216267585754, "learning_rate": 1.727666955767563e-05, "loss": 0.6736, "step": 996 }, { "epoch": 0.17297015961138099, "grad_norm": 0.8197943568229675, "learning_rate": 1.7294015611448397e-05, "loss": 0.7085, "step": 997 }, { "epoch": 0.1731436502428869, "grad_norm": 1.0312492847442627, "learning_rate": 1.7311361665221166e-05, "loss": 0.6168, "step": 998 }, { "epoch": 0.1733171408743928, "grad_norm": 1.2503159046173096, "learning_rate": 1.732870771899393e-05, "loss": 0.6034, "step": 999 }, { "epoch": 0.1734906315058987, "grad_norm": 1.1078702211380005, "learning_rate": 1.73460537727667e-05, "loss": 0.6769, "step": 1000 }, { "epoch": 0.1736641221374046, "grad_norm": 0.7119059562683105, "learning_rate": 1.7363399826539464e-05, "loss": 0.6415, "step": 1001 }, { "epoch": 0.17383761276891047, "grad_norm": 0.8156967163085938, "learning_rate": 1.7380745880312232e-05, "loss": 0.589, "step": 1002 }, { "epoch": 0.17401110340041637, "grad_norm": 1.3735185861587524, "learning_rate": 1.7398091934084997e-05, "loss": 0.7031, "step": 1003 }, { "epoch": 0.17418459403192227, "grad_norm": 0.8018829822540283, "learning_rate": 1.7415437987857762e-05, "loss": 0.6443, "step": 1004 }, { "epoch": 0.17435808466342817, "grad_norm": 0.7476117014884949, "learning_rate": 1.743278404163053e-05, "loss": 0.7019, "step": 1005 }, { "epoch": 0.17453157529493407, "grad_norm": 3.358261823654175, "learning_rate": 1.7450130095403296e-05, "loss": 0.7725, "step": 1006 }, { "epoch": 0.17470506592643997, "grad_norm": 0.6919204592704773, "learning_rate": 1.7467476149176064e-05, "loss": 0.7163, "step": 1007 }, { "epoch": 0.17487855655794587, "grad_norm": 1.4837769269943237, "learning_rate": 1.748482220294883e-05, "loss": 0.5562, "step": 1008 }, { "epoch": 0.17505204718945178, "grad_norm": 0.7044925689697266, "learning_rate": 1.7502168256721597e-05, "loss": 0.7113, "step": 1009 }, { "epoch": 0.17522553782095768, "grad_norm": 1.0583440065383911, "learning_rate": 1.7519514310494362e-05, "loss": 0.6238, "step": 1010 }, { "epoch": 0.17539902845246358, "grad_norm": 0.7085881233215332, "learning_rate": 1.753686036426713e-05, "loss": 0.7446, "step": 1011 }, { "epoch": 0.17557251908396945, "grad_norm": 1.2622864246368408, "learning_rate": 1.7554206418039896e-05, "loss": 0.6267, "step": 1012 }, { "epoch": 0.17574600971547535, "grad_norm": 0.6951441168785095, "learning_rate": 1.7571552471812664e-05, "loss": 0.7543, "step": 1013 }, { "epoch": 0.17591950034698126, "grad_norm": 0.9578819274902344, "learning_rate": 1.7588898525585433e-05, "loss": 0.7092, "step": 1014 }, { "epoch": 0.17609299097848716, "grad_norm": 0.8530898690223694, "learning_rate": 1.7606244579358198e-05, "loss": 0.7383, "step": 1015 }, { "epoch": 0.17626648160999306, "grad_norm": 0.7052402496337891, "learning_rate": 1.7623590633130966e-05, "loss": 0.7693, "step": 1016 }, { "epoch": 0.17643997224149896, "grad_norm": 1.1654605865478516, "learning_rate": 1.764093668690373e-05, "loss": 0.6918, "step": 1017 }, { "epoch": 0.17661346287300486, "grad_norm": 0.8184143900871277, "learning_rate": 1.76582827406765e-05, "loss": 0.6852, "step": 1018 }, { "epoch": 0.17678695350451076, "grad_norm": 1.003745675086975, "learning_rate": 1.7675628794449264e-05, "loss": 0.6572, "step": 1019 }, { "epoch": 0.17696044413601666, "grad_norm": 1.0442581176757812, "learning_rate": 1.769297484822203e-05, "loss": 0.5624, "step": 1020 }, { "epoch": 0.17713393476752257, "grad_norm": 2.1000277996063232, "learning_rate": 1.7710320901994798e-05, "loss": 0.724, "step": 1021 }, { "epoch": 0.17730742539902844, "grad_norm": 1.0788401365280151, "learning_rate": 1.7727666955767563e-05, "loss": 0.7444, "step": 1022 }, { "epoch": 0.17748091603053434, "grad_norm": 1.9666985273361206, "learning_rate": 1.774501300954033e-05, "loss": 0.707, "step": 1023 }, { "epoch": 0.17765440666204024, "grad_norm": 1.935426950454712, "learning_rate": 1.7762359063313096e-05, "loss": 0.7725, "step": 1024 }, { "epoch": 0.17782789729354614, "grad_norm": 0.8484199047088623, "learning_rate": 1.7779705117085865e-05, "loss": 0.6472, "step": 1025 }, { "epoch": 0.17800138792505205, "grad_norm": 0.953031599521637, "learning_rate": 1.779705117085863e-05, "loss": 0.6996, "step": 1026 }, { "epoch": 0.17817487855655795, "grad_norm": 0.6190481185913086, "learning_rate": 1.7814397224631398e-05, "loss": 0.7246, "step": 1027 }, { "epoch": 0.17834836918806385, "grad_norm": 0.7665278315544128, "learning_rate": 1.7831743278404166e-05, "loss": 0.7291, "step": 1028 }, { "epoch": 0.17852185981956975, "grad_norm": 1.5020960569381714, "learning_rate": 1.784908933217693e-05, "loss": 0.6891, "step": 1029 }, { "epoch": 0.17869535045107565, "grad_norm": 0.690614640712738, "learning_rate": 1.78664353859497e-05, "loss": 0.6453, "step": 1030 }, { "epoch": 0.17886884108258155, "grad_norm": 1.2899292707443237, "learning_rate": 1.7883781439722465e-05, "loss": 0.6635, "step": 1031 }, { "epoch": 0.17904233171408743, "grad_norm": 1.8824074268341064, "learning_rate": 1.7901127493495233e-05, "loss": 0.6235, "step": 1032 }, { "epoch": 0.17921582234559333, "grad_norm": 1.0645564794540405, "learning_rate": 1.7918473547267998e-05, "loss": 0.7957, "step": 1033 }, { "epoch": 0.17938931297709923, "grad_norm": 0.8005107641220093, "learning_rate": 1.7935819601040763e-05, "loss": 0.6934, "step": 1034 }, { "epoch": 0.17956280360860513, "grad_norm": 0.8759954571723938, "learning_rate": 1.795316565481353e-05, "loss": 0.6537, "step": 1035 }, { "epoch": 0.17973629424011103, "grad_norm": 1.047008752822876, "learning_rate": 1.7970511708586297e-05, "loss": 0.6056, "step": 1036 }, { "epoch": 0.17990978487161693, "grad_norm": 0.831057608127594, "learning_rate": 1.7987857762359065e-05, "loss": 0.6173, "step": 1037 }, { "epoch": 0.18008327550312284, "grad_norm": 1.0177525281906128, "learning_rate": 1.800520381613183e-05, "loss": 0.6202, "step": 1038 }, { "epoch": 0.18025676613462874, "grad_norm": 0.9185387492179871, "learning_rate": 1.80225498699046e-05, "loss": 0.7454, "step": 1039 }, { "epoch": 0.18043025676613464, "grad_norm": 0.8477778434753418, "learning_rate": 1.8039895923677363e-05, "loss": 0.6642, "step": 1040 }, { "epoch": 0.18060374739764054, "grad_norm": 1.2702401876449585, "learning_rate": 1.8057241977450132e-05, "loss": 0.7075, "step": 1041 }, { "epoch": 0.18077723802914641, "grad_norm": 0.902235209941864, "learning_rate": 1.8074588031222897e-05, "loss": 0.6028, "step": 1042 }, { "epoch": 0.18095072866065232, "grad_norm": 1.9682577848434448, "learning_rate": 1.8091934084995665e-05, "loss": 0.5509, "step": 1043 }, { "epoch": 0.18112421929215822, "grad_norm": 0.7393232583999634, "learning_rate": 1.8109280138768434e-05, "loss": 0.6578, "step": 1044 }, { "epoch": 0.18129770992366412, "grad_norm": 0.9041082262992859, "learning_rate": 1.81266261925412e-05, "loss": 0.5804, "step": 1045 }, { "epoch": 0.18147120055517002, "grad_norm": 0.6570352911949158, "learning_rate": 1.8143972246313967e-05, "loss": 0.7332, "step": 1046 }, { "epoch": 0.18164469118667592, "grad_norm": 1.3145637512207031, "learning_rate": 1.8161318300086732e-05, "loss": 0.5594, "step": 1047 }, { "epoch": 0.18181818181818182, "grad_norm": 0.9379259943962097, "learning_rate": 1.81786643538595e-05, "loss": 0.6917, "step": 1048 }, { "epoch": 0.18199167244968772, "grad_norm": 1.066662311553955, "learning_rate": 1.8196010407632265e-05, "loss": 0.676, "step": 1049 }, { "epoch": 0.18216516308119363, "grad_norm": 1.6131765842437744, "learning_rate": 1.821335646140503e-05, "loss": 0.7084, "step": 1050 }, { "epoch": 0.18233865371269953, "grad_norm": 0.7636017203330994, "learning_rate": 1.82307025151778e-05, "loss": 0.7501, "step": 1051 }, { "epoch": 0.1825121443442054, "grad_norm": 1.3714878559112549, "learning_rate": 1.8248048568950564e-05, "loss": 0.6624, "step": 1052 }, { "epoch": 0.1826856349757113, "grad_norm": 0.9227768778800964, "learning_rate": 1.8265394622723332e-05, "loss": 0.6272, "step": 1053 }, { "epoch": 0.1828591256072172, "grad_norm": 2.468353271484375, "learning_rate": 1.8282740676496097e-05, "loss": 0.5273, "step": 1054 }, { "epoch": 0.1830326162387231, "grad_norm": 0.7945556640625, "learning_rate": 1.8300086730268865e-05, "loss": 0.6641, "step": 1055 }, { "epoch": 0.183206106870229, "grad_norm": 1.8361812829971313, "learning_rate": 1.831743278404163e-05, "loss": 0.5623, "step": 1056 }, { "epoch": 0.1833795975017349, "grad_norm": 0.9092496037483215, "learning_rate": 1.83347788378144e-05, "loss": 0.6288, "step": 1057 }, { "epoch": 0.1835530881332408, "grad_norm": 1.0778285264968872, "learning_rate": 1.8352124891587167e-05, "loss": 0.6891, "step": 1058 }, { "epoch": 0.1837265787647467, "grad_norm": 0.9915594458580017, "learning_rate": 1.8369470945359932e-05, "loss": 0.7334, "step": 1059 }, { "epoch": 0.1839000693962526, "grad_norm": 0.8176564574241638, "learning_rate": 1.83868169991327e-05, "loss": 0.6724, "step": 1060 }, { "epoch": 0.18407356002775851, "grad_norm": 0.9000877737998962, "learning_rate": 1.8404163052905466e-05, "loss": 0.74, "step": 1061 }, { "epoch": 0.1842470506592644, "grad_norm": 0.617867112159729, "learning_rate": 1.8421509106678234e-05, "loss": 0.7555, "step": 1062 }, { "epoch": 0.1844205412907703, "grad_norm": 0.8626068234443665, "learning_rate": 1.8438855160451e-05, "loss": 0.6428, "step": 1063 }, { "epoch": 0.1845940319222762, "grad_norm": 0.8504796028137207, "learning_rate": 1.8456201214223764e-05, "loss": 0.7659, "step": 1064 }, { "epoch": 0.1847675225537821, "grad_norm": 0.8839169144630432, "learning_rate": 1.8473547267996532e-05, "loss": 0.7334, "step": 1065 }, { "epoch": 0.184941013185288, "grad_norm": 0.7100916504859924, "learning_rate": 1.8490893321769297e-05, "loss": 0.7568, "step": 1066 }, { "epoch": 0.1851145038167939, "grad_norm": 1.005721926689148, "learning_rate": 1.8508239375542066e-05, "loss": 0.5739, "step": 1067 }, { "epoch": 0.1852879944482998, "grad_norm": 1.0025429725646973, "learning_rate": 1.852558542931483e-05, "loss": 0.5717, "step": 1068 }, { "epoch": 0.1854614850798057, "grad_norm": 0.7993957996368408, "learning_rate": 1.85429314830876e-05, "loss": 0.6868, "step": 1069 }, { "epoch": 0.1856349757113116, "grad_norm": 1.636637806892395, "learning_rate": 1.8560277536860364e-05, "loss": 0.7036, "step": 1070 }, { "epoch": 0.18580846634281747, "grad_norm": 0.7456915378570557, "learning_rate": 1.8577623590633133e-05, "loss": 0.6378, "step": 1071 }, { "epoch": 0.18598195697432338, "grad_norm": 2.336636543273926, "learning_rate": 1.85949696444059e-05, "loss": 0.7042, "step": 1072 }, { "epoch": 0.18615544760582928, "grad_norm": 0.7203032970428467, "learning_rate": 1.8612315698178666e-05, "loss": 0.6406, "step": 1073 }, { "epoch": 0.18632893823733518, "grad_norm": 1.2361599206924438, "learning_rate": 1.8629661751951434e-05, "loss": 0.7024, "step": 1074 }, { "epoch": 0.18650242886884108, "grad_norm": 0.71657794713974, "learning_rate": 1.86470078057242e-05, "loss": 0.6204, "step": 1075 }, { "epoch": 0.18667591950034698, "grad_norm": 1.2553610801696777, "learning_rate": 1.8664353859496968e-05, "loss": 0.6677, "step": 1076 }, { "epoch": 0.18684941013185288, "grad_norm": 0.9711937308311462, "learning_rate": 1.8681699913269733e-05, "loss": 0.614, "step": 1077 }, { "epoch": 0.18702290076335878, "grad_norm": 0.6472023129463196, "learning_rate": 1.86990459670425e-05, "loss": 0.7666, "step": 1078 }, { "epoch": 0.1871963913948647, "grad_norm": 1.3309922218322754, "learning_rate": 1.8716392020815266e-05, "loss": 0.6407, "step": 1079 }, { "epoch": 0.1873698820263706, "grad_norm": 0.9143368601799011, "learning_rate": 1.873373807458803e-05, "loss": 0.6594, "step": 1080 }, { "epoch": 0.18754337265787646, "grad_norm": 1.003936529159546, "learning_rate": 1.87510841283608e-05, "loss": 0.6857, "step": 1081 }, { "epoch": 0.18771686328938236, "grad_norm": 0.7457190752029419, "learning_rate": 1.8768430182133565e-05, "loss": 0.7072, "step": 1082 }, { "epoch": 0.18789035392088826, "grad_norm": 0.8663756251335144, "learning_rate": 1.8785776235906333e-05, "loss": 0.6554, "step": 1083 }, { "epoch": 0.18806384455239417, "grad_norm": 0.7410752177238464, "learning_rate": 1.8803122289679098e-05, "loss": 0.7334, "step": 1084 }, { "epoch": 0.18823733518390007, "grad_norm": 0.8176028728485107, "learning_rate": 1.8820468343451866e-05, "loss": 0.7578, "step": 1085 }, { "epoch": 0.18841082581540597, "grad_norm": 1.0696377754211426, "learning_rate": 1.883781439722463e-05, "loss": 0.7449, "step": 1086 }, { "epoch": 0.18858431644691187, "grad_norm": 0.7168681621551514, "learning_rate": 1.88551604509974e-05, "loss": 0.7106, "step": 1087 }, { "epoch": 0.18875780707841777, "grad_norm": 0.7650038599967957, "learning_rate": 1.8872506504770168e-05, "loss": 0.6597, "step": 1088 }, { "epoch": 0.18893129770992367, "grad_norm": 1.2690316438674927, "learning_rate": 1.8889852558542933e-05, "loss": 0.6167, "step": 1089 }, { "epoch": 0.18910478834142957, "grad_norm": 1.2952897548675537, "learning_rate": 1.89071986123157e-05, "loss": 0.611, "step": 1090 }, { "epoch": 0.18927827897293545, "grad_norm": 2.8345625400543213, "learning_rate": 1.8924544666088467e-05, "loss": 0.7052, "step": 1091 }, { "epoch": 0.18945176960444135, "grad_norm": 0.8807550668716431, "learning_rate": 1.8941890719861235e-05, "loss": 0.598, "step": 1092 }, { "epoch": 0.18962526023594725, "grad_norm": 0.739676296710968, "learning_rate": 1.8959236773634e-05, "loss": 0.6504, "step": 1093 }, { "epoch": 0.18979875086745315, "grad_norm": 0.7994574904441833, "learning_rate": 1.8976582827406765e-05, "loss": 0.7261, "step": 1094 }, { "epoch": 0.18997224149895905, "grad_norm": 0.6968166828155518, "learning_rate": 1.8993928881179533e-05, "loss": 0.698, "step": 1095 }, { "epoch": 0.19014573213046496, "grad_norm": 0.8572284579277039, "learning_rate": 1.9011274934952298e-05, "loss": 0.6481, "step": 1096 }, { "epoch": 0.19031922276197086, "grad_norm": 0.714260995388031, "learning_rate": 1.9028620988725067e-05, "loss": 0.7607, "step": 1097 }, { "epoch": 0.19049271339347676, "grad_norm": 0.815679132938385, "learning_rate": 1.904596704249783e-05, "loss": 0.5797, "step": 1098 }, { "epoch": 0.19066620402498266, "grad_norm": 0.9675291180610657, "learning_rate": 1.90633130962706e-05, "loss": 0.6279, "step": 1099 }, { "epoch": 0.19083969465648856, "grad_norm": 2.4207892417907715, "learning_rate": 1.9080659150043365e-05, "loss": 0.5724, "step": 1100 }, { "epoch": 0.19101318528799444, "grad_norm": 1.2507901191711426, "learning_rate": 1.9098005203816133e-05, "loss": 0.8313, "step": 1101 }, { "epoch": 0.19118667591950034, "grad_norm": 0.9661257266998291, "learning_rate": 1.9115351257588902e-05, "loss": 0.6674, "step": 1102 }, { "epoch": 0.19136016655100624, "grad_norm": 1.0192809104919434, "learning_rate": 1.9132697311361667e-05, "loss": 0.5961, "step": 1103 }, { "epoch": 0.19153365718251214, "grad_norm": 0.9285260438919067, "learning_rate": 1.9150043365134435e-05, "loss": 0.5983, "step": 1104 }, { "epoch": 0.19170714781401804, "grad_norm": 0.9810802340507507, "learning_rate": 1.91673894189072e-05, "loss": 0.6393, "step": 1105 }, { "epoch": 0.19188063844552394, "grad_norm": 1.281665563583374, "learning_rate": 1.918473547267997e-05, "loss": 0.7402, "step": 1106 }, { "epoch": 0.19205412907702984, "grad_norm": 1.100449562072754, "learning_rate": 1.9202081526452734e-05, "loss": 0.6868, "step": 1107 }, { "epoch": 0.19222761970853575, "grad_norm": 1.5075404644012451, "learning_rate": 1.9219427580225502e-05, "loss": 0.615, "step": 1108 }, { "epoch": 0.19240111034004165, "grad_norm": 1.5390740633010864, "learning_rate": 1.9236773633998267e-05, "loss": 0.577, "step": 1109 }, { "epoch": 0.19257460097154755, "grad_norm": 0.8646528124809265, "learning_rate": 1.9254119687771032e-05, "loss": 0.6506, "step": 1110 }, { "epoch": 0.19274809160305342, "grad_norm": 0.8771539926528931, "learning_rate": 1.92714657415438e-05, "loss": 0.6888, "step": 1111 }, { "epoch": 0.19292158223455932, "grad_norm": 0.824918270111084, "learning_rate": 1.9288811795316565e-05, "loss": 0.6956, "step": 1112 }, { "epoch": 0.19309507286606523, "grad_norm": 0.8597473502159119, "learning_rate": 1.9306157849089334e-05, "loss": 0.6765, "step": 1113 }, { "epoch": 0.19326856349757113, "grad_norm": 1.0198392868041992, "learning_rate": 1.93235039028621e-05, "loss": 0.7966, "step": 1114 }, { "epoch": 0.19344205412907703, "grad_norm": 1.0613431930541992, "learning_rate": 1.9340849956634867e-05, "loss": 0.6799, "step": 1115 }, { "epoch": 0.19361554476058293, "grad_norm": 1.0472047328948975, "learning_rate": 1.9358196010407632e-05, "loss": 0.5466, "step": 1116 }, { "epoch": 0.19378903539208883, "grad_norm": 1.0182578563690186, "learning_rate": 1.93755420641804e-05, "loss": 0.6404, "step": 1117 }, { "epoch": 0.19396252602359473, "grad_norm": 1.1602972745895386, "learning_rate": 1.939288811795317e-05, "loss": 0.7067, "step": 1118 }, { "epoch": 0.19413601665510063, "grad_norm": 0.7067492008209229, "learning_rate": 1.9410234171725934e-05, "loss": 0.6321, "step": 1119 }, { "epoch": 0.19430950728660654, "grad_norm": 0.6538707613945007, "learning_rate": 1.9427580225498702e-05, "loss": 0.6466, "step": 1120 }, { "epoch": 0.1944829979181124, "grad_norm": 0.7539204955101013, "learning_rate": 1.9444926279271467e-05, "loss": 0.6584, "step": 1121 }, { "epoch": 0.1946564885496183, "grad_norm": 0.9606083631515503, "learning_rate": 1.9462272333044236e-05, "loss": 0.7148, "step": 1122 }, { "epoch": 0.1948299791811242, "grad_norm": 1.1239814758300781, "learning_rate": 1.9479618386817e-05, "loss": 0.7046, "step": 1123 }, { "epoch": 0.19500346981263011, "grad_norm": 0.8692232370376587, "learning_rate": 1.9496964440589766e-05, "loss": 0.5984, "step": 1124 }, { "epoch": 0.19517696044413602, "grad_norm": 0.9197940826416016, "learning_rate": 1.9514310494362534e-05, "loss": 0.7756, "step": 1125 }, { "epoch": 0.19535045107564192, "grad_norm": 0.8177411556243896, "learning_rate": 1.95316565481353e-05, "loss": 0.7275, "step": 1126 }, { "epoch": 0.19552394170714782, "grad_norm": 1.1024653911590576, "learning_rate": 1.9549002601908068e-05, "loss": 0.7601, "step": 1127 }, { "epoch": 0.19569743233865372, "grad_norm": 2.704585552215576, "learning_rate": 1.9566348655680833e-05, "loss": 0.7224, "step": 1128 }, { "epoch": 0.19587092297015962, "grad_norm": 0.9784897565841675, "learning_rate": 1.95836947094536e-05, "loss": 0.6036, "step": 1129 }, { "epoch": 0.19604441360166552, "grad_norm": 1.8710535764694214, "learning_rate": 1.9601040763226366e-05, "loss": 0.7267, "step": 1130 }, { "epoch": 0.1962179042331714, "grad_norm": 1.8200170993804932, "learning_rate": 1.9618386816999134e-05, "loss": 0.6038, "step": 1131 }, { "epoch": 0.1963913948646773, "grad_norm": 1.568758249282837, "learning_rate": 1.9635732870771903e-05, "loss": 0.5441, "step": 1132 }, { "epoch": 0.1965648854961832, "grad_norm": 2.1508874893188477, "learning_rate": 1.9653078924544668e-05, "loss": 0.6168, "step": 1133 }, { "epoch": 0.1967383761276891, "grad_norm": 1.4580485820770264, "learning_rate": 1.9670424978317436e-05, "loss": 0.6018, "step": 1134 }, { "epoch": 0.196911866759195, "grad_norm": 0.9399105310440063, "learning_rate": 1.96877710320902e-05, "loss": 0.6045, "step": 1135 }, { "epoch": 0.1970853573907009, "grad_norm": 1.007959008216858, "learning_rate": 1.970511708586297e-05, "loss": 0.6597, "step": 1136 }, { "epoch": 0.1972588480222068, "grad_norm": 1.3925546407699585, "learning_rate": 1.9722463139635735e-05, "loss": 0.658, "step": 1137 }, { "epoch": 0.1974323386537127, "grad_norm": 1.083231806755066, "learning_rate": 1.9739809193408503e-05, "loss": 0.5747, "step": 1138 }, { "epoch": 0.1976058292852186, "grad_norm": 1.03508460521698, "learning_rate": 1.9757155247181268e-05, "loss": 0.5938, "step": 1139 }, { "epoch": 0.19777931991672448, "grad_norm": 0.9703415632247925, "learning_rate": 1.9774501300954033e-05, "loss": 0.6207, "step": 1140 }, { "epoch": 0.19795281054823038, "grad_norm": 0.9999934434890747, "learning_rate": 1.97918473547268e-05, "loss": 0.6732, "step": 1141 }, { "epoch": 0.19812630117973629, "grad_norm": 1.4494614601135254, "learning_rate": 1.9809193408499566e-05, "loss": 0.5573, "step": 1142 }, { "epoch": 0.1982997918112422, "grad_norm": 2.6835134029388428, "learning_rate": 1.9826539462272335e-05, "loss": 0.6957, "step": 1143 }, { "epoch": 0.1984732824427481, "grad_norm": 1.1297166347503662, "learning_rate": 1.98438855160451e-05, "loss": 0.6608, "step": 1144 }, { "epoch": 0.198646773074254, "grad_norm": 0.9641552567481995, "learning_rate": 1.9861231569817868e-05, "loss": 0.7238, "step": 1145 }, { "epoch": 0.1988202637057599, "grad_norm": 0.6486564874649048, "learning_rate": 1.9878577623590636e-05, "loss": 0.7588, "step": 1146 }, { "epoch": 0.1989937543372658, "grad_norm": 1.3994876146316528, "learning_rate": 1.98959236773634e-05, "loss": 0.5844, "step": 1147 }, { "epoch": 0.1991672449687717, "grad_norm": 1.1556226015090942, "learning_rate": 1.991326973113617e-05, "loss": 0.6755, "step": 1148 }, { "epoch": 0.1993407356002776, "grad_norm": 0.6605205535888672, "learning_rate": 1.9930615784908935e-05, "loss": 0.7324, "step": 1149 }, { "epoch": 0.19951422623178347, "grad_norm": 0.8294914960861206, "learning_rate": 1.9947961838681703e-05, "loss": 0.6222, "step": 1150 }, { "epoch": 0.19968771686328937, "grad_norm": 0.6706499457359314, "learning_rate": 1.9965307892454468e-05, "loss": 0.7544, "step": 1151 }, { "epoch": 0.19986120749479527, "grad_norm": 1.0484025478363037, "learning_rate": 1.9982653946227237e-05, "loss": 0.6194, "step": 1152 }, { "epoch": 0.20003469812630117, "grad_norm": 0.8263928294181824, "learning_rate": 2e-05, "loss": 0.6472, "step": 1153 }, { "epoch": 0.20020818875780708, "grad_norm": 1.6001183986663818, "learning_rate": 2.0017346053772767e-05, "loss": 0.646, "step": 1154 }, { "epoch": 0.20038167938931298, "grad_norm": 1.1579716205596924, "learning_rate": 2.0034692107545535e-05, "loss": 0.6827, "step": 1155 }, { "epoch": 0.20055517002081888, "grad_norm": 1.0516129732131958, "learning_rate": 2.00520381613183e-05, "loss": 0.6022, "step": 1156 }, { "epoch": 0.20072866065232478, "grad_norm": 0.8496077656745911, "learning_rate": 2.006938421509107e-05, "loss": 0.5554, "step": 1157 }, { "epoch": 0.20090215128383068, "grad_norm": 1.4365261793136597, "learning_rate": 2.0086730268863833e-05, "loss": 0.7332, "step": 1158 }, { "epoch": 0.20107564191533658, "grad_norm": 0.7282311320304871, "learning_rate": 2.0104076322636602e-05, "loss": 0.7644, "step": 1159 }, { "epoch": 0.20124913254684246, "grad_norm": 0.7782039642333984, "learning_rate": 2.0121422376409367e-05, "loss": 0.6063, "step": 1160 }, { "epoch": 0.20142262317834836, "grad_norm": 0.5990754961967468, "learning_rate": 2.0138768430182135e-05, "loss": 0.7354, "step": 1161 }, { "epoch": 0.20159611380985426, "grad_norm": 0.8651938438415527, "learning_rate": 2.01561144839549e-05, "loss": 0.5518, "step": 1162 }, { "epoch": 0.20176960444136016, "grad_norm": 1.5074180364608765, "learning_rate": 2.017346053772767e-05, "loss": 0.7057, "step": 1163 }, { "epoch": 0.20194309507286606, "grad_norm": 0.8941850662231445, "learning_rate": 2.0190806591500434e-05, "loss": 0.7015, "step": 1164 }, { "epoch": 0.20211658570437196, "grad_norm": 0.8068917989730835, "learning_rate": 2.02081526452732e-05, "loss": 0.649, "step": 1165 }, { "epoch": 0.20229007633587787, "grad_norm": 1.2736928462982178, "learning_rate": 2.0225498699045967e-05, "loss": 0.6069, "step": 1166 }, { "epoch": 0.20246356696738377, "grad_norm": 0.8033802509307861, "learning_rate": 2.0242844752818732e-05, "loss": 0.6732, "step": 1167 }, { "epoch": 0.20263705759888967, "grad_norm": 0.9226799607276917, "learning_rate": 2.0260190806591504e-05, "loss": 0.5758, "step": 1168 }, { "epoch": 0.20281054823039557, "grad_norm": 0.889927089214325, "learning_rate": 2.0277536860364272e-05, "loss": 0.635, "step": 1169 }, { "epoch": 0.20298403886190144, "grad_norm": 1.000593662261963, "learning_rate": 2.0294882914137037e-05, "loss": 0.6479, "step": 1170 }, { "epoch": 0.20315752949340735, "grad_norm": 0.9482126235961914, "learning_rate": 2.0312228967909806e-05, "loss": 0.7021, "step": 1171 }, { "epoch": 0.20333102012491325, "grad_norm": 0.7132568955421448, "learning_rate": 2.032957502168257e-05, "loss": 0.6385, "step": 1172 }, { "epoch": 0.20350451075641915, "grad_norm": 0.9106391072273254, "learning_rate": 2.0346921075455336e-05, "loss": 0.6313, "step": 1173 }, { "epoch": 0.20367800138792505, "grad_norm": 0.8777891993522644, "learning_rate": 2.0364267129228104e-05, "loss": 0.6902, "step": 1174 }, { "epoch": 0.20385149201943095, "grad_norm": 0.7946134209632874, "learning_rate": 2.038161318300087e-05, "loss": 0.5692, "step": 1175 }, { "epoch": 0.20402498265093685, "grad_norm": 0.7996445298194885, "learning_rate": 2.0398959236773637e-05, "loss": 0.6609, "step": 1176 }, { "epoch": 0.20419847328244276, "grad_norm": 0.9031609892845154, "learning_rate": 2.0416305290546402e-05, "loss": 0.6771, "step": 1177 }, { "epoch": 0.20437196391394866, "grad_norm": 1.042405605316162, "learning_rate": 2.043365134431917e-05, "loss": 0.6077, "step": 1178 }, { "epoch": 0.20454545454545456, "grad_norm": 0.8185474276542664, "learning_rate": 2.0450997398091936e-05, "loss": 0.6809, "step": 1179 }, { "epoch": 0.20471894517696043, "grad_norm": 0.7025969624519348, "learning_rate": 2.0468343451864704e-05, "loss": 0.6758, "step": 1180 }, { "epoch": 0.20489243580846633, "grad_norm": 0.7403766512870789, "learning_rate": 2.048568950563747e-05, "loss": 0.635, "step": 1181 }, { "epoch": 0.20506592643997223, "grad_norm": 1.3111224174499512, "learning_rate": 2.0503035559410238e-05, "loss": 0.6948, "step": 1182 }, { "epoch": 0.20523941707147814, "grad_norm": 0.9487437009811401, "learning_rate": 2.0520381613183003e-05, "loss": 0.6877, "step": 1183 }, { "epoch": 0.20541290770298404, "grad_norm": 0.8493991494178772, "learning_rate": 2.0537727666955768e-05, "loss": 0.6831, "step": 1184 }, { "epoch": 0.20558639833448994, "grad_norm": 0.9466814398765564, "learning_rate": 2.0555073720728536e-05, "loss": 0.824, "step": 1185 }, { "epoch": 0.20575988896599584, "grad_norm": 1.1064136028289795, "learning_rate": 2.05724197745013e-05, "loss": 0.6874, "step": 1186 }, { "epoch": 0.20593337959750174, "grad_norm": 1.2074017524719238, "learning_rate": 2.058976582827407e-05, "loss": 0.8259, "step": 1187 }, { "epoch": 0.20610687022900764, "grad_norm": 0.7420201301574707, "learning_rate": 2.0607111882046834e-05, "loss": 0.7096, "step": 1188 }, { "epoch": 0.20628036086051355, "grad_norm": 1.0953930616378784, "learning_rate": 2.0624457935819603e-05, "loss": 0.6317, "step": 1189 }, { "epoch": 0.20645385149201942, "grad_norm": 0.8365890979766846, "learning_rate": 2.0641803989592368e-05, "loss": 0.7136, "step": 1190 }, { "epoch": 0.20662734212352532, "grad_norm": 0.860747218132019, "learning_rate": 2.0659150043365136e-05, "loss": 0.6864, "step": 1191 }, { "epoch": 0.20680083275503122, "grad_norm": 0.7875193357467651, "learning_rate": 2.06764960971379e-05, "loss": 0.7091, "step": 1192 }, { "epoch": 0.20697432338653712, "grad_norm": 0.8169761896133423, "learning_rate": 2.069384215091067e-05, "loss": 0.5524, "step": 1193 }, { "epoch": 0.20714781401804302, "grad_norm": 0.8388202786445618, "learning_rate": 2.0711188204683434e-05, "loss": 0.6893, "step": 1194 }, { "epoch": 0.20732130464954893, "grad_norm": 1.2917083501815796, "learning_rate": 2.07285342584562e-05, "loss": 0.5945, "step": 1195 }, { "epoch": 0.20749479528105483, "grad_norm": 0.9487801194190979, "learning_rate": 2.0745880312228968e-05, "loss": 0.6343, "step": 1196 }, { "epoch": 0.20766828591256073, "grad_norm": 0.9186866283416748, "learning_rate": 2.0763226366001733e-05, "loss": 0.6746, "step": 1197 }, { "epoch": 0.20784177654406663, "grad_norm": 1.1883459091186523, "learning_rate": 2.0780572419774505e-05, "loss": 0.6559, "step": 1198 }, { "epoch": 0.20801526717557253, "grad_norm": 0.8712085485458374, "learning_rate": 2.0797918473547273e-05, "loss": 0.6472, "step": 1199 }, { "epoch": 0.2081887578070784, "grad_norm": 0.7856881618499756, "learning_rate": 2.0815264527320038e-05, "loss": 0.8369, "step": 1200 }, { "epoch": 0.2083622484385843, "grad_norm": 1.4384877681732178, "learning_rate": 2.0832610581092806e-05, "loss": 0.6841, "step": 1201 }, { "epoch": 0.2085357390700902, "grad_norm": 1.0486500263214111, "learning_rate": 2.084995663486557e-05, "loss": 0.6429, "step": 1202 }, { "epoch": 0.2087092297015961, "grad_norm": 0.9659835696220398, "learning_rate": 2.0867302688638336e-05, "loss": 0.634, "step": 1203 }, { "epoch": 0.208882720333102, "grad_norm": 0.7448798418045044, "learning_rate": 2.0884648742411105e-05, "loss": 0.6191, "step": 1204 }, { "epoch": 0.2090562109646079, "grad_norm": 0.9722678065299988, "learning_rate": 2.090199479618387e-05, "loss": 0.7395, "step": 1205 }, { "epoch": 0.20922970159611382, "grad_norm": 0.8635239005088806, "learning_rate": 2.0919340849956638e-05, "loss": 0.7175, "step": 1206 }, { "epoch": 0.20940319222761972, "grad_norm": 0.7446389198303223, "learning_rate": 2.0936686903729403e-05, "loss": 0.8132, "step": 1207 }, { "epoch": 0.20957668285912562, "grad_norm": 0.9571833610534668, "learning_rate": 2.095403295750217e-05, "loss": 0.783, "step": 1208 }, { "epoch": 0.20975017349063152, "grad_norm": 1.470550775527954, "learning_rate": 2.0971379011274937e-05, "loss": 0.5852, "step": 1209 }, { "epoch": 0.2099236641221374, "grad_norm": 0.820504903793335, "learning_rate": 2.0988725065047705e-05, "loss": 0.6759, "step": 1210 }, { "epoch": 0.2100971547536433, "grad_norm": 1.0833055973052979, "learning_rate": 2.100607111882047e-05, "loss": 0.6273, "step": 1211 }, { "epoch": 0.2102706453851492, "grad_norm": 0.8269476890563965, "learning_rate": 2.102341717259324e-05, "loss": 0.7256, "step": 1212 }, { "epoch": 0.2104441360166551, "grad_norm": 0.9470596313476562, "learning_rate": 2.1040763226366003e-05, "loss": 0.6292, "step": 1213 }, { "epoch": 0.210617626648161, "grad_norm": 2.144624710083008, "learning_rate": 2.105810928013877e-05, "loss": 0.5927, "step": 1214 }, { "epoch": 0.2107911172796669, "grad_norm": 0.7160753607749939, "learning_rate": 2.1075455333911537e-05, "loss": 0.6057, "step": 1215 }, { "epoch": 0.2109646079111728, "grad_norm": 0.8870556354522705, "learning_rate": 2.1092801387684302e-05, "loss": 0.6117, "step": 1216 }, { "epoch": 0.2111380985426787, "grad_norm": 1.2170469760894775, "learning_rate": 2.111014744145707e-05, "loss": 0.6293, "step": 1217 }, { "epoch": 0.2113115891741846, "grad_norm": 1.373672604560852, "learning_rate": 2.1127493495229835e-05, "loss": 0.5717, "step": 1218 }, { "epoch": 0.21148507980569048, "grad_norm": 1.0951582193374634, "learning_rate": 2.1144839549002604e-05, "loss": 0.6418, "step": 1219 }, { "epoch": 0.21165857043719638, "grad_norm": 0.8446418642997742, "learning_rate": 2.116218560277537e-05, "loss": 0.5923, "step": 1220 }, { "epoch": 0.21183206106870228, "grad_norm": 0.7682600617408752, "learning_rate": 2.1179531656548137e-05, "loss": 0.7159, "step": 1221 }, { "epoch": 0.21200555170020818, "grad_norm": 0.8873063921928406, "learning_rate": 2.1196877710320902e-05, "loss": 0.7673, "step": 1222 }, { "epoch": 0.21217904233171409, "grad_norm": 0.7964252829551697, "learning_rate": 2.121422376409367e-05, "loss": 0.692, "step": 1223 }, { "epoch": 0.21235253296322, "grad_norm": 0.831866443157196, "learning_rate": 2.1231569817866435e-05, "loss": 0.73, "step": 1224 }, { "epoch": 0.2125260235947259, "grad_norm": 1.1679537296295166, "learning_rate": 2.12489158716392e-05, "loss": 0.7026, "step": 1225 }, { "epoch": 0.2126995142262318, "grad_norm": 1.1698815822601318, "learning_rate": 2.126626192541197e-05, "loss": 0.5995, "step": 1226 }, { "epoch": 0.2128730048577377, "grad_norm": 1.1372910737991333, "learning_rate": 2.128360797918474e-05, "loss": 0.6755, "step": 1227 }, { "epoch": 0.2130464954892436, "grad_norm": 0.8028494119644165, "learning_rate": 2.1300954032957505e-05, "loss": 0.5986, "step": 1228 }, { "epoch": 0.21321998612074947, "grad_norm": 0.8080164790153503, "learning_rate": 2.1318300086730274e-05, "loss": 0.5869, "step": 1229 }, { "epoch": 0.21339347675225537, "grad_norm": 1.5431324243545532, "learning_rate": 2.133564614050304e-05, "loss": 0.6377, "step": 1230 }, { "epoch": 0.21356696738376127, "grad_norm": 0.7453404068946838, "learning_rate": 2.1352992194275807e-05, "loss": 0.7156, "step": 1231 }, { "epoch": 0.21374045801526717, "grad_norm": 1.216294765472412, "learning_rate": 2.1370338248048572e-05, "loss": 0.5951, "step": 1232 }, { "epoch": 0.21391394864677307, "grad_norm": 1.3197087049484253, "learning_rate": 2.1387684301821337e-05, "loss": 0.533, "step": 1233 }, { "epoch": 0.21408743927827897, "grad_norm": 0.753345787525177, "learning_rate": 2.1405030355594106e-05, "loss": 0.5773, "step": 1234 }, { "epoch": 0.21426092990978488, "grad_norm": 1.1952768564224243, "learning_rate": 2.142237640936687e-05, "loss": 0.6125, "step": 1235 }, { "epoch": 0.21443442054129078, "grad_norm": 3.2357378005981445, "learning_rate": 2.143972246313964e-05, "loss": 0.6302, "step": 1236 }, { "epoch": 0.21460791117279668, "grad_norm": 1.2979215383529663, "learning_rate": 2.1457068516912404e-05, "loss": 0.6006, "step": 1237 }, { "epoch": 0.21478140180430258, "grad_norm": 0.8040156364440918, "learning_rate": 2.1474414570685172e-05, "loss": 0.771, "step": 1238 }, { "epoch": 0.21495489243580845, "grad_norm": 0.8429499268531799, "learning_rate": 2.1491760624457937e-05, "loss": 0.7969, "step": 1239 }, { "epoch": 0.21512838306731435, "grad_norm": 1.141281247138977, "learning_rate": 2.1509106678230706e-05, "loss": 0.6262, "step": 1240 }, { "epoch": 0.21530187369882026, "grad_norm": 0.8168627023696899, "learning_rate": 2.152645273200347e-05, "loss": 0.7534, "step": 1241 }, { "epoch": 0.21547536433032616, "grad_norm": 0.8681612610816956, "learning_rate": 2.154379878577624e-05, "loss": 0.7527, "step": 1242 }, { "epoch": 0.21564885496183206, "grad_norm": 1.500241756439209, "learning_rate": 2.1561144839549004e-05, "loss": 0.6274, "step": 1243 }, { "epoch": 0.21582234559333796, "grad_norm": 2.348919630050659, "learning_rate": 2.157849089332177e-05, "loss": 0.65, "step": 1244 }, { "epoch": 0.21599583622484386, "grad_norm": 0.8949500918388367, "learning_rate": 2.1595836947094538e-05, "loss": 0.5729, "step": 1245 }, { "epoch": 0.21616932685634976, "grad_norm": 0.706988513469696, "learning_rate": 2.1613183000867303e-05, "loss": 0.7266, "step": 1246 }, { "epoch": 0.21634281748785567, "grad_norm": 0.9260134696960449, "learning_rate": 2.163052905464007e-05, "loss": 0.6777, "step": 1247 }, { "epoch": 0.21651630811936157, "grad_norm": 1.1210914850234985, "learning_rate": 2.1647875108412836e-05, "loss": 0.5737, "step": 1248 }, { "epoch": 0.21668979875086744, "grad_norm": 0.857086718082428, "learning_rate": 2.1665221162185604e-05, "loss": 0.7446, "step": 1249 }, { "epoch": 0.21686328938237334, "grad_norm": 0.6255559325218201, "learning_rate": 2.168256721595837e-05, "loss": 0.7583, "step": 1250 }, { "epoch": 0.21703678001387924, "grad_norm": 0.9600679874420166, "learning_rate": 2.1699913269731138e-05, "loss": 0.7446, "step": 1251 }, { "epoch": 0.21721027064538515, "grad_norm": 0.932635486125946, "learning_rate": 2.1717259323503903e-05, "loss": 0.6096, "step": 1252 }, { "epoch": 0.21738376127689105, "grad_norm": 1.0339999198913574, "learning_rate": 2.173460537727667e-05, "loss": 0.6318, "step": 1253 }, { "epoch": 0.21755725190839695, "grad_norm": 1.0141346454620361, "learning_rate": 2.1751951431049436e-05, "loss": 0.5837, "step": 1254 }, { "epoch": 0.21773074253990285, "grad_norm": 1.782895565032959, "learning_rate": 2.17692974848222e-05, "loss": 0.5902, "step": 1255 }, { "epoch": 0.21790423317140875, "grad_norm": 0.7666664719581604, "learning_rate": 2.178664353859497e-05, "loss": 0.6473, "step": 1256 }, { "epoch": 0.21807772380291465, "grad_norm": 0.7407053112983704, "learning_rate": 2.180398959236774e-05, "loss": 0.5646, "step": 1257 }, { "epoch": 0.21825121443442055, "grad_norm": 0.742052435874939, "learning_rate": 2.1821335646140506e-05, "loss": 0.6991, "step": 1258 }, { "epoch": 0.21842470506592643, "grad_norm": 0.9027974009513855, "learning_rate": 2.1838681699913275e-05, "loss": 0.7266, "step": 1259 }, { "epoch": 0.21859819569743233, "grad_norm": 0.6909552216529846, "learning_rate": 2.185602775368604e-05, "loss": 0.7407, "step": 1260 }, { "epoch": 0.21877168632893823, "grad_norm": 0.9069677591323853, "learning_rate": 2.1873373807458808e-05, "loss": 0.6567, "step": 1261 }, { "epoch": 0.21894517696044413, "grad_norm": 0.8825785517692566, "learning_rate": 2.1890719861231573e-05, "loss": 0.6696, "step": 1262 }, { "epoch": 0.21911866759195003, "grad_norm": 0.6216393709182739, "learning_rate": 2.1908065915004338e-05, "loss": 0.6754, "step": 1263 }, { "epoch": 0.21929215822345594, "grad_norm": 0.7830954790115356, "learning_rate": 2.1925411968777107e-05, "loss": 0.7622, "step": 1264 }, { "epoch": 0.21946564885496184, "grad_norm": 0.897305428981781, "learning_rate": 2.194275802254987e-05, "loss": 0.7532, "step": 1265 }, { "epoch": 0.21963913948646774, "grad_norm": 0.8858644366264343, "learning_rate": 2.196010407632264e-05, "loss": 0.6171, "step": 1266 }, { "epoch": 0.21981263011797364, "grad_norm": 1.420778512954712, "learning_rate": 2.1977450130095405e-05, "loss": 0.5931, "step": 1267 }, { "epoch": 0.21998612074947954, "grad_norm": 0.7846259474754333, "learning_rate": 2.1994796183868173e-05, "loss": 0.7, "step": 1268 }, { "epoch": 0.22015961138098541, "grad_norm": 0.9707682132720947, "learning_rate": 2.201214223764094e-05, "loss": 0.7571, "step": 1269 }, { "epoch": 0.22033310201249132, "grad_norm": 0.917709469795227, "learning_rate": 2.2029488291413707e-05, "loss": 0.7003, "step": 1270 }, { "epoch": 0.22050659264399722, "grad_norm": 0.8586825728416443, "learning_rate": 2.2046834345186472e-05, "loss": 0.697, "step": 1271 }, { "epoch": 0.22068008327550312, "grad_norm": 0.9106934666633606, "learning_rate": 2.206418039895924e-05, "loss": 0.6313, "step": 1272 }, { "epoch": 0.22085357390700902, "grad_norm": 0.8944715857505798, "learning_rate": 2.2081526452732005e-05, "loss": 0.6823, "step": 1273 }, { "epoch": 0.22102706453851492, "grad_norm": 0.9109546542167664, "learning_rate": 2.209887250650477e-05, "loss": 0.5646, "step": 1274 }, { "epoch": 0.22120055517002082, "grad_norm": 0.8423030376434326, "learning_rate": 2.211621856027754e-05, "loss": 0.6578, "step": 1275 }, { "epoch": 0.22137404580152673, "grad_norm": 0.8873112201690674, "learning_rate": 2.2133564614050303e-05, "loss": 0.6309, "step": 1276 }, { "epoch": 0.22154753643303263, "grad_norm": 1.1744393110275269, "learning_rate": 2.2150910667823072e-05, "loss": 0.7184, "step": 1277 }, { "epoch": 0.22172102706453853, "grad_norm": 0.6773825883865356, "learning_rate": 2.2168256721595837e-05, "loss": 0.7881, "step": 1278 }, { "epoch": 0.2218945176960444, "grad_norm": 0.8595862984657288, "learning_rate": 2.2185602775368605e-05, "loss": 0.6566, "step": 1279 }, { "epoch": 0.2220680083275503, "grad_norm": 0.8502959609031677, "learning_rate": 2.220294882914137e-05, "loss": 0.5854, "step": 1280 }, { "epoch": 0.2222414989590562, "grad_norm": 0.8511287569999695, "learning_rate": 2.222029488291414e-05, "loss": 0.6636, "step": 1281 }, { "epoch": 0.2224149895905621, "grad_norm": 0.9587035775184631, "learning_rate": 2.2237640936686904e-05, "loss": 0.6709, "step": 1282 }, { "epoch": 0.222588480222068, "grad_norm": 0.8576658964157104, "learning_rate": 2.2254986990459672e-05, "loss": 0.5322, "step": 1283 }, { "epoch": 0.2227619708535739, "grad_norm": 1.0752559900283813, "learning_rate": 2.2272333044232437e-05, "loss": 0.705, "step": 1284 }, { "epoch": 0.2229354614850798, "grad_norm": 1.1957930326461792, "learning_rate": 2.2289679098005202e-05, "loss": 0.5844, "step": 1285 }, { "epoch": 0.2231089521165857, "grad_norm": 1.0012633800506592, "learning_rate": 2.230702515177797e-05, "loss": 0.6102, "step": 1286 }, { "epoch": 0.22328244274809161, "grad_norm": 1.117314100265503, "learning_rate": 2.2324371205550742e-05, "loss": 0.6372, "step": 1287 }, { "epoch": 0.22345593337959752, "grad_norm": 0.8368109464645386, "learning_rate": 2.2341717259323507e-05, "loss": 0.5454, "step": 1288 }, { "epoch": 0.2236294240111034, "grad_norm": 1.178607702255249, "learning_rate": 2.2359063313096276e-05, "loss": 0.6875, "step": 1289 }, { "epoch": 0.2238029146426093, "grad_norm": 0.7383229732513428, "learning_rate": 2.237640936686904e-05, "loss": 0.6957, "step": 1290 }, { "epoch": 0.2239764052741152, "grad_norm": 1.0060182809829712, "learning_rate": 2.2393755420641806e-05, "loss": 0.6543, "step": 1291 }, { "epoch": 0.2241498959056211, "grad_norm": 0.8270978927612305, "learning_rate": 2.2411101474414574e-05, "loss": 0.6302, "step": 1292 }, { "epoch": 0.224323386537127, "grad_norm": 1.0428016185760498, "learning_rate": 2.242844752818734e-05, "loss": 0.741, "step": 1293 }, { "epoch": 0.2244968771686329, "grad_norm": 0.7089052796363831, "learning_rate": 2.2445793581960107e-05, "loss": 0.7869, "step": 1294 }, { "epoch": 0.2246703678001388, "grad_norm": 1.201980710029602, "learning_rate": 2.2463139635732872e-05, "loss": 0.6082, "step": 1295 }, { "epoch": 0.2248438584316447, "grad_norm": 1.013190507888794, "learning_rate": 2.248048568950564e-05, "loss": 0.6641, "step": 1296 }, { "epoch": 0.2250173490631506, "grad_norm": 0.9957582950592041, "learning_rate": 2.2497831743278406e-05, "loss": 0.6294, "step": 1297 }, { "epoch": 0.22519083969465647, "grad_norm": 0.8996625542640686, "learning_rate": 2.2515177797051174e-05, "loss": 0.6589, "step": 1298 }, { "epoch": 0.22536433032616238, "grad_norm": 1.442975640296936, "learning_rate": 2.253252385082394e-05, "loss": 0.6422, "step": 1299 }, { "epoch": 0.22553782095766828, "grad_norm": 0.7491903305053711, "learning_rate": 2.2549869904596708e-05, "loss": 0.6428, "step": 1300 }, { "epoch": 0.22571131158917418, "grad_norm": 0.7685233950614929, "learning_rate": 2.2567215958369473e-05, "loss": 0.6646, "step": 1301 }, { "epoch": 0.22588480222068008, "grad_norm": 1.3932480812072754, "learning_rate": 2.258456201214224e-05, "loss": 0.6388, "step": 1302 }, { "epoch": 0.22605829285218598, "grad_norm": 0.8122157454490662, "learning_rate": 2.2601908065915006e-05, "loss": 0.6692, "step": 1303 }, { "epoch": 0.22623178348369188, "grad_norm": 0.857111930847168, "learning_rate": 2.261925411968777e-05, "loss": 0.8053, "step": 1304 }, { "epoch": 0.22640527411519779, "grad_norm": 1.4582233428955078, "learning_rate": 2.263660017346054e-05, "loss": 0.6735, "step": 1305 }, { "epoch": 0.2265787647467037, "grad_norm": 1.2034339904785156, "learning_rate": 2.2653946227233304e-05, "loss": 0.7019, "step": 1306 }, { "epoch": 0.2267522553782096, "grad_norm": 0.9479273557662964, "learning_rate": 2.2671292281006073e-05, "loss": 0.6255, "step": 1307 }, { "epoch": 0.22692574600971546, "grad_norm": 0.9462427496910095, "learning_rate": 2.2688638334778838e-05, "loss": 0.6827, "step": 1308 }, { "epoch": 0.22709923664122136, "grad_norm": 1.025986909866333, "learning_rate": 2.2705984388551606e-05, "loss": 0.5671, "step": 1309 }, { "epoch": 0.22727272727272727, "grad_norm": 1.1107748746871948, "learning_rate": 2.272333044232437e-05, "loss": 0.7192, "step": 1310 }, { "epoch": 0.22744621790423317, "grad_norm": 1.1183154582977295, "learning_rate": 2.274067649609714e-05, "loss": 0.741, "step": 1311 }, { "epoch": 0.22761970853573907, "grad_norm": 0.8567702174186707, "learning_rate": 2.2758022549869905e-05, "loss": 0.769, "step": 1312 }, { "epoch": 0.22779319916724497, "grad_norm": 0.747605562210083, "learning_rate": 2.2775368603642673e-05, "loss": 0.6871, "step": 1313 }, { "epoch": 0.22796668979875087, "grad_norm": 1.1219127178192139, "learning_rate": 2.2792714657415438e-05, "loss": 0.6958, "step": 1314 }, { "epoch": 0.22814018043025677, "grad_norm": 0.9096964597702026, "learning_rate": 2.2810060711188203e-05, "loss": 0.5948, "step": 1315 }, { "epoch": 0.22831367106176267, "grad_norm": 0.7826958298683167, "learning_rate": 2.2827406764960975e-05, "loss": 0.6185, "step": 1316 }, { "epoch": 0.22848716169326858, "grad_norm": 1.2576274871826172, "learning_rate": 2.2844752818733743e-05, "loss": 0.6299, "step": 1317 }, { "epoch": 0.22866065232477445, "grad_norm": 1.082729458808899, "learning_rate": 2.2862098872506508e-05, "loss": 0.6459, "step": 1318 }, { "epoch": 0.22883414295628035, "grad_norm": 0.7407817840576172, "learning_rate": 2.2879444926279276e-05, "loss": 0.6615, "step": 1319 }, { "epoch": 0.22900763358778625, "grad_norm": 0.8147691488265991, "learning_rate": 2.289679098005204e-05, "loss": 0.671, "step": 1320 }, { "epoch": 0.22918112421929215, "grad_norm": 0.7441374063491821, "learning_rate": 2.2914137033824806e-05, "loss": 0.6202, "step": 1321 }, { "epoch": 0.22935461485079806, "grad_norm": 2.1673049926757812, "learning_rate": 2.2931483087597575e-05, "loss": 0.6427, "step": 1322 }, { "epoch": 0.22952810548230396, "grad_norm": 0.8096586465835571, "learning_rate": 2.294882914137034e-05, "loss": 0.5872, "step": 1323 }, { "epoch": 0.22970159611380986, "grad_norm": 0.8028786778450012, "learning_rate": 2.2966175195143108e-05, "loss": 0.6263, "step": 1324 }, { "epoch": 0.22987508674531576, "grad_norm": 0.6502306461334229, "learning_rate": 2.2983521248915873e-05, "loss": 0.6902, "step": 1325 }, { "epoch": 0.23004857737682166, "grad_norm": 0.8782081007957458, "learning_rate": 2.300086730268864e-05, "loss": 0.6886, "step": 1326 }, { "epoch": 0.23022206800832756, "grad_norm": 1.0046528577804565, "learning_rate": 2.3018213356461407e-05, "loss": 0.6178, "step": 1327 }, { "epoch": 0.23039555863983344, "grad_norm": 1.015367031097412, "learning_rate": 2.3035559410234175e-05, "loss": 0.559, "step": 1328 }, { "epoch": 0.23056904927133934, "grad_norm": 0.9212409853935242, "learning_rate": 2.305290546400694e-05, "loss": 0.6744, "step": 1329 }, { "epoch": 0.23074253990284524, "grad_norm": 0.669114351272583, "learning_rate": 2.307025151777971e-05, "loss": 0.7456, "step": 1330 }, { "epoch": 0.23091603053435114, "grad_norm": 0.755664587020874, "learning_rate": 2.3087597571552473e-05, "loss": 0.7069, "step": 1331 }, { "epoch": 0.23108952116585704, "grad_norm": 0.6906589269638062, "learning_rate": 2.3104943625325242e-05, "loss": 0.7749, "step": 1332 }, { "epoch": 0.23126301179736294, "grad_norm": 0.6546650528907776, "learning_rate": 2.3122289679098007e-05, "loss": 0.7385, "step": 1333 }, { "epoch": 0.23143650242886885, "grad_norm": 1.0618844032287598, "learning_rate": 2.3139635732870772e-05, "loss": 0.7479, "step": 1334 }, { "epoch": 0.23160999306037475, "grad_norm": 0.8666930198669434, "learning_rate": 2.315698178664354e-05, "loss": 0.6582, "step": 1335 }, { "epoch": 0.23178348369188065, "grad_norm": 0.6178314685821533, "learning_rate": 2.3174327840416305e-05, "loss": 0.6498, "step": 1336 }, { "epoch": 0.23195697432338655, "grad_norm": 0.9197214245796204, "learning_rate": 2.3191673894189074e-05, "loss": 0.5626, "step": 1337 }, { "epoch": 0.23213046495489242, "grad_norm": 1.4937176704406738, "learning_rate": 2.320901994796184e-05, "loss": 0.7056, "step": 1338 }, { "epoch": 0.23230395558639833, "grad_norm": 0.8406773209571838, "learning_rate": 2.3226366001734607e-05, "loss": 0.7026, "step": 1339 }, { "epoch": 0.23247744621790423, "grad_norm": 0.6967365145683289, "learning_rate": 2.3243712055507372e-05, "loss": 0.6952, "step": 1340 }, { "epoch": 0.23265093684941013, "grad_norm": 1.1950923204421997, "learning_rate": 2.326105810928014e-05, "loss": 0.7312, "step": 1341 }, { "epoch": 0.23282442748091603, "grad_norm": 0.819696843624115, "learning_rate": 2.3278404163052905e-05, "loss": 0.5697, "step": 1342 }, { "epoch": 0.23299791811242193, "grad_norm": 1.7255115509033203, "learning_rate": 2.3295750216825674e-05, "loss": 0.5286, "step": 1343 }, { "epoch": 0.23317140874392783, "grad_norm": 1.2118375301361084, "learning_rate": 2.331309627059844e-05, "loss": 0.7142, "step": 1344 }, { "epoch": 0.23334489937543373, "grad_norm": 1.0448431968688965, "learning_rate": 2.3330442324371204e-05, "loss": 0.6359, "step": 1345 }, { "epoch": 0.23351839000693964, "grad_norm": 1.05678129196167, "learning_rate": 2.3347788378143976e-05, "loss": 0.5945, "step": 1346 }, { "epoch": 0.23369188063844554, "grad_norm": 0.7752077579498291, "learning_rate": 2.3365134431916744e-05, "loss": 0.7537, "step": 1347 }, { "epoch": 0.2338653712699514, "grad_norm": 0.6281952261924744, "learning_rate": 2.338248048568951e-05, "loss": 0.627, "step": 1348 }, { "epoch": 0.2340388619014573, "grad_norm": 0.9324553608894348, "learning_rate": 2.3399826539462277e-05, "loss": 0.6323, "step": 1349 }, { "epoch": 0.23421235253296321, "grad_norm": 0.7048712372779846, "learning_rate": 2.3417172593235042e-05, "loss": 0.7174, "step": 1350 }, { "epoch": 0.23438584316446912, "grad_norm": 1.0396593809127808, "learning_rate": 2.3434518647007807e-05, "loss": 0.5864, "step": 1351 }, { "epoch": 0.23455933379597502, "grad_norm": 0.9440403580665588, "learning_rate": 2.3451864700780576e-05, "loss": 0.6034, "step": 1352 }, { "epoch": 0.23473282442748092, "grad_norm": 0.8695996403694153, "learning_rate": 2.346921075455334e-05, "loss": 0.587, "step": 1353 }, { "epoch": 0.23490631505898682, "grad_norm": 0.8083711862564087, "learning_rate": 2.348655680832611e-05, "loss": 0.7283, "step": 1354 }, { "epoch": 0.23507980569049272, "grad_norm": 0.8822265863418579, "learning_rate": 2.3503902862098874e-05, "loss": 0.6411, "step": 1355 }, { "epoch": 0.23525329632199862, "grad_norm": 0.7099165320396423, "learning_rate": 2.3521248915871643e-05, "loss": 0.6519, "step": 1356 }, { "epoch": 0.23542678695350452, "grad_norm": 0.6208922863006592, "learning_rate": 2.3538594969644408e-05, "loss": 0.6401, "step": 1357 }, { "epoch": 0.2356002775850104, "grad_norm": 0.8681457042694092, "learning_rate": 2.3555941023417176e-05, "loss": 0.674, "step": 1358 }, { "epoch": 0.2357737682165163, "grad_norm": 0.9446276426315308, "learning_rate": 2.357328707718994e-05, "loss": 0.6056, "step": 1359 }, { "epoch": 0.2359472588480222, "grad_norm": 3.0077075958251953, "learning_rate": 2.359063313096271e-05, "loss": 0.5176, "step": 1360 }, { "epoch": 0.2361207494795281, "grad_norm": 1.011677861213684, "learning_rate": 2.3607979184735474e-05, "loss": 0.5721, "step": 1361 }, { "epoch": 0.236294240111034, "grad_norm": 1.2111024856567383, "learning_rate": 2.362532523850824e-05, "loss": 0.6101, "step": 1362 }, { "epoch": 0.2364677307425399, "grad_norm": 0.6885407567024231, "learning_rate": 2.3642671292281008e-05, "loss": 0.7671, "step": 1363 }, { "epoch": 0.2366412213740458, "grad_norm": 0.8285648226737976, "learning_rate": 2.3660017346053773e-05, "loss": 0.7073, "step": 1364 }, { "epoch": 0.2368147120055517, "grad_norm": 0.75030916929245, "learning_rate": 2.367736339982654e-05, "loss": 0.6289, "step": 1365 }, { "epoch": 0.2369882026370576, "grad_norm": 0.5976499915122986, "learning_rate": 2.3694709453599306e-05, "loss": 0.8052, "step": 1366 }, { "epoch": 0.23716169326856348, "grad_norm": 0.5793814063072205, "learning_rate": 2.3712055507372074e-05, "loss": 0.7625, "step": 1367 }, { "epoch": 0.23733518390006939, "grad_norm": 0.7182748317718506, "learning_rate": 2.372940156114484e-05, "loss": 0.6562, "step": 1368 }, { "epoch": 0.2375086745315753, "grad_norm": 0.8480749726295471, "learning_rate": 2.3746747614917608e-05, "loss": 0.6787, "step": 1369 }, { "epoch": 0.2376821651630812, "grad_norm": 1.211319088935852, "learning_rate": 2.3764093668690373e-05, "loss": 0.6803, "step": 1370 }, { "epoch": 0.2378556557945871, "grad_norm": 0.9534586668014526, "learning_rate": 2.378143972246314e-05, "loss": 0.7072, "step": 1371 }, { "epoch": 0.238029146426093, "grad_norm": 1.0700056552886963, "learning_rate": 2.3798785776235906e-05, "loss": 0.7094, "step": 1372 }, { "epoch": 0.2382026370575989, "grad_norm": 0.8894718885421753, "learning_rate": 2.3816131830008675e-05, "loss": 0.5952, "step": 1373 }, { "epoch": 0.2383761276891048, "grad_norm": 0.8361707925796509, "learning_rate": 2.383347788378144e-05, "loss": 0.6184, "step": 1374 }, { "epoch": 0.2385496183206107, "grad_norm": 0.9295462369918823, "learning_rate": 2.385082393755421e-05, "loss": 0.6335, "step": 1375 }, { "epoch": 0.2387231089521166, "grad_norm": 0.7952740788459778, "learning_rate": 2.3868169991326976e-05, "loss": 0.6921, "step": 1376 }, { "epoch": 0.23889659958362247, "grad_norm": 0.7641652226448059, "learning_rate": 2.3885516045099745e-05, "loss": 0.6609, "step": 1377 }, { "epoch": 0.23907009021512837, "grad_norm": 1.317805290222168, "learning_rate": 2.390286209887251e-05, "loss": 0.6741, "step": 1378 }, { "epoch": 0.23924358084663427, "grad_norm": 0.9185047149658203, "learning_rate": 2.3920208152645278e-05, "loss": 0.5428, "step": 1379 }, { "epoch": 0.23941707147814018, "grad_norm": 0.9052122235298157, "learning_rate": 2.3937554206418043e-05, "loss": 0.6556, "step": 1380 }, { "epoch": 0.23959056210964608, "grad_norm": 0.8003539443016052, "learning_rate": 2.3954900260190808e-05, "loss": 0.7117, "step": 1381 }, { "epoch": 0.23976405274115198, "grad_norm": 1.0138602256774902, "learning_rate": 2.3972246313963577e-05, "loss": 0.6655, "step": 1382 }, { "epoch": 0.23993754337265788, "grad_norm": 0.8630676865577698, "learning_rate": 2.398959236773634e-05, "loss": 0.7196, "step": 1383 }, { "epoch": 0.24011103400416378, "grad_norm": 0.9065120220184326, "learning_rate": 2.400693842150911e-05, "loss": 0.8337, "step": 1384 }, { "epoch": 0.24028452463566968, "grad_norm": 0.9683220982551575, "learning_rate": 2.4024284475281875e-05, "loss": 0.6088, "step": 1385 }, { "epoch": 0.24045801526717558, "grad_norm": 1.0982496738433838, "learning_rate": 2.4041630529054643e-05, "loss": 0.5911, "step": 1386 }, { "epoch": 0.24063150589868146, "grad_norm": 1.0864150524139404, "learning_rate": 2.405897658282741e-05, "loss": 0.6309, "step": 1387 }, { "epoch": 0.24080499653018736, "grad_norm": 1.4859693050384521, "learning_rate": 2.4076322636600177e-05, "loss": 0.6058, "step": 1388 }, { "epoch": 0.24097848716169326, "grad_norm": 0.7649808526039124, "learning_rate": 2.4093668690372942e-05, "loss": 0.6556, "step": 1389 }, { "epoch": 0.24115197779319916, "grad_norm": 0.8709794282913208, "learning_rate": 2.411101474414571e-05, "loss": 0.6486, "step": 1390 }, { "epoch": 0.24132546842470506, "grad_norm": 0.8978895545005798, "learning_rate": 2.4128360797918475e-05, "loss": 0.5717, "step": 1391 }, { "epoch": 0.24149895905621097, "grad_norm": 0.870811939239502, "learning_rate": 2.414570685169124e-05, "loss": 0.7159, "step": 1392 }, { "epoch": 0.24167244968771687, "grad_norm": 0.8962591290473938, "learning_rate": 2.416305290546401e-05, "loss": 0.6539, "step": 1393 }, { "epoch": 0.24184594031922277, "grad_norm": 0.8146924376487732, "learning_rate": 2.4180398959236774e-05, "loss": 0.7087, "step": 1394 }, { "epoch": 0.24201943095072867, "grad_norm": 1.1942592859268188, "learning_rate": 2.4197745013009542e-05, "loss": 0.5968, "step": 1395 }, { "epoch": 0.24219292158223457, "grad_norm": 0.867985188961029, "learning_rate": 2.4215091066782307e-05, "loss": 0.6763, "step": 1396 }, { "epoch": 0.24236641221374045, "grad_norm": 0.7588414549827576, "learning_rate": 2.4232437120555075e-05, "loss": 0.5974, "step": 1397 }, { "epoch": 0.24253990284524635, "grad_norm": 1.1507147550582886, "learning_rate": 2.424978317432784e-05, "loss": 0.7734, "step": 1398 }, { "epoch": 0.24271339347675225, "grad_norm": 0.8985394835472107, "learning_rate": 2.426712922810061e-05, "loss": 0.6003, "step": 1399 }, { "epoch": 0.24288688410825815, "grad_norm": 0.8671207427978516, "learning_rate": 2.4284475281873374e-05, "loss": 0.6472, "step": 1400 }, { "epoch": 0.24306037473976405, "grad_norm": 0.7705470323562622, "learning_rate": 2.4301821335646142e-05, "loss": 0.7676, "step": 1401 }, { "epoch": 0.24323386537126995, "grad_norm": 1.5794962644577026, "learning_rate": 2.4319167389418907e-05, "loss": 0.7045, "step": 1402 }, { "epoch": 0.24340735600277585, "grad_norm": 0.8116719126701355, "learning_rate": 2.4336513443191676e-05, "loss": 0.6333, "step": 1403 }, { "epoch": 0.24358084663428176, "grad_norm": 0.8300854563713074, "learning_rate": 2.435385949696444e-05, "loss": 0.6934, "step": 1404 }, { "epoch": 0.24375433726578766, "grad_norm": 0.7705616354942322, "learning_rate": 2.4371205550737212e-05, "loss": 0.6648, "step": 1405 }, { "epoch": 0.24392782789729356, "grad_norm": 0.9937134385108948, "learning_rate": 2.4388551604509977e-05, "loss": 0.6571, "step": 1406 }, { "epoch": 0.24410131852879943, "grad_norm": 1.039591908454895, "learning_rate": 2.4405897658282746e-05, "loss": 0.6, "step": 1407 }, { "epoch": 0.24427480916030533, "grad_norm": 1.7142187356948853, "learning_rate": 2.442324371205551e-05, "loss": 0.7238, "step": 1408 }, { "epoch": 0.24444829979181124, "grad_norm": 1.2775105237960815, "learning_rate": 2.444058976582828e-05, "loss": 0.6331, "step": 1409 }, { "epoch": 0.24462179042331714, "grad_norm": 2.677325487136841, "learning_rate": 2.4457935819601044e-05, "loss": 0.6667, "step": 1410 }, { "epoch": 0.24479528105482304, "grad_norm": 0.825202226638794, "learning_rate": 2.447528187337381e-05, "loss": 0.7372, "step": 1411 }, { "epoch": 0.24496877168632894, "grad_norm": 1.1789332628250122, "learning_rate": 2.4492627927146577e-05, "loss": 0.5078, "step": 1412 }, { "epoch": 0.24514226231783484, "grad_norm": 0.6857295036315918, "learning_rate": 2.4509973980919342e-05, "loss": 0.5812, "step": 1413 }, { "epoch": 0.24531575294934074, "grad_norm": 0.7828952074050903, "learning_rate": 2.452732003469211e-05, "loss": 0.7065, "step": 1414 }, { "epoch": 0.24548924358084664, "grad_norm": 0.6869457364082336, "learning_rate": 2.4544666088464876e-05, "loss": 0.8044, "step": 1415 }, { "epoch": 0.24566273421235255, "grad_norm": 0.7021481990814209, "learning_rate": 2.4562012142237644e-05, "loss": 0.6599, "step": 1416 }, { "epoch": 0.24583622484385842, "grad_norm": 0.9084320068359375, "learning_rate": 2.457935819601041e-05, "loss": 0.6476, "step": 1417 }, { "epoch": 0.24600971547536432, "grad_norm": 1.0445537567138672, "learning_rate": 2.4596704249783178e-05, "loss": 0.7562, "step": 1418 }, { "epoch": 0.24618320610687022, "grad_norm": 0.7493770122528076, "learning_rate": 2.4614050303555943e-05, "loss": 0.631, "step": 1419 }, { "epoch": 0.24635669673837612, "grad_norm": 0.9136198163032532, "learning_rate": 2.463139635732871e-05, "loss": 0.5819, "step": 1420 }, { "epoch": 0.24653018736988203, "grad_norm": 1.6267791986465454, "learning_rate": 2.4648742411101476e-05, "loss": 0.5837, "step": 1421 }, { "epoch": 0.24670367800138793, "grad_norm": 1.2293862104415894, "learning_rate": 2.466608846487424e-05, "loss": 0.7241, "step": 1422 }, { "epoch": 0.24687716863289383, "grad_norm": 0.7609236836433411, "learning_rate": 2.468343451864701e-05, "loss": 0.6497, "step": 1423 }, { "epoch": 0.24705065926439973, "grad_norm": 0.693767786026001, "learning_rate": 2.4700780572419774e-05, "loss": 0.6656, "step": 1424 }, { "epoch": 0.24722414989590563, "grad_norm": 0.6940293908119202, "learning_rate": 2.4718126626192543e-05, "loss": 0.7141, "step": 1425 }, { "epoch": 0.24739764052741153, "grad_norm": 0.6686576008796692, "learning_rate": 2.4735472679965308e-05, "loss": 0.6167, "step": 1426 }, { "epoch": 0.2475711311589174, "grad_norm": 0.7655903697013855, "learning_rate": 2.4752818733738076e-05, "loss": 0.6821, "step": 1427 }, { "epoch": 0.2477446217904233, "grad_norm": 1.346714735031128, "learning_rate": 2.477016478751084e-05, "loss": 0.5706, "step": 1428 }, { "epoch": 0.2479181124219292, "grad_norm": 1.171101450920105, "learning_rate": 2.478751084128361e-05, "loss": 0.4973, "step": 1429 }, { "epoch": 0.2480916030534351, "grad_norm": 0.6083464622497559, "learning_rate": 2.4804856895056375e-05, "loss": 0.7275, "step": 1430 }, { "epoch": 0.248265093684941, "grad_norm": 1.08219575881958, "learning_rate": 2.4822202948829143e-05, "loss": 0.7886, "step": 1431 }, { "epoch": 0.24843858431644691, "grad_norm": 0.7164523601531982, "learning_rate": 2.4839549002601908e-05, "loss": 0.7415, "step": 1432 }, { "epoch": 0.24861207494795282, "grad_norm": 0.8163508772850037, "learning_rate": 2.4856895056374676e-05, "loss": 0.7021, "step": 1433 }, { "epoch": 0.24878556557945872, "grad_norm": 0.6706337928771973, "learning_rate": 2.4874241110147445e-05, "loss": 0.6196, "step": 1434 }, { "epoch": 0.24895905621096462, "grad_norm": 1.0622358322143555, "learning_rate": 2.4891587163920213e-05, "loss": 0.5898, "step": 1435 }, { "epoch": 0.24913254684247052, "grad_norm": 1.08125901222229, "learning_rate": 2.4908933217692978e-05, "loss": 0.6, "step": 1436 }, { "epoch": 0.2493060374739764, "grad_norm": 0.7177915573120117, "learning_rate": 2.4926279271465747e-05, "loss": 0.6243, "step": 1437 }, { "epoch": 0.2494795281054823, "grad_norm": 0.9025813937187195, "learning_rate": 2.494362532523851e-05, "loss": 0.5082, "step": 1438 }, { "epoch": 0.2496530187369882, "grad_norm": 2.0147836208343506, "learning_rate": 2.496097137901128e-05, "loss": 0.6139, "step": 1439 }, { "epoch": 0.2498265093684941, "grad_norm": 0.5875595211982727, "learning_rate": 2.4978317432784045e-05, "loss": 0.7036, "step": 1440 }, { "epoch": 0.25, "grad_norm": 1.0961546897888184, "learning_rate": 2.499566348655681e-05, "loss": 0.5535, "step": 1441 }, { "epoch": 0.2501734906315059, "grad_norm": 0.606073796749115, "learning_rate": 2.501300954032958e-05, "loss": 0.6997, "step": 1442 }, { "epoch": 0.2503469812630118, "grad_norm": 0.9950074553489685, "learning_rate": 2.5030355594102343e-05, "loss": 0.606, "step": 1443 }, { "epoch": 0.2505204718945177, "grad_norm": 1.1742510795593262, "learning_rate": 2.5047701647875112e-05, "loss": 0.616, "step": 1444 }, { "epoch": 0.2506939625260236, "grad_norm": 0.967836856842041, "learning_rate": 2.5065047701647877e-05, "loss": 0.7161, "step": 1445 }, { "epoch": 0.2508674531575295, "grad_norm": 1.021001935005188, "learning_rate": 2.5082393755420645e-05, "loss": 0.6312, "step": 1446 }, { "epoch": 0.2510409437890354, "grad_norm": 1.0591120719909668, "learning_rate": 2.509973980919341e-05, "loss": 0.594, "step": 1447 }, { "epoch": 0.2512144344205413, "grad_norm": 0.6812381744384766, "learning_rate": 2.511708586296618e-05, "loss": 0.6257, "step": 1448 }, { "epoch": 0.2513879250520472, "grad_norm": 1.3215664625167847, "learning_rate": 2.5134431916738944e-05, "loss": 0.6381, "step": 1449 }, { "epoch": 0.2515614156835531, "grad_norm": 0.818316638469696, "learning_rate": 2.5151777970511712e-05, "loss": 0.6444, "step": 1450 }, { "epoch": 0.25173490631505896, "grad_norm": 0.6277799010276794, "learning_rate": 2.5169124024284477e-05, "loss": 0.6661, "step": 1451 }, { "epoch": 0.25190839694656486, "grad_norm": 1.578797459602356, "learning_rate": 2.5186470078057242e-05, "loss": 0.8, "step": 1452 }, { "epoch": 0.25208188757807076, "grad_norm": 1.7333303689956665, "learning_rate": 2.520381613183001e-05, "loss": 0.8259, "step": 1453 }, { "epoch": 0.25225537820957666, "grad_norm": 1.0634512901306152, "learning_rate": 2.5221162185602775e-05, "loss": 0.7368, "step": 1454 }, { "epoch": 0.25242886884108257, "grad_norm": 0.7911814451217651, "learning_rate": 2.5238508239375544e-05, "loss": 0.5845, "step": 1455 }, { "epoch": 0.25260235947258847, "grad_norm": 1.1727575063705444, "learning_rate": 2.525585429314831e-05, "loss": 0.5659, "step": 1456 }, { "epoch": 0.25277585010409437, "grad_norm": 1.3862674236297607, "learning_rate": 2.5273200346921077e-05, "loss": 0.8508, "step": 1457 }, { "epoch": 0.25294934073560027, "grad_norm": 2.0431294441223145, "learning_rate": 2.5290546400693842e-05, "loss": 0.7278, "step": 1458 }, { "epoch": 0.25312283136710617, "grad_norm": 0.8106757998466492, "learning_rate": 2.530789245446661e-05, "loss": 0.5602, "step": 1459 }, { "epoch": 0.2532963219986121, "grad_norm": 0.6260269284248352, "learning_rate": 2.5325238508239375e-05, "loss": 0.7405, "step": 1460 }, { "epoch": 0.253469812630118, "grad_norm": 0.846102237701416, "learning_rate": 2.5342584562012144e-05, "loss": 0.6794, "step": 1461 }, { "epoch": 0.2536433032616239, "grad_norm": 0.7476555705070496, "learning_rate": 2.535993061578491e-05, "loss": 0.6829, "step": 1462 }, { "epoch": 0.2538167938931298, "grad_norm": 1.4022223949432373, "learning_rate": 2.5377276669557674e-05, "loss": 0.5559, "step": 1463 }, { "epoch": 0.2539902845246357, "grad_norm": 0.8950586915016174, "learning_rate": 2.5394622723330446e-05, "loss": 0.6235, "step": 1464 }, { "epoch": 0.2541637751561416, "grad_norm": 0.9371039271354675, "learning_rate": 2.5411968777103214e-05, "loss": 0.5811, "step": 1465 }, { "epoch": 0.2543372657876475, "grad_norm": 0.819125771522522, "learning_rate": 2.542931483087598e-05, "loss": 0.6378, "step": 1466 }, { "epoch": 0.2545107564191534, "grad_norm": 0.9547324180603027, "learning_rate": 2.5446660884648747e-05, "loss": 0.6903, "step": 1467 }, { "epoch": 0.2546842470506593, "grad_norm": 0.8518248796463013, "learning_rate": 2.5464006938421512e-05, "loss": 0.584, "step": 1468 }, { "epoch": 0.2548577376821652, "grad_norm": 0.8062921762466431, "learning_rate": 2.548135299219428e-05, "loss": 0.6942, "step": 1469 }, { "epoch": 0.2550312283136711, "grad_norm": 1.0598679780960083, "learning_rate": 2.5498699045967046e-05, "loss": 0.5164, "step": 1470 }, { "epoch": 0.25520471894517693, "grad_norm": 1.1887106895446777, "learning_rate": 2.551604509973981e-05, "loss": 0.6323, "step": 1471 }, { "epoch": 0.25537820957668284, "grad_norm": 1.7033594846725464, "learning_rate": 2.553339115351258e-05, "loss": 0.6974, "step": 1472 }, { "epoch": 0.25555170020818874, "grad_norm": 0.6564008593559265, "learning_rate": 2.5550737207285344e-05, "loss": 0.6829, "step": 1473 }, { "epoch": 0.25572519083969464, "grad_norm": 1.0994482040405273, "learning_rate": 2.5568083261058113e-05, "loss": 0.5802, "step": 1474 }, { "epoch": 0.25589868147120054, "grad_norm": 0.7509127259254456, "learning_rate": 2.5585429314830878e-05, "loss": 0.5813, "step": 1475 }, { "epoch": 0.25607217210270644, "grad_norm": 1.0179251432418823, "learning_rate": 2.5602775368603646e-05, "loss": 0.6151, "step": 1476 }, { "epoch": 0.25624566273421234, "grad_norm": 0.833226203918457, "learning_rate": 2.562012142237641e-05, "loss": 0.6606, "step": 1477 }, { "epoch": 0.25641915336571824, "grad_norm": 0.8429149985313416, "learning_rate": 2.563746747614918e-05, "loss": 0.5847, "step": 1478 }, { "epoch": 0.25659264399722415, "grad_norm": 0.82933509349823, "learning_rate": 2.5654813529921944e-05, "loss": 0.5924, "step": 1479 }, { "epoch": 0.25676613462873005, "grad_norm": 0.7633499503135681, "learning_rate": 2.5672159583694713e-05, "loss": 0.606, "step": 1480 }, { "epoch": 0.25693962526023595, "grad_norm": 0.8920152187347412, "learning_rate": 2.5689505637467478e-05, "loss": 0.6642, "step": 1481 }, { "epoch": 0.25711311589174185, "grad_norm": 1.372825026512146, "learning_rate": 2.5706851691240243e-05, "loss": 0.7399, "step": 1482 }, { "epoch": 0.25728660652324775, "grad_norm": 0.8788719177246094, "learning_rate": 2.572419774501301e-05, "loss": 0.6349, "step": 1483 }, { "epoch": 0.25746009715475365, "grad_norm": 0.9693309664726257, "learning_rate": 2.5741543798785776e-05, "loss": 0.6376, "step": 1484 }, { "epoch": 0.25763358778625955, "grad_norm": 0.7878218293190002, "learning_rate": 2.5758889852558545e-05, "loss": 0.5402, "step": 1485 }, { "epoch": 0.25780707841776546, "grad_norm": 0.8361150026321411, "learning_rate": 2.577623590633131e-05, "loss": 0.598, "step": 1486 }, { "epoch": 0.25798056904927136, "grad_norm": 0.7228373289108276, "learning_rate": 2.5793581960104078e-05, "loss": 0.7197, "step": 1487 }, { "epoch": 0.25815405968077726, "grad_norm": 4.787299156188965, "learning_rate": 2.5810928013876843e-05, "loss": 0.6613, "step": 1488 }, { "epoch": 0.25832755031228316, "grad_norm": 0.8377718329429626, "learning_rate": 2.582827406764961e-05, "loss": 0.712, "step": 1489 }, { "epoch": 0.25850104094378906, "grad_norm": 1.3003144264221191, "learning_rate": 2.5845620121422376e-05, "loss": 0.6018, "step": 1490 }, { "epoch": 0.2586745315752949, "grad_norm": 0.7731533646583557, "learning_rate": 2.5862966175195145e-05, "loss": 0.6294, "step": 1491 }, { "epoch": 0.2588480222068008, "grad_norm": 0.8601460456848145, "learning_rate": 2.588031222896791e-05, "loss": 0.6941, "step": 1492 }, { "epoch": 0.2590215128383067, "grad_norm": 0.8807269334793091, "learning_rate": 2.589765828274068e-05, "loss": 0.5585, "step": 1493 }, { "epoch": 0.2591950034698126, "grad_norm": 0.9132180213928223, "learning_rate": 2.5915004336513447e-05, "loss": 0.5447, "step": 1494 }, { "epoch": 0.2593684941013185, "grad_norm": 1.032853364944458, "learning_rate": 2.5932350390286215e-05, "loss": 0.6312, "step": 1495 }, { "epoch": 0.2595419847328244, "grad_norm": 0.8208354711532593, "learning_rate": 2.594969644405898e-05, "loss": 0.5564, "step": 1496 }, { "epoch": 0.2597154753643303, "grad_norm": 0.7401357889175415, "learning_rate": 2.5967042497831748e-05, "loss": 0.6831, "step": 1497 }, { "epoch": 0.2598889659958362, "grad_norm": 0.6182374358177185, "learning_rate": 2.5984388551604513e-05, "loss": 0.7275, "step": 1498 }, { "epoch": 0.2600624566273421, "grad_norm": 0.8194847702980042, "learning_rate": 2.600173460537728e-05, "loss": 0.5455, "step": 1499 }, { "epoch": 0.260235947258848, "grad_norm": 0.8735288381576538, "learning_rate": 2.6019080659150047e-05, "loss": 0.5653, "step": 1500 }, { "epoch": 0.2604094378903539, "grad_norm": 0.9586334824562073, "learning_rate": 2.603642671292281e-05, "loss": 0.6289, "step": 1501 }, { "epoch": 0.2605829285218598, "grad_norm": 2.6775732040405273, "learning_rate": 2.605377276669558e-05, "loss": 0.5901, "step": 1502 }, { "epoch": 0.2607564191533657, "grad_norm": 0.7645121216773987, "learning_rate": 2.6071118820468345e-05, "loss": 0.7407, "step": 1503 }, { "epoch": 0.2609299097848716, "grad_norm": 0.8603814840316772, "learning_rate": 2.6088464874241113e-05, "loss": 0.6304, "step": 1504 }, { "epoch": 0.26110340041637753, "grad_norm": 0.8805418014526367, "learning_rate": 2.610581092801388e-05, "loss": 0.6538, "step": 1505 }, { "epoch": 0.26127689104788343, "grad_norm": 0.7971379160881042, "learning_rate": 2.6123156981786647e-05, "loss": 0.6194, "step": 1506 }, { "epoch": 0.26145038167938933, "grad_norm": 1.210202932357788, "learning_rate": 2.6140503035559412e-05, "loss": 0.6835, "step": 1507 }, { "epoch": 0.26162387231089523, "grad_norm": 0.8352237939834595, "learning_rate": 2.615784908933218e-05, "loss": 0.6198, "step": 1508 }, { "epoch": 0.26179736294240114, "grad_norm": 1.833086371421814, "learning_rate": 2.6175195143104945e-05, "loss": 0.7163, "step": 1509 }, { "epoch": 0.26197085357390704, "grad_norm": 0.8619792461395264, "learning_rate": 2.6192541196877714e-05, "loss": 0.6056, "step": 1510 }, { "epoch": 0.2621443442054129, "grad_norm": 1.010326862335205, "learning_rate": 2.620988725065048e-05, "loss": 0.74, "step": 1511 }, { "epoch": 0.2623178348369188, "grad_norm": 0.6972298622131348, "learning_rate": 2.6227233304423244e-05, "loss": 0.7306, "step": 1512 }, { "epoch": 0.2624913254684247, "grad_norm": 0.8568160533905029, "learning_rate": 2.6244579358196012e-05, "loss": 0.6146, "step": 1513 }, { "epoch": 0.2626648160999306, "grad_norm": 0.7707713842391968, "learning_rate": 2.6261925411968777e-05, "loss": 0.6111, "step": 1514 }, { "epoch": 0.2628383067314365, "grad_norm": 0.9823936820030212, "learning_rate": 2.6279271465741545e-05, "loss": 0.6719, "step": 1515 }, { "epoch": 0.2630117973629424, "grad_norm": 0.9398236274719238, "learning_rate": 2.629661751951431e-05, "loss": 0.5513, "step": 1516 }, { "epoch": 0.2631852879944483, "grad_norm": 0.8186742067337036, "learning_rate": 2.631396357328708e-05, "loss": 0.5397, "step": 1517 }, { "epoch": 0.2633587786259542, "grad_norm": 0.7485017776489258, "learning_rate": 2.6331309627059844e-05, "loss": 0.637, "step": 1518 }, { "epoch": 0.2635322692574601, "grad_norm": 1.1243284940719604, "learning_rate": 2.6348655680832612e-05, "loss": 0.5631, "step": 1519 }, { "epoch": 0.263705759888966, "grad_norm": 0.7137185335159302, "learning_rate": 2.6366001734605377e-05, "loss": 0.5881, "step": 1520 }, { "epoch": 0.2638792505204719, "grad_norm": 4.920660972595215, "learning_rate": 2.6383347788378146e-05, "loss": 0.5968, "step": 1521 }, { "epoch": 0.2640527411519778, "grad_norm": 0.6850613355636597, "learning_rate": 2.640069384215091e-05, "loss": 0.6979, "step": 1522 }, { "epoch": 0.2642262317834837, "grad_norm": 1.2354705333709717, "learning_rate": 2.6418039895923682e-05, "loss": 0.5844, "step": 1523 }, { "epoch": 0.2643997224149896, "grad_norm": 3.75435471534729, "learning_rate": 2.6435385949696447e-05, "loss": 0.5521, "step": 1524 }, { "epoch": 0.2645732130464955, "grad_norm": 0.9748126268386841, "learning_rate": 2.6452732003469216e-05, "loss": 0.6317, "step": 1525 }, { "epoch": 0.2647467036780014, "grad_norm": 0.8047106862068176, "learning_rate": 2.647007805724198e-05, "loss": 0.6383, "step": 1526 }, { "epoch": 0.2649201943095073, "grad_norm": 0.9678272008895874, "learning_rate": 2.648742411101475e-05, "loss": 0.5825, "step": 1527 }, { "epoch": 0.2650936849410132, "grad_norm": 0.713713526725769, "learning_rate": 2.6504770164787514e-05, "loss": 0.6475, "step": 1528 }, { "epoch": 0.2652671755725191, "grad_norm": 0.851690411567688, "learning_rate": 2.6522116218560283e-05, "loss": 0.7598, "step": 1529 }, { "epoch": 0.26544066620402496, "grad_norm": 1.8548156023025513, "learning_rate": 2.6539462272333048e-05, "loss": 0.618, "step": 1530 }, { "epoch": 0.26561415683553086, "grad_norm": 0.7332980036735535, "learning_rate": 2.6556808326105813e-05, "loss": 0.791, "step": 1531 }, { "epoch": 0.26578764746703676, "grad_norm": 3.4511547088623047, "learning_rate": 2.657415437987858e-05, "loss": 0.6028, "step": 1532 }, { "epoch": 0.26596113809854266, "grad_norm": 0.759696364402771, "learning_rate": 2.6591500433651346e-05, "loss": 0.614, "step": 1533 }, { "epoch": 0.26613462873004856, "grad_norm": 0.8297407031059265, "learning_rate": 2.6608846487424114e-05, "loss": 0.626, "step": 1534 }, { "epoch": 0.26630811936155446, "grad_norm": 0.936953067779541, "learning_rate": 2.662619254119688e-05, "loss": 0.6895, "step": 1535 }, { "epoch": 0.26648160999306036, "grad_norm": 1.5277049541473389, "learning_rate": 2.6643538594969648e-05, "loss": 0.623, "step": 1536 }, { "epoch": 0.26665510062456627, "grad_norm": 1.0148879289627075, "learning_rate": 2.6660884648742413e-05, "loss": 0.6768, "step": 1537 }, { "epoch": 0.26682859125607217, "grad_norm": 0.8687107563018799, "learning_rate": 2.667823070251518e-05, "loss": 0.5505, "step": 1538 }, { "epoch": 0.26700208188757807, "grad_norm": 2.721482038497925, "learning_rate": 2.6695576756287946e-05, "loss": 0.6248, "step": 1539 }, { "epoch": 0.26717557251908397, "grad_norm": 1.6190732717514038, "learning_rate": 2.6712922810060714e-05, "loss": 0.5505, "step": 1540 }, { "epoch": 0.26734906315058987, "grad_norm": 0.9620177745819092, "learning_rate": 2.673026886383348e-05, "loss": 0.5226, "step": 1541 }, { "epoch": 0.2675225537820958, "grad_norm": 0.9856328368186951, "learning_rate": 2.6747614917606245e-05, "loss": 0.5323, "step": 1542 }, { "epoch": 0.2676960444136017, "grad_norm": 1.3991506099700928, "learning_rate": 2.6764960971379013e-05, "loss": 0.5851, "step": 1543 }, { "epoch": 0.2678695350451076, "grad_norm": 0.6915868520736694, "learning_rate": 2.6782307025151778e-05, "loss": 0.7461, "step": 1544 }, { "epoch": 0.2680430256766135, "grad_norm": 0.7301471829414368, "learning_rate": 2.6799653078924546e-05, "loss": 0.6425, "step": 1545 }, { "epoch": 0.2682165163081194, "grad_norm": 0.7267754077911377, "learning_rate": 2.681699913269731e-05, "loss": 0.6333, "step": 1546 }, { "epoch": 0.2683900069396253, "grad_norm": 1.0410354137420654, "learning_rate": 2.683434518647008e-05, "loss": 0.6542, "step": 1547 }, { "epoch": 0.2685634975711312, "grad_norm": 1.1848565340042114, "learning_rate": 2.6851691240242845e-05, "loss": 0.6288, "step": 1548 }, { "epoch": 0.2687369882026371, "grad_norm": 0.6985346078872681, "learning_rate": 2.6869037294015613e-05, "loss": 0.7563, "step": 1549 }, { "epoch": 0.26891047883414293, "grad_norm": 0.7319064140319824, "learning_rate": 2.6886383347788378e-05, "loss": 0.7356, "step": 1550 }, { "epoch": 0.26908396946564883, "grad_norm": 0.8546984195709229, "learning_rate": 2.6903729401561146e-05, "loss": 0.6451, "step": 1551 }, { "epoch": 0.26925746009715473, "grad_norm": 0.7902388572692871, "learning_rate": 2.692107545533391e-05, "loss": 0.7434, "step": 1552 }, { "epoch": 0.26943095072866063, "grad_norm": 0.9439723491668701, "learning_rate": 2.6938421509106683e-05, "loss": 0.6918, "step": 1553 }, { "epoch": 0.26960444136016654, "grad_norm": 1.1431201696395874, "learning_rate": 2.6955767562879448e-05, "loss": 0.5726, "step": 1554 }, { "epoch": 0.26977793199167244, "grad_norm": 0.9502336978912354, "learning_rate": 2.6973113616652217e-05, "loss": 0.6831, "step": 1555 }, { "epoch": 0.26995142262317834, "grad_norm": 0.6996731162071228, "learning_rate": 2.699045967042498e-05, "loss": 0.5747, "step": 1556 }, { "epoch": 0.27012491325468424, "grad_norm": 0.8544343113899231, "learning_rate": 2.700780572419775e-05, "loss": 0.6241, "step": 1557 }, { "epoch": 0.27029840388619014, "grad_norm": 1.1212594509124756, "learning_rate": 2.7025151777970515e-05, "loss": 0.6632, "step": 1558 }, { "epoch": 0.27047189451769604, "grad_norm": 0.8834282159805298, "learning_rate": 2.7042497831743283e-05, "loss": 0.6011, "step": 1559 }, { "epoch": 0.27064538514920194, "grad_norm": 0.7834562063217163, "learning_rate": 2.705984388551605e-05, "loss": 0.6708, "step": 1560 }, { "epoch": 0.27081887578070785, "grad_norm": 0.6939826607704163, "learning_rate": 2.7077189939288813e-05, "loss": 0.6743, "step": 1561 }, { "epoch": 0.27099236641221375, "grad_norm": 0.9736182689666748, "learning_rate": 2.7094535993061582e-05, "loss": 0.5938, "step": 1562 }, { "epoch": 0.27116585704371965, "grad_norm": 1.8810274600982666, "learning_rate": 2.7111882046834347e-05, "loss": 0.7124, "step": 1563 }, { "epoch": 0.27133934767522555, "grad_norm": 1.272519588470459, "learning_rate": 2.7129228100607115e-05, "loss": 0.6328, "step": 1564 }, { "epoch": 0.27151283830673145, "grad_norm": 0.8820292949676514, "learning_rate": 2.714657415437988e-05, "loss": 0.5922, "step": 1565 }, { "epoch": 0.27168632893823735, "grad_norm": 0.808553159236908, "learning_rate": 2.716392020815265e-05, "loss": 0.5687, "step": 1566 }, { "epoch": 0.27185981956974326, "grad_norm": 0.79135662317276, "learning_rate": 2.7181266261925414e-05, "loss": 0.5488, "step": 1567 }, { "epoch": 0.27203331020124916, "grad_norm": 0.8797126412391663, "learning_rate": 2.7198612315698182e-05, "loss": 0.5465, "step": 1568 }, { "epoch": 0.27220680083275506, "grad_norm": 1.229600191116333, "learning_rate": 2.7215958369470947e-05, "loss": 0.6514, "step": 1569 }, { "epoch": 0.2723802914642609, "grad_norm": 0.7109715342521667, "learning_rate": 2.7233304423243715e-05, "loss": 0.6831, "step": 1570 }, { "epoch": 0.2725537820957668, "grad_norm": 1.7438849210739136, "learning_rate": 2.725065047701648e-05, "loss": 0.548, "step": 1571 }, { "epoch": 0.2727272727272727, "grad_norm": 1.8204153776168823, "learning_rate": 2.7267996530789245e-05, "loss": 0.6622, "step": 1572 }, { "epoch": 0.2729007633587786, "grad_norm": 0.8696473240852356, "learning_rate": 2.7285342584562014e-05, "loss": 0.6555, "step": 1573 }, { "epoch": 0.2730742539902845, "grad_norm": 0.8713580965995789, "learning_rate": 2.730268863833478e-05, "loss": 0.6252, "step": 1574 }, { "epoch": 0.2732477446217904, "grad_norm": 0.9230552911758423, "learning_rate": 2.7320034692107547e-05, "loss": 0.7051, "step": 1575 }, { "epoch": 0.2734212352532963, "grad_norm": 0.9991665482521057, "learning_rate": 2.7337380745880312e-05, "loss": 0.6189, "step": 1576 }, { "epoch": 0.2735947258848022, "grad_norm": 1.474080204963684, "learning_rate": 2.735472679965308e-05, "loss": 0.6189, "step": 1577 }, { "epoch": 0.2737682165163081, "grad_norm": 0.8902952671051025, "learning_rate": 2.7372072853425846e-05, "loss": 0.6364, "step": 1578 }, { "epoch": 0.273941707147814, "grad_norm": 1.0150178670883179, "learning_rate": 2.7389418907198614e-05, "loss": 0.7532, "step": 1579 }, { "epoch": 0.2741151977793199, "grad_norm": 3.1479642391204834, "learning_rate": 2.740676496097138e-05, "loss": 0.6188, "step": 1580 }, { "epoch": 0.2742886884108258, "grad_norm": 0.7297648191452026, "learning_rate": 2.7424111014744147e-05, "loss": 0.7668, "step": 1581 }, { "epoch": 0.2744621790423317, "grad_norm": 0.6790329217910767, "learning_rate": 2.7441457068516916e-05, "loss": 0.6566, "step": 1582 }, { "epoch": 0.2746356696738376, "grad_norm": 0.7647008299827576, "learning_rate": 2.7458803122289684e-05, "loss": 0.6169, "step": 1583 }, { "epoch": 0.2748091603053435, "grad_norm": 0.9886143207550049, "learning_rate": 2.747614917606245e-05, "loss": 0.6399, "step": 1584 }, { "epoch": 0.2749826509368494, "grad_norm": 0.8156219720840454, "learning_rate": 2.7493495229835217e-05, "loss": 0.6313, "step": 1585 }, { "epoch": 0.27515614156835533, "grad_norm": 0.7117592096328735, "learning_rate": 2.7510841283607982e-05, "loss": 0.6704, "step": 1586 }, { "epoch": 0.27532963219986123, "grad_norm": 0.8192128539085388, "learning_rate": 2.752818733738075e-05, "loss": 0.5906, "step": 1587 }, { "epoch": 0.27550312283136713, "grad_norm": 0.9606592655181885, "learning_rate": 2.7545533391153516e-05, "loss": 0.5538, "step": 1588 }, { "epoch": 0.27567661346287303, "grad_norm": 1.0025795698165894, "learning_rate": 2.7562879444926284e-05, "loss": 0.567, "step": 1589 }, { "epoch": 0.2758501040943789, "grad_norm": 0.6096360087394714, "learning_rate": 2.758022549869905e-05, "loss": 0.708, "step": 1590 }, { "epoch": 0.2760235947258848, "grad_norm": 0.6717957854270935, "learning_rate": 2.7597571552471814e-05, "loss": 0.6978, "step": 1591 }, { "epoch": 0.2761970853573907, "grad_norm": 0.9949507713317871, "learning_rate": 2.7614917606244583e-05, "loss": 0.5021, "step": 1592 }, { "epoch": 0.2763705759888966, "grad_norm": 0.8604462742805481, "learning_rate": 2.7632263660017348e-05, "loss": 0.5776, "step": 1593 }, { "epoch": 0.2765440666204025, "grad_norm": 1.1553502082824707, "learning_rate": 2.7649609713790116e-05, "loss": 0.5986, "step": 1594 }, { "epoch": 0.2767175572519084, "grad_norm": 0.768334686756134, "learning_rate": 2.766695576756288e-05, "loss": 0.6689, "step": 1595 }, { "epoch": 0.2768910478834143, "grad_norm": 0.8372496366500854, "learning_rate": 2.768430182133565e-05, "loss": 0.6992, "step": 1596 }, { "epoch": 0.2770645385149202, "grad_norm": 1.0399575233459473, "learning_rate": 2.7701647875108414e-05, "loss": 0.6202, "step": 1597 }, { "epoch": 0.2772380291464261, "grad_norm": 1.0247970819473267, "learning_rate": 2.7718993928881183e-05, "loss": 0.76, "step": 1598 }, { "epoch": 0.277411519777932, "grad_norm": 0.810613214969635, "learning_rate": 2.7736339982653948e-05, "loss": 0.6338, "step": 1599 }, { "epoch": 0.2775850104094379, "grad_norm": 1.4586679935455322, "learning_rate": 2.7753686036426716e-05, "loss": 0.6869, "step": 1600 }, { "epoch": 0.2777585010409438, "grad_norm": 0.9183869957923889, "learning_rate": 2.777103209019948e-05, "loss": 0.6377, "step": 1601 }, { "epoch": 0.2779319916724497, "grad_norm": 0.9354574084281921, "learning_rate": 2.7788378143972246e-05, "loss": 0.6223, "step": 1602 }, { "epoch": 0.2781054823039556, "grad_norm": 0.6997314095497131, "learning_rate": 2.7805724197745015e-05, "loss": 0.6864, "step": 1603 }, { "epoch": 0.2782789729354615, "grad_norm": 0.981356680393219, "learning_rate": 2.782307025151778e-05, "loss": 0.5828, "step": 1604 }, { "epoch": 0.2784524635669674, "grad_norm": 0.7175416350364685, "learning_rate": 2.7840416305290548e-05, "loss": 0.5935, "step": 1605 }, { "epoch": 0.2786259541984733, "grad_norm": 0.7980823516845703, "learning_rate": 2.7857762359063313e-05, "loss": 0.5989, "step": 1606 }, { "epoch": 0.2787994448299792, "grad_norm": 0.7140958905220032, "learning_rate": 2.787510841283608e-05, "loss": 0.6205, "step": 1607 }, { "epoch": 0.2789729354614851, "grad_norm": 0.7325379252433777, "learning_rate": 2.7892454466608846e-05, "loss": 0.6599, "step": 1608 }, { "epoch": 0.27914642609299095, "grad_norm": 1.1121330261230469, "learning_rate": 2.7909800520381615e-05, "loss": 0.7002, "step": 1609 }, { "epoch": 0.27931991672449685, "grad_norm": 0.8870050311088562, "learning_rate": 2.792714657415438e-05, "loss": 0.5525, "step": 1610 }, { "epoch": 0.27949340735600275, "grad_norm": 1.183931827545166, "learning_rate": 2.7944492627927148e-05, "loss": 0.6285, "step": 1611 }, { "epoch": 0.27966689798750866, "grad_norm": 1.1623479127883911, "learning_rate": 2.7961838681699917e-05, "loss": 0.5328, "step": 1612 }, { "epoch": 0.27984038861901456, "grad_norm": 1.0510090589523315, "learning_rate": 2.7979184735472685e-05, "loss": 0.5796, "step": 1613 }, { "epoch": 0.28001387925052046, "grad_norm": 0.9020524024963379, "learning_rate": 2.799653078924545e-05, "loss": 0.5897, "step": 1614 }, { "epoch": 0.28018736988202636, "grad_norm": 2.9960711002349854, "learning_rate": 2.801387684301822e-05, "loss": 0.6235, "step": 1615 }, { "epoch": 0.28036086051353226, "grad_norm": 0.8520289063453674, "learning_rate": 2.8031222896790983e-05, "loss": 0.6537, "step": 1616 }, { "epoch": 0.28053435114503816, "grad_norm": 1.2885127067565918, "learning_rate": 2.8048568950563752e-05, "loss": 0.5812, "step": 1617 }, { "epoch": 0.28070784177654406, "grad_norm": 0.9299231171607971, "learning_rate": 2.8065915004336517e-05, "loss": 0.5502, "step": 1618 }, { "epoch": 0.28088133240804997, "grad_norm": 0.761440098285675, "learning_rate": 2.8083261058109285e-05, "loss": 0.6263, "step": 1619 }, { "epoch": 0.28105482303955587, "grad_norm": 0.8906003832817078, "learning_rate": 2.810060711188205e-05, "loss": 0.6716, "step": 1620 }, { "epoch": 0.28122831367106177, "grad_norm": 0.9329630732536316, "learning_rate": 2.8117953165654815e-05, "loss": 0.712, "step": 1621 }, { "epoch": 0.28140180430256767, "grad_norm": 0.9584671854972839, "learning_rate": 2.8135299219427584e-05, "loss": 0.5972, "step": 1622 }, { "epoch": 0.28157529493407357, "grad_norm": 2.436326026916504, "learning_rate": 2.815264527320035e-05, "loss": 0.5856, "step": 1623 }, { "epoch": 0.2817487855655795, "grad_norm": 0.8243262767791748, "learning_rate": 2.8169991326973117e-05, "loss": 0.5275, "step": 1624 }, { "epoch": 0.2819222761970854, "grad_norm": 0.7558211088180542, "learning_rate": 2.8187337380745882e-05, "loss": 0.7168, "step": 1625 }, { "epoch": 0.2820957668285913, "grad_norm": 0.6931042075157166, "learning_rate": 2.820468343451865e-05, "loss": 0.6819, "step": 1626 }, { "epoch": 0.2822692574600972, "grad_norm": 1.038571834564209, "learning_rate": 2.8222029488291415e-05, "loss": 0.5641, "step": 1627 }, { "epoch": 0.2824427480916031, "grad_norm": 1.068108320236206, "learning_rate": 2.8239375542064184e-05, "loss": 0.537, "step": 1628 }, { "epoch": 0.2826162387231089, "grad_norm": 1.0364161729812622, "learning_rate": 2.825672159583695e-05, "loss": 0.676, "step": 1629 }, { "epoch": 0.2827897293546148, "grad_norm": 1.1429134607315063, "learning_rate": 2.8274067649609717e-05, "loss": 0.5065, "step": 1630 }, { "epoch": 0.28296321998612073, "grad_norm": 0.7850894331932068, "learning_rate": 2.8291413703382482e-05, "loss": 0.6429, "step": 1631 }, { "epoch": 0.28313671061762663, "grad_norm": 0.8497446775436401, "learning_rate": 2.8308759757155247e-05, "loss": 0.5441, "step": 1632 }, { "epoch": 0.28331020124913253, "grad_norm": 1.1594936847686768, "learning_rate": 2.8326105810928015e-05, "loss": 0.5991, "step": 1633 }, { "epoch": 0.28348369188063843, "grad_norm": 0.780320405960083, "learning_rate": 2.834345186470078e-05, "loss": 0.6357, "step": 1634 }, { "epoch": 0.28365718251214433, "grad_norm": 0.816405177116394, "learning_rate": 2.836079791847355e-05, "loss": 0.5743, "step": 1635 }, { "epoch": 0.28383067314365024, "grad_norm": 1.3144505023956299, "learning_rate": 2.8378143972246314e-05, "loss": 0.5634, "step": 1636 }, { "epoch": 0.28400416377515614, "grad_norm": 0.9545460343360901, "learning_rate": 2.8395490026019082e-05, "loss": 0.5889, "step": 1637 }, { "epoch": 0.28417765440666204, "grad_norm": 1.016923189163208, "learning_rate": 2.8412836079791847e-05, "loss": 0.6056, "step": 1638 }, { "epoch": 0.28435114503816794, "grad_norm": 1.6665234565734863, "learning_rate": 2.8430182133564616e-05, "loss": 0.6687, "step": 1639 }, { "epoch": 0.28452463566967384, "grad_norm": 1.1342241764068604, "learning_rate": 2.844752818733738e-05, "loss": 0.5743, "step": 1640 }, { "epoch": 0.28469812630117974, "grad_norm": 0.7941092252731323, "learning_rate": 2.8464874241110152e-05, "loss": 0.6633, "step": 1641 }, { "epoch": 0.28487161693268565, "grad_norm": 0.8998805284500122, "learning_rate": 2.8482220294882917e-05, "loss": 0.6926, "step": 1642 }, { "epoch": 0.28504510756419155, "grad_norm": 1.1339398622512817, "learning_rate": 2.8499566348655686e-05, "loss": 0.7589, "step": 1643 }, { "epoch": 0.28521859819569745, "grad_norm": 0.6638227701187134, "learning_rate": 2.851691240242845e-05, "loss": 0.7004, "step": 1644 }, { "epoch": 0.28539208882720335, "grad_norm": 1.128553867340088, "learning_rate": 2.853425845620122e-05, "loss": 0.5492, "step": 1645 }, { "epoch": 0.28556557945870925, "grad_norm": 0.9050806164741516, "learning_rate": 2.8551604509973984e-05, "loss": 0.627, "step": 1646 }, { "epoch": 0.28573907009021515, "grad_norm": 0.7931371331214905, "learning_rate": 2.8568950563746753e-05, "loss": 0.6644, "step": 1647 }, { "epoch": 0.28591256072172105, "grad_norm": 0.6553512811660767, "learning_rate": 2.8586296617519518e-05, "loss": 0.6649, "step": 1648 }, { "epoch": 0.2860860513532269, "grad_norm": 0.7781357765197754, "learning_rate": 2.8603642671292286e-05, "loss": 0.7988, "step": 1649 }, { "epoch": 0.2862595419847328, "grad_norm": 0.7760520577430725, "learning_rate": 2.862098872506505e-05, "loss": 0.6641, "step": 1650 }, { "epoch": 0.2864330326162387, "grad_norm": 1.9323515892028809, "learning_rate": 2.8638334778837816e-05, "loss": 0.6757, "step": 1651 }, { "epoch": 0.2866065232477446, "grad_norm": 0.767174243927002, "learning_rate": 2.8655680832610584e-05, "loss": 0.6904, "step": 1652 }, { "epoch": 0.2867800138792505, "grad_norm": 0.9943397641181946, "learning_rate": 2.867302688638335e-05, "loss": 0.7026, "step": 1653 }, { "epoch": 0.2869535045107564, "grad_norm": 0.8809281587600708, "learning_rate": 2.8690372940156118e-05, "loss": 0.4868, "step": 1654 }, { "epoch": 0.2871269951422623, "grad_norm": 0.758124053478241, "learning_rate": 2.8707718993928883e-05, "loss": 0.5481, "step": 1655 }, { "epoch": 0.2873004857737682, "grad_norm": 0.978379487991333, "learning_rate": 2.872506504770165e-05, "loss": 0.7194, "step": 1656 }, { "epoch": 0.2874739764052741, "grad_norm": 0.686726450920105, "learning_rate": 2.8742411101474416e-05, "loss": 0.7852, "step": 1657 }, { "epoch": 0.28764746703678, "grad_norm": 0.8651068210601807, "learning_rate": 2.8759757155247185e-05, "loss": 0.582, "step": 1658 }, { "epoch": 0.2878209576682859, "grad_norm": 1.214903712272644, "learning_rate": 2.877710320901995e-05, "loss": 0.5227, "step": 1659 }, { "epoch": 0.2879944482997918, "grad_norm": 0.8349252939224243, "learning_rate": 2.8794449262792718e-05, "loss": 0.5859, "step": 1660 }, { "epoch": 0.2881679389312977, "grad_norm": 1.026668906211853, "learning_rate": 2.8811795316565483e-05, "loss": 0.52, "step": 1661 }, { "epoch": 0.2883414295628036, "grad_norm": 2.8020482063293457, "learning_rate": 2.8829141370338248e-05, "loss": 0.7031, "step": 1662 }, { "epoch": 0.2885149201943095, "grad_norm": 1.0019539594650269, "learning_rate": 2.8846487424111016e-05, "loss": 0.641, "step": 1663 }, { "epoch": 0.2886884108258154, "grad_norm": 0.8635918498039246, "learning_rate": 2.886383347788378e-05, "loss": 0.572, "step": 1664 }, { "epoch": 0.2888619014573213, "grad_norm": 1.5796480178833008, "learning_rate": 2.888117953165655e-05, "loss": 0.5725, "step": 1665 }, { "epoch": 0.2890353920888272, "grad_norm": 0.8226332068443298, "learning_rate": 2.8898525585429315e-05, "loss": 0.6166, "step": 1666 }, { "epoch": 0.2892088827203331, "grad_norm": 1.6667702198028564, "learning_rate": 2.8915871639202083e-05, "loss": 0.5413, "step": 1667 }, { "epoch": 0.28938237335183903, "grad_norm": 0.9546117186546326, "learning_rate": 2.8933217692974848e-05, "loss": 0.5764, "step": 1668 }, { "epoch": 0.2895558639833449, "grad_norm": 0.8718425631523132, "learning_rate": 2.8950563746747617e-05, "loss": 0.554, "step": 1669 }, { "epoch": 0.2897293546148508, "grad_norm": 1.003496527671814, "learning_rate": 2.896790980052038e-05, "loss": 0.547, "step": 1670 }, { "epoch": 0.2899028452463567, "grad_norm": 0.9424927234649658, "learning_rate": 2.8985255854293153e-05, "loss": 0.6836, "step": 1671 }, { "epoch": 0.2900763358778626, "grad_norm": 0.6520414352416992, "learning_rate": 2.900260190806592e-05, "loss": 0.6129, "step": 1672 }, { "epoch": 0.2902498265093685, "grad_norm": 0.6672245860099792, "learning_rate": 2.9019947961838687e-05, "loss": 0.6251, "step": 1673 }, { "epoch": 0.2904233171408744, "grad_norm": 0.8402858376502991, "learning_rate": 2.9037294015611452e-05, "loss": 0.7013, "step": 1674 }, { "epoch": 0.2905968077723803, "grad_norm": 0.6859344244003296, "learning_rate": 2.905464006938422e-05, "loss": 0.6455, "step": 1675 }, { "epoch": 0.2907702984038862, "grad_norm": 0.8473714590072632, "learning_rate": 2.9071986123156985e-05, "loss": 0.6072, "step": 1676 }, { "epoch": 0.2909437890353921, "grad_norm": 0.8517741560935974, "learning_rate": 2.9089332176929753e-05, "loss": 0.6248, "step": 1677 }, { "epoch": 0.291117279666898, "grad_norm": 1.1224453449249268, "learning_rate": 2.910667823070252e-05, "loss": 0.6846, "step": 1678 }, { "epoch": 0.2912907702984039, "grad_norm": 0.8093835115432739, "learning_rate": 2.9124024284475287e-05, "loss": 0.7305, "step": 1679 }, { "epoch": 0.2914642609299098, "grad_norm": 1.2922654151916504, "learning_rate": 2.9141370338248052e-05, "loss": 0.7435, "step": 1680 }, { "epoch": 0.2916377515614157, "grad_norm": 0.858190655708313, "learning_rate": 2.9158716392020817e-05, "loss": 0.6708, "step": 1681 }, { "epoch": 0.2918112421929216, "grad_norm": 0.7695908546447754, "learning_rate": 2.9176062445793585e-05, "loss": 0.7864, "step": 1682 }, { "epoch": 0.2919847328244275, "grad_norm": 0.981799840927124, "learning_rate": 2.919340849956635e-05, "loss": 0.673, "step": 1683 }, { "epoch": 0.2921582234559334, "grad_norm": 0.9595302939414978, "learning_rate": 2.921075455333912e-05, "loss": 0.7332, "step": 1684 }, { "epoch": 0.2923317140874393, "grad_norm": 0.9652135372161865, "learning_rate": 2.9228100607111884e-05, "loss": 0.5652, "step": 1685 }, { "epoch": 0.2925052047189452, "grad_norm": 1.3736512660980225, "learning_rate": 2.9245446660884652e-05, "loss": 0.6237, "step": 1686 }, { "epoch": 0.2926786953504511, "grad_norm": 0.8677798509597778, "learning_rate": 2.9262792714657417e-05, "loss": 0.632, "step": 1687 }, { "epoch": 0.29285218598195695, "grad_norm": 1.0175774097442627, "learning_rate": 2.9280138768430185e-05, "loss": 0.5555, "step": 1688 }, { "epoch": 0.29302567661346285, "grad_norm": 0.856152355670929, "learning_rate": 2.929748482220295e-05, "loss": 0.7053, "step": 1689 }, { "epoch": 0.29319916724496875, "grad_norm": 0.8478633165359497, "learning_rate": 2.931483087597572e-05, "loss": 0.6868, "step": 1690 }, { "epoch": 0.29337265787647465, "grad_norm": 0.8868247866630554, "learning_rate": 2.9332176929748484e-05, "loss": 0.6127, "step": 1691 }, { "epoch": 0.29354614850798055, "grad_norm": 0.8794786334037781, "learning_rate": 2.934952298352125e-05, "loss": 0.6006, "step": 1692 }, { "epoch": 0.29371963913948645, "grad_norm": 0.72084641456604, "learning_rate": 2.9366869037294017e-05, "loss": 0.7935, "step": 1693 }, { "epoch": 0.29389312977099236, "grad_norm": 0.9005531668663025, "learning_rate": 2.9384215091066782e-05, "loss": 0.6073, "step": 1694 }, { "epoch": 0.29406662040249826, "grad_norm": 0.8129706978797913, "learning_rate": 2.940156114483955e-05, "loss": 0.6124, "step": 1695 }, { "epoch": 0.29424011103400416, "grad_norm": 1.0862834453582764, "learning_rate": 2.9418907198612316e-05, "loss": 0.6075, "step": 1696 }, { "epoch": 0.29441360166551006, "grad_norm": 1.0599271059036255, "learning_rate": 2.9436253252385084e-05, "loss": 0.5315, "step": 1697 }, { "epoch": 0.29458709229701596, "grad_norm": 1.2996435165405273, "learning_rate": 2.945359930615785e-05, "loss": 0.5306, "step": 1698 }, { "epoch": 0.29476058292852186, "grad_norm": 0.9041792750358582, "learning_rate": 2.9470945359930617e-05, "loss": 0.536, "step": 1699 }, { "epoch": 0.29493407356002777, "grad_norm": 0.914151132106781, "learning_rate": 2.9488291413703386e-05, "loss": 0.5747, "step": 1700 }, { "epoch": 0.29510756419153367, "grad_norm": 0.828359067440033, "learning_rate": 2.9505637467476154e-05, "loss": 0.6954, "step": 1701 }, { "epoch": 0.29528105482303957, "grad_norm": 1.7115951776504517, "learning_rate": 2.952298352124892e-05, "loss": 0.5409, "step": 1702 }, { "epoch": 0.29545454545454547, "grad_norm": 0.9483741521835327, "learning_rate": 2.9540329575021688e-05, "loss": 0.6135, "step": 1703 }, { "epoch": 0.29562803608605137, "grad_norm": 0.8337392210960388, "learning_rate": 2.9557675628794453e-05, "loss": 0.6631, "step": 1704 }, { "epoch": 0.2958015267175573, "grad_norm": 0.7385498881340027, "learning_rate": 2.957502168256722e-05, "loss": 0.7437, "step": 1705 }, { "epoch": 0.2959750173490632, "grad_norm": 0.9643473625183105, "learning_rate": 2.9592367736339986e-05, "loss": 0.5907, "step": 1706 }, { "epoch": 0.2961485079805691, "grad_norm": 0.8231221437454224, "learning_rate": 2.9609713790112754e-05, "loss": 0.6471, "step": 1707 }, { "epoch": 0.2963219986120749, "grad_norm": 0.6505579352378845, "learning_rate": 2.962705984388552e-05, "loss": 0.6716, "step": 1708 }, { "epoch": 0.2964954892435808, "grad_norm": 0.8700175881385803, "learning_rate": 2.9644405897658284e-05, "loss": 0.6149, "step": 1709 }, { "epoch": 0.2966689798750867, "grad_norm": 0.7063069939613342, "learning_rate": 2.9661751951431053e-05, "loss": 0.6666, "step": 1710 }, { "epoch": 0.2968424705065926, "grad_norm": 0.8879273533821106, "learning_rate": 2.9679098005203818e-05, "loss": 0.6432, "step": 1711 }, { "epoch": 0.2970159611380985, "grad_norm": 0.7333043813705444, "learning_rate": 2.9696444058976586e-05, "loss": 0.5787, "step": 1712 }, { "epoch": 0.29718945176960443, "grad_norm": 0.6548595428466797, "learning_rate": 2.971379011274935e-05, "loss": 0.7612, "step": 1713 }, { "epoch": 0.29736294240111033, "grad_norm": 1.1210254430770874, "learning_rate": 2.973113616652212e-05, "loss": 0.6273, "step": 1714 }, { "epoch": 0.29753643303261623, "grad_norm": 0.7752652168273926, "learning_rate": 2.9748482220294885e-05, "loss": 0.781, "step": 1715 }, { "epoch": 0.29770992366412213, "grad_norm": 0.9227986931800842, "learning_rate": 2.9765828274067653e-05, "loss": 0.5767, "step": 1716 }, { "epoch": 0.29788341429562804, "grad_norm": 1.5093612670898438, "learning_rate": 2.9783174327840418e-05, "loss": 0.5686, "step": 1717 }, { "epoch": 0.29805690492713394, "grad_norm": 0.694484293460846, "learning_rate": 2.9800520381613186e-05, "loss": 0.7233, "step": 1718 }, { "epoch": 0.29823039555863984, "grad_norm": 0.8226703405380249, "learning_rate": 2.981786643538595e-05, "loss": 0.6245, "step": 1719 }, { "epoch": 0.29840388619014574, "grad_norm": 0.7781656384468079, "learning_rate": 2.983521248915872e-05, "loss": 0.6234, "step": 1720 }, { "epoch": 0.29857737682165164, "grad_norm": 0.7376680374145508, "learning_rate": 2.9852558542931485e-05, "loss": 0.632, "step": 1721 }, { "epoch": 0.29875086745315754, "grad_norm": 1.330215573310852, "learning_rate": 2.986990459670425e-05, "loss": 0.5302, "step": 1722 }, { "epoch": 0.29892435808466344, "grad_norm": 0.9246023297309875, "learning_rate": 2.9887250650477018e-05, "loss": 0.5, "step": 1723 }, { "epoch": 0.29909784871616935, "grad_norm": 0.6971983909606934, "learning_rate": 2.9904596704249783e-05, "loss": 0.5924, "step": 1724 }, { "epoch": 0.29927133934767525, "grad_norm": 1.3495919704437256, "learning_rate": 2.992194275802255e-05, "loss": 0.6742, "step": 1725 }, { "epoch": 0.29944482997918115, "grad_norm": 0.8896782994270325, "learning_rate": 2.9939288811795316e-05, "loss": 0.5295, "step": 1726 }, { "epoch": 0.29961832061068705, "grad_norm": 1.149600625038147, "learning_rate": 2.9956634865568085e-05, "loss": 0.6323, "step": 1727 }, { "epoch": 0.2997918112421929, "grad_norm": 0.6024688482284546, "learning_rate": 2.997398091934085e-05, "loss": 0.745, "step": 1728 }, { "epoch": 0.2999653018736988, "grad_norm": 0.7137205600738525, "learning_rate": 2.9991326973113618e-05, "loss": 0.5896, "step": 1729 }, { "epoch": 0.3001387925052047, "grad_norm": 0.5844210982322693, "learning_rate": 3.0008673026886387e-05, "loss": 0.72, "step": 1730 }, { "epoch": 0.3003122831367106, "grad_norm": 0.9469254612922668, "learning_rate": 3.0026019080659155e-05, "loss": 0.7002, "step": 1731 }, { "epoch": 0.3004857737682165, "grad_norm": 1.4320896863937378, "learning_rate": 3.004336513443192e-05, "loss": 0.5857, "step": 1732 }, { "epoch": 0.3006592643997224, "grad_norm": 0.819429337978363, "learning_rate": 3.006071118820469e-05, "loss": 0.6317, "step": 1733 }, { "epoch": 0.3008327550312283, "grad_norm": 0.8208953738212585, "learning_rate": 3.0078057241977453e-05, "loss": 0.5531, "step": 1734 }, { "epoch": 0.3010062456627342, "grad_norm": 1.2594075202941895, "learning_rate": 3.0095403295750222e-05, "loss": 0.6279, "step": 1735 }, { "epoch": 0.3011797362942401, "grad_norm": 0.8851017355918884, "learning_rate": 3.0112749349522987e-05, "loss": 0.5364, "step": 1736 }, { "epoch": 0.301353226925746, "grad_norm": 0.8470969796180725, "learning_rate": 3.0130095403295755e-05, "loss": 0.6034, "step": 1737 }, { "epoch": 0.3015267175572519, "grad_norm": 0.8872317671775818, "learning_rate": 3.014744145706852e-05, "loss": 0.7142, "step": 1738 }, { "epoch": 0.3017002081887578, "grad_norm": 0.6019777059555054, "learning_rate": 3.0164787510841285e-05, "loss": 0.7383, "step": 1739 }, { "epoch": 0.3018736988202637, "grad_norm": 0.7203370332717896, "learning_rate": 3.0182133564614054e-05, "loss": 0.6733, "step": 1740 }, { "epoch": 0.3020471894517696, "grad_norm": 0.9027160406112671, "learning_rate": 3.019947961838682e-05, "loss": 0.5337, "step": 1741 }, { "epoch": 0.3022206800832755, "grad_norm": 1.074172019958496, "learning_rate": 3.0216825672159587e-05, "loss": 0.6599, "step": 1742 }, { "epoch": 0.3023941707147814, "grad_norm": 0.9253829121589661, "learning_rate": 3.0234171725932352e-05, "loss": 0.6835, "step": 1743 }, { "epoch": 0.3025676613462873, "grad_norm": 0.9681320786476135, "learning_rate": 3.025151777970512e-05, "loss": 0.5883, "step": 1744 }, { "epoch": 0.3027411519777932, "grad_norm": 0.7276582717895508, "learning_rate": 3.0268863833477885e-05, "loss": 0.6689, "step": 1745 }, { "epoch": 0.3029146426092991, "grad_norm": 0.7069199681282043, "learning_rate": 3.0286209887250654e-05, "loss": 0.6892, "step": 1746 }, { "epoch": 0.30308813324080497, "grad_norm": 0.8422447443008423, "learning_rate": 3.030355594102342e-05, "loss": 0.6062, "step": 1747 }, { "epoch": 0.30326162387231087, "grad_norm": 2.7167630195617676, "learning_rate": 3.0320901994796187e-05, "loss": 0.7922, "step": 1748 }, { "epoch": 0.30343511450381677, "grad_norm": 0.8047178983688354, "learning_rate": 3.0338248048568952e-05, "loss": 0.7277, "step": 1749 }, { "epoch": 0.3036086051353227, "grad_norm": 0.9170768857002258, "learning_rate": 3.035559410234172e-05, "loss": 0.6207, "step": 1750 }, { "epoch": 0.3037820957668286, "grad_norm": 0.6820874810218811, "learning_rate": 3.0372940156114486e-05, "loss": 0.6847, "step": 1751 }, { "epoch": 0.3039555863983345, "grad_norm": 1.0593931674957275, "learning_rate": 3.039028620988725e-05, "loss": 0.6033, "step": 1752 }, { "epoch": 0.3041290770298404, "grad_norm": 1.0743542909622192, "learning_rate": 3.040763226366002e-05, "loss": 0.6537, "step": 1753 }, { "epoch": 0.3043025676613463, "grad_norm": 0.7283003330230713, "learning_rate": 3.0424978317432784e-05, "loss": 0.5768, "step": 1754 }, { "epoch": 0.3044760582928522, "grad_norm": 0.8777532577514648, "learning_rate": 3.0442324371205552e-05, "loss": 0.559, "step": 1755 }, { "epoch": 0.3046495489243581, "grad_norm": 0.8460490107536316, "learning_rate": 3.0459670424978317e-05, "loss": 0.6449, "step": 1756 }, { "epoch": 0.304823039555864, "grad_norm": 0.7011381983757019, "learning_rate": 3.0477016478751086e-05, "loss": 0.6785, "step": 1757 }, { "epoch": 0.3049965301873699, "grad_norm": 0.7739598751068115, "learning_rate": 3.049436253252385e-05, "loss": 0.6609, "step": 1758 }, { "epoch": 0.3051700208188758, "grad_norm": 0.6341903209686279, "learning_rate": 3.051170858629662e-05, "loss": 0.7423, "step": 1759 }, { "epoch": 0.3053435114503817, "grad_norm": 0.8954048752784729, "learning_rate": 3.052905464006939e-05, "loss": 0.5604, "step": 1760 }, { "epoch": 0.3055170020818876, "grad_norm": 0.7283293008804321, "learning_rate": 3.0546400693842156e-05, "loss": 0.6965, "step": 1761 }, { "epoch": 0.3056904927133935, "grad_norm": 0.7408492565155029, "learning_rate": 3.056374674761492e-05, "loss": 0.6975, "step": 1762 }, { "epoch": 0.3058639833448994, "grad_norm": 0.70777428150177, "learning_rate": 3.0581092801387686e-05, "loss": 0.6914, "step": 1763 }, { "epoch": 0.3060374739764053, "grad_norm": 0.6914678812026978, "learning_rate": 3.059843885516046e-05, "loss": 0.6989, "step": 1764 }, { "epoch": 0.3062109646079112, "grad_norm": 1.1912055015563965, "learning_rate": 3.061578490893322e-05, "loss": 0.5037, "step": 1765 }, { "epoch": 0.3063844552394171, "grad_norm": 0.9246727824211121, "learning_rate": 3.063313096270599e-05, "loss": 0.6804, "step": 1766 }, { "epoch": 0.30655794587092294, "grad_norm": 0.6906810402870178, "learning_rate": 3.065047701647875e-05, "loss": 0.7336, "step": 1767 }, { "epoch": 0.30673143650242884, "grad_norm": 0.787849485874176, "learning_rate": 3.0667823070251524e-05, "loss": 0.6996, "step": 1768 }, { "epoch": 0.30690492713393475, "grad_norm": 1.0751699209213257, "learning_rate": 3.068516912402429e-05, "loss": 0.7373, "step": 1769 }, { "epoch": 0.30707841776544065, "grad_norm": 1.01177978515625, "learning_rate": 3.0702515177797054e-05, "loss": 0.5951, "step": 1770 }, { "epoch": 0.30725190839694655, "grad_norm": 0.9446077346801758, "learning_rate": 3.071986123156982e-05, "loss": 0.5613, "step": 1771 }, { "epoch": 0.30742539902845245, "grad_norm": 0.9041101336479187, "learning_rate": 3.0737207285342584e-05, "loss": 0.6337, "step": 1772 }, { "epoch": 0.30759888965995835, "grad_norm": 2.196101188659668, "learning_rate": 3.0754553339115356e-05, "loss": 0.5615, "step": 1773 }, { "epoch": 0.30777238029146425, "grad_norm": 1.4874036312103271, "learning_rate": 3.077189939288812e-05, "loss": 0.5897, "step": 1774 }, { "epoch": 0.30794587092297016, "grad_norm": 0.8694006204605103, "learning_rate": 3.0789245446660886e-05, "loss": 0.6401, "step": 1775 }, { "epoch": 0.30811936155447606, "grad_norm": 0.5794796943664551, "learning_rate": 3.080659150043365e-05, "loss": 0.7235, "step": 1776 }, { "epoch": 0.30829285218598196, "grad_norm": 0.9236095547676086, "learning_rate": 3.082393755420642e-05, "loss": 0.7251, "step": 1777 }, { "epoch": 0.30846634281748786, "grad_norm": 0.7490460872650146, "learning_rate": 3.084128360797919e-05, "loss": 0.718, "step": 1778 }, { "epoch": 0.30863983344899376, "grad_norm": 0.9465798139572144, "learning_rate": 3.085862966175195e-05, "loss": 0.691, "step": 1779 }, { "epoch": 0.30881332408049966, "grad_norm": 0.7532344460487366, "learning_rate": 3.087597571552472e-05, "loss": 0.6631, "step": 1780 }, { "epoch": 0.30898681471200556, "grad_norm": 1.433074712753296, "learning_rate": 3.089332176929748e-05, "loss": 0.6438, "step": 1781 }, { "epoch": 0.30916030534351147, "grad_norm": 1.390010952949524, "learning_rate": 3.0910667823070255e-05, "loss": 0.7295, "step": 1782 }, { "epoch": 0.30933379597501737, "grad_norm": 0.8118870258331299, "learning_rate": 3.092801387684302e-05, "loss": 0.572, "step": 1783 }, { "epoch": 0.30950728660652327, "grad_norm": 1.1158040761947632, "learning_rate": 3.0945359930615785e-05, "loss": 0.627, "step": 1784 }, { "epoch": 0.30968077723802917, "grad_norm": 0.7021401524543762, "learning_rate": 3.096270598438855e-05, "loss": 0.6309, "step": 1785 }, { "epoch": 0.30985426786953507, "grad_norm": 0.9394262433052063, "learning_rate": 3.098005203816132e-05, "loss": 0.5513, "step": 1786 }, { "epoch": 0.3100277585010409, "grad_norm": 0.9389492869377136, "learning_rate": 3.0997398091934087e-05, "loss": 0.6484, "step": 1787 }, { "epoch": 0.3102012491325468, "grad_norm": 0.6000140309333801, "learning_rate": 3.101474414570685e-05, "loss": 0.6553, "step": 1788 }, { "epoch": 0.3103747397640527, "grad_norm": 0.8362985849380493, "learning_rate": 3.103209019947962e-05, "loss": 0.6587, "step": 1789 }, { "epoch": 0.3105482303955586, "grad_norm": 0.7522281408309937, "learning_rate": 3.104943625325239e-05, "loss": 0.708, "step": 1790 }, { "epoch": 0.3107217210270645, "grad_norm": 0.6182814836502075, "learning_rate": 3.106678230702515e-05, "loss": 0.765, "step": 1791 }, { "epoch": 0.3108952116585704, "grad_norm": 0.9737197160720825, "learning_rate": 3.1084128360797925e-05, "loss": 0.7123, "step": 1792 }, { "epoch": 0.3110687022900763, "grad_norm": 2.1213455200195312, "learning_rate": 3.110147441457069e-05, "loss": 0.5731, "step": 1793 }, { "epoch": 0.31124219292158223, "grad_norm": 1.0937367677688599, "learning_rate": 3.1118820468343455e-05, "loss": 0.5756, "step": 1794 }, { "epoch": 0.31141568355308813, "grad_norm": 0.7878339290618896, "learning_rate": 3.113616652211622e-05, "loss": 0.6467, "step": 1795 }, { "epoch": 0.31158917418459403, "grad_norm": 1.0251662731170654, "learning_rate": 3.115351257588899e-05, "loss": 0.6653, "step": 1796 }, { "epoch": 0.31176266481609993, "grad_norm": 0.618768572807312, "learning_rate": 3.117085862966176e-05, "loss": 0.7207, "step": 1797 }, { "epoch": 0.31193615544760583, "grad_norm": 0.9752748012542725, "learning_rate": 3.118820468343452e-05, "loss": 0.5526, "step": 1798 }, { "epoch": 0.31210964607911174, "grad_norm": 1.442103385925293, "learning_rate": 3.120555073720729e-05, "loss": 0.5773, "step": 1799 }, { "epoch": 0.31228313671061764, "grad_norm": 1.0630028247833252, "learning_rate": 3.122289679098005e-05, "loss": 0.6183, "step": 1800 }, { "epoch": 0.31245662734212354, "grad_norm": 0.7093740105628967, "learning_rate": 3.1240242844752824e-05, "loss": 0.6361, "step": 1801 }, { "epoch": 0.31263011797362944, "grad_norm": 0.9507588744163513, "learning_rate": 3.125758889852559e-05, "loss": 0.5803, "step": 1802 }, { "epoch": 0.31280360860513534, "grad_norm": 1.4544535875320435, "learning_rate": 3.1274934952298354e-05, "loss": 0.5353, "step": 1803 }, { "epoch": 0.31297709923664124, "grad_norm": 0.9902476668357849, "learning_rate": 3.129228100607112e-05, "loss": 0.6809, "step": 1804 }, { "epoch": 0.31315058986814714, "grad_norm": 0.9561774134635925, "learning_rate": 3.130962705984389e-05, "loss": 0.5793, "step": 1805 }, { "epoch": 0.31332408049965305, "grad_norm": 0.9205996990203857, "learning_rate": 3.1326973113616656e-05, "loss": 0.6047, "step": 1806 }, { "epoch": 0.3134975711311589, "grad_norm": 0.9781830310821533, "learning_rate": 3.134431916738942e-05, "loss": 0.5886, "step": 1807 }, { "epoch": 0.3136710617626648, "grad_norm": 0.8477849364280701, "learning_rate": 3.1361665221162186e-05, "loss": 0.6198, "step": 1808 }, { "epoch": 0.3138445523941707, "grad_norm": 0.8550189137458801, "learning_rate": 3.137901127493496e-05, "loss": 0.5726, "step": 1809 }, { "epoch": 0.3140180430256766, "grad_norm": 0.8213677406311035, "learning_rate": 3.139635732870772e-05, "loss": 0.6034, "step": 1810 }, { "epoch": 0.3141915336571825, "grad_norm": 0.7088372707366943, "learning_rate": 3.141370338248049e-05, "loss": 0.6888, "step": 1811 }, { "epoch": 0.3143650242886884, "grad_norm": 0.7192822694778442, "learning_rate": 3.143104943625325e-05, "loss": 0.7292, "step": 1812 }, { "epoch": 0.3145385149201943, "grad_norm": 0.9309755563735962, "learning_rate": 3.144839549002602e-05, "loss": 0.5566, "step": 1813 }, { "epoch": 0.3147120055517002, "grad_norm": 0.8870457410812378, "learning_rate": 3.146574154379879e-05, "loss": 0.556, "step": 1814 }, { "epoch": 0.3148854961832061, "grad_norm": 0.7572647333145142, "learning_rate": 3.1483087597571554e-05, "loss": 0.5516, "step": 1815 }, { "epoch": 0.315058986814712, "grad_norm": 0.8539776802062988, "learning_rate": 3.150043365134432e-05, "loss": 0.6786, "step": 1816 }, { "epoch": 0.3152324774462179, "grad_norm": 2.1104016304016113, "learning_rate": 3.1517779705117084e-05, "loss": 0.6483, "step": 1817 }, { "epoch": 0.3154059680777238, "grad_norm": 0.6436271071434021, "learning_rate": 3.1535125758889856e-05, "loss": 0.7158, "step": 1818 }, { "epoch": 0.3155794587092297, "grad_norm": 0.956113874912262, "learning_rate": 3.155247181266262e-05, "loss": 0.5658, "step": 1819 }, { "epoch": 0.3157529493407356, "grad_norm": 0.8388386368751526, "learning_rate": 3.156981786643539e-05, "loss": 0.67, "step": 1820 }, { "epoch": 0.3159264399722415, "grad_norm": 0.764722466468811, "learning_rate": 3.158716392020816e-05, "loss": 0.5817, "step": 1821 }, { "epoch": 0.3160999306037474, "grad_norm": 0.8697858452796936, "learning_rate": 3.160450997398092e-05, "loss": 0.6027, "step": 1822 }, { "epoch": 0.3162734212352533, "grad_norm": 0.9201172590255737, "learning_rate": 3.162185602775369e-05, "loss": 0.6139, "step": 1823 }, { "epoch": 0.3164469118667592, "grad_norm": 1.1344634294509888, "learning_rate": 3.163920208152646e-05, "loss": 0.7426, "step": 1824 }, { "epoch": 0.3166204024982651, "grad_norm": 1.032973051071167, "learning_rate": 3.1656548135299224e-05, "loss": 0.6969, "step": 1825 }, { "epoch": 0.31679389312977096, "grad_norm": 1.1614660024642944, "learning_rate": 3.167389418907199e-05, "loss": 0.6847, "step": 1826 }, { "epoch": 0.31696738376127687, "grad_norm": 0.8547880053520203, "learning_rate": 3.1691240242844754e-05, "loss": 0.7056, "step": 1827 }, { "epoch": 0.31714087439278277, "grad_norm": 0.7039558291435242, "learning_rate": 3.1708586296617526e-05, "loss": 0.8013, "step": 1828 }, { "epoch": 0.31731436502428867, "grad_norm": 0.7111900448799133, "learning_rate": 3.172593235039029e-05, "loss": 0.7852, "step": 1829 }, { "epoch": 0.31748785565579457, "grad_norm": 1.033314824104309, "learning_rate": 3.1743278404163056e-05, "loss": 0.7156, "step": 1830 }, { "epoch": 0.3176613462873005, "grad_norm": 0.6807761788368225, "learning_rate": 3.176062445793582e-05, "loss": 0.6707, "step": 1831 }, { "epoch": 0.3178348369188064, "grad_norm": 0.9980964660644531, "learning_rate": 3.1777970511708586e-05, "loss": 0.5842, "step": 1832 }, { "epoch": 0.3180083275503123, "grad_norm": 0.7964962720870972, "learning_rate": 3.179531656548136e-05, "loss": 0.6732, "step": 1833 }, { "epoch": 0.3181818181818182, "grad_norm": 1.0412522554397583, "learning_rate": 3.181266261925412e-05, "loss": 0.606, "step": 1834 }, { "epoch": 0.3183553088133241, "grad_norm": 0.8972187638282776, "learning_rate": 3.183000867302689e-05, "loss": 0.6263, "step": 1835 }, { "epoch": 0.31852879944483, "grad_norm": 0.7858732342720032, "learning_rate": 3.184735472679965e-05, "loss": 0.5792, "step": 1836 }, { "epoch": 0.3187022900763359, "grad_norm": 0.7865777015686035, "learning_rate": 3.1864700780572425e-05, "loss": 0.5364, "step": 1837 }, { "epoch": 0.3188757807078418, "grad_norm": 0.9523298144340515, "learning_rate": 3.188204683434519e-05, "loss": 0.683, "step": 1838 }, { "epoch": 0.3190492713393477, "grad_norm": 1.0407299995422363, "learning_rate": 3.1899392888117955e-05, "loss": 0.5481, "step": 1839 }, { "epoch": 0.3192227619708536, "grad_norm": 1.987041711807251, "learning_rate": 3.191673894189072e-05, "loss": 0.5403, "step": 1840 }, { "epoch": 0.3193962526023595, "grad_norm": 0.820220410823822, "learning_rate": 3.1934084995663485e-05, "loss": 0.6174, "step": 1841 }, { "epoch": 0.3195697432338654, "grad_norm": 0.73539137840271, "learning_rate": 3.1951431049436257e-05, "loss": 0.6836, "step": 1842 }, { "epoch": 0.3197432338653713, "grad_norm": 1.3756539821624756, "learning_rate": 3.196877710320902e-05, "loss": 0.7025, "step": 1843 }, { "epoch": 0.3199167244968772, "grad_norm": 0.6816329956054688, "learning_rate": 3.1986123156981787e-05, "loss": 0.6943, "step": 1844 }, { "epoch": 0.3200902151283831, "grad_norm": 0.7202216982841492, "learning_rate": 3.200346921075455e-05, "loss": 0.7222, "step": 1845 }, { "epoch": 0.32026370575988894, "grad_norm": 0.676460325717926, "learning_rate": 3.202081526452732e-05, "loss": 0.6368, "step": 1846 }, { "epoch": 0.32043719639139484, "grad_norm": 0.8167012333869934, "learning_rate": 3.203816131830009e-05, "loss": 0.7476, "step": 1847 }, { "epoch": 0.32061068702290074, "grad_norm": 0.8015779256820679, "learning_rate": 3.205550737207286e-05, "loss": 0.6105, "step": 1848 }, { "epoch": 0.32078417765440664, "grad_norm": 0.6521061658859253, "learning_rate": 3.2072853425845625e-05, "loss": 0.7544, "step": 1849 }, { "epoch": 0.32095766828591255, "grad_norm": 0.9332058429718018, "learning_rate": 3.209019947961839e-05, "loss": 0.5671, "step": 1850 }, { "epoch": 0.32113115891741845, "grad_norm": 0.8357752561569214, "learning_rate": 3.2107545533391155e-05, "loss": 0.6456, "step": 1851 }, { "epoch": 0.32130464954892435, "grad_norm": 0.7136110067367554, "learning_rate": 3.212489158716393e-05, "loss": 0.626, "step": 1852 }, { "epoch": 0.32147814018043025, "grad_norm": 0.8159222602844238, "learning_rate": 3.214223764093669e-05, "loss": 0.6195, "step": 1853 }, { "epoch": 0.32165163081193615, "grad_norm": 0.8828167915344238, "learning_rate": 3.215958369470946e-05, "loss": 0.5812, "step": 1854 }, { "epoch": 0.32182512144344205, "grad_norm": 0.844054102897644, "learning_rate": 3.217692974848222e-05, "loss": 0.7297, "step": 1855 }, { "epoch": 0.32199861207494795, "grad_norm": 0.9942353963851929, "learning_rate": 3.2194275802254994e-05, "loss": 0.6737, "step": 1856 }, { "epoch": 0.32217210270645386, "grad_norm": 0.8586241006851196, "learning_rate": 3.221162185602776e-05, "loss": 0.5699, "step": 1857 }, { "epoch": 0.32234559333795976, "grad_norm": 1.068901777267456, "learning_rate": 3.2228967909800524e-05, "loss": 0.6454, "step": 1858 }, { "epoch": 0.32251908396946566, "grad_norm": 0.94094318151474, "learning_rate": 3.224631396357329e-05, "loss": 0.6882, "step": 1859 }, { "epoch": 0.32269257460097156, "grad_norm": 1.031432032585144, "learning_rate": 3.2263660017346054e-05, "loss": 0.5964, "step": 1860 }, { "epoch": 0.32286606523247746, "grad_norm": 1.1422418355941772, "learning_rate": 3.2281006071118825e-05, "loss": 0.7263, "step": 1861 }, { "epoch": 0.32303955586398336, "grad_norm": 1.1631206274032593, "learning_rate": 3.229835212489159e-05, "loss": 0.665, "step": 1862 }, { "epoch": 0.32321304649548926, "grad_norm": 0.7793134450912476, "learning_rate": 3.2315698178664355e-05, "loss": 0.6418, "step": 1863 }, { "epoch": 0.32338653712699517, "grad_norm": 0.861473560333252, "learning_rate": 3.233304423243712e-05, "loss": 0.6372, "step": 1864 }, { "epoch": 0.32356002775850107, "grad_norm": 0.8410269618034363, "learning_rate": 3.235039028620989e-05, "loss": 0.5958, "step": 1865 }, { "epoch": 0.3237335183900069, "grad_norm": 0.71745365858078, "learning_rate": 3.236773633998266e-05, "loss": 0.5424, "step": 1866 }, { "epoch": 0.3239070090215128, "grad_norm": 0.6829073429107666, "learning_rate": 3.238508239375542e-05, "loss": 0.7207, "step": 1867 }, { "epoch": 0.3240804996530187, "grad_norm": 0.9286072254180908, "learning_rate": 3.240242844752819e-05, "loss": 0.6857, "step": 1868 }, { "epoch": 0.3242539902845246, "grad_norm": 0.9504925608634949, "learning_rate": 3.241977450130096e-05, "loss": 0.5566, "step": 1869 }, { "epoch": 0.3244274809160305, "grad_norm": 0.8263635635375977, "learning_rate": 3.2437120555073724e-05, "loss": 0.7421, "step": 1870 }, { "epoch": 0.3246009715475364, "grad_norm": 1.0866979360580444, "learning_rate": 3.245446660884649e-05, "loss": 0.5236, "step": 1871 }, { "epoch": 0.3247744621790423, "grad_norm": 1.151369571685791, "learning_rate": 3.2471812662619254e-05, "loss": 0.6591, "step": 1872 }, { "epoch": 0.3249479528105482, "grad_norm": 1.141158938407898, "learning_rate": 3.248915871639202e-05, "loss": 0.7085, "step": 1873 }, { "epoch": 0.3251214434420541, "grad_norm": 0.7256845235824585, "learning_rate": 3.250650477016479e-05, "loss": 0.5247, "step": 1874 }, { "epoch": 0.32529493407356, "grad_norm": 0.6779228448867798, "learning_rate": 3.2523850823937556e-05, "loss": 0.686, "step": 1875 }, { "epoch": 0.32546842470506593, "grad_norm": 0.6428731679916382, "learning_rate": 3.254119687771032e-05, "loss": 0.6531, "step": 1876 }, { "epoch": 0.32564191533657183, "grad_norm": 0.7823044061660767, "learning_rate": 3.2558542931483086e-05, "loss": 0.621, "step": 1877 }, { "epoch": 0.32581540596807773, "grad_norm": 1.2496860027313232, "learning_rate": 3.257588898525586e-05, "loss": 0.6849, "step": 1878 }, { "epoch": 0.32598889659958363, "grad_norm": 0.825401246547699, "learning_rate": 3.259323503902862e-05, "loss": 0.5262, "step": 1879 }, { "epoch": 0.32616238723108953, "grad_norm": 0.8637552261352539, "learning_rate": 3.2610581092801394e-05, "loss": 0.6127, "step": 1880 }, { "epoch": 0.32633587786259544, "grad_norm": 0.5828275084495544, "learning_rate": 3.262792714657416e-05, "loss": 0.6902, "step": 1881 }, { "epoch": 0.32650936849410134, "grad_norm": 1.9262351989746094, "learning_rate": 3.2645273200346924e-05, "loss": 0.6202, "step": 1882 }, { "epoch": 0.32668285912560724, "grad_norm": 0.9045750498771667, "learning_rate": 3.266261925411969e-05, "loss": 0.6622, "step": 1883 }, { "epoch": 0.32685634975711314, "grad_norm": 0.6373817920684814, "learning_rate": 3.267996530789246e-05, "loss": 0.7363, "step": 1884 }, { "epoch": 0.32702984038861904, "grad_norm": 0.6378402709960938, "learning_rate": 3.2697311361665226e-05, "loss": 0.6084, "step": 1885 }, { "epoch": 0.3272033310201249, "grad_norm": 0.9899991750717163, "learning_rate": 3.271465741543799e-05, "loss": 0.6056, "step": 1886 }, { "epoch": 0.3273768216516308, "grad_norm": 0.7649429440498352, "learning_rate": 3.2732003469210756e-05, "loss": 0.5872, "step": 1887 }, { "epoch": 0.3275503122831367, "grad_norm": 0.9158859252929688, "learning_rate": 3.274934952298353e-05, "loss": 0.5853, "step": 1888 }, { "epoch": 0.3277238029146426, "grad_norm": 0.8275113701820374, "learning_rate": 3.276669557675629e-05, "loss": 0.6613, "step": 1889 }, { "epoch": 0.3278972935461485, "grad_norm": 0.880486011505127, "learning_rate": 3.278404163052906e-05, "loss": 0.561, "step": 1890 }, { "epoch": 0.3280707841776544, "grad_norm": 0.7612427473068237, "learning_rate": 3.280138768430182e-05, "loss": 0.6125, "step": 1891 }, { "epoch": 0.3282442748091603, "grad_norm": 0.7106688022613525, "learning_rate": 3.281873373807459e-05, "loss": 0.743, "step": 1892 }, { "epoch": 0.3284177654406662, "grad_norm": 0.831951379776001, "learning_rate": 3.283607979184736e-05, "loss": 0.5771, "step": 1893 }, { "epoch": 0.3285912560721721, "grad_norm": 1.737910270690918, "learning_rate": 3.2853425845620125e-05, "loss": 0.6971, "step": 1894 }, { "epoch": 0.328764746703678, "grad_norm": 0.9117143750190735, "learning_rate": 3.287077189939289e-05, "loss": 0.5634, "step": 1895 }, { "epoch": 0.3289382373351839, "grad_norm": 0.7156542539596558, "learning_rate": 3.2888117953165655e-05, "loss": 0.6713, "step": 1896 }, { "epoch": 0.3291117279666898, "grad_norm": 0.9128335118293762, "learning_rate": 3.2905464006938426e-05, "loss": 0.5865, "step": 1897 }, { "epoch": 0.3292852185981957, "grad_norm": 0.9140109419822693, "learning_rate": 3.292281006071119e-05, "loss": 0.5771, "step": 1898 }, { "epoch": 0.3294587092297016, "grad_norm": 0.8033643364906311, "learning_rate": 3.2940156114483956e-05, "loss": 0.528, "step": 1899 }, { "epoch": 0.3296321998612075, "grad_norm": 1.2055332660675049, "learning_rate": 3.295750216825672e-05, "loss": 0.52, "step": 1900 }, { "epoch": 0.3298056904927134, "grad_norm": 0.8967594504356384, "learning_rate": 3.2974848222029487e-05, "loss": 0.637, "step": 1901 }, { "epoch": 0.3299791811242193, "grad_norm": 0.7392427325248718, "learning_rate": 3.299219427580226e-05, "loss": 0.5469, "step": 1902 }, { "epoch": 0.3301526717557252, "grad_norm": 0.6878560185432434, "learning_rate": 3.300954032957502e-05, "loss": 0.676, "step": 1903 }, { "epoch": 0.3303261623872311, "grad_norm": 0.8250359296798706, "learning_rate": 3.302688638334779e-05, "loss": 0.6309, "step": 1904 }, { "epoch": 0.33049965301873696, "grad_norm": 0.9844671487808228, "learning_rate": 3.304423243712055e-05, "loss": 0.541, "step": 1905 }, { "epoch": 0.33067314365024286, "grad_norm": 1.5574495792388916, "learning_rate": 3.3061578490893325e-05, "loss": 0.7218, "step": 1906 }, { "epoch": 0.33084663428174876, "grad_norm": 0.6480480432510376, "learning_rate": 3.307892454466609e-05, "loss": 0.8379, "step": 1907 }, { "epoch": 0.33102012491325467, "grad_norm": 0.8829315304756165, "learning_rate": 3.309627059843886e-05, "loss": 0.6604, "step": 1908 }, { "epoch": 0.33119361554476057, "grad_norm": 0.7221475839614868, "learning_rate": 3.311361665221163e-05, "loss": 0.6022, "step": 1909 }, { "epoch": 0.33136710617626647, "grad_norm": 0.7198967933654785, "learning_rate": 3.313096270598439e-05, "loss": 0.5106, "step": 1910 }, { "epoch": 0.33154059680777237, "grad_norm": 1.0678820610046387, "learning_rate": 3.314830875975716e-05, "loss": 0.5536, "step": 1911 }, { "epoch": 0.33171408743927827, "grad_norm": 0.940387487411499, "learning_rate": 3.316565481352993e-05, "loss": 0.6127, "step": 1912 }, { "epoch": 0.3318875780707842, "grad_norm": 1.3829164505004883, "learning_rate": 3.3183000867302694e-05, "loss": 0.6375, "step": 1913 }, { "epoch": 0.3320610687022901, "grad_norm": 0.8988991975784302, "learning_rate": 3.320034692107546e-05, "loss": 0.6748, "step": 1914 }, { "epoch": 0.332234559333796, "grad_norm": 0.6385945677757263, "learning_rate": 3.3217692974848224e-05, "loss": 0.6494, "step": 1915 }, { "epoch": 0.3324080499653019, "grad_norm": 0.7207217812538147, "learning_rate": 3.3235039028620995e-05, "loss": 0.6686, "step": 1916 }, { "epoch": 0.3325815405968078, "grad_norm": 0.9134792685508728, "learning_rate": 3.325238508239376e-05, "loss": 0.6448, "step": 1917 }, { "epoch": 0.3327550312283137, "grad_norm": 0.9056723713874817, "learning_rate": 3.3269731136166525e-05, "loss": 0.7886, "step": 1918 }, { "epoch": 0.3329285218598196, "grad_norm": 0.916094183921814, "learning_rate": 3.328707718993929e-05, "loss": 0.7588, "step": 1919 }, { "epoch": 0.3331020124913255, "grad_norm": 0.7988206744194031, "learning_rate": 3.3304423243712055e-05, "loss": 0.6619, "step": 1920 }, { "epoch": 0.3332755031228314, "grad_norm": 0.7587366104125977, "learning_rate": 3.332176929748483e-05, "loss": 0.6594, "step": 1921 }, { "epoch": 0.3334489937543373, "grad_norm": 0.9271337985992432, "learning_rate": 3.333911535125759e-05, "loss": 0.5983, "step": 1922 }, { "epoch": 0.3336224843858432, "grad_norm": 0.8258846998214722, "learning_rate": 3.335646140503036e-05, "loss": 0.6704, "step": 1923 }, { "epoch": 0.3337959750173491, "grad_norm": 0.9814427495002747, "learning_rate": 3.337380745880312e-05, "loss": 0.5504, "step": 1924 }, { "epoch": 0.33396946564885494, "grad_norm": 0.7927559614181519, "learning_rate": 3.3391153512575894e-05, "loss": 0.5997, "step": 1925 }, { "epoch": 0.33414295628036084, "grad_norm": 1.139303207397461, "learning_rate": 3.340849956634866e-05, "loss": 0.7278, "step": 1926 }, { "epoch": 0.33431644691186674, "grad_norm": 0.993246853351593, "learning_rate": 3.3425845620121424e-05, "loss": 0.6042, "step": 1927 }, { "epoch": 0.33448993754337264, "grad_norm": 1.020550012588501, "learning_rate": 3.344319167389419e-05, "loss": 0.5493, "step": 1928 }, { "epoch": 0.33466342817487854, "grad_norm": 0.8157418370246887, "learning_rate": 3.346053772766696e-05, "loss": 0.6833, "step": 1929 }, { "epoch": 0.33483691880638444, "grad_norm": 0.8034759759902954, "learning_rate": 3.3477883781439726e-05, "loss": 0.707, "step": 1930 }, { "epoch": 0.33501040943789034, "grad_norm": 0.6415484547615051, "learning_rate": 3.349522983521249e-05, "loss": 0.7014, "step": 1931 }, { "epoch": 0.33518390006939625, "grad_norm": 0.9110195636749268, "learning_rate": 3.3512575888985256e-05, "loss": 0.663, "step": 1932 }, { "epoch": 0.33535739070090215, "grad_norm": 3.435911178588867, "learning_rate": 3.352992194275802e-05, "loss": 0.5925, "step": 1933 }, { "epoch": 0.33553088133240805, "grad_norm": 0.6798315644264221, "learning_rate": 3.354726799653079e-05, "loss": 0.7164, "step": 1934 }, { "epoch": 0.33570437196391395, "grad_norm": 1.6037698984146118, "learning_rate": 3.356461405030356e-05, "loss": 0.6107, "step": 1935 }, { "epoch": 0.33587786259541985, "grad_norm": 0.7971057295799255, "learning_rate": 3.358196010407632e-05, "loss": 0.7469, "step": 1936 }, { "epoch": 0.33605135322692575, "grad_norm": 1.0822875499725342, "learning_rate": 3.3599306157849094e-05, "loss": 0.5383, "step": 1937 }, { "epoch": 0.33622484385843165, "grad_norm": 0.7857166528701782, "learning_rate": 3.361665221162186e-05, "loss": 0.7449, "step": 1938 }, { "epoch": 0.33639833448993756, "grad_norm": 0.6329569816589355, "learning_rate": 3.3633998265394624e-05, "loss": 0.7579, "step": 1939 }, { "epoch": 0.33657182512144346, "grad_norm": 0.7199830412864685, "learning_rate": 3.3651344319167396e-05, "loss": 0.7336, "step": 1940 }, { "epoch": 0.33674531575294936, "grad_norm": 0.8670171499252319, "learning_rate": 3.366869037294016e-05, "loss": 0.6079, "step": 1941 }, { "epoch": 0.33691880638445526, "grad_norm": 0.7961511611938477, "learning_rate": 3.3686036426712926e-05, "loss": 0.5454, "step": 1942 }, { "epoch": 0.33709229701596116, "grad_norm": 0.8177798986434937, "learning_rate": 3.370338248048569e-05, "loss": 0.6361, "step": 1943 }, { "epoch": 0.33726578764746706, "grad_norm": 0.8449214696884155, "learning_rate": 3.372072853425846e-05, "loss": 0.7, "step": 1944 }, { "epoch": 0.3374392782789729, "grad_norm": 1.0306763648986816, "learning_rate": 3.373807458803123e-05, "loss": 0.6237, "step": 1945 }, { "epoch": 0.3376127689104788, "grad_norm": 0.7724658250808716, "learning_rate": 3.375542064180399e-05, "loss": 0.6016, "step": 1946 }, { "epoch": 0.3377862595419847, "grad_norm": 0.8948357701301575, "learning_rate": 3.377276669557676e-05, "loss": 0.7991, "step": 1947 }, { "epoch": 0.3379597501734906, "grad_norm": 0.7655358910560608, "learning_rate": 3.379011274934952e-05, "loss": 0.6937, "step": 1948 }, { "epoch": 0.3381332408049965, "grad_norm": 0.8109301328659058, "learning_rate": 3.3807458803122295e-05, "loss": 0.6616, "step": 1949 }, { "epoch": 0.3383067314365024, "grad_norm": 0.8285800218582153, "learning_rate": 3.382480485689506e-05, "loss": 0.7268, "step": 1950 }, { "epoch": 0.3384802220680083, "grad_norm": 0.7333600521087646, "learning_rate": 3.3842150910667825e-05, "loss": 0.6385, "step": 1951 }, { "epoch": 0.3386537126995142, "grad_norm": 0.5980129241943359, "learning_rate": 3.385949696444059e-05, "loss": 0.7239, "step": 1952 }, { "epoch": 0.3388272033310201, "grad_norm": 2.1083834171295166, "learning_rate": 3.387684301821336e-05, "loss": 0.6746, "step": 1953 }, { "epoch": 0.339000693962526, "grad_norm": 0.774154543876648, "learning_rate": 3.3894189071986126e-05, "loss": 0.671, "step": 1954 }, { "epoch": 0.3391741845940319, "grad_norm": 0.8086047172546387, "learning_rate": 3.391153512575889e-05, "loss": 0.8333, "step": 1955 }, { "epoch": 0.3393476752255378, "grad_norm": 0.9128818511962891, "learning_rate": 3.3928881179531656e-05, "loss": 0.6677, "step": 1956 }, { "epoch": 0.3395211658570437, "grad_norm": 0.8383493423461914, "learning_rate": 3.394622723330443e-05, "loss": 0.6716, "step": 1957 }, { "epoch": 0.33969465648854963, "grad_norm": 2.182744264602661, "learning_rate": 3.396357328707719e-05, "loss": 0.6908, "step": 1958 }, { "epoch": 0.33986814712005553, "grad_norm": 0.8761880397796631, "learning_rate": 3.398091934084996e-05, "loss": 0.6628, "step": 1959 }, { "epoch": 0.34004163775156143, "grad_norm": 1.249531865119934, "learning_rate": 3.399826539462272e-05, "loss": 0.6277, "step": 1960 }, { "epoch": 0.34021512838306733, "grad_norm": 1.0625807046890259, "learning_rate": 3.401561144839549e-05, "loss": 0.7263, "step": 1961 }, { "epoch": 0.34038861901457323, "grad_norm": 0.9543647766113281, "learning_rate": 3.403295750216826e-05, "loss": 0.7351, "step": 1962 }, { "epoch": 0.34056210964607914, "grad_norm": 0.7973102927207947, "learning_rate": 3.4050303555941025e-05, "loss": 0.5649, "step": 1963 }, { "epoch": 0.34073560027758504, "grad_norm": 1.3753342628479004, "learning_rate": 3.406764960971379e-05, "loss": 0.5652, "step": 1964 }, { "epoch": 0.3409090909090909, "grad_norm": 1.9109654426574707, "learning_rate": 3.4084995663486555e-05, "loss": 0.5186, "step": 1965 }, { "epoch": 0.3410825815405968, "grad_norm": 1.8508704900741577, "learning_rate": 3.410234171725933e-05, "loss": 0.5887, "step": 1966 }, { "epoch": 0.3412560721721027, "grad_norm": 0.8658906817436218, "learning_rate": 3.411968777103209e-05, "loss": 0.7981, "step": 1967 }, { "epoch": 0.3414295628036086, "grad_norm": 1.0544072389602661, "learning_rate": 3.4137033824804864e-05, "loss": 0.5813, "step": 1968 }, { "epoch": 0.3416030534351145, "grad_norm": 1.6280932426452637, "learning_rate": 3.415437987857763e-05, "loss": 0.7124, "step": 1969 }, { "epoch": 0.3417765440666204, "grad_norm": 1.027541995048523, "learning_rate": 3.4171725932350394e-05, "loss": 0.6947, "step": 1970 }, { "epoch": 0.3419500346981263, "grad_norm": 1.1156625747680664, "learning_rate": 3.418907198612316e-05, "loss": 0.6039, "step": 1971 }, { "epoch": 0.3421235253296322, "grad_norm": 1.0394290685653687, "learning_rate": 3.420641803989593e-05, "loss": 0.7434, "step": 1972 }, { "epoch": 0.3422970159611381, "grad_norm": 0.9105010628700256, "learning_rate": 3.4223764093668695e-05, "loss": 0.6143, "step": 1973 }, { "epoch": 0.342470506592644, "grad_norm": 0.7156486511230469, "learning_rate": 3.424111014744146e-05, "loss": 0.6757, "step": 1974 }, { "epoch": 0.3426439972241499, "grad_norm": 0.7882575392723083, "learning_rate": 3.4258456201214225e-05, "loss": 0.5669, "step": 1975 }, { "epoch": 0.3428174878556558, "grad_norm": 0.7309373617172241, "learning_rate": 3.4275802254987e-05, "loss": 0.6183, "step": 1976 }, { "epoch": 0.3429909784871617, "grad_norm": 1.8437594175338745, "learning_rate": 3.429314830875976e-05, "loss": 0.6377, "step": 1977 }, { "epoch": 0.3431644691186676, "grad_norm": 1.0819367170333862, "learning_rate": 3.431049436253253e-05, "loss": 0.6041, "step": 1978 }, { "epoch": 0.3433379597501735, "grad_norm": 0.8812955617904663, "learning_rate": 3.432784041630529e-05, "loss": 0.4973, "step": 1979 }, { "epoch": 0.3435114503816794, "grad_norm": 0.6564153432846069, "learning_rate": 3.434518647007806e-05, "loss": 0.7019, "step": 1980 }, { "epoch": 0.3436849410131853, "grad_norm": 1.0282037258148193, "learning_rate": 3.436253252385083e-05, "loss": 0.658, "step": 1981 }, { "epoch": 0.3438584316446912, "grad_norm": 0.7225372195243835, "learning_rate": 3.4379878577623594e-05, "loss": 0.6141, "step": 1982 }, { "epoch": 0.3440319222761971, "grad_norm": 0.6336444616317749, "learning_rate": 3.439722463139636e-05, "loss": 0.7566, "step": 1983 }, { "epoch": 0.34420541290770296, "grad_norm": 0.9767608046531677, "learning_rate": 3.4414570685169124e-05, "loss": 0.6304, "step": 1984 }, { "epoch": 0.34437890353920886, "grad_norm": 1.0240918397903442, "learning_rate": 3.4431916738941896e-05, "loss": 0.6415, "step": 1985 }, { "epoch": 0.34455239417071476, "grad_norm": 0.7765363454818726, "learning_rate": 3.444926279271466e-05, "loss": 0.6307, "step": 1986 }, { "epoch": 0.34472588480222066, "grad_norm": 0.6897956728935242, "learning_rate": 3.4466608846487426e-05, "loss": 0.6045, "step": 1987 }, { "epoch": 0.34489937543372656, "grad_norm": 0.7995160222053528, "learning_rate": 3.448395490026019e-05, "loss": 0.6552, "step": 1988 }, { "epoch": 0.34507286606523246, "grad_norm": 0.7897160649299622, "learning_rate": 3.4501300954032956e-05, "loss": 0.6372, "step": 1989 }, { "epoch": 0.34524635669673837, "grad_norm": 0.9942298531532288, "learning_rate": 3.451864700780573e-05, "loss": 0.6865, "step": 1990 }, { "epoch": 0.34541984732824427, "grad_norm": 0.7617659568786621, "learning_rate": 3.453599306157849e-05, "loss": 0.7029, "step": 1991 }, { "epoch": 0.34559333795975017, "grad_norm": 0.9708228707313538, "learning_rate": 3.455333911535126e-05, "loss": 0.6744, "step": 1992 }, { "epoch": 0.34576682859125607, "grad_norm": 0.9564141035079956, "learning_rate": 3.457068516912402e-05, "loss": 0.6765, "step": 1993 }, { "epoch": 0.34594031922276197, "grad_norm": 1.037091851234436, "learning_rate": 3.4588031222896794e-05, "loss": 0.702, "step": 1994 }, { "epoch": 0.3461138098542679, "grad_norm": 0.8068506717681885, "learning_rate": 3.460537727666956e-05, "loss": 0.6803, "step": 1995 }, { "epoch": 0.3462873004857738, "grad_norm": 0.7901633381843567, "learning_rate": 3.462272333044233e-05, "loss": 0.6143, "step": 1996 }, { "epoch": 0.3464607911172797, "grad_norm": 2.00464129447937, "learning_rate": 3.4640069384215096e-05, "loss": 0.5975, "step": 1997 }, { "epoch": 0.3466342817487856, "grad_norm": 0.9880556464195251, "learning_rate": 3.465741543798786e-05, "loss": 0.5653, "step": 1998 }, { "epoch": 0.3468077723802915, "grad_norm": 0.989421010017395, "learning_rate": 3.4674761491760626e-05, "loss": 0.6676, "step": 1999 }, { "epoch": 0.3469812630117974, "grad_norm": 0.8350646495819092, "learning_rate": 3.46921075455334e-05, "loss": 0.5137, "step": 2000 }, { "epoch": 0.3471547536433033, "grad_norm": 1.1387163400650024, "learning_rate": 3.470945359930616e-05, "loss": 0.5483, "step": 2001 }, { "epoch": 0.3473282442748092, "grad_norm": 2.10577392578125, "learning_rate": 3.472679965307893e-05, "loss": 0.6202, "step": 2002 }, { "epoch": 0.3475017349063151, "grad_norm": 0.8258737921714783, "learning_rate": 3.474414570685169e-05, "loss": 0.6392, "step": 2003 }, { "epoch": 0.34767522553782093, "grad_norm": 1.2150181531906128, "learning_rate": 3.4761491760624465e-05, "loss": 0.7468, "step": 2004 }, { "epoch": 0.34784871616932683, "grad_norm": 0.7214828729629517, "learning_rate": 3.477883781439723e-05, "loss": 0.6019, "step": 2005 }, { "epoch": 0.34802220680083273, "grad_norm": 0.6550180315971375, "learning_rate": 3.4796183868169995e-05, "loss": 0.7052, "step": 2006 }, { "epoch": 0.34819569743233864, "grad_norm": 2.2414355278015137, "learning_rate": 3.481352992194276e-05, "loss": 0.6088, "step": 2007 }, { "epoch": 0.34836918806384454, "grad_norm": 0.8592984676361084, "learning_rate": 3.4830875975715525e-05, "loss": 0.6058, "step": 2008 }, { "epoch": 0.34854267869535044, "grad_norm": 0.7922565937042236, "learning_rate": 3.4848222029488296e-05, "loss": 0.7596, "step": 2009 }, { "epoch": 0.34871616932685634, "grad_norm": 0.7546785473823547, "learning_rate": 3.486556808326106e-05, "loss": 0.6936, "step": 2010 }, { "epoch": 0.34888965995836224, "grad_norm": 0.831555187702179, "learning_rate": 3.4882914137033826e-05, "loss": 0.5795, "step": 2011 }, { "epoch": 0.34906315058986814, "grad_norm": 0.9311597943305969, "learning_rate": 3.490026019080659e-05, "loss": 0.6919, "step": 2012 }, { "epoch": 0.34923664122137404, "grad_norm": 0.9510216116905212, "learning_rate": 3.491760624457936e-05, "loss": 0.5664, "step": 2013 }, { "epoch": 0.34941013185287995, "grad_norm": 0.7857622504234314, "learning_rate": 3.493495229835213e-05, "loss": 0.6205, "step": 2014 }, { "epoch": 0.34958362248438585, "grad_norm": 1.2958338260650635, "learning_rate": 3.495229835212489e-05, "loss": 0.5803, "step": 2015 }, { "epoch": 0.34975711311589175, "grad_norm": 0.7746779918670654, "learning_rate": 3.496964440589766e-05, "loss": 0.6233, "step": 2016 }, { "epoch": 0.34993060374739765, "grad_norm": 0.6474652886390686, "learning_rate": 3.498699045967043e-05, "loss": 0.739, "step": 2017 }, { "epoch": 0.35010409437890355, "grad_norm": 0.7284134030342102, "learning_rate": 3.5004336513443195e-05, "loss": 0.8052, "step": 2018 }, { "epoch": 0.35027758501040945, "grad_norm": 0.6557030081748962, "learning_rate": 3.502168256721596e-05, "loss": 0.6912, "step": 2019 }, { "epoch": 0.35045107564191535, "grad_norm": 0.8244677782058716, "learning_rate": 3.5039028620988725e-05, "loss": 0.6118, "step": 2020 }, { "epoch": 0.35062456627342126, "grad_norm": 1.0263489484786987, "learning_rate": 3.505637467476149e-05, "loss": 0.5574, "step": 2021 }, { "epoch": 0.35079805690492716, "grad_norm": 1.0217803716659546, "learning_rate": 3.507372072853426e-05, "loss": 0.5685, "step": 2022 }, { "epoch": 0.35097154753643306, "grad_norm": 3.0454044342041016, "learning_rate": 3.509106678230703e-05, "loss": 0.5991, "step": 2023 }, { "epoch": 0.3511450381679389, "grad_norm": 0.8422743082046509, "learning_rate": 3.510841283607979e-05, "loss": 0.6758, "step": 2024 }, { "epoch": 0.3513185287994448, "grad_norm": 0.6514055728912354, "learning_rate": 3.512575888985256e-05, "loss": 0.7083, "step": 2025 }, { "epoch": 0.3514920194309507, "grad_norm": 0.6344184875488281, "learning_rate": 3.514310494362533e-05, "loss": 0.7272, "step": 2026 }, { "epoch": 0.3516655100624566, "grad_norm": 0.5242217183113098, "learning_rate": 3.5160450997398094e-05, "loss": 0.7678, "step": 2027 }, { "epoch": 0.3518390006939625, "grad_norm": 1.2077232599258423, "learning_rate": 3.5177797051170865e-05, "loss": 0.595, "step": 2028 }, { "epoch": 0.3520124913254684, "grad_norm": 0.8761990666389465, "learning_rate": 3.519514310494363e-05, "loss": 0.5796, "step": 2029 }, { "epoch": 0.3521859819569743, "grad_norm": 1.412538766860962, "learning_rate": 3.5212489158716395e-05, "loss": 0.6929, "step": 2030 }, { "epoch": 0.3523594725884802, "grad_norm": 0.9181896448135376, "learning_rate": 3.522983521248916e-05, "loss": 0.5657, "step": 2031 }, { "epoch": 0.3525329632199861, "grad_norm": 0.7443928718566895, "learning_rate": 3.524718126626193e-05, "loss": 0.6014, "step": 2032 }, { "epoch": 0.352706453851492, "grad_norm": 0.6138537526130676, "learning_rate": 3.52645273200347e-05, "loss": 0.7174, "step": 2033 }, { "epoch": 0.3528799444829979, "grad_norm": 1.006217360496521, "learning_rate": 3.528187337380746e-05, "loss": 0.549, "step": 2034 }, { "epoch": 0.3530534351145038, "grad_norm": 0.8398961424827576, "learning_rate": 3.529921942758023e-05, "loss": 0.6268, "step": 2035 }, { "epoch": 0.3532269257460097, "grad_norm": 0.6724855303764343, "learning_rate": 3.5316565481353e-05, "loss": 0.6564, "step": 2036 }, { "epoch": 0.3534004163775156, "grad_norm": 0.8726453185081482, "learning_rate": 3.5333911535125764e-05, "loss": 0.6663, "step": 2037 }, { "epoch": 0.3535739070090215, "grad_norm": 1.2279284000396729, "learning_rate": 3.535125758889853e-05, "loss": 0.6401, "step": 2038 }, { "epoch": 0.35374739764052743, "grad_norm": 0.6089798808097839, "learning_rate": 3.5368603642671294e-05, "loss": 0.6617, "step": 2039 }, { "epoch": 0.35392088827203333, "grad_norm": 0.7994186878204346, "learning_rate": 3.538594969644406e-05, "loss": 0.632, "step": 2040 }, { "epoch": 0.35409437890353923, "grad_norm": 0.7627372145652771, "learning_rate": 3.540329575021683e-05, "loss": 0.5959, "step": 2041 }, { "epoch": 0.35426786953504513, "grad_norm": 0.6806637644767761, "learning_rate": 3.5420641803989596e-05, "loss": 0.6321, "step": 2042 }, { "epoch": 0.35444136016655103, "grad_norm": 0.8860881924629211, "learning_rate": 3.543798785776236e-05, "loss": 0.6539, "step": 2043 }, { "epoch": 0.3546148507980569, "grad_norm": 1.116827368736267, "learning_rate": 3.5455333911535126e-05, "loss": 0.694, "step": 2044 }, { "epoch": 0.3547883414295628, "grad_norm": 1.0090057849884033, "learning_rate": 3.54726799653079e-05, "loss": 0.6637, "step": 2045 }, { "epoch": 0.3549618320610687, "grad_norm": 0.9741466641426086, "learning_rate": 3.549002601908066e-05, "loss": 0.6384, "step": 2046 }, { "epoch": 0.3551353226925746, "grad_norm": 0.7068755030632019, "learning_rate": 3.550737207285343e-05, "loss": 0.6006, "step": 2047 }, { "epoch": 0.3553088133240805, "grad_norm": 1.0879136323928833, "learning_rate": 3.552471812662619e-05, "loss": 0.6957, "step": 2048 }, { "epoch": 0.3554823039555864, "grad_norm": 0.9993870854377747, "learning_rate": 3.554206418039896e-05, "loss": 0.6057, "step": 2049 }, { "epoch": 0.3556557945870923, "grad_norm": 0.7793830037117004, "learning_rate": 3.555941023417173e-05, "loss": 0.6934, "step": 2050 }, { "epoch": 0.3558292852185982, "grad_norm": 1.8161706924438477, "learning_rate": 3.5576756287944494e-05, "loss": 0.7069, "step": 2051 }, { "epoch": 0.3560027758501041, "grad_norm": 0.8136652112007141, "learning_rate": 3.559410234171726e-05, "loss": 0.5745, "step": 2052 }, { "epoch": 0.35617626648161, "grad_norm": 0.7486060857772827, "learning_rate": 3.5611448395490024e-05, "loss": 0.5691, "step": 2053 }, { "epoch": 0.3563497571131159, "grad_norm": 1.0805221796035767, "learning_rate": 3.5628794449262796e-05, "loss": 0.595, "step": 2054 }, { "epoch": 0.3565232477446218, "grad_norm": 0.8663496971130371, "learning_rate": 3.564614050303557e-05, "loss": 0.5597, "step": 2055 }, { "epoch": 0.3566967383761277, "grad_norm": 1.3997595310211182, "learning_rate": 3.566348655680833e-05, "loss": 0.5537, "step": 2056 }, { "epoch": 0.3568702290076336, "grad_norm": 1.1377601623535156, "learning_rate": 3.56808326105811e-05, "loss": 0.5688, "step": 2057 }, { "epoch": 0.3570437196391395, "grad_norm": 0.7313696146011353, "learning_rate": 3.569817866435386e-05, "loss": 0.7212, "step": 2058 }, { "epoch": 0.3572172102706454, "grad_norm": 0.9031012654304504, "learning_rate": 3.571552471812663e-05, "loss": 0.5355, "step": 2059 }, { "epoch": 0.3573907009021513, "grad_norm": 1.2563116550445557, "learning_rate": 3.57328707718994e-05, "loss": 0.5861, "step": 2060 }, { "epoch": 0.3575641915336572, "grad_norm": 0.6538774371147156, "learning_rate": 3.5750216825672165e-05, "loss": 0.7454, "step": 2061 }, { "epoch": 0.3577376821651631, "grad_norm": 0.9398738145828247, "learning_rate": 3.576756287944493e-05, "loss": 0.5988, "step": 2062 }, { "epoch": 0.35791117279666895, "grad_norm": 0.8730028867721558, "learning_rate": 3.5784908933217695e-05, "loss": 0.511, "step": 2063 }, { "epoch": 0.35808466342817485, "grad_norm": 1.0857669115066528, "learning_rate": 3.5802254986990466e-05, "loss": 0.5911, "step": 2064 }, { "epoch": 0.35825815405968076, "grad_norm": 0.6124600172042847, "learning_rate": 3.581960104076323e-05, "loss": 0.6644, "step": 2065 }, { "epoch": 0.35843164469118666, "grad_norm": 0.7109978795051575, "learning_rate": 3.5836947094535996e-05, "loss": 0.6721, "step": 2066 }, { "epoch": 0.35860513532269256, "grad_norm": 0.7086520195007324, "learning_rate": 3.585429314830876e-05, "loss": 0.647, "step": 2067 }, { "epoch": 0.35877862595419846, "grad_norm": 0.7438195943832397, "learning_rate": 3.5871639202081526e-05, "loss": 0.5869, "step": 2068 }, { "epoch": 0.35895211658570436, "grad_norm": 0.965003490447998, "learning_rate": 3.58889852558543e-05, "loss": 0.5374, "step": 2069 }, { "epoch": 0.35912560721721026, "grad_norm": 0.8170627355575562, "learning_rate": 3.590633130962706e-05, "loss": 0.5886, "step": 2070 }, { "epoch": 0.35929909784871616, "grad_norm": 2.7678844928741455, "learning_rate": 3.592367736339983e-05, "loss": 0.6692, "step": 2071 }, { "epoch": 0.35947258848022207, "grad_norm": 0.6812294125556946, "learning_rate": 3.594102341717259e-05, "loss": 0.6429, "step": 2072 }, { "epoch": 0.35964607911172797, "grad_norm": 0.8648760318756104, "learning_rate": 3.5958369470945365e-05, "loss": 0.551, "step": 2073 }, { "epoch": 0.35981956974323387, "grad_norm": 0.8667991161346436, "learning_rate": 3.597571552471813e-05, "loss": 0.5897, "step": 2074 }, { "epoch": 0.35999306037473977, "grad_norm": 2.108598232269287, "learning_rate": 3.5993061578490895e-05, "loss": 0.5443, "step": 2075 }, { "epoch": 0.36016655100624567, "grad_norm": 0.8901035189628601, "learning_rate": 3.601040763226366e-05, "loss": 0.6406, "step": 2076 }, { "epoch": 0.3603400416377516, "grad_norm": 0.6625940203666687, "learning_rate": 3.602775368603643e-05, "loss": 0.6876, "step": 2077 }, { "epoch": 0.3605135322692575, "grad_norm": 1.0242830514907837, "learning_rate": 3.60450997398092e-05, "loss": 0.6406, "step": 2078 }, { "epoch": 0.3606870229007634, "grad_norm": 0.9576663374900818, "learning_rate": 3.606244579358196e-05, "loss": 0.613, "step": 2079 }, { "epoch": 0.3608605135322693, "grad_norm": 1.330987811088562, "learning_rate": 3.607979184735473e-05, "loss": 0.5693, "step": 2080 }, { "epoch": 0.3610340041637752, "grad_norm": 0.9956892132759094, "learning_rate": 3.609713790112749e-05, "loss": 0.6139, "step": 2081 }, { "epoch": 0.3612074947952811, "grad_norm": 0.786950945854187, "learning_rate": 3.6114483954900263e-05, "loss": 0.6703, "step": 2082 }, { "epoch": 0.3613809854267869, "grad_norm": 1.808635950088501, "learning_rate": 3.613183000867303e-05, "loss": 0.5145, "step": 2083 }, { "epoch": 0.36155447605829283, "grad_norm": 0.9782841801643372, "learning_rate": 3.6149176062445793e-05, "loss": 0.5297, "step": 2084 }, { "epoch": 0.36172796668979873, "grad_norm": 0.9573488235473633, "learning_rate": 3.6166522116218565e-05, "loss": 0.6165, "step": 2085 }, { "epoch": 0.36190145732130463, "grad_norm": 0.8779812455177307, "learning_rate": 3.618386816999133e-05, "loss": 0.5905, "step": 2086 }, { "epoch": 0.36207494795281053, "grad_norm": 0.8574431538581848, "learning_rate": 3.6201214223764095e-05, "loss": 0.5747, "step": 2087 }, { "epoch": 0.36224843858431643, "grad_norm": 0.9366618990898132, "learning_rate": 3.621856027753687e-05, "loss": 0.6127, "step": 2088 }, { "epoch": 0.36242192921582234, "grad_norm": 1.0383944511413574, "learning_rate": 3.623590633130963e-05, "loss": 0.5952, "step": 2089 }, { "epoch": 0.36259541984732824, "grad_norm": 0.8221400380134583, "learning_rate": 3.62532523850824e-05, "loss": 0.5695, "step": 2090 }, { "epoch": 0.36276891047883414, "grad_norm": 0.8370509743690491, "learning_rate": 3.627059843885516e-05, "loss": 0.5514, "step": 2091 }, { "epoch": 0.36294240111034004, "grad_norm": 0.7895044684410095, "learning_rate": 3.6287944492627934e-05, "loss": 0.645, "step": 2092 }, { "epoch": 0.36311589174184594, "grad_norm": 0.8655766248703003, "learning_rate": 3.63052905464007e-05, "loss": 0.7417, "step": 2093 }, { "epoch": 0.36328938237335184, "grad_norm": 0.60938960313797, "learning_rate": 3.6322636600173464e-05, "loss": 0.7515, "step": 2094 }, { "epoch": 0.36346287300485774, "grad_norm": 0.906223714351654, "learning_rate": 3.633998265394623e-05, "loss": 0.7554, "step": 2095 }, { "epoch": 0.36363636363636365, "grad_norm": 1.096692681312561, "learning_rate": 3.6357328707719e-05, "loss": 0.6246, "step": 2096 }, { "epoch": 0.36380985426786955, "grad_norm": 0.6672376394271851, "learning_rate": 3.6374674761491766e-05, "loss": 0.7445, "step": 2097 }, { "epoch": 0.36398334489937545, "grad_norm": 0.7774509191513062, "learning_rate": 3.639202081526453e-05, "loss": 0.5376, "step": 2098 }, { "epoch": 0.36415683553088135, "grad_norm": 1.0610190629959106, "learning_rate": 3.6409366869037296e-05, "loss": 0.62, "step": 2099 }, { "epoch": 0.36433032616238725, "grad_norm": 0.8590747714042664, "learning_rate": 3.642671292281006e-05, "loss": 0.5973, "step": 2100 }, { "epoch": 0.36450381679389315, "grad_norm": 0.8628801703453064, "learning_rate": 3.644405897658283e-05, "loss": 0.6003, "step": 2101 }, { "epoch": 0.36467730742539906, "grad_norm": 0.56028813123703, "learning_rate": 3.64614050303556e-05, "loss": 0.7511, "step": 2102 }, { "epoch": 0.3648507980569049, "grad_norm": 0.7692236304283142, "learning_rate": 3.647875108412836e-05, "loss": 0.7776, "step": 2103 }, { "epoch": 0.3650242886884108, "grad_norm": 0.5963106155395508, "learning_rate": 3.649609713790113e-05, "loss": 0.7537, "step": 2104 }, { "epoch": 0.3651977793199167, "grad_norm": 0.9407646656036377, "learning_rate": 3.65134431916739e-05, "loss": 0.577, "step": 2105 }, { "epoch": 0.3653712699514226, "grad_norm": 0.6260008215904236, "learning_rate": 3.6530789245446664e-05, "loss": 0.6802, "step": 2106 }, { "epoch": 0.3655447605829285, "grad_norm": 0.7514500617980957, "learning_rate": 3.654813529921943e-05, "loss": 0.6919, "step": 2107 }, { "epoch": 0.3657182512144344, "grad_norm": 0.6525545120239258, "learning_rate": 3.6565481352992194e-05, "loss": 0.7079, "step": 2108 }, { "epoch": 0.3658917418459403, "grad_norm": 1.0511095523834229, "learning_rate": 3.658282740676496e-05, "loss": 0.663, "step": 2109 }, { "epoch": 0.3660652324774462, "grad_norm": 0.7916147708892822, "learning_rate": 3.660017346053773e-05, "loss": 0.5444, "step": 2110 }, { "epoch": 0.3662387231089521, "grad_norm": 0.689697265625, "learning_rate": 3.6617519514310496e-05, "loss": 0.6318, "step": 2111 }, { "epoch": 0.366412213740458, "grad_norm": 0.7486228346824646, "learning_rate": 3.663486556808326e-05, "loss": 0.5714, "step": 2112 }, { "epoch": 0.3665857043719639, "grad_norm": 0.8919068574905396, "learning_rate": 3.6652211621856026e-05, "loss": 0.6603, "step": 2113 }, { "epoch": 0.3667591950034698, "grad_norm": 2.1213366985321045, "learning_rate": 3.66695576756288e-05, "loss": 0.5385, "step": 2114 }, { "epoch": 0.3669326856349757, "grad_norm": 1.0311821699142456, "learning_rate": 3.668690372940157e-05, "loss": 0.5289, "step": 2115 }, { "epoch": 0.3671061762664816, "grad_norm": 0.9412194490432739, "learning_rate": 3.6704249783174335e-05, "loss": 0.546, "step": 2116 }, { "epoch": 0.3672796668979875, "grad_norm": 0.9072990417480469, "learning_rate": 3.67215958369471e-05, "loss": 0.545, "step": 2117 }, { "epoch": 0.3674531575294934, "grad_norm": 0.994905948638916, "learning_rate": 3.6738941890719865e-05, "loss": 0.6029, "step": 2118 }, { "epoch": 0.3676266481609993, "grad_norm": 1.0085077285766602, "learning_rate": 3.675628794449263e-05, "loss": 0.6748, "step": 2119 }, { "epoch": 0.3678001387925052, "grad_norm": 0.7583150267601013, "learning_rate": 3.67736339982654e-05, "loss": 0.6609, "step": 2120 }, { "epoch": 0.36797362942401113, "grad_norm": 0.9201546907424927, "learning_rate": 3.6790980052038166e-05, "loss": 0.4989, "step": 2121 }, { "epoch": 0.36814712005551703, "grad_norm": 0.6300575137138367, "learning_rate": 3.680832610581093e-05, "loss": 0.5645, "step": 2122 }, { "epoch": 0.3683206106870229, "grad_norm": 1.0931706428527832, "learning_rate": 3.6825672159583696e-05, "loss": 0.6112, "step": 2123 }, { "epoch": 0.3684941013185288, "grad_norm": 0.850147008895874, "learning_rate": 3.684301821335647e-05, "loss": 0.5879, "step": 2124 }, { "epoch": 0.3686675919500347, "grad_norm": 0.8981357216835022, "learning_rate": 3.686036426712923e-05, "loss": 0.6051, "step": 2125 }, { "epoch": 0.3688410825815406, "grad_norm": 0.7931495308876038, "learning_rate": 3.6877710320902e-05, "loss": 0.7314, "step": 2126 }, { "epoch": 0.3690145732130465, "grad_norm": 0.832497775554657, "learning_rate": 3.689505637467476e-05, "loss": 0.7456, "step": 2127 }, { "epoch": 0.3691880638445524, "grad_norm": 0.9802011251449585, "learning_rate": 3.691240242844753e-05, "loss": 0.6581, "step": 2128 }, { "epoch": 0.3693615544760583, "grad_norm": 0.8732492327690125, "learning_rate": 3.69297484822203e-05, "loss": 0.6853, "step": 2129 }, { "epoch": 0.3695350451075642, "grad_norm": 2.1102023124694824, "learning_rate": 3.6947094535993065e-05, "loss": 0.7095, "step": 2130 }, { "epoch": 0.3697085357390701, "grad_norm": 0.8344833254814148, "learning_rate": 3.696444058976583e-05, "loss": 0.5624, "step": 2131 }, { "epoch": 0.369882026370576, "grad_norm": 0.7335735559463501, "learning_rate": 3.6981786643538595e-05, "loss": 0.6384, "step": 2132 }, { "epoch": 0.3700555170020819, "grad_norm": 0.808387815952301, "learning_rate": 3.699913269731137e-05, "loss": 0.5967, "step": 2133 }, { "epoch": 0.3702290076335878, "grad_norm": 0.8561879992485046, "learning_rate": 3.701647875108413e-05, "loss": 0.6221, "step": 2134 }, { "epoch": 0.3704024982650937, "grad_norm": 1.3011711835861206, "learning_rate": 3.70338248048569e-05, "loss": 0.6879, "step": 2135 }, { "epoch": 0.3705759888965996, "grad_norm": 0.8561848998069763, "learning_rate": 3.705117085862966e-05, "loss": 0.5869, "step": 2136 }, { "epoch": 0.3707494795281055, "grad_norm": 3.0262610912323, "learning_rate": 3.7068516912402433e-05, "loss": 0.6707, "step": 2137 }, { "epoch": 0.3709229701596114, "grad_norm": 0.6116523742675781, "learning_rate": 3.70858629661752e-05, "loss": 0.6661, "step": 2138 }, { "epoch": 0.3710964607911173, "grad_norm": 0.6808441281318665, "learning_rate": 3.7103209019947963e-05, "loss": 0.6449, "step": 2139 }, { "epoch": 0.3712699514226232, "grad_norm": 0.9481117129325867, "learning_rate": 3.712055507372073e-05, "loss": 0.5366, "step": 2140 }, { "epoch": 0.3714434420541291, "grad_norm": 3.1375207901000977, "learning_rate": 3.7137901127493493e-05, "loss": 0.6429, "step": 2141 }, { "epoch": 0.37161693268563495, "grad_norm": 0.6729617714881897, "learning_rate": 3.7155247181266265e-05, "loss": 0.6078, "step": 2142 }, { "epoch": 0.37179042331714085, "grad_norm": 0.6703296899795532, "learning_rate": 3.717259323503903e-05, "loss": 0.6831, "step": 2143 }, { "epoch": 0.37196391394864675, "grad_norm": 0.8146363496780396, "learning_rate": 3.71899392888118e-05, "loss": 0.6257, "step": 2144 }, { "epoch": 0.37213740458015265, "grad_norm": 0.7584531903266907, "learning_rate": 3.720728534258457e-05, "loss": 0.6805, "step": 2145 }, { "epoch": 0.37231089521165855, "grad_norm": 0.8820028901100159, "learning_rate": 3.722463139635733e-05, "loss": 0.5426, "step": 2146 }, { "epoch": 0.37248438584316446, "grad_norm": 0.6640870571136475, "learning_rate": 3.72419774501301e-05, "loss": 0.6062, "step": 2147 }, { "epoch": 0.37265787647467036, "grad_norm": 0.8160288333892822, "learning_rate": 3.725932350390287e-05, "loss": 0.653, "step": 2148 }, { "epoch": 0.37283136710617626, "grad_norm": 0.9772170782089233, "learning_rate": 3.7276669557675634e-05, "loss": 0.5541, "step": 2149 }, { "epoch": 0.37300485773768216, "grad_norm": 0.8377646207809448, "learning_rate": 3.72940156114484e-05, "loss": 0.569, "step": 2150 }, { "epoch": 0.37317834836918806, "grad_norm": 0.7429654002189636, "learning_rate": 3.7311361665221164e-05, "loss": 0.545, "step": 2151 }, { "epoch": 0.37335183900069396, "grad_norm": 0.965164065361023, "learning_rate": 3.7328707718993936e-05, "loss": 0.5671, "step": 2152 }, { "epoch": 0.37352532963219987, "grad_norm": 0.7786089181900024, "learning_rate": 3.73460537727667e-05, "loss": 0.7173, "step": 2153 }, { "epoch": 0.37369882026370577, "grad_norm": 1.0826573371887207, "learning_rate": 3.7363399826539466e-05, "loss": 0.6929, "step": 2154 }, { "epoch": 0.37387231089521167, "grad_norm": 1.2865610122680664, "learning_rate": 3.738074588031223e-05, "loss": 0.5865, "step": 2155 }, { "epoch": 0.37404580152671757, "grad_norm": 0.9463958144187927, "learning_rate": 3.7398091934085e-05, "loss": 0.5681, "step": 2156 }, { "epoch": 0.37421929215822347, "grad_norm": 0.966681182384491, "learning_rate": 3.741543798785777e-05, "loss": 0.6877, "step": 2157 }, { "epoch": 0.3743927827897294, "grad_norm": 0.8081557154655457, "learning_rate": 3.743278404163053e-05, "loss": 0.7155, "step": 2158 }, { "epoch": 0.3745662734212353, "grad_norm": 0.5800716280937195, "learning_rate": 3.74501300954033e-05, "loss": 0.687, "step": 2159 }, { "epoch": 0.3747397640527412, "grad_norm": 0.863257646560669, "learning_rate": 3.746747614917606e-05, "loss": 0.5291, "step": 2160 }, { "epoch": 0.3749132546842471, "grad_norm": 0.7452552914619446, "learning_rate": 3.7484822202948834e-05, "loss": 0.637, "step": 2161 }, { "epoch": 0.3750867453157529, "grad_norm": 0.9935975074768066, "learning_rate": 3.75021682567216e-05, "loss": 0.5657, "step": 2162 }, { "epoch": 0.3752602359472588, "grad_norm": 0.7122426629066467, "learning_rate": 3.7519514310494364e-05, "loss": 0.58, "step": 2163 }, { "epoch": 0.3754337265787647, "grad_norm": 0.6864352226257324, "learning_rate": 3.753686036426713e-05, "loss": 0.666, "step": 2164 }, { "epoch": 0.3756072172102706, "grad_norm": 0.7913309335708618, "learning_rate": 3.75542064180399e-05, "loss": 0.5684, "step": 2165 }, { "epoch": 0.37578070784177653, "grad_norm": 0.819497287273407, "learning_rate": 3.7571552471812666e-05, "loss": 0.8098, "step": 2166 }, { "epoch": 0.37595419847328243, "grad_norm": 0.7337230443954468, "learning_rate": 3.758889852558543e-05, "loss": 0.6859, "step": 2167 }, { "epoch": 0.37612768910478833, "grad_norm": 1.0212225914001465, "learning_rate": 3.7606244579358196e-05, "loss": 0.5035, "step": 2168 }, { "epoch": 0.37630117973629423, "grad_norm": 0.9265641570091248, "learning_rate": 3.762359063313096e-05, "loss": 0.5811, "step": 2169 }, { "epoch": 0.37647467036780013, "grad_norm": 0.9080912470817566, "learning_rate": 3.764093668690373e-05, "loss": 0.7583, "step": 2170 }, { "epoch": 0.37664816099930604, "grad_norm": 1.1696559190750122, "learning_rate": 3.76582827406765e-05, "loss": 0.5392, "step": 2171 }, { "epoch": 0.37682165163081194, "grad_norm": 0.7206262946128845, "learning_rate": 3.767562879444926e-05, "loss": 0.6002, "step": 2172 }, { "epoch": 0.37699514226231784, "grad_norm": 0.7880340814590454, "learning_rate": 3.7692974848222034e-05, "loss": 0.6541, "step": 2173 }, { "epoch": 0.37716863289382374, "grad_norm": 0.7043308019638062, "learning_rate": 3.77103209019948e-05, "loss": 0.6379, "step": 2174 }, { "epoch": 0.37734212352532964, "grad_norm": 1.0066626071929932, "learning_rate": 3.772766695576757e-05, "loss": 0.6377, "step": 2175 }, { "epoch": 0.37751561415683554, "grad_norm": 0.7290460467338562, "learning_rate": 3.7745013009540336e-05, "loss": 0.6566, "step": 2176 }, { "epoch": 0.37768910478834145, "grad_norm": 0.8432542681694031, "learning_rate": 3.77623590633131e-05, "loss": 0.5568, "step": 2177 }, { "epoch": 0.37786259541984735, "grad_norm": 1.1949617862701416, "learning_rate": 3.7779705117085866e-05, "loss": 0.6653, "step": 2178 }, { "epoch": 0.37803608605135325, "grad_norm": 0.9642347097396851, "learning_rate": 3.779705117085863e-05, "loss": 0.6412, "step": 2179 }, { "epoch": 0.37820957668285915, "grad_norm": 0.8353285193443298, "learning_rate": 3.78143972246314e-05, "loss": 0.6458, "step": 2180 }, { "epoch": 0.37838306731436505, "grad_norm": 0.7169858813285828, "learning_rate": 3.783174327840417e-05, "loss": 0.7122, "step": 2181 }, { "epoch": 0.3785565579458709, "grad_norm": 0.6582884788513184, "learning_rate": 3.784908933217693e-05, "loss": 0.6321, "step": 2182 }, { "epoch": 0.3787300485773768, "grad_norm": 0.9503910541534424, "learning_rate": 3.78664353859497e-05, "loss": 0.5455, "step": 2183 }, { "epoch": 0.3789035392088827, "grad_norm": 0.8722729682922363, "learning_rate": 3.788378143972247e-05, "loss": 0.7169, "step": 2184 }, { "epoch": 0.3790770298403886, "grad_norm": 0.6877573728561401, "learning_rate": 3.7901127493495235e-05, "loss": 0.6674, "step": 2185 }, { "epoch": 0.3792505204718945, "grad_norm": 0.8797959685325623, "learning_rate": 3.7918473547268e-05, "loss": 0.6737, "step": 2186 }, { "epoch": 0.3794240111034004, "grad_norm": 0.9650540351867676, "learning_rate": 3.7935819601040765e-05, "loss": 0.7108, "step": 2187 }, { "epoch": 0.3795975017349063, "grad_norm": 0.8198643326759338, "learning_rate": 3.795316565481353e-05, "loss": 0.6161, "step": 2188 }, { "epoch": 0.3797709923664122, "grad_norm": 1.009889841079712, "learning_rate": 3.79705117085863e-05, "loss": 0.601, "step": 2189 }, { "epoch": 0.3799444829979181, "grad_norm": 0.7123552560806274, "learning_rate": 3.7987857762359067e-05, "loss": 0.6511, "step": 2190 }, { "epoch": 0.380117973629424, "grad_norm": 0.8433806896209717, "learning_rate": 3.800520381613183e-05, "loss": 0.5269, "step": 2191 }, { "epoch": 0.3802914642609299, "grad_norm": 0.9886137247085571, "learning_rate": 3.8022549869904597e-05, "loss": 0.5284, "step": 2192 }, { "epoch": 0.3804649548924358, "grad_norm": 1.609928011894226, "learning_rate": 3.803989592367737e-05, "loss": 0.6855, "step": 2193 }, { "epoch": 0.3806384455239417, "grad_norm": 0.5896164178848267, "learning_rate": 3.805724197745013e-05, "loss": 0.7257, "step": 2194 }, { "epoch": 0.3808119361554476, "grad_norm": 0.7190706729888916, "learning_rate": 3.80745880312229e-05, "loss": 0.6545, "step": 2195 }, { "epoch": 0.3809854267869535, "grad_norm": 0.7872779369354248, "learning_rate": 3.809193408499566e-05, "loss": 0.5746, "step": 2196 }, { "epoch": 0.3811589174184594, "grad_norm": 0.9195467233657837, "learning_rate": 3.8109280138768435e-05, "loss": 0.6157, "step": 2197 }, { "epoch": 0.3813324080499653, "grad_norm": 1.1707857847213745, "learning_rate": 3.81266261925412e-05, "loss": 0.6997, "step": 2198 }, { "epoch": 0.3815058986814712, "grad_norm": 0.7408986687660217, "learning_rate": 3.8143972246313965e-05, "loss": 0.618, "step": 2199 }, { "epoch": 0.3816793893129771, "grad_norm": 0.7133910059928894, "learning_rate": 3.816131830008673e-05, "loss": 0.7285, "step": 2200 }, { "epoch": 0.38185287994448297, "grad_norm": 0.6581571698188782, "learning_rate": 3.8178664353859495e-05, "loss": 0.6235, "step": 2201 }, { "epoch": 0.38202637057598887, "grad_norm": 0.792733907699585, "learning_rate": 3.819601040763227e-05, "loss": 0.5786, "step": 2202 }, { "epoch": 0.3821998612074948, "grad_norm": 0.9168846607208252, "learning_rate": 3.821335646140504e-05, "loss": 0.573, "step": 2203 }, { "epoch": 0.3823733518390007, "grad_norm": 0.6650002002716064, "learning_rate": 3.8230702515177804e-05, "loss": 0.6843, "step": 2204 }, { "epoch": 0.3825468424705066, "grad_norm": 0.8428970575332642, "learning_rate": 3.824804856895057e-05, "loss": 0.5229, "step": 2205 }, { "epoch": 0.3827203331020125, "grad_norm": 0.8378449082374573, "learning_rate": 3.8265394622723334e-05, "loss": 0.5596, "step": 2206 }, { "epoch": 0.3828938237335184, "grad_norm": 0.6730502843856812, "learning_rate": 3.82827406764961e-05, "loss": 0.5841, "step": 2207 }, { "epoch": 0.3830673143650243, "grad_norm": 0.9363775849342346, "learning_rate": 3.830008673026887e-05, "loss": 0.6696, "step": 2208 }, { "epoch": 0.3832408049965302, "grad_norm": 0.9516682624816895, "learning_rate": 3.8317432784041636e-05, "loss": 0.6219, "step": 2209 }, { "epoch": 0.3834142956280361, "grad_norm": 0.9467013478279114, "learning_rate": 3.83347788378144e-05, "loss": 0.5372, "step": 2210 }, { "epoch": 0.383587786259542, "grad_norm": 1.103040099143982, "learning_rate": 3.8352124891587166e-05, "loss": 0.5616, "step": 2211 }, { "epoch": 0.3837612768910479, "grad_norm": 0.8702182769775391, "learning_rate": 3.836947094535994e-05, "loss": 0.5455, "step": 2212 }, { "epoch": 0.3839347675225538, "grad_norm": 1.0493888854980469, "learning_rate": 3.83868169991327e-05, "loss": 0.6152, "step": 2213 }, { "epoch": 0.3841082581540597, "grad_norm": 0.7445110082626343, "learning_rate": 3.840416305290547e-05, "loss": 0.5646, "step": 2214 }, { "epoch": 0.3842817487855656, "grad_norm": 0.9141197204589844, "learning_rate": 3.842150910667823e-05, "loss": 0.5713, "step": 2215 }, { "epoch": 0.3844552394170715, "grad_norm": 0.7753279209136963, "learning_rate": 3.8438855160451004e-05, "loss": 0.7032, "step": 2216 }, { "epoch": 0.3846287300485774, "grad_norm": 0.8179315328598022, "learning_rate": 3.845620121422377e-05, "loss": 0.6094, "step": 2217 }, { "epoch": 0.3848022206800833, "grad_norm": 1.1109416484832764, "learning_rate": 3.8473547267996534e-05, "loss": 0.5817, "step": 2218 }, { "epoch": 0.3849757113115892, "grad_norm": 0.778189480304718, "learning_rate": 3.84908933217693e-05, "loss": 0.6007, "step": 2219 }, { "epoch": 0.3851492019430951, "grad_norm": 0.5885578393936157, "learning_rate": 3.8508239375542064e-05, "loss": 0.7057, "step": 2220 }, { "epoch": 0.38532269257460094, "grad_norm": 0.9650107026100159, "learning_rate": 3.8525585429314836e-05, "loss": 0.6461, "step": 2221 }, { "epoch": 0.38549618320610685, "grad_norm": 0.7100480198860168, "learning_rate": 3.85429314830876e-05, "loss": 0.6464, "step": 2222 }, { "epoch": 0.38566967383761275, "grad_norm": 0.7356059551239014, "learning_rate": 3.8560277536860366e-05, "loss": 0.7244, "step": 2223 }, { "epoch": 0.38584316446911865, "grad_norm": 0.8487452268600464, "learning_rate": 3.857762359063313e-05, "loss": 0.6389, "step": 2224 }, { "epoch": 0.38601665510062455, "grad_norm": 2.1440274715423584, "learning_rate": 3.85949696444059e-05, "loss": 0.5741, "step": 2225 }, { "epoch": 0.38619014573213045, "grad_norm": 1.0137948989868164, "learning_rate": 3.861231569817867e-05, "loss": 0.5248, "step": 2226 }, { "epoch": 0.38636363636363635, "grad_norm": 2.05366849899292, "learning_rate": 3.862966175195143e-05, "loss": 0.6107, "step": 2227 }, { "epoch": 0.38653712699514226, "grad_norm": 0.663736879825592, "learning_rate": 3.86470078057242e-05, "loss": 0.6602, "step": 2228 }, { "epoch": 0.38671061762664816, "grad_norm": 0.797764778137207, "learning_rate": 3.866435385949696e-05, "loss": 0.6271, "step": 2229 }, { "epoch": 0.38688410825815406, "grad_norm": 1.0539405345916748, "learning_rate": 3.8681699913269734e-05, "loss": 0.7142, "step": 2230 }, { "epoch": 0.38705759888965996, "grad_norm": 0.8324370384216309, "learning_rate": 3.86990459670425e-05, "loss": 0.6418, "step": 2231 }, { "epoch": 0.38723108952116586, "grad_norm": 1.3873273134231567, "learning_rate": 3.8716392020815264e-05, "loss": 0.5942, "step": 2232 }, { "epoch": 0.38740458015267176, "grad_norm": 1.3949419260025024, "learning_rate": 3.8733738074588036e-05, "loss": 0.6537, "step": 2233 }, { "epoch": 0.38757807078417766, "grad_norm": 0.9070783257484436, "learning_rate": 3.87510841283608e-05, "loss": 0.6234, "step": 2234 }, { "epoch": 0.38775156141568357, "grad_norm": 0.8216388821601868, "learning_rate": 3.8768430182133566e-05, "loss": 0.6975, "step": 2235 }, { "epoch": 0.38792505204718947, "grad_norm": 0.7318658828735352, "learning_rate": 3.878577623590634e-05, "loss": 0.7463, "step": 2236 }, { "epoch": 0.38809854267869537, "grad_norm": 1.1542892456054688, "learning_rate": 3.88031222896791e-05, "loss": 0.5834, "step": 2237 }, { "epoch": 0.38827203331020127, "grad_norm": 0.7708222270011902, "learning_rate": 3.882046834345187e-05, "loss": 0.6576, "step": 2238 }, { "epoch": 0.38844552394170717, "grad_norm": 0.8864904642105103, "learning_rate": 3.883781439722463e-05, "loss": 0.6471, "step": 2239 }, { "epoch": 0.3886190145732131, "grad_norm": 0.9263260960578918, "learning_rate": 3.8855160450997405e-05, "loss": 0.6013, "step": 2240 }, { "epoch": 0.3887925052047189, "grad_norm": 0.9893810153007507, "learning_rate": 3.887250650477017e-05, "loss": 0.6096, "step": 2241 }, { "epoch": 0.3889659958362248, "grad_norm": 1.1212811470031738, "learning_rate": 3.8889852558542935e-05, "loss": 0.5563, "step": 2242 }, { "epoch": 0.3891394864677307, "grad_norm": 0.6433964371681213, "learning_rate": 3.89071986123157e-05, "loss": 0.7335, "step": 2243 }, { "epoch": 0.3893129770992366, "grad_norm": 1.5604666471481323, "learning_rate": 3.892454466608847e-05, "loss": 0.5704, "step": 2244 }, { "epoch": 0.3894864677307425, "grad_norm": 2.013279438018799, "learning_rate": 3.8941890719861237e-05, "loss": 0.6403, "step": 2245 }, { "epoch": 0.3896599583622484, "grad_norm": 0.7819218039512634, "learning_rate": 3.8959236773634e-05, "loss": 0.5585, "step": 2246 }, { "epoch": 0.38983344899375433, "grad_norm": 1.1008758544921875, "learning_rate": 3.8976582827406767e-05, "loss": 0.5536, "step": 2247 }, { "epoch": 0.39000693962526023, "grad_norm": 1.0997666120529175, "learning_rate": 3.899392888117953e-05, "loss": 0.6185, "step": 2248 }, { "epoch": 0.39018043025676613, "grad_norm": 1.2280099391937256, "learning_rate": 3.90112749349523e-05, "loss": 0.6738, "step": 2249 }, { "epoch": 0.39035392088827203, "grad_norm": 0.8849902749061584, "learning_rate": 3.902862098872507e-05, "loss": 0.6309, "step": 2250 }, { "epoch": 0.39052741151977793, "grad_norm": 0.9604009389877319, "learning_rate": 3.904596704249783e-05, "loss": 0.5986, "step": 2251 }, { "epoch": 0.39070090215128384, "grad_norm": 0.6952579617500305, "learning_rate": 3.90633130962706e-05, "loss": 0.6101, "step": 2252 }, { "epoch": 0.39087439278278974, "grad_norm": 1.2840449810028076, "learning_rate": 3.908065915004337e-05, "loss": 0.6625, "step": 2253 }, { "epoch": 0.39104788341429564, "grad_norm": 0.8691678047180176, "learning_rate": 3.9098005203816135e-05, "loss": 0.5641, "step": 2254 }, { "epoch": 0.39122137404580154, "grad_norm": 1.1951429843902588, "learning_rate": 3.91153512575889e-05, "loss": 0.5901, "step": 2255 }, { "epoch": 0.39139486467730744, "grad_norm": 1.7035776376724243, "learning_rate": 3.9132697311361665e-05, "loss": 0.5787, "step": 2256 }, { "epoch": 0.39156835530881334, "grad_norm": 0.8311005234718323, "learning_rate": 3.915004336513444e-05, "loss": 0.6257, "step": 2257 }, { "epoch": 0.39174184594031924, "grad_norm": 0.9469131827354431, "learning_rate": 3.91673894189072e-05, "loss": 0.5909, "step": 2258 }, { "epoch": 0.39191533657182515, "grad_norm": 1.100370168685913, "learning_rate": 3.918473547267997e-05, "loss": 0.6981, "step": 2259 }, { "epoch": 0.39208882720333105, "grad_norm": 0.7783090472221375, "learning_rate": 3.920208152645273e-05, "loss": 0.6653, "step": 2260 }, { "epoch": 0.3922623178348369, "grad_norm": 1.446513295173645, "learning_rate": 3.92194275802255e-05, "loss": 0.6958, "step": 2261 }, { "epoch": 0.3924358084663428, "grad_norm": 0.6650233864784241, "learning_rate": 3.923677363399827e-05, "loss": 0.7476, "step": 2262 }, { "epoch": 0.3926092990978487, "grad_norm": 0.9387511014938354, "learning_rate": 3.925411968777104e-05, "loss": 0.533, "step": 2263 }, { "epoch": 0.3927827897293546, "grad_norm": 0.9248531460762024, "learning_rate": 3.9271465741543805e-05, "loss": 0.6064, "step": 2264 }, { "epoch": 0.3929562803608605, "grad_norm": 0.8621205687522888, "learning_rate": 3.928881179531657e-05, "loss": 0.639, "step": 2265 }, { "epoch": 0.3931297709923664, "grad_norm": 0.9346652030944824, "learning_rate": 3.9306157849089335e-05, "loss": 0.6278, "step": 2266 }, { "epoch": 0.3933032616238723, "grad_norm": 0.9083680510520935, "learning_rate": 3.93235039028621e-05, "loss": 0.5778, "step": 2267 }, { "epoch": 0.3934767522553782, "grad_norm": 1.2940402030944824, "learning_rate": 3.934084995663487e-05, "loss": 0.6317, "step": 2268 }, { "epoch": 0.3936502428868841, "grad_norm": 1.2326910495758057, "learning_rate": 3.935819601040764e-05, "loss": 0.5909, "step": 2269 }, { "epoch": 0.39382373351839, "grad_norm": 0.8138448596000671, "learning_rate": 3.93755420641804e-05, "loss": 0.6462, "step": 2270 }, { "epoch": 0.3939972241498959, "grad_norm": 0.7268115878105164, "learning_rate": 3.939288811795317e-05, "loss": 0.6428, "step": 2271 }, { "epoch": 0.3941707147814018, "grad_norm": 0.7852060198783875, "learning_rate": 3.941023417172594e-05, "loss": 0.6348, "step": 2272 }, { "epoch": 0.3943442054129077, "grad_norm": 0.7785980105400085, "learning_rate": 3.9427580225498704e-05, "loss": 0.5223, "step": 2273 }, { "epoch": 0.3945176960444136, "grad_norm": 0.8455443978309631, "learning_rate": 3.944492627927147e-05, "loss": 0.5814, "step": 2274 }, { "epoch": 0.3946911866759195, "grad_norm": 0.972460150718689, "learning_rate": 3.9462272333044234e-05, "loss": 0.6086, "step": 2275 }, { "epoch": 0.3948646773074254, "grad_norm": 1.0304902791976929, "learning_rate": 3.9479618386817006e-05, "loss": 0.7295, "step": 2276 }, { "epoch": 0.3950381679389313, "grad_norm": 0.7572159767150879, "learning_rate": 3.949696444058977e-05, "loss": 0.7764, "step": 2277 }, { "epoch": 0.3952116585704372, "grad_norm": 1.0979856252670288, "learning_rate": 3.9514310494362536e-05, "loss": 0.6222, "step": 2278 }, { "epoch": 0.3953851492019431, "grad_norm": 0.7576919198036194, "learning_rate": 3.95316565481353e-05, "loss": 0.6149, "step": 2279 }, { "epoch": 0.39555863983344897, "grad_norm": 0.682855486869812, "learning_rate": 3.9549002601908066e-05, "loss": 0.6792, "step": 2280 }, { "epoch": 0.39573213046495487, "grad_norm": 0.8346561193466187, "learning_rate": 3.956634865568084e-05, "loss": 0.6987, "step": 2281 }, { "epoch": 0.39590562109646077, "grad_norm": 0.7672377824783325, "learning_rate": 3.95836947094536e-05, "loss": 0.7587, "step": 2282 }, { "epoch": 0.39607911172796667, "grad_norm": 0.830708920955658, "learning_rate": 3.960104076322637e-05, "loss": 0.5814, "step": 2283 }, { "epoch": 0.39625260235947257, "grad_norm": 0.7514001131057739, "learning_rate": 3.961838681699913e-05, "loss": 0.5084, "step": 2284 }, { "epoch": 0.3964260929909785, "grad_norm": 0.5804221630096436, "learning_rate": 3.9635732870771904e-05, "loss": 0.6553, "step": 2285 }, { "epoch": 0.3965995836224844, "grad_norm": 1.231408953666687, "learning_rate": 3.965307892454467e-05, "loss": 0.5291, "step": 2286 }, { "epoch": 0.3967730742539903, "grad_norm": 0.8249265551567078, "learning_rate": 3.9670424978317434e-05, "loss": 0.5966, "step": 2287 }, { "epoch": 0.3969465648854962, "grad_norm": 0.8786765336990356, "learning_rate": 3.96877710320902e-05, "loss": 0.6144, "step": 2288 }, { "epoch": 0.3971200555170021, "grad_norm": 0.9649869203567505, "learning_rate": 3.9705117085862964e-05, "loss": 0.5176, "step": 2289 }, { "epoch": 0.397293546148508, "grad_norm": 0.6982461214065552, "learning_rate": 3.9722463139635736e-05, "loss": 0.5854, "step": 2290 }, { "epoch": 0.3974670367800139, "grad_norm": 0.9511545896530151, "learning_rate": 3.97398091934085e-05, "loss": 0.6732, "step": 2291 }, { "epoch": 0.3976405274115198, "grad_norm": 0.6593926548957825, "learning_rate": 3.975715524718127e-05, "loss": 0.8062, "step": 2292 }, { "epoch": 0.3978140180430257, "grad_norm": 0.7266292572021484, "learning_rate": 3.977450130095404e-05, "loss": 0.6166, "step": 2293 }, { "epoch": 0.3979875086745316, "grad_norm": 1.044750452041626, "learning_rate": 3.97918473547268e-05, "loss": 0.7644, "step": 2294 }, { "epoch": 0.3981609993060375, "grad_norm": 0.8857741355895996, "learning_rate": 3.980919340849957e-05, "loss": 0.5856, "step": 2295 }, { "epoch": 0.3983344899375434, "grad_norm": 0.6424846649169922, "learning_rate": 3.982653946227234e-05, "loss": 0.6316, "step": 2296 }, { "epoch": 0.3985079805690493, "grad_norm": 1.01796555519104, "learning_rate": 3.9843885516045105e-05, "loss": 0.5682, "step": 2297 }, { "epoch": 0.3986814712005552, "grad_norm": 0.8512812256813049, "learning_rate": 3.986123156981787e-05, "loss": 0.5811, "step": 2298 }, { "epoch": 0.3988549618320611, "grad_norm": 1.0336955785751343, "learning_rate": 3.9878577623590635e-05, "loss": 0.5056, "step": 2299 }, { "epoch": 0.39902845246356694, "grad_norm": 1.2619831562042236, "learning_rate": 3.9895923677363406e-05, "loss": 0.5651, "step": 2300 }, { "epoch": 0.39920194309507284, "grad_norm": 0.985720694065094, "learning_rate": 3.991326973113617e-05, "loss": 0.5935, "step": 2301 }, { "epoch": 0.39937543372657874, "grad_norm": 1.572470784187317, "learning_rate": 3.9930615784908936e-05, "loss": 0.7886, "step": 2302 }, { "epoch": 0.39954892435808465, "grad_norm": 1.0250521898269653, "learning_rate": 3.99479618386817e-05, "loss": 0.6128, "step": 2303 }, { "epoch": 0.39972241498959055, "grad_norm": 0.7787879705429077, "learning_rate": 3.996530789245447e-05, "loss": 0.6128, "step": 2304 }, { "epoch": 0.39989590562109645, "grad_norm": 0.8346136212348938, "learning_rate": 3.998265394622724e-05, "loss": 0.5773, "step": 2305 }, { "epoch": 0.40006939625260235, "grad_norm": 0.7871057391166687, "learning_rate": 4e-05, "loss": 0.6619, "step": 2306 }, { "epoch": 0.40024288688410825, "grad_norm": 0.909366250038147, "learning_rate": 3.9999998839488355e-05, "loss": 0.658, "step": 2307 }, { "epoch": 0.40041637751561415, "grad_norm": 0.6434295177459717, "learning_rate": 3.999999535795353e-05, "loss": 0.6929, "step": 2308 }, { "epoch": 0.40058986814712005, "grad_norm": 0.7157797813415527, "learning_rate": 3.999998955539594e-05, "loss": 0.6593, "step": 2309 }, { "epoch": 0.40076335877862596, "grad_norm": 0.8653424382209778, "learning_rate": 3.9999981431816256e-05, "loss": 0.7112, "step": 2310 }, { "epoch": 0.40093684941013186, "grad_norm": 0.9613097310066223, "learning_rate": 3.999997098721543e-05, "loss": 0.5836, "step": 2311 }, { "epoch": 0.40111034004163776, "grad_norm": 1.0514494180679321, "learning_rate": 3.999995822159466e-05, "loss": 0.762, "step": 2312 }, { "epoch": 0.40128383067314366, "grad_norm": 0.7570793032646179, "learning_rate": 3.9999943134955436e-05, "loss": 0.6372, "step": 2313 }, { "epoch": 0.40145732130464956, "grad_norm": 0.8194951415061951, "learning_rate": 3.9999925727299505e-05, "loss": 0.6851, "step": 2314 }, { "epoch": 0.40163081193615546, "grad_norm": 0.7198679447174072, "learning_rate": 3.999990599862889e-05, "loss": 0.5604, "step": 2315 }, { "epoch": 0.40180430256766136, "grad_norm": 0.9790434837341309, "learning_rate": 3.999988394894588e-05, "loss": 0.6573, "step": 2316 }, { "epoch": 0.40197779319916727, "grad_norm": 0.7335717082023621, "learning_rate": 3.999985957825303e-05, "loss": 0.6274, "step": 2317 }, { "epoch": 0.40215128383067317, "grad_norm": 0.8409496545791626, "learning_rate": 3.999983288655318e-05, "loss": 0.6637, "step": 2318 }, { "epoch": 0.40232477446217907, "grad_norm": 1.3137797117233276, "learning_rate": 3.999980387384941e-05, "loss": 0.6411, "step": 2319 }, { "epoch": 0.4024982650936849, "grad_norm": 0.6379951238632202, "learning_rate": 3.9999772540145104e-05, "loss": 0.6807, "step": 2320 }, { "epoch": 0.4026717557251908, "grad_norm": 0.7253673672676086, "learning_rate": 3.9999738885443885e-05, "loss": 0.6781, "step": 2321 }, { "epoch": 0.4028452463566967, "grad_norm": 0.7153136134147644, "learning_rate": 3.999970290974967e-05, "loss": 0.6554, "step": 2322 }, { "epoch": 0.4030187369882026, "grad_norm": 2.572360038757324, "learning_rate": 3.9999664613066615e-05, "loss": 0.6593, "step": 2323 }, { "epoch": 0.4031922276197085, "grad_norm": 0.7657721042633057, "learning_rate": 3.999962399539919e-05, "loss": 0.6445, "step": 2324 }, { "epoch": 0.4033657182512144, "grad_norm": 2.1976494789123535, "learning_rate": 3.9999581056752085e-05, "loss": 0.5967, "step": 2325 }, { "epoch": 0.4035392088827203, "grad_norm": 0.9684531092643738, "learning_rate": 3.9999535797130304e-05, "loss": 0.6548, "step": 2326 }, { "epoch": 0.4037126995142262, "grad_norm": 0.7884881496429443, "learning_rate": 3.999948821653908e-05, "loss": 0.6665, "step": 2327 }, { "epoch": 0.4038861901457321, "grad_norm": 0.6548993587493896, "learning_rate": 3.999943831498395e-05, "loss": 0.6117, "step": 2328 }, { "epoch": 0.40405968077723803, "grad_norm": 0.7876485586166382, "learning_rate": 3.99993860924707e-05, "loss": 0.6306, "step": 2329 }, { "epoch": 0.40423317140874393, "grad_norm": 0.7914551496505737, "learning_rate": 3.9999331549005394e-05, "loss": 0.6366, "step": 2330 }, { "epoch": 0.40440666204024983, "grad_norm": 0.9158231019973755, "learning_rate": 3.999927468459435e-05, "loss": 0.5707, "step": 2331 }, { "epoch": 0.40458015267175573, "grad_norm": 0.8651548624038696, "learning_rate": 3.999921549924418e-05, "loss": 0.5966, "step": 2332 }, { "epoch": 0.40475364330326163, "grad_norm": 0.7848436236381531, "learning_rate": 3.999915399296175e-05, "loss": 0.6642, "step": 2333 }, { "epoch": 0.40492713393476754, "grad_norm": 0.721215009689331, "learning_rate": 3.999909016575419e-05, "loss": 0.6163, "step": 2334 }, { "epoch": 0.40510062456627344, "grad_norm": 0.8471186757087708, "learning_rate": 3.999902401762892e-05, "loss": 0.7141, "step": 2335 }, { "epoch": 0.40527411519777934, "grad_norm": 0.7264529466629028, "learning_rate": 3.99989555485936e-05, "loss": 0.7271, "step": 2336 }, { "epoch": 0.40544760582928524, "grad_norm": 0.7853133082389832, "learning_rate": 3.999888475865619e-05, "loss": 0.6869, "step": 2337 }, { "epoch": 0.40562109646079114, "grad_norm": 1.0160361528396606, "learning_rate": 3.99988116478249e-05, "loss": 0.6592, "step": 2338 }, { "epoch": 0.40579458709229704, "grad_norm": 1.0822649002075195, "learning_rate": 3.999873621610822e-05, "loss": 0.554, "step": 2339 }, { "epoch": 0.4059680777238029, "grad_norm": 0.8811540007591248, "learning_rate": 3.999865846351489e-05, "loss": 0.6926, "step": 2340 }, { "epoch": 0.4061415683553088, "grad_norm": 0.7317081689834595, "learning_rate": 3.999857839005395e-05, "loss": 0.5583, "step": 2341 }, { "epoch": 0.4063150589868147, "grad_norm": 0.7234801650047302, "learning_rate": 3.9998495995734677e-05, "loss": 0.6863, "step": 2342 }, { "epoch": 0.4064885496183206, "grad_norm": 0.7849730253219604, "learning_rate": 3.999841128056664e-05, "loss": 0.6707, "step": 2343 }, { "epoch": 0.4066620402498265, "grad_norm": 0.6559509634971619, "learning_rate": 3.999832424455968e-05, "loss": 0.6493, "step": 2344 }, { "epoch": 0.4068355308813324, "grad_norm": 0.7420839071273804, "learning_rate": 3.999823488772388e-05, "loss": 0.5861, "step": 2345 }, { "epoch": 0.4070090215128383, "grad_norm": 0.8226618766784668, "learning_rate": 3.999814321006963e-05, "loss": 0.5984, "step": 2346 }, { "epoch": 0.4071825121443442, "grad_norm": 0.8272169828414917, "learning_rate": 3.9998049211607546e-05, "loss": 0.7593, "step": 2347 }, { "epoch": 0.4073560027758501, "grad_norm": 0.6217381954193115, "learning_rate": 3.999795289234856e-05, "loss": 0.6067, "step": 2348 }, { "epoch": 0.407529493407356, "grad_norm": 1.3326365947723389, "learning_rate": 3.9997854252303826e-05, "loss": 0.524, "step": 2349 }, { "epoch": 0.4077029840388619, "grad_norm": 0.9312711954116821, "learning_rate": 3.9997753291484816e-05, "loss": 0.6646, "step": 2350 }, { "epoch": 0.4078764746703678, "grad_norm": 0.7296499609947205, "learning_rate": 3.9997650009903226e-05, "loss": 0.722, "step": 2351 }, { "epoch": 0.4080499653018737, "grad_norm": 0.6808332800865173, "learning_rate": 3.999754440757105e-05, "loss": 0.6744, "step": 2352 }, { "epoch": 0.4082234559333796, "grad_norm": 1.1659239530563354, "learning_rate": 3.999743648450055e-05, "loss": 0.5813, "step": 2353 }, { "epoch": 0.4083969465648855, "grad_norm": 1.0359535217285156, "learning_rate": 3.999732624070424e-05, "loss": 0.5223, "step": 2354 }, { "epoch": 0.4085704371963914, "grad_norm": 0.7763608694076538, "learning_rate": 3.999721367619492e-05, "loss": 0.6569, "step": 2355 }, { "epoch": 0.4087439278278973, "grad_norm": 0.8380506038665771, "learning_rate": 3.999709879098565e-05, "loss": 0.7256, "step": 2356 }, { "epoch": 0.4089174184594032, "grad_norm": 1.13246488571167, "learning_rate": 3.999698158508977e-05, "loss": 0.5399, "step": 2357 }, { "epoch": 0.4090909090909091, "grad_norm": 1.2012083530426025, "learning_rate": 3.999686205852087e-05, "loss": 0.5126, "step": 2358 }, { "epoch": 0.40926439972241496, "grad_norm": 1.7218319177627563, "learning_rate": 3.999674021129283e-05, "loss": 0.5927, "step": 2359 }, { "epoch": 0.40943789035392086, "grad_norm": 1.1242047548294067, "learning_rate": 3.999661604341978e-05, "loss": 0.4895, "step": 2360 }, { "epoch": 0.40961138098542677, "grad_norm": 0.841090977191925, "learning_rate": 3.9996489554916145e-05, "loss": 0.6304, "step": 2361 }, { "epoch": 0.40978487161693267, "grad_norm": 1.1248053312301636, "learning_rate": 3.99963607457966e-05, "loss": 0.6895, "step": 2362 }, { "epoch": 0.40995836224843857, "grad_norm": 0.9558753967285156, "learning_rate": 3.9996229616076086e-05, "loss": 0.7213, "step": 2363 }, { "epoch": 0.41013185287994447, "grad_norm": 1.1117990016937256, "learning_rate": 3.999609616576982e-05, "loss": 0.699, "step": 2364 }, { "epoch": 0.41030534351145037, "grad_norm": 0.8498603701591492, "learning_rate": 3.99959603948933e-05, "loss": 0.6716, "step": 2365 }, { "epoch": 0.4104788341429563, "grad_norm": 0.6780929565429688, "learning_rate": 3.9995822303462273e-05, "loss": 0.6489, "step": 2366 }, { "epoch": 0.4106523247744622, "grad_norm": 1.1830792427062988, "learning_rate": 3.9995681891492774e-05, "loss": 0.7004, "step": 2367 }, { "epoch": 0.4108258154059681, "grad_norm": 0.7626097202301025, "learning_rate": 3.9995539159001074e-05, "loss": 0.6608, "step": 2368 }, { "epoch": 0.410999306037474, "grad_norm": 0.8115872740745544, "learning_rate": 3.999539410600378e-05, "loss": 0.6238, "step": 2369 }, { "epoch": 0.4111727966689799, "grad_norm": 0.9422028660774231, "learning_rate": 3.999524673251768e-05, "loss": 0.6399, "step": 2370 }, { "epoch": 0.4113462873004858, "grad_norm": 0.8287187218666077, "learning_rate": 3.999509703855991e-05, "loss": 0.7007, "step": 2371 }, { "epoch": 0.4115197779319917, "grad_norm": 0.9184843301773071, "learning_rate": 3.999494502414783e-05, "loss": 0.6715, "step": 2372 }, { "epoch": 0.4116932685634976, "grad_norm": 0.680824339389801, "learning_rate": 3.999479068929907e-05, "loss": 0.6639, "step": 2373 }, { "epoch": 0.4118667591950035, "grad_norm": 1.0199631452560425, "learning_rate": 3.999463403403156e-05, "loss": 0.6597, "step": 2374 }, { "epoch": 0.4120402498265094, "grad_norm": 0.8106906414031982, "learning_rate": 3.999447505836347e-05, "loss": 0.6565, "step": 2375 }, { "epoch": 0.4122137404580153, "grad_norm": 0.8393969535827637, "learning_rate": 3.999431376231326e-05, "loss": 0.679, "step": 2376 }, { "epoch": 0.4123872310895212, "grad_norm": 1.0296335220336914, "learning_rate": 3.999415014589963e-05, "loss": 0.5259, "step": 2377 }, { "epoch": 0.4125607217210271, "grad_norm": 1.2408167123794556, "learning_rate": 3.9993984209141576e-05, "loss": 0.4758, "step": 2378 }, { "epoch": 0.41273421235253294, "grad_norm": 0.6542762517929077, "learning_rate": 3.999381595205836e-05, "loss": 0.6488, "step": 2379 }, { "epoch": 0.41290770298403884, "grad_norm": 0.9614208340644836, "learning_rate": 3.999364537466951e-05, "loss": 0.5717, "step": 2380 }, { "epoch": 0.41308119361554474, "grad_norm": 0.9062381386756897, "learning_rate": 3.999347247699481e-05, "loss": 0.6156, "step": 2381 }, { "epoch": 0.41325468424705064, "grad_norm": 0.7734870314598083, "learning_rate": 3.999329725905434e-05, "loss": 0.5577, "step": 2382 }, { "epoch": 0.41342817487855654, "grad_norm": 0.7082153558731079, "learning_rate": 3.999311972086842e-05, "loss": 0.5798, "step": 2383 }, { "epoch": 0.41360166551006244, "grad_norm": 0.83006751537323, "learning_rate": 3.999293986245766e-05, "loss": 0.6296, "step": 2384 }, { "epoch": 0.41377515614156835, "grad_norm": 1.2090071439743042, "learning_rate": 3.999275768384294e-05, "loss": 0.5714, "step": 2385 }, { "epoch": 0.41394864677307425, "grad_norm": 0.6968817710876465, "learning_rate": 3.9992573185045386e-05, "loss": 0.5883, "step": 2386 }, { "epoch": 0.41412213740458015, "grad_norm": 1.0211082696914673, "learning_rate": 3.9992386366086415e-05, "loss": 0.693, "step": 2387 }, { "epoch": 0.41429562803608605, "grad_norm": 1.071164846420288, "learning_rate": 3.9992197226987725e-05, "loss": 0.5638, "step": 2388 }, { "epoch": 0.41446911866759195, "grad_norm": 1.1762923002243042, "learning_rate": 3.9992005767771236e-05, "loss": 0.616, "step": 2389 }, { "epoch": 0.41464260929909785, "grad_norm": 1.2997300624847412, "learning_rate": 3.999181198845919e-05, "loss": 0.571, "step": 2390 }, { "epoch": 0.41481609993060375, "grad_norm": 0.7499904632568359, "learning_rate": 3.9991615889074065e-05, "loss": 0.7039, "step": 2391 }, { "epoch": 0.41498959056210966, "grad_norm": 1.0127307176589966, "learning_rate": 3.999141746963862e-05, "loss": 0.6185, "step": 2392 }, { "epoch": 0.41516308119361556, "grad_norm": 1.1571502685546875, "learning_rate": 3.999121673017589e-05, "loss": 0.5881, "step": 2393 }, { "epoch": 0.41533657182512146, "grad_norm": 0.8128833770751953, "learning_rate": 3.999101367070916e-05, "loss": 0.5934, "step": 2394 }, { "epoch": 0.41551006245662736, "grad_norm": 1.1349921226501465, "learning_rate": 3.9990808291262e-05, "loss": 0.5839, "step": 2395 }, { "epoch": 0.41568355308813326, "grad_norm": 1.5129714012145996, "learning_rate": 3.9990600591858244e-05, "loss": 0.676, "step": 2396 }, { "epoch": 0.41585704371963916, "grad_norm": 0.7934991717338562, "learning_rate": 3.9990390572522e-05, "loss": 0.6084, "step": 2397 }, { "epoch": 0.41603053435114506, "grad_norm": 0.8803777694702148, "learning_rate": 3.999017823327762e-05, "loss": 0.6567, "step": 2398 }, { "epoch": 0.4162040249826509, "grad_norm": 0.8170890808105469, "learning_rate": 3.998996357414978e-05, "loss": 0.7646, "step": 2399 }, { "epoch": 0.4163775156141568, "grad_norm": 0.7423005700111389, "learning_rate": 3.9989746595163364e-05, "loss": 0.587, "step": 2400 }, { "epoch": 0.4165510062456627, "grad_norm": 1.7788679599761963, "learning_rate": 3.998952729634357e-05, "loss": 0.6564, "step": 2401 }, { "epoch": 0.4167244968771686, "grad_norm": 1.6209176778793335, "learning_rate": 3.998930567771583e-05, "loss": 0.666, "step": 2402 }, { "epoch": 0.4168979875086745, "grad_norm": 0.9754631519317627, "learning_rate": 3.998908173930589e-05, "loss": 0.5767, "step": 2403 }, { "epoch": 0.4170714781401804, "grad_norm": 1.4091540575027466, "learning_rate": 3.998885548113971e-05, "loss": 0.5999, "step": 2404 }, { "epoch": 0.4172449687716863, "grad_norm": 2.70163631439209, "learning_rate": 3.998862690324357e-05, "loss": 0.7211, "step": 2405 }, { "epoch": 0.4174184594031922, "grad_norm": 1.098078727722168, "learning_rate": 3.998839600564398e-05, "loss": 0.5623, "step": 2406 }, { "epoch": 0.4175919500346981, "grad_norm": 1.1452560424804688, "learning_rate": 3.9988162788367744e-05, "loss": 0.6671, "step": 2407 }, { "epoch": 0.417765440666204, "grad_norm": 1.117812156677246, "learning_rate": 3.998792725144192e-05, "loss": 0.5447, "step": 2408 }, { "epoch": 0.4179389312977099, "grad_norm": 1.441041350364685, "learning_rate": 3.9987689394893855e-05, "loss": 0.6554, "step": 2409 }, { "epoch": 0.4181124219292158, "grad_norm": 1.0478700399398804, "learning_rate": 3.9987449218751134e-05, "loss": 0.66, "step": 2410 }, { "epoch": 0.41828591256072173, "grad_norm": 1.5800617933273315, "learning_rate": 3.9987206723041654e-05, "loss": 0.6804, "step": 2411 }, { "epoch": 0.41845940319222763, "grad_norm": 1.2367937564849854, "learning_rate": 3.998696190779354e-05, "loss": 0.5527, "step": 2412 }, { "epoch": 0.41863289382373353, "grad_norm": 1.0617979764938354, "learning_rate": 3.9986714773035207e-05, "loss": 0.6426, "step": 2413 }, { "epoch": 0.41880638445523943, "grad_norm": 1.3675651550292969, "learning_rate": 3.9986465318795336e-05, "loss": 0.6241, "step": 2414 }, { "epoch": 0.41897987508674533, "grad_norm": 1.568328619003296, "learning_rate": 3.998621354510288e-05, "loss": 0.5544, "step": 2415 }, { "epoch": 0.41915336571825124, "grad_norm": 0.8526886105537415, "learning_rate": 3.998595945198705e-05, "loss": 0.7128, "step": 2416 }, { "epoch": 0.41932685634975714, "grad_norm": 0.8320168852806091, "learning_rate": 3.998570303947733e-05, "loss": 0.6757, "step": 2417 }, { "epoch": 0.41950034698126304, "grad_norm": 0.9623420834541321, "learning_rate": 3.9985444307603497e-05, "loss": 0.5509, "step": 2418 }, { "epoch": 0.4196738376127689, "grad_norm": 0.8692437410354614, "learning_rate": 3.998518325639556e-05, "loss": 0.6351, "step": 2419 }, { "epoch": 0.4198473282442748, "grad_norm": 1.0957146883010864, "learning_rate": 3.998491988588381e-05, "loss": 0.6073, "step": 2420 }, { "epoch": 0.4200208188757807, "grad_norm": 1.278006672859192, "learning_rate": 3.9984654196098825e-05, "loss": 0.5454, "step": 2421 }, { "epoch": 0.4201943095072866, "grad_norm": 0.9353867173194885, "learning_rate": 3.998438618707144e-05, "loss": 0.6455, "step": 2422 }, { "epoch": 0.4203678001387925, "grad_norm": 0.9579692482948303, "learning_rate": 3.998411585883274e-05, "loss": 0.5718, "step": 2423 }, { "epoch": 0.4205412907702984, "grad_norm": 0.7727288603782654, "learning_rate": 3.9983843211414124e-05, "loss": 0.6592, "step": 2424 }, { "epoch": 0.4207147814018043, "grad_norm": 0.9824633002281189, "learning_rate": 3.998356824484721e-05, "loss": 0.6136, "step": 2425 }, { "epoch": 0.4208882720333102, "grad_norm": 1.1013127565383911, "learning_rate": 3.9983290959163914e-05, "loss": 0.6833, "step": 2426 }, { "epoch": 0.4210617626648161, "grad_norm": 1.01968252658844, "learning_rate": 3.998301135439642e-05, "loss": 0.7205, "step": 2427 }, { "epoch": 0.421235253296322, "grad_norm": 1.1779837608337402, "learning_rate": 3.998272943057717e-05, "loss": 0.7167, "step": 2428 }, { "epoch": 0.4214087439278279, "grad_norm": 1.0232571363449097, "learning_rate": 3.9982445187738885e-05, "loss": 0.6255, "step": 2429 }, { "epoch": 0.4215822345593338, "grad_norm": 0.6751450896263123, "learning_rate": 3.998215862591455e-05, "loss": 0.6083, "step": 2430 }, { "epoch": 0.4217557251908397, "grad_norm": 1.1975443363189697, "learning_rate": 3.998186974513743e-05, "loss": 0.6542, "step": 2431 }, { "epoch": 0.4219292158223456, "grad_norm": 0.7380514144897461, "learning_rate": 3.998157854544104e-05, "loss": 0.7418, "step": 2432 }, { "epoch": 0.4221027064538515, "grad_norm": 0.8603578209877014, "learning_rate": 3.998128502685917e-05, "loss": 0.6229, "step": 2433 }, { "epoch": 0.4222761970853574, "grad_norm": 1.667292594909668, "learning_rate": 3.99809891894259e-05, "loss": 0.5339, "step": 2434 }, { "epoch": 0.4224496877168633, "grad_norm": 2.614480972290039, "learning_rate": 3.998069103317555e-05, "loss": 0.597, "step": 2435 }, { "epoch": 0.4226231783483692, "grad_norm": 0.8586104512214661, "learning_rate": 3.998039055814272e-05, "loss": 0.7209, "step": 2436 }, { "epoch": 0.4227966689798751, "grad_norm": 0.9705440998077393, "learning_rate": 3.998008776436228e-05, "loss": 0.6768, "step": 2437 }, { "epoch": 0.42297015961138096, "grad_norm": 0.8429592847824097, "learning_rate": 3.9979782651869384e-05, "loss": 0.6238, "step": 2438 }, { "epoch": 0.42314365024288686, "grad_norm": 1.0563840866088867, "learning_rate": 3.997947522069942e-05, "loss": 0.5486, "step": 2439 }, { "epoch": 0.42331714087439276, "grad_norm": 2.9745025634765625, "learning_rate": 3.997916547088808e-05, "loss": 0.5063, "step": 2440 }, { "epoch": 0.42349063150589866, "grad_norm": 1.5186116695404053, "learning_rate": 3.9978853402471306e-05, "loss": 0.693, "step": 2441 }, { "epoch": 0.42366412213740456, "grad_norm": 1.7089169025421143, "learning_rate": 3.997853901548532e-05, "loss": 0.5458, "step": 2442 }, { "epoch": 0.42383761276891047, "grad_norm": 1.2330875396728516, "learning_rate": 3.9978222309966594e-05, "loss": 0.6323, "step": 2443 }, { "epoch": 0.42401110340041637, "grad_norm": 1.1696192026138306, "learning_rate": 3.9977903285951896e-05, "loss": 0.5769, "step": 2444 }, { "epoch": 0.42418459403192227, "grad_norm": 1.1445473432540894, "learning_rate": 3.9977581943478236e-05, "loss": 0.5812, "step": 2445 }, { "epoch": 0.42435808466342817, "grad_norm": 1.298656702041626, "learning_rate": 3.9977258282582916e-05, "loss": 0.5652, "step": 2446 }, { "epoch": 0.42453157529493407, "grad_norm": 0.8933175206184387, "learning_rate": 3.99769323033035e-05, "loss": 0.6921, "step": 2447 }, { "epoch": 0.42470506592644, "grad_norm": 0.7664970755577087, "learning_rate": 3.99766040056778e-05, "loss": 0.7361, "step": 2448 }, { "epoch": 0.4248785565579459, "grad_norm": 0.8455670475959778, "learning_rate": 3.997627338974394e-05, "loss": 0.6073, "step": 2449 }, { "epoch": 0.4250520471894518, "grad_norm": 1.244389533996582, "learning_rate": 3.997594045554027e-05, "loss": 0.5999, "step": 2450 }, { "epoch": 0.4252255378209577, "grad_norm": 1.0283491611480713, "learning_rate": 3.9975605203105434e-05, "loss": 0.6758, "step": 2451 }, { "epoch": 0.4253990284524636, "grad_norm": 0.840735673904419, "learning_rate": 3.9975267632478336e-05, "loss": 0.6875, "step": 2452 }, { "epoch": 0.4255725190839695, "grad_norm": 0.906441867351532, "learning_rate": 3.997492774369816e-05, "loss": 0.6664, "step": 2453 }, { "epoch": 0.4257460097154754, "grad_norm": 1.0706926584243774, "learning_rate": 3.997458553680434e-05, "loss": 0.6334, "step": 2454 }, { "epoch": 0.4259195003469813, "grad_norm": 1.6295770406723022, "learning_rate": 3.9974241011836594e-05, "loss": 0.5881, "step": 2455 }, { "epoch": 0.4260929909784872, "grad_norm": 1.4740289449691772, "learning_rate": 3.99738941688349e-05, "loss": 0.73, "step": 2456 }, { "epoch": 0.4262664816099931, "grad_norm": 0.9593019485473633, "learning_rate": 3.997354500783952e-05, "loss": 0.5723, "step": 2457 }, { "epoch": 0.42643997224149893, "grad_norm": 0.8586723804473877, "learning_rate": 3.997319352889096e-05, "loss": 0.6282, "step": 2458 }, { "epoch": 0.42661346287300483, "grad_norm": 1.7914944887161255, "learning_rate": 3.997283973203003e-05, "loss": 0.6292, "step": 2459 }, { "epoch": 0.42678695350451074, "grad_norm": 1.2020705938339233, "learning_rate": 3.997248361729777e-05, "loss": 0.588, "step": 2460 }, { "epoch": 0.42696044413601664, "grad_norm": 1.2364118099212646, "learning_rate": 3.9972125184735505e-05, "loss": 0.6376, "step": 2461 }, { "epoch": 0.42713393476752254, "grad_norm": 2.9780807495117188, "learning_rate": 3.997176443438485e-05, "loss": 0.6693, "step": 2462 }, { "epoch": 0.42730742539902844, "grad_norm": 1.4414304494857788, "learning_rate": 3.9971401366287666e-05, "loss": 0.516, "step": 2463 }, { "epoch": 0.42748091603053434, "grad_norm": 1.0700013637542725, "learning_rate": 3.997103598048607e-05, "loss": 0.551, "step": 2464 }, { "epoch": 0.42765440666204024, "grad_norm": 0.8656531572341919, "learning_rate": 3.997066827702248e-05, "loss": 0.535, "step": 2465 }, { "epoch": 0.42782789729354614, "grad_norm": 1.2760169506072998, "learning_rate": 3.9970298255939564e-05, "loss": 0.5417, "step": 2466 }, { "epoch": 0.42800138792505205, "grad_norm": 1.130365252494812, "learning_rate": 3.9969925917280276e-05, "loss": 0.6696, "step": 2467 }, { "epoch": 0.42817487855655795, "grad_norm": 0.8799391984939575, "learning_rate": 3.9969551261087806e-05, "loss": 0.5598, "step": 2468 }, { "epoch": 0.42834836918806385, "grad_norm": 1.3091599941253662, "learning_rate": 3.996917428740565e-05, "loss": 0.5651, "step": 2469 }, { "epoch": 0.42852185981956975, "grad_norm": 1.28733491897583, "learning_rate": 3.996879499627754e-05, "loss": 0.5994, "step": 2470 }, { "epoch": 0.42869535045107565, "grad_norm": 1.048340916633606, "learning_rate": 3.996841338774751e-05, "loss": 0.6121, "step": 2471 }, { "epoch": 0.42886884108258155, "grad_norm": 1.209240198135376, "learning_rate": 3.996802946185984e-05, "loss": 0.7847, "step": 2472 }, { "epoch": 0.42904233171408745, "grad_norm": 1.1296306848526, "learning_rate": 3.996764321865907e-05, "loss": 0.6157, "step": 2473 }, { "epoch": 0.42921582234559336, "grad_norm": 0.9123029708862305, "learning_rate": 3.9967254658190055e-05, "loss": 0.5289, "step": 2474 }, { "epoch": 0.42938931297709926, "grad_norm": 1.675567626953125, "learning_rate": 3.996686378049786e-05, "loss": 0.6039, "step": 2475 }, { "epoch": 0.42956280360860516, "grad_norm": 0.8229936957359314, "learning_rate": 3.996647058562786e-05, "loss": 0.6963, "step": 2476 }, { "epoch": 0.42973629424011106, "grad_norm": 1.2111272811889648, "learning_rate": 3.9966075073625684e-05, "loss": 0.6133, "step": 2477 }, { "epoch": 0.4299097848716169, "grad_norm": 2.042346715927124, "learning_rate": 3.9965677244537226e-05, "loss": 0.703, "step": 2478 }, { "epoch": 0.4300832755031228, "grad_norm": 0.8567150235176086, "learning_rate": 3.9965277098408666e-05, "loss": 0.6338, "step": 2479 }, { "epoch": 0.4302567661346287, "grad_norm": 0.9839659929275513, "learning_rate": 3.9964874635286436e-05, "loss": 0.6497, "step": 2480 }, { "epoch": 0.4304302567661346, "grad_norm": 1.4143627882003784, "learning_rate": 3.996446985521723e-05, "loss": 0.5919, "step": 2481 }, { "epoch": 0.4306037473976405, "grad_norm": 1.1674302816390991, "learning_rate": 3.996406275824804e-05, "loss": 0.6061, "step": 2482 }, { "epoch": 0.4307772380291464, "grad_norm": 1.1941511631011963, "learning_rate": 3.996365334442611e-05, "loss": 0.708, "step": 2483 }, { "epoch": 0.4309507286606523, "grad_norm": 1.0614922046661377, "learning_rate": 3.996324161379894e-05, "loss": 0.4954, "step": 2484 }, { "epoch": 0.4311242192921582, "grad_norm": 1.6599159240722656, "learning_rate": 3.996282756641432e-05, "loss": 0.6565, "step": 2485 }, { "epoch": 0.4312977099236641, "grad_norm": 1.4952170848846436, "learning_rate": 3.9962411202320296e-05, "loss": 0.5383, "step": 2486 }, { "epoch": 0.43147120055517, "grad_norm": 1.8975037336349487, "learning_rate": 3.99619925215652e-05, "loss": 0.7227, "step": 2487 }, { "epoch": 0.4316446911866759, "grad_norm": 1.1745805740356445, "learning_rate": 3.99615715241976e-05, "loss": 0.6337, "step": 2488 }, { "epoch": 0.4318181818181818, "grad_norm": 2.31195068359375, "learning_rate": 3.996114821026638e-05, "loss": 0.6847, "step": 2489 }, { "epoch": 0.4319916724496877, "grad_norm": 1.2115309238433838, "learning_rate": 3.996072257982064e-05, "loss": 0.6565, "step": 2490 }, { "epoch": 0.4321651630811936, "grad_norm": 2.326763153076172, "learning_rate": 3.996029463290978e-05, "loss": 0.693, "step": 2491 }, { "epoch": 0.4323386537126995, "grad_norm": 0.8984289169311523, "learning_rate": 3.9959864369583485e-05, "loss": 0.5587, "step": 2492 }, { "epoch": 0.43251214434420543, "grad_norm": 1.6303499937057495, "learning_rate": 3.9959431789891665e-05, "loss": 0.7015, "step": 2493 }, { "epoch": 0.43268563497571133, "grad_norm": 1.4040231704711914, "learning_rate": 3.9958996893884525e-05, "loss": 0.6107, "step": 2494 }, { "epoch": 0.43285912560721723, "grad_norm": 2.0959885120391846, "learning_rate": 3.9958559681612544e-05, "loss": 0.5842, "step": 2495 }, { "epoch": 0.43303261623872313, "grad_norm": 6.375994682312012, "learning_rate": 3.9958120153126454e-05, "loss": 0.7133, "step": 2496 }, { "epoch": 0.43320610687022904, "grad_norm": 2.2398011684417725, "learning_rate": 3.995767830847726e-05, "loss": 0.627, "step": 2497 }, { "epoch": 0.4333795975017349, "grad_norm": 0.9163916707038879, "learning_rate": 3.995723414771625e-05, "loss": 0.567, "step": 2498 }, { "epoch": 0.4335530881332408, "grad_norm": 3.787116289138794, "learning_rate": 3.9956787670894954e-05, "loss": 0.5774, "step": 2499 }, { "epoch": 0.4337265787647467, "grad_norm": 0.7828592658042908, "learning_rate": 3.9956338878065205e-05, "loss": 0.6929, "step": 2500 }, { "epoch": 0.4339000693962526, "grad_norm": 0.9084985852241516, "learning_rate": 3.995588776927907e-05, "loss": 0.5309, "step": 2501 }, { "epoch": 0.4340735600277585, "grad_norm": 1.0282264947891235, "learning_rate": 3.99554343445889e-05, "loss": 0.5529, "step": 2502 }, { "epoch": 0.4342470506592644, "grad_norm": 1.0289828777313232, "learning_rate": 3.995497860404733e-05, "loss": 0.5729, "step": 2503 }, { "epoch": 0.4344205412907703, "grad_norm": 1.0444369316101074, "learning_rate": 3.995452054770724e-05, "loss": 0.5541, "step": 2504 }, { "epoch": 0.4345940319222762, "grad_norm": 0.9299532175064087, "learning_rate": 3.995406017562179e-05, "loss": 0.6864, "step": 2505 }, { "epoch": 0.4347675225537821, "grad_norm": 2.2198166847229004, "learning_rate": 3.99535974878444e-05, "loss": 0.5369, "step": 2506 }, { "epoch": 0.434941013185288, "grad_norm": 1.6497248411178589, "learning_rate": 3.995313248442878e-05, "loss": 0.5876, "step": 2507 }, { "epoch": 0.4351145038167939, "grad_norm": 1.0134512186050415, "learning_rate": 3.995266516542887e-05, "loss": 0.7045, "step": 2508 }, { "epoch": 0.4352879944482998, "grad_norm": 0.901813805103302, "learning_rate": 3.9952195530898926e-05, "loss": 0.5161, "step": 2509 }, { "epoch": 0.4354614850798057, "grad_norm": 1.057944655418396, "learning_rate": 3.995172358089344e-05, "loss": 0.5239, "step": 2510 }, { "epoch": 0.4356349757113116, "grad_norm": 0.9936065673828125, "learning_rate": 3.9951249315467194e-05, "loss": 0.6049, "step": 2511 }, { "epoch": 0.4358084663428175, "grad_norm": 1.1527905464172363, "learning_rate": 3.995077273467521e-05, "loss": 0.5753, "step": 2512 }, { "epoch": 0.4359819569743234, "grad_norm": 5.697759628295898, "learning_rate": 3.99502938385728e-05, "loss": 0.6766, "step": 2513 }, { "epoch": 0.4361554476058293, "grad_norm": 1.6759312152862549, "learning_rate": 3.994981262721555e-05, "loss": 0.5149, "step": 2514 }, { "epoch": 0.4363289382373352, "grad_norm": 1.1284745931625366, "learning_rate": 3.994932910065929e-05, "loss": 0.5564, "step": 2515 }, { "epoch": 0.4365024288688411, "grad_norm": 0.9783058762550354, "learning_rate": 3.9948843258960154e-05, "loss": 0.5891, "step": 2516 }, { "epoch": 0.43667591950034695, "grad_norm": 1.159525990486145, "learning_rate": 3.9948355102174503e-05, "loss": 0.6738, "step": 2517 }, { "epoch": 0.43684941013185286, "grad_norm": 0.8247144222259521, "learning_rate": 3.9947864630359005e-05, "loss": 0.5682, "step": 2518 }, { "epoch": 0.43702290076335876, "grad_norm": 1.1474746465682983, "learning_rate": 3.9947371843570565e-05, "loss": 0.6608, "step": 2519 }, { "epoch": 0.43719639139486466, "grad_norm": 0.994800865650177, "learning_rate": 3.994687674186638e-05, "loss": 0.5623, "step": 2520 }, { "epoch": 0.43736988202637056, "grad_norm": 1.6168469190597534, "learning_rate": 3.994637932530391e-05, "loss": 0.6833, "step": 2521 }, { "epoch": 0.43754337265787646, "grad_norm": 0.8193566203117371, "learning_rate": 3.9945879593940874e-05, "loss": 0.7202, "step": 2522 }, { "epoch": 0.43771686328938236, "grad_norm": 1.0501503944396973, "learning_rate": 3.994537754783527e-05, "loss": 0.6912, "step": 2523 }, { "epoch": 0.43789035392088826, "grad_norm": 0.9368448257446289, "learning_rate": 3.994487318704536e-05, "loss": 0.616, "step": 2524 }, { "epoch": 0.43806384455239417, "grad_norm": 0.9665560722351074, "learning_rate": 3.994436651162969e-05, "loss": 0.5624, "step": 2525 }, { "epoch": 0.43823733518390007, "grad_norm": 0.9095457196235657, "learning_rate": 3.994385752164703e-05, "loss": 0.561, "step": 2526 }, { "epoch": 0.43841082581540597, "grad_norm": 1.1988638639450073, "learning_rate": 3.994334621715647e-05, "loss": 0.5768, "step": 2527 }, { "epoch": 0.43858431644691187, "grad_norm": 0.9439307451248169, "learning_rate": 3.9942832598217345e-05, "loss": 0.6707, "step": 2528 }, { "epoch": 0.43875780707841777, "grad_norm": 1.55705726146698, "learning_rate": 3.9942316664889255e-05, "loss": 0.543, "step": 2529 }, { "epoch": 0.4389312977099237, "grad_norm": 1.1340587139129639, "learning_rate": 3.9941798417232084e-05, "loss": 0.5366, "step": 2530 }, { "epoch": 0.4391047883414296, "grad_norm": 1.848889708518982, "learning_rate": 3.994127785530596e-05, "loss": 0.5619, "step": 2531 }, { "epoch": 0.4392782789729355, "grad_norm": 0.8365806341171265, "learning_rate": 3.9940754979171317e-05, "loss": 0.6888, "step": 2532 }, { "epoch": 0.4394517696044414, "grad_norm": 0.9113152027130127, "learning_rate": 3.994022978888882e-05, "loss": 0.5945, "step": 2533 }, { "epoch": 0.4396252602359473, "grad_norm": 0.8680764436721802, "learning_rate": 3.9939702284519416e-05, "loss": 0.5983, "step": 2534 }, { "epoch": 0.4397987508674532, "grad_norm": 1.0888547897338867, "learning_rate": 3.993917246612433e-05, "loss": 0.5275, "step": 2535 }, { "epoch": 0.4399722414989591, "grad_norm": 0.8546169400215149, "learning_rate": 3.9938640333765046e-05, "loss": 0.5636, "step": 2536 }, { "epoch": 0.44014573213046493, "grad_norm": 0.8106816411018372, "learning_rate": 3.993810588750332e-05, "loss": 0.6598, "step": 2537 }, { "epoch": 0.44031922276197083, "grad_norm": 1.2467848062515259, "learning_rate": 3.993756912740117e-05, "loss": 0.7937, "step": 2538 }, { "epoch": 0.44049271339347673, "grad_norm": 0.7479244470596313, "learning_rate": 3.993703005352089e-05, "loss": 0.6787, "step": 2539 }, { "epoch": 0.44066620402498263, "grad_norm": 0.8020234107971191, "learning_rate": 3.9936488665925045e-05, "loss": 0.7542, "step": 2540 }, { "epoch": 0.44083969465648853, "grad_norm": 0.9814688563346863, "learning_rate": 3.993594496467646e-05, "loss": 0.5591, "step": 2541 }, { "epoch": 0.44101318528799444, "grad_norm": 0.9027915596961975, "learning_rate": 3.993539894983823e-05, "loss": 0.549, "step": 2542 }, { "epoch": 0.44118667591950034, "grad_norm": 1.0879294872283936, "learning_rate": 3.993485062147372e-05, "loss": 0.5062, "step": 2543 }, { "epoch": 0.44136016655100624, "grad_norm": 0.9362106323242188, "learning_rate": 3.993429997964657e-05, "loss": 0.6223, "step": 2544 }, { "epoch": 0.44153365718251214, "grad_norm": 0.9086353778839111, "learning_rate": 3.993374702442068e-05, "loss": 0.6256, "step": 2545 }, { "epoch": 0.44170714781401804, "grad_norm": 0.7001194357872009, "learning_rate": 3.993319175586021e-05, "loss": 0.663, "step": 2546 }, { "epoch": 0.44188063844552394, "grad_norm": 0.9094083905220032, "learning_rate": 3.993263417402962e-05, "loss": 0.6094, "step": 2547 }, { "epoch": 0.44205412907702984, "grad_norm": 1.1739511489868164, "learning_rate": 3.9932074278993604e-05, "loss": 0.5942, "step": 2548 }, { "epoch": 0.44222761970853575, "grad_norm": 1.1181588172912598, "learning_rate": 3.993151207081715e-05, "loss": 0.6553, "step": 2549 }, { "epoch": 0.44240111034004165, "grad_norm": 3.685606002807617, "learning_rate": 3.993094754956549e-05, "loss": 0.7366, "step": 2550 }, { "epoch": 0.44257460097154755, "grad_norm": 0.9214552044868469, "learning_rate": 3.9930380715304143e-05, "loss": 0.7626, "step": 2551 }, { "epoch": 0.44274809160305345, "grad_norm": 0.7478047013282776, "learning_rate": 3.992981156809889e-05, "loss": 0.7194, "step": 2552 }, { "epoch": 0.44292158223455935, "grad_norm": 0.7857555747032166, "learning_rate": 3.992924010801578e-05, "loss": 0.6995, "step": 2553 }, { "epoch": 0.44309507286606525, "grad_norm": 1.4917502403259277, "learning_rate": 3.9928666335121135e-05, "loss": 0.6264, "step": 2554 }, { "epoch": 0.44326856349757116, "grad_norm": 0.5582311153411865, "learning_rate": 3.992809024948154e-05, "loss": 0.7217, "step": 2555 }, { "epoch": 0.44344205412907706, "grad_norm": 1.3408156633377075, "learning_rate": 3.992751185116385e-05, "loss": 0.5981, "step": 2556 }, { "epoch": 0.4436155447605829, "grad_norm": 0.8039281964302063, "learning_rate": 3.992693114023519e-05, "loss": 0.6396, "step": 2557 }, { "epoch": 0.4437890353920888, "grad_norm": 1.0995712280273438, "learning_rate": 3.992634811676296e-05, "loss": 0.5409, "step": 2558 }, { "epoch": 0.4439625260235947, "grad_norm": 0.8444967865943909, "learning_rate": 3.9925762780814804e-05, "loss": 0.5554, "step": 2559 }, { "epoch": 0.4441360166551006, "grad_norm": 0.9820617437362671, "learning_rate": 3.992517513245865e-05, "loss": 0.5261, "step": 2560 }, { "epoch": 0.4443095072866065, "grad_norm": 1.0747051239013672, "learning_rate": 3.992458517176272e-05, "loss": 0.5458, "step": 2561 }, { "epoch": 0.4444829979181124, "grad_norm": 0.9341891407966614, "learning_rate": 3.992399289879546e-05, "loss": 0.54, "step": 2562 }, { "epoch": 0.4446564885496183, "grad_norm": 0.7590116858482361, "learning_rate": 3.992339831362561e-05, "loss": 0.6815, "step": 2563 }, { "epoch": 0.4448299791811242, "grad_norm": 1.0929893255233765, "learning_rate": 3.992280141632216e-05, "loss": 0.6704, "step": 2564 }, { "epoch": 0.4450034698126301, "grad_norm": 0.6500886678695679, "learning_rate": 3.9922202206954395e-05, "loss": 0.5415, "step": 2565 }, { "epoch": 0.445176960444136, "grad_norm": 0.9372058510780334, "learning_rate": 3.9921600685591856e-05, "loss": 0.5028, "step": 2566 }, { "epoch": 0.4453504510756419, "grad_norm": 1.0460612773895264, "learning_rate": 3.992099685230434e-05, "loss": 0.514, "step": 2567 }, { "epoch": 0.4455239417071478, "grad_norm": 0.8441975712776184, "learning_rate": 3.9920390707161927e-05, "loss": 0.6785, "step": 2568 }, { "epoch": 0.4456974323386537, "grad_norm": 0.7510903477668762, "learning_rate": 3.991978225023497e-05, "loss": 0.6934, "step": 2569 }, { "epoch": 0.4458709229701596, "grad_norm": 0.8730303645133972, "learning_rate": 3.9919171481594056e-05, "loss": 0.5898, "step": 2570 }, { "epoch": 0.4460444136016655, "grad_norm": 0.657044529914856, "learning_rate": 3.991855840131009e-05, "loss": 0.73, "step": 2571 }, { "epoch": 0.4462179042331714, "grad_norm": 1.8791756629943848, "learning_rate": 3.9917943009454206e-05, "loss": 0.5889, "step": 2572 }, { "epoch": 0.4463913948646773, "grad_norm": 0.9651908278465271, "learning_rate": 3.991732530609783e-05, "loss": 0.5627, "step": 2573 }, { "epoch": 0.44656488549618323, "grad_norm": 1.3580819368362427, "learning_rate": 3.9916705291312646e-05, "loss": 0.5333, "step": 2574 }, { "epoch": 0.44673837612768913, "grad_norm": 0.9872630834579468, "learning_rate": 3.9916082965170604e-05, "loss": 0.7241, "step": 2575 }, { "epoch": 0.44691186675919503, "grad_norm": 0.995220959186554, "learning_rate": 3.991545832774393e-05, "loss": 0.5817, "step": 2576 }, { "epoch": 0.4470853573907009, "grad_norm": 0.8517805933952332, "learning_rate": 3.9914831379105104e-05, "loss": 0.6534, "step": 2577 }, { "epoch": 0.4472588480222068, "grad_norm": 1.203552484512329, "learning_rate": 3.9914202119326895e-05, "loss": 0.6368, "step": 2578 }, { "epoch": 0.4474323386537127, "grad_norm": 0.7733704447746277, "learning_rate": 3.991357054848233e-05, "loss": 0.5702, "step": 2579 }, { "epoch": 0.4476058292852186, "grad_norm": 0.8969932794570923, "learning_rate": 3.991293666664469e-05, "loss": 0.6219, "step": 2580 }, { "epoch": 0.4477793199167245, "grad_norm": 0.8377065062522888, "learning_rate": 3.991230047388755e-05, "loss": 0.5553, "step": 2581 }, { "epoch": 0.4479528105482304, "grad_norm": 0.7633172273635864, "learning_rate": 3.991166197028474e-05, "loss": 0.4855, "step": 2582 }, { "epoch": 0.4481263011797363, "grad_norm": 0.8832114338874817, "learning_rate": 3.9911021155910355e-05, "loss": 0.594, "step": 2583 }, { "epoch": 0.4482997918112422, "grad_norm": 0.7355130314826965, "learning_rate": 3.9910378030838765e-05, "loss": 0.632, "step": 2584 }, { "epoch": 0.4484732824427481, "grad_norm": 1.0732299089431763, "learning_rate": 3.99097325951446e-05, "loss": 0.7803, "step": 2585 }, { "epoch": 0.448646773074254, "grad_norm": 1.6468517780303955, "learning_rate": 3.990908484890277e-05, "loss": 0.6067, "step": 2586 }, { "epoch": 0.4488202637057599, "grad_norm": 1.1821500062942505, "learning_rate": 3.9908434792188443e-05, "loss": 0.5513, "step": 2587 }, { "epoch": 0.4489937543372658, "grad_norm": 0.7869590520858765, "learning_rate": 3.990778242507707e-05, "loss": 0.6683, "step": 2588 }, { "epoch": 0.4491672449687717, "grad_norm": 0.6006678938865662, "learning_rate": 3.990712774764434e-05, "loss": 0.6669, "step": 2589 }, { "epoch": 0.4493407356002776, "grad_norm": 0.6672049760818481, "learning_rate": 3.990647075996624e-05, "loss": 0.6932, "step": 2590 }, { "epoch": 0.4495142262317835, "grad_norm": 1.5705081224441528, "learning_rate": 3.9905811462119014e-05, "loss": 0.6442, "step": 2591 }, { "epoch": 0.4496877168632894, "grad_norm": 0.6579003930091858, "learning_rate": 3.9905149854179174e-05, "loss": 0.6995, "step": 2592 }, { "epoch": 0.4498612074947953, "grad_norm": 0.8032071590423584, "learning_rate": 3.990448593622349e-05, "loss": 0.5646, "step": 2593 }, { "epoch": 0.4500346981263012, "grad_norm": 1.135553240776062, "learning_rate": 3.990381970832903e-05, "loss": 0.5828, "step": 2594 }, { "epoch": 0.4502081887578071, "grad_norm": 0.9185262322425842, "learning_rate": 3.99031511705731e-05, "loss": 0.7058, "step": 2595 }, { "epoch": 0.45038167938931295, "grad_norm": 0.6164620518684387, "learning_rate": 3.9902480323033285e-05, "loss": 0.7278, "step": 2596 }, { "epoch": 0.45055517002081885, "grad_norm": 0.7947415709495544, "learning_rate": 3.990180716578744e-05, "loss": 0.5625, "step": 2597 }, { "epoch": 0.45072866065232475, "grad_norm": 0.9763660430908203, "learning_rate": 3.990113169891367e-05, "loss": 0.5795, "step": 2598 }, { "epoch": 0.45090215128383065, "grad_norm": 1.2467541694641113, "learning_rate": 3.990045392249039e-05, "loss": 0.7317, "step": 2599 }, { "epoch": 0.45107564191533656, "grad_norm": 0.9953369498252869, "learning_rate": 3.989977383659624e-05, "loss": 0.5889, "step": 2600 }, { "epoch": 0.45124913254684246, "grad_norm": 1.3223135471343994, "learning_rate": 3.989909144131015e-05, "loss": 0.6748, "step": 2601 }, { "epoch": 0.45142262317834836, "grad_norm": 0.6914749145507812, "learning_rate": 3.989840673671131e-05, "loss": 0.6632, "step": 2602 }, { "epoch": 0.45159611380985426, "grad_norm": 0.9431111812591553, "learning_rate": 3.9897719722879176e-05, "loss": 0.6514, "step": 2603 }, { "epoch": 0.45176960444136016, "grad_norm": 0.8488491773605347, "learning_rate": 3.989703039989349e-05, "loss": 0.6818, "step": 2604 }, { "epoch": 0.45194309507286606, "grad_norm": 1.092769980430603, "learning_rate": 3.989633876783423e-05, "loss": 0.6993, "step": 2605 }, { "epoch": 0.45211658570437196, "grad_norm": 1.1377992630004883, "learning_rate": 3.989564482678168e-05, "loss": 0.567, "step": 2606 }, { "epoch": 0.45229007633587787, "grad_norm": 1.0154528617858887, "learning_rate": 3.9894948576816374e-05, "loss": 0.5203, "step": 2607 }, { "epoch": 0.45246356696738377, "grad_norm": 1.028075098991394, "learning_rate": 3.989425001801909e-05, "loss": 0.4795, "step": 2608 }, { "epoch": 0.45263705759888967, "grad_norm": 1.4053622484207153, "learning_rate": 3.9893549150470925e-05, "loss": 0.6066, "step": 2609 }, { "epoch": 0.45281054823039557, "grad_norm": 1.0774238109588623, "learning_rate": 3.9892845974253184e-05, "loss": 0.7133, "step": 2610 }, { "epoch": 0.4529840388619015, "grad_norm": 0.7488763928413391, "learning_rate": 3.989214048944749e-05, "loss": 0.6543, "step": 2611 }, { "epoch": 0.4531575294934074, "grad_norm": 0.9664822816848755, "learning_rate": 3.989143269613572e-05, "loss": 0.6453, "step": 2612 }, { "epoch": 0.4533310201249133, "grad_norm": 0.7693923711776733, "learning_rate": 3.9890722594400005e-05, "loss": 0.7168, "step": 2613 }, { "epoch": 0.4535045107564192, "grad_norm": 1.0447169542312622, "learning_rate": 3.989001018432276e-05, "loss": 0.5677, "step": 2614 }, { "epoch": 0.4536780013879251, "grad_norm": 1.1141068935394287, "learning_rate": 3.988929546598665e-05, "loss": 0.7236, "step": 2615 }, { "epoch": 0.4538514920194309, "grad_norm": 1.1113327741622925, "learning_rate": 3.988857843947463e-05, "loss": 0.5212, "step": 2616 }, { "epoch": 0.4540249826509368, "grad_norm": 0.832119345664978, "learning_rate": 3.9887859104869905e-05, "loss": 0.502, "step": 2617 }, { "epoch": 0.4541984732824427, "grad_norm": 0.8675135374069214, "learning_rate": 3.988713746225596e-05, "loss": 0.6472, "step": 2618 }, { "epoch": 0.45437196391394863, "grad_norm": 0.7328503727912903, "learning_rate": 3.988641351171653e-05, "loss": 0.7312, "step": 2619 }, { "epoch": 0.45454545454545453, "grad_norm": 1.2184706926345825, "learning_rate": 3.988568725333565e-05, "loss": 0.7324, "step": 2620 }, { "epoch": 0.45471894517696043, "grad_norm": 0.8525048494338989, "learning_rate": 3.988495868719759e-05, "loss": 0.571, "step": 2621 }, { "epoch": 0.45489243580846633, "grad_norm": 0.855289876461029, "learning_rate": 3.98842278133869e-05, "loss": 0.6549, "step": 2622 }, { "epoch": 0.45506592643997223, "grad_norm": 1.3905209302902222, "learning_rate": 3.988349463198841e-05, "loss": 0.5911, "step": 2623 }, { "epoch": 0.45523941707147814, "grad_norm": 1.1628540754318237, "learning_rate": 3.9882759143087194e-05, "loss": 0.621, "step": 2624 }, { "epoch": 0.45541290770298404, "grad_norm": 1.0131633281707764, "learning_rate": 3.9882021346768613e-05, "loss": 0.6434, "step": 2625 }, { "epoch": 0.45558639833448994, "grad_norm": 0.8525959253311157, "learning_rate": 3.9881281243118285e-05, "loss": 0.6331, "step": 2626 }, { "epoch": 0.45575988896599584, "grad_norm": 0.8659221529960632, "learning_rate": 3.9880538832222105e-05, "loss": 0.7217, "step": 2627 }, { "epoch": 0.45593337959750174, "grad_norm": 1.3382600545883179, "learning_rate": 3.987979411416623e-05, "loss": 0.5326, "step": 2628 }, { "epoch": 0.45610687022900764, "grad_norm": 2.1783976554870605, "learning_rate": 3.987904708903708e-05, "loss": 0.7019, "step": 2629 }, { "epoch": 0.45628036086051355, "grad_norm": 1.4334501028060913, "learning_rate": 3.987829775692135e-05, "loss": 0.5695, "step": 2630 }, { "epoch": 0.45645385149201945, "grad_norm": 0.6840330362319946, "learning_rate": 3.987754611790601e-05, "loss": 0.64, "step": 2631 }, { "epoch": 0.45662734212352535, "grad_norm": 1.0548428297042847, "learning_rate": 3.987679217207827e-05, "loss": 0.5468, "step": 2632 }, { "epoch": 0.45680083275503125, "grad_norm": 0.8488027453422546, "learning_rate": 3.9876035919525644e-05, "loss": 0.6248, "step": 2633 }, { "epoch": 0.45697432338653715, "grad_norm": 1.8956934213638306, "learning_rate": 3.987527736033589e-05, "loss": 0.5099, "step": 2634 }, { "epoch": 0.45714781401804305, "grad_norm": 1.124916434288025, "learning_rate": 3.9874516494597035e-05, "loss": 0.6058, "step": 2635 }, { "epoch": 0.4573213046495489, "grad_norm": 2.044217348098755, "learning_rate": 3.987375332239739e-05, "loss": 0.6123, "step": 2636 }, { "epoch": 0.4574947952810548, "grad_norm": 0.7469845414161682, "learning_rate": 3.9872987843825505e-05, "loss": 0.5177, "step": 2637 }, { "epoch": 0.4576682859125607, "grad_norm": 0.9351304173469543, "learning_rate": 3.9872220058970226e-05, "loss": 0.6732, "step": 2638 }, { "epoch": 0.4578417765440666, "grad_norm": 0.7773634791374207, "learning_rate": 3.9871449967920656e-05, "loss": 0.5753, "step": 2639 }, { "epoch": 0.4580152671755725, "grad_norm": 0.9111433029174805, "learning_rate": 3.9870677570766167e-05, "loss": 0.6011, "step": 2640 }, { "epoch": 0.4581887578070784, "grad_norm": 0.6724522113800049, "learning_rate": 3.986990286759639e-05, "loss": 0.6995, "step": 2641 }, { "epoch": 0.4583622484385843, "grad_norm": 2.940087080001831, "learning_rate": 3.986912585850123e-05, "loss": 0.6014, "step": 2642 }, { "epoch": 0.4585357390700902, "grad_norm": 1.3955363035202026, "learning_rate": 3.986834654357086e-05, "loss": 0.5264, "step": 2643 }, { "epoch": 0.4587092297015961, "grad_norm": 3.199185609817505, "learning_rate": 3.9867564922895724e-05, "loss": 0.5081, "step": 2644 }, { "epoch": 0.458882720333102, "grad_norm": 0.8836493492126465, "learning_rate": 3.9866780996566525e-05, "loss": 0.6975, "step": 2645 }, { "epoch": 0.4590562109646079, "grad_norm": 0.9459909200668335, "learning_rate": 3.986599476467425e-05, "loss": 0.7307, "step": 2646 }, { "epoch": 0.4592297015961138, "grad_norm": 0.9849326014518738, "learning_rate": 3.986520622731012e-05, "loss": 0.5869, "step": 2647 }, { "epoch": 0.4594031922276197, "grad_norm": 0.746747612953186, "learning_rate": 3.9864415384565675e-05, "loss": 0.6517, "step": 2648 }, { "epoch": 0.4595766828591256, "grad_norm": 0.7429086565971375, "learning_rate": 3.986362223653267e-05, "loss": 0.7041, "step": 2649 }, { "epoch": 0.4597501734906315, "grad_norm": 0.8128641247749329, "learning_rate": 3.986282678330316e-05, "loss": 0.7878, "step": 2650 }, { "epoch": 0.4599236641221374, "grad_norm": 0.778728187084198, "learning_rate": 3.9862029024969466e-05, "loss": 0.5446, "step": 2651 }, { "epoch": 0.4600971547536433, "grad_norm": 0.6940119862556458, "learning_rate": 3.9861228961624146e-05, "loss": 0.5607, "step": 2652 }, { "epoch": 0.4602706453851492, "grad_norm": 0.772933304309845, "learning_rate": 3.986042659336007e-05, "loss": 0.5793, "step": 2653 }, { "epoch": 0.4604441360166551, "grad_norm": 1.608131766319275, "learning_rate": 3.985962192027034e-05, "loss": 0.5588, "step": 2654 }, { "epoch": 0.46061762664816097, "grad_norm": 0.9020060300827026, "learning_rate": 3.985881494244835e-05, "loss": 0.5914, "step": 2655 }, { "epoch": 0.4607911172796669, "grad_norm": 0.9139862060546875, "learning_rate": 3.985800565998775e-05, "loss": 0.6445, "step": 2656 }, { "epoch": 0.4609646079111728, "grad_norm": 1.4942809343338013, "learning_rate": 3.9857194072982445e-05, "loss": 0.5084, "step": 2657 }, { "epoch": 0.4611380985426787, "grad_norm": 0.6657776832580566, "learning_rate": 3.9856380181526634e-05, "loss": 0.6106, "step": 2658 }, { "epoch": 0.4613115891741846, "grad_norm": 1.0785088539123535, "learning_rate": 3.985556398571476e-05, "loss": 0.5094, "step": 2659 }, { "epoch": 0.4614850798056905, "grad_norm": 0.8394649028778076, "learning_rate": 3.9854745485641556e-05, "loss": 0.5852, "step": 2660 }, { "epoch": 0.4616585704371964, "grad_norm": 2.2808496952056885, "learning_rate": 3.9853924681402e-05, "loss": 0.5568, "step": 2661 }, { "epoch": 0.4618320610687023, "grad_norm": 0.7547072768211365, "learning_rate": 3.985310157309135e-05, "loss": 0.562, "step": 2662 }, { "epoch": 0.4620055517002082, "grad_norm": 0.7576550841331482, "learning_rate": 3.985227616080513e-05, "loss": 0.5585, "step": 2663 }, { "epoch": 0.4621790423317141, "grad_norm": 1.0454773902893066, "learning_rate": 3.985144844463913e-05, "loss": 0.698, "step": 2664 }, { "epoch": 0.46235253296322, "grad_norm": 2.2017455101013184, "learning_rate": 3.985061842468941e-05, "loss": 0.5635, "step": 2665 }, { "epoch": 0.4625260235947259, "grad_norm": 1.4035037755966187, "learning_rate": 3.9849786101052285e-05, "loss": 0.5897, "step": 2666 }, { "epoch": 0.4626995142262318, "grad_norm": 0.8923456072807312, "learning_rate": 3.984895147382435e-05, "loss": 0.6019, "step": 2667 }, { "epoch": 0.4628730048577377, "grad_norm": 0.717780351638794, "learning_rate": 3.984811454310248e-05, "loss": 0.6179, "step": 2668 }, { "epoch": 0.4630464954892436, "grad_norm": 1.2886556386947632, "learning_rate": 3.984727530898378e-05, "loss": 0.5853, "step": 2669 }, { "epoch": 0.4632199861207495, "grad_norm": 0.8386170268058777, "learning_rate": 3.9846433771565655e-05, "loss": 0.6455, "step": 2670 }, { "epoch": 0.4633934767522554, "grad_norm": 1.1211575269699097, "learning_rate": 3.9845589930945764e-05, "loss": 0.6292, "step": 2671 }, { "epoch": 0.4635669673837613, "grad_norm": 0.6581203937530518, "learning_rate": 3.9844743787222046e-05, "loss": 0.6748, "step": 2672 }, { "epoch": 0.4637404580152672, "grad_norm": 0.5879610776901245, "learning_rate": 3.984389534049268e-05, "loss": 0.6105, "step": 2673 }, { "epoch": 0.4639139486467731, "grad_norm": 1.0543928146362305, "learning_rate": 3.984304459085614e-05, "loss": 0.5782, "step": 2674 }, { "epoch": 0.46408743927827895, "grad_norm": 1.3818283081054688, "learning_rate": 3.9842191538411145e-05, "loss": 0.6366, "step": 2675 }, { "epoch": 0.46426092990978485, "grad_norm": 1.5274306535720825, "learning_rate": 3.984133618325671e-05, "loss": 0.6567, "step": 2676 }, { "epoch": 0.46443442054129075, "grad_norm": 0.9548752903938293, "learning_rate": 3.984047852549209e-05, "loss": 0.5369, "step": 2677 }, { "epoch": 0.46460791117279665, "grad_norm": 0.7854692339897156, "learning_rate": 3.983961856521682e-05, "loss": 0.6729, "step": 2678 }, { "epoch": 0.46478140180430255, "grad_norm": 0.6886661648750305, "learning_rate": 3.983875630253069e-05, "loss": 0.6757, "step": 2679 }, { "epoch": 0.46495489243580845, "grad_norm": 0.6751460433006287, "learning_rate": 3.983789173753378e-05, "loss": 0.6976, "step": 2680 }, { "epoch": 0.46512838306731435, "grad_norm": 0.6498127579689026, "learning_rate": 3.9837024870326425e-05, "loss": 0.6304, "step": 2681 }, { "epoch": 0.46530187369882026, "grad_norm": 0.8613525629043579, "learning_rate": 3.983615570100921e-05, "loss": 0.556, "step": 2682 }, { "epoch": 0.46547536433032616, "grad_norm": 1.1612775325775146, "learning_rate": 3.983528422968301e-05, "loss": 0.6768, "step": 2683 }, { "epoch": 0.46564885496183206, "grad_norm": 1.0510660409927368, "learning_rate": 3.9834410456448966e-05, "loss": 0.5898, "step": 2684 }, { "epoch": 0.46582234559333796, "grad_norm": 0.819597601890564, "learning_rate": 3.983353438140848e-05, "loss": 0.6868, "step": 2685 }, { "epoch": 0.46599583622484386, "grad_norm": 1.039358139038086, "learning_rate": 3.983265600466321e-05, "loss": 0.6661, "step": 2686 }, { "epoch": 0.46616932685634976, "grad_norm": 0.6790923476219177, "learning_rate": 3.983177532631511e-05, "loss": 0.6658, "step": 2687 }, { "epoch": 0.46634281748785567, "grad_norm": 0.7647500038146973, "learning_rate": 3.983089234646637e-05, "loss": 0.6296, "step": 2688 }, { "epoch": 0.46651630811936157, "grad_norm": 0.852489709854126, "learning_rate": 3.9830007065219466e-05, "loss": 0.6422, "step": 2689 }, { "epoch": 0.46668979875086747, "grad_norm": 4.935563087463379, "learning_rate": 3.9829119482677144e-05, "loss": 0.6632, "step": 2690 }, { "epoch": 0.46686328938237337, "grad_norm": 1.0070613622665405, "learning_rate": 3.982822959894239e-05, "loss": 0.681, "step": 2691 }, { "epoch": 0.46703678001387927, "grad_norm": 0.9911800622940063, "learning_rate": 3.9827337414118486e-05, "loss": 0.6112, "step": 2692 }, { "epoch": 0.4672102706453852, "grad_norm": 0.8067095875740051, "learning_rate": 3.9826442928308974e-05, "loss": 0.5524, "step": 2693 }, { "epoch": 0.4673837612768911, "grad_norm": 0.7464631199836731, "learning_rate": 3.982554614161766e-05, "loss": 0.6768, "step": 2694 }, { "epoch": 0.4675572519083969, "grad_norm": 0.872347891330719, "learning_rate": 3.9824647054148614e-05, "loss": 0.558, "step": 2695 }, { "epoch": 0.4677307425399028, "grad_norm": 0.7208455204963684, "learning_rate": 3.9823745666006176e-05, "loss": 0.5389, "step": 2696 }, { "epoch": 0.4679042331714087, "grad_norm": 0.984927237033844, "learning_rate": 3.982284197729496e-05, "loss": 0.5515, "step": 2697 }, { "epoch": 0.4680777238029146, "grad_norm": 1.1473478078842163, "learning_rate": 3.982193598811983e-05, "loss": 0.5508, "step": 2698 }, { "epoch": 0.4682512144344205, "grad_norm": 0.9905941486358643, "learning_rate": 3.982102769858593e-05, "loss": 0.6453, "step": 2699 }, { "epoch": 0.46842470506592643, "grad_norm": 1.1189990043640137, "learning_rate": 3.9820117108798666e-05, "loss": 0.4843, "step": 2700 }, { "epoch": 0.46859819569743233, "grad_norm": 1.9971749782562256, "learning_rate": 3.981920421886372e-05, "loss": 0.4984, "step": 2701 }, { "epoch": 0.46877168632893823, "grad_norm": 0.6893688440322876, "learning_rate": 3.981828902888704e-05, "loss": 0.6013, "step": 2702 }, { "epoch": 0.46894517696044413, "grad_norm": 0.9006636142730713, "learning_rate": 3.981737153897481e-05, "loss": 0.7227, "step": 2703 }, { "epoch": 0.46911866759195003, "grad_norm": 1.91812264919281, "learning_rate": 3.981645174923353e-05, "loss": 0.5508, "step": 2704 }, { "epoch": 0.46929215822345594, "grad_norm": 1.011795997619629, "learning_rate": 3.981552965976993e-05, "loss": 0.5004, "step": 2705 }, { "epoch": 0.46946564885496184, "grad_norm": 1.296972632408142, "learning_rate": 3.9814605270691025e-05, "loss": 0.5403, "step": 2706 }, { "epoch": 0.46963913948646774, "grad_norm": 0.7811582088470459, "learning_rate": 3.9813678582104095e-05, "loss": 0.5728, "step": 2707 }, { "epoch": 0.46981263011797364, "grad_norm": 0.8036185503005981, "learning_rate": 3.981274959411667e-05, "loss": 0.6035, "step": 2708 }, { "epoch": 0.46998612074947954, "grad_norm": 0.8364619016647339, "learning_rate": 3.981181830683657e-05, "loss": 0.6265, "step": 2709 }, { "epoch": 0.47015961138098544, "grad_norm": 1.0008864402770996, "learning_rate": 3.9810884720371874e-05, "loss": 0.7524, "step": 2710 }, { "epoch": 0.47033310201249134, "grad_norm": 0.7549238801002502, "learning_rate": 3.9809948834830914e-05, "loss": 0.6472, "step": 2711 }, { "epoch": 0.47050659264399725, "grad_norm": 0.9469247460365295, "learning_rate": 3.980901065032232e-05, "loss": 0.606, "step": 2712 }, { "epoch": 0.47068008327550315, "grad_norm": 2.2984249591827393, "learning_rate": 3.9808070166954945e-05, "loss": 0.6272, "step": 2713 }, { "epoch": 0.47085357390700905, "grad_norm": 0.9015179872512817, "learning_rate": 3.9807127384837955e-05, "loss": 0.5844, "step": 2714 }, { "epoch": 0.4710270645385149, "grad_norm": 0.8548340797424316, "learning_rate": 3.980618230408075e-05, "loss": 0.5885, "step": 2715 }, { "epoch": 0.4712005551700208, "grad_norm": 0.9300568103790283, "learning_rate": 3.980523492479301e-05, "loss": 0.5674, "step": 2716 }, { "epoch": 0.4713740458015267, "grad_norm": 0.9700860977172852, "learning_rate": 3.9804285247084674e-05, "loss": 0.6692, "step": 2717 }, { "epoch": 0.4715475364330326, "grad_norm": 0.7631649374961853, "learning_rate": 3.980333327106596e-05, "loss": 0.6504, "step": 2718 }, { "epoch": 0.4717210270645385, "grad_norm": 0.7961834669113159, "learning_rate": 3.980237899684735e-05, "loss": 0.7671, "step": 2719 }, { "epoch": 0.4718945176960444, "grad_norm": 0.8147475123405457, "learning_rate": 3.980142242453958e-05, "loss": 0.6982, "step": 2720 }, { "epoch": 0.4720680083275503, "grad_norm": 0.6349263787269592, "learning_rate": 3.980046355425366e-05, "loss": 0.6766, "step": 2721 }, { "epoch": 0.4722414989590562, "grad_norm": 0.7165682911872864, "learning_rate": 3.979950238610088e-05, "loss": 0.5476, "step": 2722 }, { "epoch": 0.4724149895905621, "grad_norm": 1.1357554197311401, "learning_rate": 3.979853892019278e-05, "loss": 0.6307, "step": 2723 }, { "epoch": 0.472588480222068, "grad_norm": 1.2190383672714233, "learning_rate": 3.9797573156641165e-05, "loss": 0.5709, "step": 2724 }, { "epoch": 0.4727619708535739, "grad_norm": 0.8067264556884766, "learning_rate": 3.9796605095558115e-05, "loss": 0.5641, "step": 2725 }, { "epoch": 0.4729354614850798, "grad_norm": 1.0697906017303467, "learning_rate": 3.9795634737055974e-05, "loss": 0.6151, "step": 2726 }, { "epoch": 0.4731089521165857, "grad_norm": 1.0114651918411255, "learning_rate": 3.979466208124736e-05, "loss": 0.5023, "step": 2727 }, { "epoch": 0.4732824427480916, "grad_norm": 0.6932345032691956, "learning_rate": 3.979368712824514e-05, "loss": 0.5094, "step": 2728 }, { "epoch": 0.4734559333795975, "grad_norm": 1.4069453477859497, "learning_rate": 3.9792709878162466e-05, "loss": 0.6754, "step": 2729 }, { "epoch": 0.4736294240111034, "grad_norm": 1.1037931442260742, "learning_rate": 3.979173033111275e-05, "loss": 0.5452, "step": 2730 }, { "epoch": 0.4738029146426093, "grad_norm": 0.8605200052261353, "learning_rate": 3.979074848720967e-05, "loss": 0.6757, "step": 2731 }, { "epoch": 0.4739764052741152, "grad_norm": 0.9133217334747314, "learning_rate": 3.9789764346567154e-05, "loss": 0.6819, "step": 2732 }, { "epoch": 0.4741498959056211, "grad_norm": 1.019981861114502, "learning_rate": 3.978877790929944e-05, "loss": 0.6392, "step": 2733 }, { "epoch": 0.47432338653712697, "grad_norm": 0.7765205502510071, "learning_rate": 3.978778917552099e-05, "loss": 0.6055, "step": 2734 }, { "epoch": 0.47449687716863287, "grad_norm": 0.5978204011917114, "learning_rate": 3.978679814534654e-05, "loss": 0.7339, "step": 2735 }, { "epoch": 0.47467036780013877, "grad_norm": 0.8094828724861145, "learning_rate": 3.9785804818891117e-05, "loss": 0.5305, "step": 2736 }, { "epoch": 0.47484385843164467, "grad_norm": 1.0592631101608276, "learning_rate": 3.9784809196269986e-05, "loss": 0.5562, "step": 2737 }, { "epoch": 0.4750173490631506, "grad_norm": 0.7034501433372498, "learning_rate": 3.978381127759869e-05, "loss": 0.6902, "step": 2738 }, { "epoch": 0.4751908396946565, "grad_norm": 1.376899003982544, "learning_rate": 3.978281106299305e-05, "loss": 0.5536, "step": 2739 }, { "epoch": 0.4753643303261624, "grad_norm": 0.6912811398506165, "learning_rate": 3.9781808552569134e-05, "loss": 0.624, "step": 2740 }, { "epoch": 0.4755378209576683, "grad_norm": 0.8399140238761902, "learning_rate": 3.9780803746443284e-05, "loss": 0.6537, "step": 2741 }, { "epoch": 0.4757113115891742, "grad_norm": 0.7426314949989319, "learning_rate": 3.97797966447321e-05, "loss": 0.5227, "step": 2742 }, { "epoch": 0.4758848022206801, "grad_norm": 2.417330741882324, "learning_rate": 3.9778787247552484e-05, "loss": 0.5402, "step": 2743 }, { "epoch": 0.476058292852186, "grad_norm": 0.9911115765571594, "learning_rate": 3.977777555502155e-05, "loss": 0.5673, "step": 2744 }, { "epoch": 0.4762317834836919, "grad_norm": 0.8713995814323425, "learning_rate": 3.9776761567256726e-05, "loss": 0.6201, "step": 2745 }, { "epoch": 0.4764052741151978, "grad_norm": 1.156535029411316, "learning_rate": 3.977574528437567e-05, "loss": 0.6407, "step": 2746 }, { "epoch": 0.4765787647467037, "grad_norm": 0.5964752435684204, "learning_rate": 3.977472670649634e-05, "loss": 0.672, "step": 2747 }, { "epoch": 0.4767522553782096, "grad_norm": 0.706098735332489, "learning_rate": 3.977370583373692e-05, "loss": 0.6331, "step": 2748 }, { "epoch": 0.4769257460097155, "grad_norm": 1.0353593826293945, "learning_rate": 3.9772682666215906e-05, "loss": 0.5831, "step": 2749 }, { "epoch": 0.4770992366412214, "grad_norm": 0.7834818959236145, "learning_rate": 3.9771657204052026e-05, "loss": 0.5303, "step": 2750 }, { "epoch": 0.4772727272727273, "grad_norm": 0.5836576223373413, "learning_rate": 3.977062944736429e-05, "loss": 0.5969, "step": 2751 }, { "epoch": 0.4774462179042332, "grad_norm": 0.8149217963218689, "learning_rate": 3.976959939627196e-05, "loss": 0.6594, "step": 2752 }, { "epoch": 0.4776197085357391, "grad_norm": 1.064262866973877, "learning_rate": 3.97685670508946e-05, "loss": 0.5863, "step": 2753 }, { "epoch": 0.47779319916724494, "grad_norm": 0.7263724207878113, "learning_rate": 3.9767532411351985e-05, "loss": 0.5328, "step": 2754 }, { "epoch": 0.47796668979875084, "grad_norm": 0.7789517641067505, "learning_rate": 3.976649547776421e-05, "loss": 0.6167, "step": 2755 }, { "epoch": 0.47814018043025674, "grad_norm": 1.1462479829788208, "learning_rate": 3.97654562502516e-05, "loss": 0.605, "step": 2756 }, { "epoch": 0.47831367106176265, "grad_norm": 0.6074411869049072, "learning_rate": 3.9764414728934755e-05, "loss": 0.6172, "step": 2757 }, { "epoch": 0.47848716169326855, "grad_norm": 0.9205759167671204, "learning_rate": 3.9763370913934554e-05, "loss": 0.5411, "step": 2758 }, { "epoch": 0.47866065232477445, "grad_norm": 1.2367000579833984, "learning_rate": 3.976232480537213e-05, "loss": 0.512, "step": 2759 }, { "epoch": 0.47883414295628035, "grad_norm": 1.5458433628082275, "learning_rate": 3.976127640336889e-05, "loss": 0.6302, "step": 2760 }, { "epoch": 0.47900763358778625, "grad_norm": 0.8205773234367371, "learning_rate": 3.976022570804649e-05, "loss": 0.6331, "step": 2761 }, { "epoch": 0.47918112421929215, "grad_norm": 0.8028043508529663, "learning_rate": 3.9759172719526876e-05, "loss": 0.5426, "step": 2762 }, { "epoch": 0.47935461485079806, "grad_norm": 0.8865018486976624, "learning_rate": 3.975811743793224e-05, "loss": 0.5605, "step": 2763 }, { "epoch": 0.47952810548230396, "grad_norm": 0.722575843334198, "learning_rate": 3.975705986338505e-05, "loss": 0.6724, "step": 2764 }, { "epoch": 0.47970159611380986, "grad_norm": 1.0967686176300049, "learning_rate": 3.975599999600804e-05, "loss": 0.6622, "step": 2765 }, { "epoch": 0.47987508674531576, "grad_norm": 1.0719001293182373, "learning_rate": 3.9754937835924214e-05, "loss": 0.6163, "step": 2766 }, { "epoch": 0.48004857737682166, "grad_norm": 0.7715944051742554, "learning_rate": 3.975387338325684e-05, "loss": 0.5645, "step": 2767 }, { "epoch": 0.48022206800832756, "grad_norm": 1.0731868743896484, "learning_rate": 3.9752806638129435e-05, "loss": 0.572, "step": 2768 }, { "epoch": 0.48039555863983346, "grad_norm": 0.8332193493843079, "learning_rate": 3.975173760066581e-05, "loss": 0.7068, "step": 2769 }, { "epoch": 0.48056904927133937, "grad_norm": 0.9313003420829773, "learning_rate": 3.975066627099e-05, "loss": 0.6307, "step": 2770 }, { "epoch": 0.48074253990284527, "grad_norm": 0.9903737902641296, "learning_rate": 3.974959264922638e-05, "loss": 0.6519, "step": 2771 }, { "epoch": 0.48091603053435117, "grad_norm": 0.8947104811668396, "learning_rate": 3.974851673549951e-05, "loss": 0.5934, "step": 2772 }, { "epoch": 0.48108952116585707, "grad_norm": 0.9411084055900574, "learning_rate": 3.974743852993426e-05, "loss": 0.6036, "step": 2773 }, { "epoch": 0.4812630117973629, "grad_norm": 0.6337997913360596, "learning_rate": 3.9746358032655764e-05, "loss": 0.7129, "step": 2774 }, { "epoch": 0.4814365024288688, "grad_norm": 0.6764761209487915, "learning_rate": 3.9745275243789396e-05, "loss": 0.5551, "step": 2775 }, { "epoch": 0.4816099930603747, "grad_norm": 0.9338626265525818, "learning_rate": 3.974419016346084e-05, "loss": 0.5388, "step": 2776 }, { "epoch": 0.4817834836918806, "grad_norm": 0.6472459435462952, "learning_rate": 3.9743102791796005e-05, "loss": 0.5714, "step": 2777 }, { "epoch": 0.4819569743233865, "grad_norm": 1.0440516471862793, "learning_rate": 3.974201312892109e-05, "loss": 0.6394, "step": 2778 }, { "epoch": 0.4821304649548924, "grad_norm": 0.8014706373214722, "learning_rate": 3.9740921174962545e-05, "loss": 0.6423, "step": 2779 }, { "epoch": 0.4823039555863983, "grad_norm": 0.8197331428527832, "learning_rate": 3.9739826930047095e-05, "loss": 0.6128, "step": 2780 }, { "epoch": 0.4824774462179042, "grad_norm": 1.1526602506637573, "learning_rate": 3.9738730394301726e-05, "loss": 0.6182, "step": 2781 }, { "epoch": 0.48265093684941013, "grad_norm": 0.9170807003974915, "learning_rate": 3.9737631567853695e-05, "loss": 0.6565, "step": 2782 }, { "epoch": 0.48282442748091603, "grad_norm": 1.079443097114563, "learning_rate": 3.9736530450830525e-05, "loss": 0.6467, "step": 2783 }, { "epoch": 0.48299791811242193, "grad_norm": 0.743094265460968, "learning_rate": 3.9735427043359996e-05, "loss": 0.6473, "step": 2784 }, { "epoch": 0.48317140874392783, "grad_norm": 2.7557244300842285, "learning_rate": 3.9734321345570165e-05, "loss": 0.5278, "step": 2785 }, { "epoch": 0.48334489937543373, "grad_norm": 0.9088783860206604, "learning_rate": 3.973321335758934e-05, "loss": 0.6384, "step": 2786 }, { "epoch": 0.48351839000693964, "grad_norm": 0.7157585024833679, "learning_rate": 3.9732103079546125e-05, "loss": 0.5518, "step": 2787 }, { "epoch": 0.48369188063844554, "grad_norm": 0.6420220136642456, "learning_rate": 3.973099051156934e-05, "loss": 0.6764, "step": 2788 }, { "epoch": 0.48386537126995144, "grad_norm": 0.728564441204071, "learning_rate": 3.9729875653788125e-05, "loss": 0.6217, "step": 2789 }, { "epoch": 0.48403886190145734, "grad_norm": 0.7883340716362, "learning_rate": 3.972875850633184e-05, "loss": 0.5724, "step": 2790 }, { "epoch": 0.48421235253296324, "grad_norm": 0.7076512575149536, "learning_rate": 3.972763906933015e-05, "loss": 0.6989, "step": 2791 }, { "epoch": 0.48438584316446914, "grad_norm": 1.6119723320007324, "learning_rate": 3.9726517342912954e-05, "loss": 0.5679, "step": 2792 }, { "epoch": 0.48455933379597504, "grad_norm": 0.9080114364624023, "learning_rate": 3.972539332721044e-05, "loss": 0.5646, "step": 2793 }, { "epoch": 0.4847328244274809, "grad_norm": 0.6551885008811951, "learning_rate": 3.972426702235304e-05, "loss": 0.6938, "step": 2794 }, { "epoch": 0.4849063150589868, "grad_norm": 1.1007322072982788, "learning_rate": 3.972313842847148e-05, "loss": 0.5051, "step": 2795 }, { "epoch": 0.4850798056904927, "grad_norm": 1.7170222997665405, "learning_rate": 3.972200754569671e-05, "loss": 0.5197, "step": 2796 }, { "epoch": 0.4852532963219986, "grad_norm": 0.829792857170105, "learning_rate": 3.972087437415999e-05, "loss": 0.6917, "step": 2797 }, { "epoch": 0.4854267869535045, "grad_norm": 1.100192666053772, "learning_rate": 3.9719738913992815e-05, "loss": 0.5415, "step": 2798 }, { "epoch": 0.4856002775850104, "grad_norm": 0.8359057903289795, "learning_rate": 3.971860116532696e-05, "loss": 0.5387, "step": 2799 }, { "epoch": 0.4857737682165163, "grad_norm": 0.8219515681266785, "learning_rate": 3.971746112829447e-05, "loss": 0.6611, "step": 2800 }, { "epoch": 0.4859472588480222, "grad_norm": 0.5996765494346619, "learning_rate": 3.971631880302764e-05, "loss": 0.6564, "step": 2801 }, { "epoch": 0.4861207494795281, "grad_norm": 1.5173307657241821, "learning_rate": 3.9715174189659036e-05, "loss": 0.6659, "step": 2802 }, { "epoch": 0.486294240111034, "grad_norm": 0.9307529926300049, "learning_rate": 3.97140272883215e-05, "loss": 0.585, "step": 2803 }, { "epoch": 0.4864677307425399, "grad_norm": 0.7527129650115967, "learning_rate": 3.971287809914811e-05, "loss": 0.6942, "step": 2804 }, { "epoch": 0.4866412213740458, "grad_norm": 0.67873215675354, "learning_rate": 3.9711726622272266e-05, "loss": 0.6327, "step": 2805 }, { "epoch": 0.4868147120055517, "grad_norm": 0.8369560241699219, "learning_rate": 3.971057285782757e-05, "loss": 0.5317, "step": 2806 }, { "epoch": 0.4869882026370576, "grad_norm": 0.9180094599723816, "learning_rate": 3.970941680594792e-05, "loss": 0.6156, "step": 2807 }, { "epoch": 0.4871616932685635, "grad_norm": 1.8472323417663574, "learning_rate": 3.970825846676749e-05, "loss": 0.6224, "step": 2808 }, { "epoch": 0.4873351839000694, "grad_norm": 1.179469108581543, "learning_rate": 3.9707097840420706e-05, "loss": 0.7119, "step": 2809 }, { "epoch": 0.4875086745315753, "grad_norm": 1.036863923072815, "learning_rate": 3.970593492704225e-05, "loss": 0.5345, "step": 2810 }, { "epoch": 0.4876821651630812, "grad_norm": 1.147894263267517, "learning_rate": 3.970476972676708e-05, "loss": 0.5669, "step": 2811 }, { "epoch": 0.4878556557945871, "grad_norm": 0.7939436435699463, "learning_rate": 3.9703602239730425e-05, "loss": 0.6367, "step": 2812 }, { "epoch": 0.48802914642609296, "grad_norm": 0.6466153860092163, "learning_rate": 3.970243246606777e-05, "loss": 0.7765, "step": 2813 }, { "epoch": 0.48820263705759886, "grad_norm": 4.05562686920166, "learning_rate": 3.9701260405914874e-05, "loss": 0.6578, "step": 2814 }, { "epoch": 0.48837612768910477, "grad_norm": 0.7797324657440186, "learning_rate": 3.9700086059407745e-05, "loss": 0.6892, "step": 2815 }, { "epoch": 0.48854961832061067, "grad_norm": 1.3146294355392456, "learning_rate": 3.9698909426682674e-05, "loss": 0.5365, "step": 2816 }, { "epoch": 0.48872310895211657, "grad_norm": 0.7870042324066162, "learning_rate": 3.969773050787622e-05, "loss": 0.6057, "step": 2817 }, { "epoch": 0.48889659958362247, "grad_norm": 1.0030949115753174, "learning_rate": 3.9696549303125176e-05, "loss": 0.5239, "step": 2818 }, { "epoch": 0.4890700902151284, "grad_norm": 0.6438866257667542, "learning_rate": 3.9695365812566645e-05, "loss": 0.5275, "step": 2819 }, { "epoch": 0.4892435808466343, "grad_norm": 0.847891628742218, "learning_rate": 3.969418003633795e-05, "loss": 0.5771, "step": 2820 }, { "epoch": 0.4894170714781402, "grad_norm": 0.8128512501716614, "learning_rate": 3.9692991974576725e-05, "loss": 0.5381, "step": 2821 }, { "epoch": 0.4895905621096461, "grad_norm": 1.2491570711135864, "learning_rate": 3.969180162742082e-05, "loss": 0.5964, "step": 2822 }, { "epoch": 0.489764052741152, "grad_norm": 1.1974592208862305, "learning_rate": 3.96906089950084e-05, "loss": 0.5793, "step": 2823 }, { "epoch": 0.4899375433726579, "grad_norm": 0.808745265007019, "learning_rate": 3.9689414077477865e-05, "loss": 0.6477, "step": 2824 }, { "epoch": 0.4901110340041638, "grad_norm": 0.7065175175666809, "learning_rate": 3.968821687496788e-05, "loss": 0.6975, "step": 2825 }, { "epoch": 0.4902845246356697, "grad_norm": 0.8909404873847961, "learning_rate": 3.968701738761739e-05, "loss": 0.6786, "step": 2826 }, { "epoch": 0.4904580152671756, "grad_norm": 0.8688374757766724, "learning_rate": 3.968581561556558e-05, "loss": 0.6854, "step": 2827 }, { "epoch": 0.4906315058986815, "grad_norm": 0.8573815226554871, "learning_rate": 3.968461155895194e-05, "loss": 0.6083, "step": 2828 }, { "epoch": 0.4908049965301874, "grad_norm": 0.8470245599746704, "learning_rate": 3.968340521791619e-05, "loss": 0.6835, "step": 2829 }, { "epoch": 0.4909784871616933, "grad_norm": 0.7496466040611267, "learning_rate": 3.9682196592598324e-05, "loss": 0.562, "step": 2830 }, { "epoch": 0.4911519777931992, "grad_norm": 0.7681129574775696, "learning_rate": 3.968098568313862e-05, "loss": 0.6537, "step": 2831 }, { "epoch": 0.4913254684247051, "grad_norm": 1.5018384456634521, "learning_rate": 3.967977248967758e-05, "loss": 0.6011, "step": 2832 }, { "epoch": 0.49149895905621094, "grad_norm": 1.2373415231704712, "learning_rate": 3.9678557012356014e-05, "loss": 0.5194, "step": 2833 }, { "epoch": 0.49167244968771684, "grad_norm": 0.9008753895759583, "learning_rate": 3.967733925131498e-05, "loss": 0.5525, "step": 2834 }, { "epoch": 0.49184594031922274, "grad_norm": 0.8494754433631897, "learning_rate": 3.96761192066958e-05, "loss": 0.6589, "step": 2835 }, { "epoch": 0.49201943095072864, "grad_norm": 0.7637364864349365, "learning_rate": 3.9674896878640054e-05, "loss": 0.5907, "step": 2836 }, { "epoch": 0.49219292158223454, "grad_norm": 0.844154953956604, "learning_rate": 3.9673672267289604e-05, "loss": 0.5884, "step": 2837 }, { "epoch": 0.49236641221374045, "grad_norm": 0.9407692551612854, "learning_rate": 3.9672445372786565e-05, "loss": 0.6096, "step": 2838 }, { "epoch": 0.49253990284524635, "grad_norm": 0.8381971120834351, "learning_rate": 3.967121619527331e-05, "loss": 0.5492, "step": 2839 }, { "epoch": 0.49271339347675225, "grad_norm": 0.8256162405014038, "learning_rate": 3.96699847348925e-05, "loss": 0.6206, "step": 2840 }, { "epoch": 0.49288688410825815, "grad_norm": 0.8454298973083496, "learning_rate": 3.9668750991787034e-05, "loss": 0.7008, "step": 2841 }, { "epoch": 0.49306037473976405, "grad_norm": 0.7378323674201965, "learning_rate": 3.966751496610011e-05, "loss": 0.6375, "step": 2842 }, { "epoch": 0.49323386537126995, "grad_norm": 0.6299169063568115, "learning_rate": 3.9666276657975144e-05, "loss": 0.7336, "step": 2843 }, { "epoch": 0.49340735600277585, "grad_norm": 0.684691309928894, "learning_rate": 3.966503606755586e-05, "loss": 0.5975, "step": 2844 }, { "epoch": 0.49358084663428176, "grad_norm": 0.823893129825592, "learning_rate": 3.966379319498623e-05, "loss": 0.6161, "step": 2845 }, { "epoch": 0.49375433726578766, "grad_norm": 0.8355918526649475, "learning_rate": 3.9662548040410485e-05, "loss": 0.6804, "step": 2846 }, { "epoch": 0.49392782789729356, "grad_norm": 0.9166586399078369, "learning_rate": 3.966130060397312e-05, "loss": 0.652, "step": 2847 }, { "epoch": 0.49410131852879946, "grad_norm": 0.8756088018417358, "learning_rate": 3.9660050885818925e-05, "loss": 0.6575, "step": 2848 }, { "epoch": 0.49427480916030536, "grad_norm": 1.5226891040802002, "learning_rate": 3.9658798886092904e-05, "loss": 0.553, "step": 2849 }, { "epoch": 0.49444829979181126, "grad_norm": 0.7012729644775391, "learning_rate": 3.965754460494037e-05, "loss": 0.6741, "step": 2850 }, { "epoch": 0.49462179042331716, "grad_norm": 1.1426388025283813, "learning_rate": 3.965628804250688e-05, "loss": 0.6549, "step": 2851 }, { "epoch": 0.49479528105482307, "grad_norm": 0.9859298467636108, "learning_rate": 3.9655029198938256e-05, "loss": 0.5869, "step": 2852 }, { "epoch": 0.4949687716863289, "grad_norm": 0.8353455066680908, "learning_rate": 3.965376807438059e-05, "loss": 0.6431, "step": 2853 }, { "epoch": 0.4951422623178348, "grad_norm": 1.476958155632019, "learning_rate": 3.965250466898024e-05, "loss": 0.5901, "step": 2854 }, { "epoch": 0.4953157529493407, "grad_norm": 0.6651232838630676, "learning_rate": 3.9651238982883826e-05, "loss": 0.5836, "step": 2855 }, { "epoch": 0.4954892435808466, "grad_norm": 0.6790268421173096, "learning_rate": 3.964997101623823e-05, "loss": 0.6171, "step": 2856 }, { "epoch": 0.4956627342123525, "grad_norm": 0.8696946501731873, "learning_rate": 3.964870076919059e-05, "loss": 0.5375, "step": 2857 }, { "epoch": 0.4958362248438584, "grad_norm": 0.8823839426040649, "learning_rate": 3.964742824188834e-05, "loss": 0.6649, "step": 2858 }, { "epoch": 0.4960097154753643, "grad_norm": 0.7932507395744324, "learning_rate": 3.964615343447915e-05, "loss": 0.6865, "step": 2859 }, { "epoch": 0.4961832061068702, "grad_norm": 0.653605043888092, "learning_rate": 3.9644876347110956e-05, "loss": 0.7451, "step": 2860 }, { "epoch": 0.4963566967383761, "grad_norm": 0.6188622713088989, "learning_rate": 3.964359697993198e-05, "loss": 0.7036, "step": 2861 }, { "epoch": 0.496530187369882, "grad_norm": 1.760206937789917, "learning_rate": 3.964231533309067e-05, "loss": 0.5435, "step": 2862 }, { "epoch": 0.4967036780013879, "grad_norm": 0.7955849766731262, "learning_rate": 3.964103140673579e-05, "loss": 0.5455, "step": 2863 }, { "epoch": 0.49687716863289383, "grad_norm": 0.7729914784431458, "learning_rate": 3.963974520101632e-05, "loss": 0.6177, "step": 2864 }, { "epoch": 0.49705065926439973, "grad_norm": 1.0147491693496704, "learning_rate": 3.963845671608154e-05, "loss": 0.6644, "step": 2865 }, { "epoch": 0.49722414989590563, "grad_norm": 0.9814116954803467, "learning_rate": 3.963716595208098e-05, "loss": 0.509, "step": 2866 }, { "epoch": 0.49739764052741153, "grad_norm": 1.0014759302139282, "learning_rate": 3.963587290916442e-05, "loss": 0.6312, "step": 2867 }, { "epoch": 0.49757113115891743, "grad_norm": 0.9009021520614624, "learning_rate": 3.963457758748193e-05, "loss": 0.6477, "step": 2868 }, { "epoch": 0.49774462179042334, "grad_norm": 0.8552650213241577, "learning_rate": 3.963327998718385e-05, "loss": 0.5275, "step": 2869 }, { "epoch": 0.49791811242192924, "grad_norm": 0.9464699625968933, "learning_rate": 3.963198010842073e-05, "loss": 0.5591, "step": 2870 }, { "epoch": 0.49809160305343514, "grad_norm": 0.909501314163208, "learning_rate": 3.963067795134344e-05, "loss": 0.7686, "step": 2871 }, { "epoch": 0.49826509368494104, "grad_norm": 0.853759765625, "learning_rate": 3.9629373516103114e-05, "loss": 0.6168, "step": 2872 }, { "epoch": 0.4984385843164469, "grad_norm": 0.8711340427398682, "learning_rate": 3.962806680285111e-05, "loss": 0.5017, "step": 2873 }, { "epoch": 0.4986120749479528, "grad_norm": 0.7842757105827332, "learning_rate": 3.9626757811739084e-05, "loss": 0.5222, "step": 2874 }, { "epoch": 0.4987855655794587, "grad_norm": 0.9540364742279053, "learning_rate": 3.962544654291894e-05, "loss": 0.572, "step": 2875 }, { "epoch": 0.4989590562109646, "grad_norm": 1.2070144414901733, "learning_rate": 3.962413299654286e-05, "loss": 0.6826, "step": 2876 }, { "epoch": 0.4991325468424705, "grad_norm": 0.9257821440696716, "learning_rate": 3.962281717276328e-05, "loss": 0.5488, "step": 2877 }, { "epoch": 0.4993060374739764, "grad_norm": 1.1215496063232422, "learning_rate": 3.962149907173291e-05, "loss": 0.4994, "step": 2878 }, { "epoch": 0.4994795281054823, "grad_norm": 1.1144548654556274, "learning_rate": 3.9620178693604696e-05, "loss": 0.5066, "step": 2879 }, { "epoch": 0.4996530187369882, "grad_norm": 0.8781638741493225, "learning_rate": 3.961885603853189e-05, "loss": 0.5291, "step": 2880 }, { "epoch": 0.4998265093684941, "grad_norm": 0.7669386863708496, "learning_rate": 3.961753110666798e-05, "loss": 0.599, "step": 2881 }, { "epoch": 0.5, "grad_norm": 1.8803635835647583, "learning_rate": 3.9616203898166724e-05, "loss": 0.6178, "step": 2882 }, { "epoch": 0.5001734906315058, "grad_norm": 0.7655910849571228, "learning_rate": 3.9614874413182144e-05, "loss": 0.5726, "step": 2883 }, { "epoch": 0.5003469812630118, "grad_norm": 0.6875408291816711, "learning_rate": 3.961354265186854e-05, "loss": 0.6406, "step": 2884 }, { "epoch": 0.5005204718945176, "grad_norm": 1.0279085636138916, "learning_rate": 3.961220861438045e-05, "loss": 0.6409, "step": 2885 }, { "epoch": 0.5006939625260236, "grad_norm": 0.7553526163101196, "learning_rate": 3.9610872300872704e-05, "loss": 0.6992, "step": 2886 }, { "epoch": 0.5008674531575295, "grad_norm": 1.0226480960845947, "learning_rate": 3.960953371150037e-05, "loss": 0.5773, "step": 2887 }, { "epoch": 0.5010409437890354, "grad_norm": 0.944045901298523, "learning_rate": 3.96081928464188e-05, "loss": 0.5137, "step": 2888 }, { "epoch": 0.5012144344205413, "grad_norm": 1.0751080513000488, "learning_rate": 3.9606849705783606e-05, "loss": 0.4993, "step": 2889 }, { "epoch": 0.5013879250520472, "grad_norm": 1.8217488527297974, "learning_rate": 3.960550428975066e-05, "loss": 0.6428, "step": 2890 }, { "epoch": 0.5015614156835531, "grad_norm": 0.7758296728134155, "learning_rate": 3.960415659847609e-05, "loss": 0.5029, "step": 2891 }, { "epoch": 0.501734906315059, "grad_norm": 0.9004384875297546, "learning_rate": 3.9602806632116304e-05, "loss": 0.504, "step": 2892 }, { "epoch": 0.5019083969465649, "grad_norm": 1.2992662191390991, "learning_rate": 3.960145439082797e-05, "loss": 0.5168, "step": 2893 }, { "epoch": 0.5020818875780708, "grad_norm": 0.715490460395813, "learning_rate": 3.960009987476801e-05, "loss": 0.5402, "step": 2894 }, { "epoch": 0.5022553782095767, "grad_norm": 0.7640341520309448, "learning_rate": 3.959874308409362e-05, "loss": 0.5972, "step": 2895 }, { "epoch": 0.5024288688410826, "grad_norm": 0.6937002539634705, "learning_rate": 3.959738401896227e-05, "loss": 0.7065, "step": 2896 }, { "epoch": 0.5026023594725885, "grad_norm": 0.7549835443496704, "learning_rate": 3.959602267953165e-05, "loss": 0.54, "step": 2897 }, { "epoch": 0.5027758501040944, "grad_norm": 0.736943244934082, "learning_rate": 3.9594659065959774e-05, "loss": 0.5231, "step": 2898 }, { "epoch": 0.5029493407356003, "grad_norm": 1.0537973642349243, "learning_rate": 3.9593293178404885e-05, "loss": 0.4803, "step": 2899 }, { "epoch": 0.5031228313671062, "grad_norm": 0.8774026036262512, "learning_rate": 3.959192501702548e-05, "loss": 0.6389, "step": 2900 }, { "epoch": 0.5032963219986121, "grad_norm": 0.7064281702041626, "learning_rate": 3.959055458198036e-05, "loss": 0.5803, "step": 2901 }, { "epoch": 0.5034698126301179, "grad_norm": 0.8273089528083801, "learning_rate": 3.958918187342855e-05, "loss": 0.6331, "step": 2902 }, { "epoch": 0.5036433032616239, "grad_norm": 0.6765996813774109, "learning_rate": 3.9587806891529354e-05, "loss": 0.6755, "step": 2903 }, { "epoch": 0.5038167938931297, "grad_norm": 0.7752816677093506, "learning_rate": 3.9586429636442346e-05, "loss": 0.625, "step": 2904 }, { "epoch": 0.5039902845246357, "grad_norm": 0.7052983641624451, "learning_rate": 3.958505010832735e-05, "loss": 0.8062, "step": 2905 }, { "epoch": 0.5041637751561415, "grad_norm": 0.6991101503372192, "learning_rate": 3.958366830734448e-05, "loss": 0.7192, "step": 2906 }, { "epoch": 0.5043372657876475, "grad_norm": 0.6590566635131836, "learning_rate": 3.958228423365408e-05, "loss": 0.528, "step": 2907 }, { "epoch": 0.5045107564191533, "grad_norm": 0.7325621843338013, "learning_rate": 3.958089788741677e-05, "loss": 0.5999, "step": 2908 }, { "epoch": 0.5046842470506593, "grad_norm": 0.7469881772994995, "learning_rate": 3.957950926879345e-05, "loss": 0.627, "step": 2909 }, { "epoch": 0.5048577376821651, "grad_norm": 0.8237590193748474, "learning_rate": 3.957811837794526e-05, "loss": 0.4972, "step": 2910 }, { "epoch": 0.5050312283136711, "grad_norm": 1.2984604835510254, "learning_rate": 3.9576725215033624e-05, "loss": 0.6031, "step": 2911 }, { "epoch": 0.5052047189451769, "grad_norm": 0.7125715613365173, "learning_rate": 3.9575329780220215e-05, "loss": 0.6146, "step": 2912 }, { "epoch": 0.5053782095766829, "grad_norm": 0.7786535024642944, "learning_rate": 3.957393207366697e-05, "loss": 0.5795, "step": 2913 }, { "epoch": 0.5055517002081887, "grad_norm": 1.1961077451705933, "learning_rate": 3.957253209553611e-05, "loss": 0.645, "step": 2914 }, { "epoch": 0.5057251908396947, "grad_norm": 0.7613911032676697, "learning_rate": 3.9571129845990084e-05, "loss": 0.6475, "step": 2915 }, { "epoch": 0.5058986814712005, "grad_norm": 1.0745956897735596, "learning_rate": 3.956972532519164e-05, "loss": 0.5205, "step": 2916 }, { "epoch": 0.5060721721027065, "grad_norm": 0.8047274947166443, "learning_rate": 3.956831853330376e-05, "loss": 0.6866, "step": 2917 }, { "epoch": 0.5062456627342123, "grad_norm": 1.157810091972351, "learning_rate": 3.956690947048972e-05, "loss": 0.636, "step": 2918 }, { "epoch": 0.5064191533657183, "grad_norm": 0.8943071365356445, "learning_rate": 3.956549813691304e-05, "loss": 0.5283, "step": 2919 }, { "epoch": 0.5065926439972241, "grad_norm": 0.9355053901672363, "learning_rate": 3.9564084532737495e-05, "loss": 0.6853, "step": 2920 }, { "epoch": 0.5067661346287301, "grad_norm": 0.7883349657058716, "learning_rate": 3.956266865812714e-05, "loss": 0.5863, "step": 2921 }, { "epoch": 0.506939625260236, "grad_norm": 0.890841543674469, "learning_rate": 3.9561250513246306e-05, "loss": 0.6272, "step": 2922 }, { "epoch": 0.5071131158917418, "grad_norm": 0.7559226751327515, "learning_rate": 3.9559830098259544e-05, "loss": 0.6003, "step": 2923 }, { "epoch": 0.5072866065232478, "grad_norm": 0.8822606801986694, "learning_rate": 3.955840741333171e-05, "loss": 0.4907, "step": 2924 }, { "epoch": 0.5074600971547536, "grad_norm": 0.9082823395729065, "learning_rate": 3.9556982458627905e-05, "loss": 0.5961, "step": 2925 }, { "epoch": 0.5076335877862596, "grad_norm": 0.854621946811676, "learning_rate": 3.9555555234313506e-05, "loss": 0.5956, "step": 2926 }, { "epoch": 0.5078070784177654, "grad_norm": 0.7763813138008118, "learning_rate": 3.955412574055413e-05, "loss": 0.5427, "step": 2927 }, { "epoch": 0.5079805690492714, "grad_norm": 1.4742603302001953, "learning_rate": 3.9552693977515675e-05, "loss": 0.5181, "step": 2928 }, { "epoch": 0.5081540596807772, "grad_norm": 0.8555228114128113, "learning_rate": 3.95512599453643e-05, "loss": 0.5499, "step": 2929 }, { "epoch": 0.5083275503122832, "grad_norm": 0.7972477674484253, "learning_rate": 3.9549823644266434e-05, "loss": 0.6315, "step": 2930 }, { "epoch": 0.508501040943789, "grad_norm": 0.9488426446914673, "learning_rate": 3.9548385074388745e-05, "loss": 0.6281, "step": 2931 }, { "epoch": 0.508674531575295, "grad_norm": 0.7253492474555969, "learning_rate": 3.9546944235898194e-05, "loss": 0.7048, "step": 2932 }, { "epoch": 0.5088480222068008, "grad_norm": 0.7153453826904297, "learning_rate": 3.9545501128961985e-05, "loss": 0.6403, "step": 2933 }, { "epoch": 0.5090215128383068, "grad_norm": 0.7560308575630188, "learning_rate": 3.954405575374759e-05, "loss": 0.731, "step": 2934 }, { "epoch": 0.5091950034698126, "grad_norm": 1.0532622337341309, "learning_rate": 3.9542608110422764e-05, "loss": 0.582, "step": 2935 }, { "epoch": 0.5093684941013186, "grad_norm": 0.9665968418121338, "learning_rate": 3.954115819915549e-05, "loss": 0.5809, "step": 2936 }, { "epoch": 0.5095419847328244, "grad_norm": 2.0514838695526123, "learning_rate": 3.953970602011404e-05, "loss": 0.5996, "step": 2937 }, { "epoch": 0.5097154753643304, "grad_norm": 1.7059575319290161, "learning_rate": 3.9538251573466926e-05, "loss": 0.6071, "step": 2938 }, { "epoch": 0.5098889659958362, "grad_norm": 2.3512909412384033, "learning_rate": 3.9536794859382966e-05, "loss": 0.5767, "step": 2939 }, { "epoch": 0.5100624566273422, "grad_norm": 0.9228242635726929, "learning_rate": 3.9535335878031185e-05, "loss": 0.6882, "step": 2940 }, { "epoch": 0.510235947258848, "grad_norm": 0.7073661684989929, "learning_rate": 3.953387462958092e-05, "loss": 0.536, "step": 2941 }, { "epoch": 0.5104094378903539, "grad_norm": 0.866476833820343, "learning_rate": 3.953241111420174e-05, "loss": 0.5959, "step": 2942 }, { "epoch": 0.5105829285218598, "grad_norm": 0.7014033198356628, "learning_rate": 3.9530945332063486e-05, "loss": 0.5774, "step": 2943 }, { "epoch": 0.5107564191533657, "grad_norm": 0.7500950694084167, "learning_rate": 3.9529477283336274e-05, "loss": 0.6086, "step": 2944 }, { "epoch": 0.5109299097848716, "grad_norm": 0.8323651552200317, "learning_rate": 3.952800696819046e-05, "loss": 0.6919, "step": 2945 }, { "epoch": 0.5111034004163775, "grad_norm": 0.7332131862640381, "learning_rate": 3.9526534386796696e-05, "loss": 0.5884, "step": 2946 }, { "epoch": 0.5112768910478834, "grad_norm": 1.0053178071975708, "learning_rate": 3.9525059539325854e-05, "loss": 0.5651, "step": 2947 }, { "epoch": 0.5114503816793893, "grad_norm": 0.8901483416557312, "learning_rate": 3.95235824259491e-05, "loss": 0.5414, "step": 2948 }, { "epoch": 0.5116238723108952, "grad_norm": 0.9375765919685364, "learning_rate": 3.952210304683786e-05, "loss": 0.6737, "step": 2949 }, { "epoch": 0.5117973629424011, "grad_norm": 0.7333534359931946, "learning_rate": 3.952062140216381e-05, "loss": 0.5692, "step": 2950 }, { "epoch": 0.511970853573907, "grad_norm": 0.9000279307365417, "learning_rate": 3.951913749209891e-05, "loss": 0.644, "step": 2951 }, { "epoch": 0.5121443442054129, "grad_norm": 1.2508001327514648, "learning_rate": 3.951765131681535e-05, "loss": 0.6993, "step": 2952 }, { "epoch": 0.5123178348369188, "grad_norm": 1.0223735570907593, "learning_rate": 3.951616287648561e-05, "loss": 0.6294, "step": 2953 }, { "epoch": 0.5124913254684247, "grad_norm": 1.4913530349731445, "learning_rate": 3.9514672171282435e-05, "loss": 0.5161, "step": 2954 }, { "epoch": 0.5126648160999306, "grad_norm": 0.7307369112968445, "learning_rate": 3.951317920137881e-05, "loss": 0.6313, "step": 2955 }, { "epoch": 0.5128383067314365, "grad_norm": 2.8054757118225098, "learning_rate": 3.951168396694801e-05, "loss": 0.5863, "step": 2956 }, { "epoch": 0.5130117973629424, "grad_norm": 2.5261077880859375, "learning_rate": 3.951018646816354e-05, "loss": 0.7458, "step": 2957 }, { "epoch": 0.5131852879944483, "grad_norm": 1.7429360151290894, "learning_rate": 3.9508686705199196e-05, "loss": 0.525, "step": 2958 }, { "epoch": 0.5133587786259542, "grad_norm": 1.2919009923934937, "learning_rate": 3.9507184678229035e-05, "loss": 0.4951, "step": 2959 }, { "epoch": 0.5135322692574601, "grad_norm": 0.6654407978057861, "learning_rate": 3.950568038742736e-05, "loss": 0.7129, "step": 2960 }, { "epoch": 0.5137057598889659, "grad_norm": 0.7495904564857483, "learning_rate": 3.9504173832968744e-05, "loss": 0.6887, "step": 2961 }, { "epoch": 0.5138792505204719, "grad_norm": 0.9177446365356445, "learning_rate": 3.950266501502803e-05, "loss": 0.5347, "step": 2962 }, { "epoch": 0.5140527411519777, "grad_norm": 0.8417816758155823, "learning_rate": 3.9501153933780314e-05, "loss": 0.5763, "step": 2963 }, { "epoch": 0.5142262317834837, "grad_norm": 0.6979896426200867, "learning_rate": 3.9499640589400964e-05, "loss": 0.6661, "step": 2964 }, { "epoch": 0.5143997224149895, "grad_norm": 1.1703006029129028, "learning_rate": 3.94981249820656e-05, "loss": 0.6267, "step": 2965 }, { "epoch": 0.5145732130464955, "grad_norm": 0.7633793950080872, "learning_rate": 3.949660711195011e-05, "loss": 0.7529, "step": 2966 }, { "epoch": 0.5147467036780013, "grad_norm": 0.8751875162124634, "learning_rate": 3.9495086979230656e-05, "loss": 0.553, "step": 2967 }, { "epoch": 0.5149201943095073, "grad_norm": 0.9142911434173584, "learning_rate": 3.949356458408363e-05, "loss": 0.621, "step": 2968 }, { "epoch": 0.5150936849410132, "grad_norm": 1.1330902576446533, "learning_rate": 3.9492039926685724e-05, "loss": 0.5168, "step": 2969 }, { "epoch": 0.5152671755725191, "grad_norm": 0.8150011301040649, "learning_rate": 3.9490513007213874e-05, "loss": 0.5708, "step": 2970 }, { "epoch": 0.515440666204025, "grad_norm": 0.7211820483207703, "learning_rate": 3.948898382584528e-05, "loss": 0.5453, "step": 2971 }, { "epoch": 0.5156141568355309, "grad_norm": 1.0376445055007935, "learning_rate": 3.94874523827574e-05, "loss": 0.6359, "step": 2972 }, { "epoch": 0.5157876474670368, "grad_norm": 0.5772063136100769, "learning_rate": 3.9485918678127954e-05, "loss": 0.7893, "step": 2973 }, { "epoch": 0.5159611380985427, "grad_norm": 0.7940547466278076, "learning_rate": 3.9484382712134956e-05, "loss": 0.689, "step": 2974 }, { "epoch": 0.5161346287300486, "grad_norm": 0.7835409641265869, "learning_rate": 3.948284448495663e-05, "loss": 0.6183, "step": 2975 }, { "epoch": 0.5163081193615545, "grad_norm": 0.9172401428222656, "learning_rate": 3.9481303996771505e-05, "loss": 0.6893, "step": 2976 }, { "epoch": 0.5164816099930604, "grad_norm": 0.7964392304420471, "learning_rate": 3.947976124775835e-05, "loss": 0.657, "step": 2977 }, { "epoch": 0.5166551006245663, "grad_norm": 0.7399853467941284, "learning_rate": 3.9478216238096206e-05, "loss": 0.5171, "step": 2978 }, { "epoch": 0.5168285912560722, "grad_norm": 0.7355174422264099, "learning_rate": 3.947666896796436e-05, "loss": 0.6783, "step": 2979 }, { "epoch": 0.5170020818875781, "grad_norm": 0.6980626583099365, "learning_rate": 3.94751194375424e-05, "loss": 0.7009, "step": 2980 }, { "epoch": 0.517175572519084, "grad_norm": 0.8751781582832336, "learning_rate": 3.947356764701013e-05, "loss": 0.5306, "step": 2981 }, { "epoch": 0.5173490631505898, "grad_norm": 1.3187061548233032, "learning_rate": 3.9472013596547646e-05, "loss": 0.5841, "step": 2982 }, { "epoch": 0.5175225537820958, "grad_norm": 1.0359468460083008, "learning_rate": 3.947045728633529e-05, "loss": 0.5577, "step": 2983 }, { "epoch": 0.5176960444136016, "grad_norm": 0.7905299067497253, "learning_rate": 3.946889871655368e-05, "loss": 0.5211, "step": 2984 }, { "epoch": 0.5178695350451076, "grad_norm": 1.22454035282135, "learning_rate": 3.9467337887383695e-05, "loss": 0.5479, "step": 2985 }, { "epoch": 0.5180430256766134, "grad_norm": 0.8180719017982483, "learning_rate": 3.946577479900645e-05, "loss": 0.6879, "step": 2986 }, { "epoch": 0.5182165163081194, "grad_norm": 1.2501583099365234, "learning_rate": 3.9464209451603367e-05, "loss": 0.5841, "step": 2987 }, { "epoch": 0.5183900069396252, "grad_norm": 0.5989814400672913, "learning_rate": 3.9462641845356096e-05, "loss": 0.6218, "step": 2988 }, { "epoch": 0.5185634975711312, "grad_norm": 0.7976348996162415, "learning_rate": 3.946107198044656e-05, "loss": 0.5869, "step": 2989 }, { "epoch": 0.518736988202637, "grad_norm": 0.8901609778404236, "learning_rate": 3.945949985705694e-05, "loss": 0.6271, "step": 2990 }, { "epoch": 0.518910478834143, "grad_norm": 1.047589659690857, "learning_rate": 3.945792547536969e-05, "loss": 0.6718, "step": 2991 }, { "epoch": 0.5190839694656488, "grad_norm": 1.1930838823318481, "learning_rate": 3.945634883556752e-05, "loss": 0.5968, "step": 2992 }, { "epoch": 0.5192574600971548, "grad_norm": 0.8060930967330933, "learning_rate": 3.945476993783339e-05, "loss": 0.5712, "step": 2993 }, { "epoch": 0.5194309507286606, "grad_norm": 0.889964759349823, "learning_rate": 3.945318878235054e-05, "loss": 0.5232, "step": 2994 }, { "epoch": 0.5196044413601666, "grad_norm": 0.7452549934387207, "learning_rate": 3.945160536930247e-05, "loss": 0.6582, "step": 2995 }, { "epoch": 0.5197779319916724, "grad_norm": 0.7570145130157471, "learning_rate": 3.945001969887293e-05, "loss": 0.598, "step": 2996 }, { "epoch": 0.5199514226231784, "grad_norm": 0.6366786956787109, "learning_rate": 3.944843177124593e-05, "loss": 0.6699, "step": 2997 }, { "epoch": 0.5201249132546842, "grad_norm": 0.7185344099998474, "learning_rate": 3.944684158660577e-05, "loss": 0.5645, "step": 2998 }, { "epoch": 0.5202984038861902, "grad_norm": 0.8471353054046631, "learning_rate": 3.944524914513698e-05, "loss": 0.5682, "step": 2999 }, { "epoch": 0.520471894517696, "grad_norm": 0.7987390160560608, "learning_rate": 3.944365444702437e-05, "loss": 0.6149, "step": 3000 }, { "epoch": 0.5206453851492019, "grad_norm": 0.8394932150840759, "learning_rate": 3.944205749245301e-05, "loss": 0.6683, "step": 3001 }, { "epoch": 0.5208188757807078, "grad_norm": 0.8959985375404358, "learning_rate": 3.944045828160822e-05, "loss": 0.5833, "step": 3002 }, { "epoch": 0.5209923664122137, "grad_norm": 0.6436522603034973, "learning_rate": 3.943885681467559e-05, "loss": 0.6285, "step": 3003 }, { "epoch": 0.5211658570437196, "grad_norm": 0.7823279500007629, "learning_rate": 3.943725309184098e-05, "loss": 0.6475, "step": 3004 }, { "epoch": 0.5213393476752255, "grad_norm": 1.139787197113037, "learning_rate": 3.94356471132905e-05, "loss": 0.6207, "step": 3005 }, { "epoch": 0.5215128383067315, "grad_norm": 0.8308555483818054, "learning_rate": 3.943403887921052e-05, "loss": 0.6595, "step": 3006 }, { "epoch": 0.5216863289382373, "grad_norm": 0.9472867846488953, "learning_rate": 3.943242838978769e-05, "loss": 0.5508, "step": 3007 }, { "epoch": 0.5218598195697433, "grad_norm": 0.6742684245109558, "learning_rate": 3.94308156452089e-05, "loss": 0.6439, "step": 3008 }, { "epoch": 0.5220333102012491, "grad_norm": 0.7607699036598206, "learning_rate": 3.942920064566131e-05, "loss": 0.6178, "step": 3009 }, { "epoch": 0.5222068008327551, "grad_norm": 0.7258752584457397, "learning_rate": 3.9427583391332354e-05, "loss": 0.6803, "step": 3010 }, { "epoch": 0.5223802914642609, "grad_norm": 2.0244803428649902, "learning_rate": 3.94259638824097e-05, "loss": 0.6411, "step": 3011 }, { "epoch": 0.5225537820957669, "grad_norm": 1.4871636629104614, "learning_rate": 3.94243421190813e-05, "loss": 0.6649, "step": 3012 }, { "epoch": 0.5227272727272727, "grad_norm": 1.0151525735855103, "learning_rate": 3.942271810153537e-05, "loss": 0.5286, "step": 3013 }, { "epoch": 0.5229007633587787, "grad_norm": 0.8217700123786926, "learning_rate": 3.9421091829960364e-05, "loss": 0.562, "step": 3014 }, { "epoch": 0.5230742539902845, "grad_norm": 1.2036974430084229, "learning_rate": 3.941946330454503e-05, "loss": 0.5923, "step": 3015 }, { "epoch": 0.5232477446217905, "grad_norm": 0.8942238688468933, "learning_rate": 3.9417832525478344e-05, "loss": 0.6196, "step": 3016 }, { "epoch": 0.5234212352532963, "grad_norm": 0.6654651165008545, "learning_rate": 3.941619949294957e-05, "loss": 0.5917, "step": 3017 }, { "epoch": 0.5235947258848023, "grad_norm": 1.251190185546875, "learning_rate": 3.941456420714822e-05, "loss": 0.5902, "step": 3018 }, { "epoch": 0.5237682165163081, "grad_norm": 0.7630736827850342, "learning_rate": 3.941292666826408e-05, "loss": 0.4933, "step": 3019 }, { "epoch": 0.5239417071478141, "grad_norm": 0.8213485479354858, "learning_rate": 3.941128687648717e-05, "loss": 0.6637, "step": 3020 }, { "epoch": 0.5241151977793199, "grad_norm": 0.7555518746376038, "learning_rate": 3.94096448320078e-05, "loss": 0.6387, "step": 3021 }, { "epoch": 0.5242886884108258, "grad_norm": 1.5528206825256348, "learning_rate": 3.940800053501653e-05, "loss": 0.6968, "step": 3022 }, { "epoch": 0.5244621790423317, "grad_norm": 0.8396145105361938, "learning_rate": 3.940635398570418e-05, "loss": 0.5979, "step": 3023 }, { "epoch": 0.5246356696738376, "grad_norm": 1.0485159158706665, "learning_rate": 3.9404705184261846e-05, "loss": 0.5519, "step": 3024 }, { "epoch": 0.5248091603053435, "grad_norm": 0.7583789229393005, "learning_rate": 3.9403054130880864e-05, "loss": 0.5808, "step": 3025 }, { "epoch": 0.5249826509368494, "grad_norm": 0.5559424757957458, "learning_rate": 3.9401400825752835e-05, "loss": 0.7402, "step": 3026 }, { "epoch": 0.5251561415683553, "grad_norm": 1.0415542125701904, "learning_rate": 3.9399745269069636e-05, "loss": 0.7172, "step": 3027 }, { "epoch": 0.5253296321998612, "grad_norm": 0.8398486971855164, "learning_rate": 3.939808746102339e-05, "loss": 0.5929, "step": 3028 }, { "epoch": 0.5255031228313671, "grad_norm": 0.8926632404327393, "learning_rate": 3.93964274018065e-05, "loss": 0.5403, "step": 3029 }, { "epoch": 0.525676613462873, "grad_norm": 0.825466513633728, "learning_rate": 3.9394765091611596e-05, "loss": 0.5776, "step": 3030 }, { "epoch": 0.5258501040943789, "grad_norm": 1.4896812438964844, "learning_rate": 3.939310053063161e-05, "loss": 0.6487, "step": 3031 }, { "epoch": 0.5260235947258848, "grad_norm": 0.8119390606880188, "learning_rate": 3.939143371905971e-05, "loss": 0.7996, "step": 3032 }, { "epoch": 0.5261970853573907, "grad_norm": 0.7745625376701355, "learning_rate": 3.9389764657089334e-05, "loss": 0.6672, "step": 3033 }, { "epoch": 0.5263705759888966, "grad_norm": 0.5133965611457825, "learning_rate": 3.938809334491417e-05, "loss": 0.6941, "step": 3034 }, { "epoch": 0.5265440666204025, "grad_norm": 0.8933323621749878, "learning_rate": 3.938641978272819e-05, "loss": 0.6237, "step": 3035 }, { "epoch": 0.5267175572519084, "grad_norm": 2.33898663520813, "learning_rate": 3.9384743970725596e-05, "loss": 0.5834, "step": 3036 }, { "epoch": 0.5268910478834143, "grad_norm": 0.7867479920387268, "learning_rate": 3.938306590910088e-05, "loss": 0.6361, "step": 3037 }, { "epoch": 0.5270645385149202, "grad_norm": 1.6359503269195557, "learning_rate": 3.938138559804878e-05, "loss": 0.5477, "step": 3038 }, { "epoch": 0.5272380291464261, "grad_norm": 1.137347936630249, "learning_rate": 3.937970303776429e-05, "loss": 0.5846, "step": 3039 }, { "epoch": 0.527411519777932, "grad_norm": 0.8294497132301331, "learning_rate": 3.9378018228442696e-05, "loss": 0.6381, "step": 3040 }, { "epoch": 0.5275850104094378, "grad_norm": 0.9856337904930115, "learning_rate": 3.93763311702795e-05, "loss": 0.6945, "step": 3041 }, { "epoch": 0.5277585010409438, "grad_norm": 0.6542000770568848, "learning_rate": 3.937464186347049e-05, "loss": 0.6139, "step": 3042 }, { "epoch": 0.5279319916724496, "grad_norm": 0.8927829265594482, "learning_rate": 3.9372950308211715e-05, "loss": 0.5177, "step": 3043 }, { "epoch": 0.5281054823039556, "grad_norm": 1.0479310750961304, "learning_rate": 3.9371256504699486e-05, "loss": 0.5997, "step": 3044 }, { "epoch": 0.5282789729354614, "grad_norm": 1.0101724863052368, "learning_rate": 3.9369560453130366e-05, "loss": 0.6899, "step": 3045 }, { "epoch": 0.5284524635669674, "grad_norm": 0.889836847782135, "learning_rate": 3.936786215370119e-05, "loss": 0.571, "step": 3046 }, { "epoch": 0.5286259541984732, "grad_norm": 0.8853172659873962, "learning_rate": 3.9366161606609045e-05, "loss": 0.6084, "step": 3047 }, { "epoch": 0.5287994448299792, "grad_norm": 1.596792221069336, "learning_rate": 3.936445881205127e-05, "loss": 0.6294, "step": 3048 }, { "epoch": 0.528972935461485, "grad_norm": 0.6516552567481995, "learning_rate": 3.936275377022549e-05, "loss": 0.6698, "step": 3049 }, { "epoch": 0.529146426092991, "grad_norm": 0.9162889122962952, "learning_rate": 3.936104648132957e-05, "loss": 0.5614, "step": 3050 }, { "epoch": 0.5293199167244969, "grad_norm": 1.008646845817566, "learning_rate": 3.9359336945561656e-05, "loss": 0.6289, "step": 3051 }, { "epoch": 0.5294934073560028, "grad_norm": 0.7039345502853394, "learning_rate": 3.935762516312012e-05, "loss": 0.6151, "step": 3052 }, { "epoch": 0.5296668979875087, "grad_norm": 0.8550469875335693, "learning_rate": 3.9355911134203626e-05, "loss": 0.4678, "step": 3053 }, { "epoch": 0.5298403886190146, "grad_norm": 0.8338708877563477, "learning_rate": 3.9354194859011105e-05, "loss": 0.5522, "step": 3054 }, { "epoch": 0.5300138792505205, "grad_norm": 0.7659257650375366, "learning_rate": 3.935247633774171e-05, "loss": 0.6072, "step": 3055 }, { "epoch": 0.5301873698820264, "grad_norm": 0.6845202445983887, "learning_rate": 3.935075557059488e-05, "loss": 0.7031, "step": 3056 }, { "epoch": 0.5303608605135323, "grad_norm": 1.2755361795425415, "learning_rate": 3.934903255777033e-05, "loss": 0.6149, "step": 3057 }, { "epoch": 0.5305343511450382, "grad_norm": 0.8341980576515198, "learning_rate": 3.934730729946799e-05, "loss": 0.5929, "step": 3058 }, { "epoch": 0.5307078417765441, "grad_norm": 0.6358599066734314, "learning_rate": 3.934557979588811e-05, "loss": 0.6218, "step": 3059 }, { "epoch": 0.5308813324080499, "grad_norm": 0.808149516582489, "learning_rate": 3.9343850047231144e-05, "loss": 0.5128, "step": 3060 }, { "epoch": 0.5310548230395559, "grad_norm": 0.9138489961624146, "learning_rate": 3.9342118053697837e-05, "loss": 0.5391, "step": 3061 }, { "epoch": 0.5312283136710617, "grad_norm": 1.1662746667861938, "learning_rate": 3.9340383815489204e-05, "loss": 0.5781, "step": 3062 }, { "epoch": 0.5314018043025677, "grad_norm": 0.7738902568817139, "learning_rate": 3.933864733280648e-05, "loss": 0.5934, "step": 3063 }, { "epoch": 0.5315752949340735, "grad_norm": 0.7514986991882324, "learning_rate": 3.933690860585121e-05, "loss": 0.5417, "step": 3064 }, { "epoch": 0.5317487855655795, "grad_norm": 0.7755056023597717, "learning_rate": 3.933516763482516e-05, "loss": 0.6454, "step": 3065 }, { "epoch": 0.5319222761970853, "grad_norm": 0.8597734570503235, "learning_rate": 3.933342441993037e-05, "loss": 0.6802, "step": 3066 }, { "epoch": 0.5320957668285913, "grad_norm": 0.7433542609214783, "learning_rate": 3.9331678961369156e-05, "loss": 0.6699, "step": 3067 }, { "epoch": 0.5322692574600971, "grad_norm": 0.8031440377235413, "learning_rate": 3.932993125934407e-05, "loss": 0.5856, "step": 3068 }, { "epoch": 0.5324427480916031, "grad_norm": 0.599829912185669, "learning_rate": 3.932818131405794e-05, "loss": 0.665, "step": 3069 }, { "epoch": 0.5326162387231089, "grad_norm": 0.669053316116333, "learning_rate": 3.932642912571385e-05, "loss": 0.5923, "step": 3070 }, { "epoch": 0.5327897293546149, "grad_norm": 0.6360391974449158, "learning_rate": 3.9324674694515126e-05, "loss": 0.6462, "step": 3071 }, { "epoch": 0.5329632199861207, "grad_norm": 1.3935229778289795, "learning_rate": 3.932291802066539e-05, "loss": 0.5499, "step": 3072 }, { "epoch": 0.5331367106176267, "grad_norm": 0.7656862735748291, "learning_rate": 3.932115910436851e-05, "loss": 0.5592, "step": 3073 }, { "epoch": 0.5333102012491325, "grad_norm": 0.8608168959617615, "learning_rate": 3.93193979458286e-05, "loss": 0.5165, "step": 3074 }, { "epoch": 0.5334836918806385, "grad_norm": 0.6383200287818909, "learning_rate": 3.931763454525005e-05, "loss": 0.6392, "step": 3075 }, { "epoch": 0.5336571825121443, "grad_norm": 0.7783990502357483, "learning_rate": 3.93158689028375e-05, "loss": 0.5022, "step": 3076 }, { "epoch": 0.5338306731436503, "grad_norm": 0.9632290005683899, "learning_rate": 3.931410101879585e-05, "loss": 0.5671, "step": 3077 }, { "epoch": 0.5340041637751561, "grad_norm": 0.6838582158088684, "learning_rate": 3.931233089333027e-05, "loss": 0.5509, "step": 3078 }, { "epoch": 0.5341776544066621, "grad_norm": 0.9874531030654907, "learning_rate": 3.931055852664619e-05, "loss": 0.6674, "step": 3079 }, { "epoch": 0.5343511450381679, "grad_norm": 0.9669835567474365, "learning_rate": 3.9308783918949296e-05, "loss": 0.5638, "step": 3080 }, { "epoch": 0.5345246356696738, "grad_norm": 0.7132205963134766, "learning_rate": 3.930700707044552e-05, "loss": 0.6456, "step": 3081 }, { "epoch": 0.5346981263011797, "grad_norm": 0.7564111948013306, "learning_rate": 3.9305227981341085e-05, "loss": 0.6383, "step": 3082 }, { "epoch": 0.5348716169326856, "grad_norm": 0.8441155552864075, "learning_rate": 3.9303446651842444e-05, "loss": 0.6904, "step": 3083 }, { "epoch": 0.5350451075641915, "grad_norm": 0.8988627791404724, "learning_rate": 3.930166308215633e-05, "loss": 0.6704, "step": 3084 }, { "epoch": 0.5352185981956974, "grad_norm": 0.9254375696182251, "learning_rate": 3.929987727248972e-05, "loss": 0.629, "step": 3085 }, { "epoch": 0.5353920888272033, "grad_norm": 0.9920805096626282, "learning_rate": 3.929808922304987e-05, "loss": 0.549, "step": 3086 }, { "epoch": 0.5355655794587092, "grad_norm": 0.5927398204803467, "learning_rate": 3.929629893404428e-05, "loss": 0.59, "step": 3087 }, { "epoch": 0.5357390700902152, "grad_norm": 1.3722820281982422, "learning_rate": 3.92945064056807e-05, "loss": 0.5322, "step": 3088 }, { "epoch": 0.535912560721721, "grad_norm": 0.7934204936027527, "learning_rate": 3.929271163816718e-05, "loss": 0.6578, "step": 3089 }, { "epoch": 0.536086051353227, "grad_norm": 1.265750765800476, "learning_rate": 3.929091463171199e-05, "loss": 0.5594, "step": 3090 }, { "epoch": 0.5362595419847328, "grad_norm": 0.7382822036743164, "learning_rate": 3.9289115386523676e-05, "loss": 0.6552, "step": 3091 }, { "epoch": 0.5364330326162388, "grad_norm": 0.8243357539176941, "learning_rate": 3.928731390281105e-05, "loss": 0.6023, "step": 3092 }, { "epoch": 0.5366065232477446, "grad_norm": 0.7092339396476746, "learning_rate": 3.928551018078317e-05, "loss": 0.6342, "step": 3093 }, { "epoch": 0.5367800138792506, "grad_norm": 0.6945857405662537, "learning_rate": 3.928370422064936e-05, "loss": 0.6372, "step": 3094 }, { "epoch": 0.5369535045107564, "grad_norm": 0.6603047251701355, "learning_rate": 3.928189602261921e-05, "loss": 0.7356, "step": 3095 }, { "epoch": 0.5371269951422624, "grad_norm": 0.7895171642303467, "learning_rate": 3.928008558690255e-05, "loss": 0.6509, "step": 3096 }, { "epoch": 0.5373004857737682, "grad_norm": 1.4371894598007202, "learning_rate": 3.927827291370951e-05, "loss": 0.6134, "step": 3097 }, { "epoch": 0.5374739764052742, "grad_norm": 0.7677549719810486, "learning_rate": 3.927645800325041e-05, "loss": 0.5552, "step": 3098 }, { "epoch": 0.53764746703678, "grad_norm": 0.8762229681015015, "learning_rate": 3.9274640855735914e-05, "loss": 0.7253, "step": 3099 }, { "epoch": 0.5378209576682859, "grad_norm": 0.6970749497413635, "learning_rate": 3.927282147137688e-05, "loss": 0.707, "step": 3100 }, { "epoch": 0.5379944482997918, "grad_norm": 0.9581252932548523, "learning_rate": 3.927099985038446e-05, "loss": 0.4913, "step": 3101 }, { "epoch": 0.5381679389312977, "grad_norm": 0.9242482781410217, "learning_rate": 3.9269175992970055e-05, "loss": 0.5629, "step": 3102 }, { "epoch": 0.5383414295628036, "grad_norm": 1.2358778715133667, "learning_rate": 3.926734989934532e-05, "loss": 0.5692, "step": 3103 }, { "epoch": 0.5385149201943095, "grad_norm": 0.9537628889083862, "learning_rate": 3.9265521569722176e-05, "loss": 0.626, "step": 3104 }, { "epoch": 0.5386884108258154, "grad_norm": 0.7534976005554199, "learning_rate": 3.9263691004312804e-05, "loss": 0.7104, "step": 3105 }, { "epoch": 0.5388619014573213, "grad_norm": 0.6508181095123291, "learning_rate": 3.926185820332965e-05, "loss": 0.5377, "step": 3106 }, { "epoch": 0.5390353920888272, "grad_norm": 0.6256936192512512, "learning_rate": 3.9260023166985407e-05, "loss": 0.5938, "step": 3107 }, { "epoch": 0.5392088827203331, "grad_norm": 0.8080528378486633, "learning_rate": 3.9258185895493026e-05, "loss": 0.6345, "step": 3108 }, { "epoch": 0.539382373351839, "grad_norm": 0.6470572352409363, "learning_rate": 3.925634638906574e-05, "loss": 0.592, "step": 3109 }, { "epoch": 0.5395558639833449, "grad_norm": 0.9834403395652771, "learning_rate": 3.925450464791701e-05, "loss": 0.5455, "step": 3110 }, { "epoch": 0.5397293546148508, "grad_norm": 0.9988872408866882, "learning_rate": 3.925266067226058e-05, "loss": 0.5189, "step": 3111 }, { "epoch": 0.5399028452463567, "grad_norm": 0.608731210231781, "learning_rate": 3.925081446231045e-05, "loss": 0.5674, "step": 3112 }, { "epoch": 0.5400763358778626, "grad_norm": 0.7310196757316589, "learning_rate": 3.924896601828087e-05, "loss": 0.5524, "step": 3113 }, { "epoch": 0.5402498265093685, "grad_norm": 0.6233857274055481, "learning_rate": 3.924711534038635e-05, "loss": 0.5348, "step": 3114 }, { "epoch": 0.5404233171408744, "grad_norm": 0.6901100873947144, "learning_rate": 3.924526242884167e-05, "loss": 0.736, "step": 3115 }, { "epoch": 0.5405968077723803, "grad_norm": 0.9781090617179871, "learning_rate": 3.9243407283861866e-05, "loss": 0.6023, "step": 3116 }, { "epoch": 0.5407702984038862, "grad_norm": 0.9208560585975647, "learning_rate": 3.924154990566222e-05, "loss": 0.6281, "step": 3117 }, { "epoch": 0.5409437890353921, "grad_norm": 0.7432438731193542, "learning_rate": 3.923969029445828e-05, "loss": 0.7271, "step": 3118 }, { "epoch": 0.5411172796668979, "grad_norm": 0.6769471168518066, "learning_rate": 3.9237828450465866e-05, "loss": 0.6505, "step": 3119 }, { "epoch": 0.5412907702984039, "grad_norm": 0.7981407642364502, "learning_rate": 3.923596437390105e-05, "loss": 0.5497, "step": 3120 }, { "epoch": 0.5414642609299097, "grad_norm": 1.166545033454895, "learning_rate": 3.9234098064980145e-05, "loss": 0.6946, "step": 3121 }, { "epoch": 0.5416377515614157, "grad_norm": 0.9433131814002991, "learning_rate": 3.923222952391975e-05, "loss": 0.6458, "step": 3122 }, { "epoch": 0.5418112421929215, "grad_norm": 1.5492539405822754, "learning_rate": 3.923035875093671e-05, "loss": 0.6902, "step": 3123 }, { "epoch": 0.5419847328244275, "grad_norm": 0.9652863144874573, "learning_rate": 3.9228485746248134e-05, "loss": 0.5554, "step": 3124 }, { "epoch": 0.5421582234559333, "grad_norm": 0.9015517830848694, "learning_rate": 3.922661051007137e-05, "loss": 0.5894, "step": 3125 }, { "epoch": 0.5423317140874393, "grad_norm": 1.0942059755325317, "learning_rate": 3.922473304262406e-05, "loss": 0.5625, "step": 3126 }, { "epoch": 0.5425052047189451, "grad_norm": 0.9288303256034851, "learning_rate": 3.922285334412408e-05, "loss": 0.5653, "step": 3127 }, { "epoch": 0.5426786953504511, "grad_norm": 0.7825427055358887, "learning_rate": 3.922097141478957e-05, "loss": 0.691, "step": 3128 }, { "epoch": 0.542852185981957, "grad_norm": 0.8568305969238281, "learning_rate": 3.9219087254838925e-05, "loss": 0.6082, "step": 3129 }, { "epoch": 0.5430256766134629, "grad_norm": 1.0168123245239258, "learning_rate": 3.921720086449082e-05, "loss": 0.6301, "step": 3130 }, { "epoch": 0.5431991672449688, "grad_norm": 1.1532608270645142, "learning_rate": 3.921531224396415e-05, "loss": 0.5955, "step": 3131 }, { "epoch": 0.5433726578764747, "grad_norm": 0.7430927753448486, "learning_rate": 3.921342139347811e-05, "loss": 0.5428, "step": 3132 }, { "epoch": 0.5435461485079806, "grad_norm": 1.0280706882476807, "learning_rate": 3.921152831325213e-05, "loss": 0.5558, "step": 3133 }, { "epoch": 0.5437196391394865, "grad_norm": 0.6628148555755615, "learning_rate": 3.92096330035059e-05, "loss": 0.6188, "step": 3134 }, { "epoch": 0.5438931297709924, "grad_norm": 1.6297130584716797, "learning_rate": 3.920773546445938e-05, "loss": 0.6769, "step": 3135 }, { "epoch": 0.5440666204024983, "grad_norm": 0.6971448659896851, "learning_rate": 3.9205835696332775e-05, "loss": 0.585, "step": 3136 }, { "epoch": 0.5442401110340042, "grad_norm": 0.9060026407241821, "learning_rate": 3.9203933699346555e-05, "loss": 0.5519, "step": 3137 }, { "epoch": 0.5444136016655101, "grad_norm": 0.7070338129997253, "learning_rate": 3.920202947372146e-05, "loss": 0.5883, "step": 3138 }, { "epoch": 0.544587092297016, "grad_norm": 0.9702456593513489, "learning_rate": 3.9200123019678467e-05, "loss": 0.5353, "step": 3139 }, { "epoch": 0.5447605829285218, "grad_norm": 1.4283995628356934, "learning_rate": 3.919821433743882e-05, "loss": 0.7267, "step": 3140 }, { "epoch": 0.5449340735600278, "grad_norm": 0.7251282930374146, "learning_rate": 3.9196303427224036e-05, "loss": 0.6918, "step": 3141 }, { "epoch": 0.5451075641915336, "grad_norm": 0.8550883531570435, "learning_rate": 3.919439028925587e-05, "loss": 0.6157, "step": 3142 }, { "epoch": 0.5452810548230396, "grad_norm": 1.5072968006134033, "learning_rate": 3.919247492375634e-05, "loss": 0.6156, "step": 3143 }, { "epoch": 0.5454545454545454, "grad_norm": 0.7578117251396179, "learning_rate": 3.919055733094774e-05, "loss": 0.5886, "step": 3144 }, { "epoch": 0.5456280360860514, "grad_norm": 0.8776355981826782, "learning_rate": 3.918863751105259e-05, "loss": 0.5455, "step": 3145 }, { "epoch": 0.5458015267175572, "grad_norm": 0.7595939636230469, "learning_rate": 3.91867154642937e-05, "loss": 0.6096, "step": 3146 }, { "epoch": 0.5459750173490632, "grad_norm": 1.0227677822113037, "learning_rate": 3.918479119089413e-05, "loss": 0.5476, "step": 3147 }, { "epoch": 0.546148507980569, "grad_norm": 0.7157589197158813, "learning_rate": 3.918286469107718e-05, "loss": 0.6128, "step": 3148 }, { "epoch": 0.546321998612075, "grad_norm": 0.995482861995697, "learning_rate": 3.918093596506643e-05, "loss": 0.5902, "step": 3149 }, { "epoch": 0.5464954892435808, "grad_norm": 0.7816616892814636, "learning_rate": 3.917900501308572e-05, "loss": 0.6029, "step": 3150 }, { "epoch": 0.5466689798750868, "grad_norm": 1.0965698957443237, "learning_rate": 3.917707183535913e-05, "loss": 0.6873, "step": 3151 }, { "epoch": 0.5468424705065926, "grad_norm": 0.5522381663322449, "learning_rate": 3.9175136432111e-05, "loss": 0.6611, "step": 3152 }, { "epoch": 0.5470159611380986, "grad_norm": 1.5331714153289795, "learning_rate": 3.917319880356594e-05, "loss": 0.6505, "step": 3153 }, { "epoch": 0.5471894517696044, "grad_norm": 1.0777934789657593, "learning_rate": 3.9171258949948827e-05, "loss": 0.5635, "step": 3154 }, { "epoch": 0.5473629424011104, "grad_norm": 0.8994247913360596, "learning_rate": 3.916931687148477e-05, "loss": 0.5776, "step": 3155 }, { "epoch": 0.5475364330326162, "grad_norm": 1.6511632204055786, "learning_rate": 3.916737256839916e-05, "loss": 0.5968, "step": 3156 }, { "epoch": 0.5477099236641222, "grad_norm": 0.8579990267753601, "learning_rate": 3.916542604091762e-05, "loss": 0.5574, "step": 3157 }, { "epoch": 0.547883414295628, "grad_norm": 1.2080751657485962, "learning_rate": 3.916347728926606e-05, "loss": 0.7275, "step": 3158 }, { "epoch": 0.5480569049271339, "grad_norm": 0.7340068221092224, "learning_rate": 3.916152631367063e-05, "loss": 0.5321, "step": 3159 }, { "epoch": 0.5482303955586398, "grad_norm": 0.6173186302185059, "learning_rate": 3.915957311435774e-05, "loss": 0.5669, "step": 3160 }, { "epoch": 0.5484038861901457, "grad_norm": 0.7848614454269409, "learning_rate": 3.915761769155407e-05, "loss": 0.5255, "step": 3161 }, { "epoch": 0.5485773768216516, "grad_norm": 0.5917115807533264, "learning_rate": 3.915566004548654e-05, "loss": 0.5249, "step": 3162 }, { "epoch": 0.5487508674531575, "grad_norm": 1.5309841632843018, "learning_rate": 3.9153700176382344e-05, "loss": 0.6642, "step": 3163 }, { "epoch": 0.5489243580846634, "grad_norm": 0.7895891666412354, "learning_rate": 3.915173808446892e-05, "loss": 0.6454, "step": 3164 }, { "epoch": 0.5490978487161693, "grad_norm": 0.8298311233520508, "learning_rate": 3.9149773769973985e-05, "loss": 0.4957, "step": 3165 }, { "epoch": 0.5492713393476752, "grad_norm": 1.134933590888977, "learning_rate": 3.914780723312548e-05, "loss": 0.5394, "step": 3166 }, { "epoch": 0.5494448299791811, "grad_norm": 1.13173246383667, "learning_rate": 3.9145838474151633e-05, "loss": 0.5302, "step": 3167 }, { "epoch": 0.549618320610687, "grad_norm": 1.4297860860824585, "learning_rate": 3.914386749328093e-05, "loss": 0.6407, "step": 3168 }, { "epoch": 0.5497918112421929, "grad_norm": 0.7096014618873596, "learning_rate": 3.914189429074209e-05, "loss": 0.6606, "step": 3169 }, { "epoch": 0.5499653018736989, "grad_norm": 1.2400507926940918, "learning_rate": 3.913991886676412e-05, "loss": 0.6764, "step": 3170 }, { "epoch": 0.5501387925052047, "grad_norm": 0.7265576720237732, "learning_rate": 3.913794122157626e-05, "loss": 0.6105, "step": 3171 }, { "epoch": 0.5503122831367107, "grad_norm": 0.8316229581832886, "learning_rate": 3.9135961355408024e-05, "loss": 0.564, "step": 3172 }, { "epoch": 0.5504857737682165, "grad_norm": 1.0690675973892212, "learning_rate": 3.913397926848917e-05, "loss": 0.5505, "step": 3173 }, { "epoch": 0.5506592643997225, "grad_norm": 0.6928693056106567, "learning_rate": 3.913199496104972e-05, "loss": 0.6057, "step": 3174 }, { "epoch": 0.5508327550312283, "grad_norm": 0.5716657638549805, "learning_rate": 3.9130008433319974e-05, "loss": 0.7322, "step": 3175 }, { "epoch": 0.5510062456627343, "grad_norm": 0.6816802024841309, "learning_rate": 3.912801968553045e-05, "loss": 0.6383, "step": 3176 }, { "epoch": 0.5511797362942401, "grad_norm": 0.6757462024688721, "learning_rate": 3.912602871791196e-05, "loss": 0.6283, "step": 3177 }, { "epoch": 0.5513532269257461, "grad_norm": 0.7462841272354126, "learning_rate": 3.9124035530695546e-05, "loss": 0.6243, "step": 3178 }, { "epoch": 0.5515267175572519, "grad_norm": 0.875676155090332, "learning_rate": 3.912204012411253e-05, "loss": 0.5619, "step": 3179 }, { "epoch": 0.5517002081887578, "grad_norm": 0.673236072063446, "learning_rate": 3.912004249839447e-05, "loss": 0.5929, "step": 3180 }, { "epoch": 0.5518736988202637, "grad_norm": 0.9303373694419861, "learning_rate": 3.9118042653773194e-05, "loss": 0.6926, "step": 3181 }, { "epoch": 0.5520471894517696, "grad_norm": 0.684323787689209, "learning_rate": 3.91160405904808e-05, "loss": 0.6083, "step": 3182 }, { "epoch": 0.5522206800832755, "grad_norm": 0.7034314274787903, "learning_rate": 3.9114036308749625e-05, "loss": 0.5876, "step": 3183 }, { "epoch": 0.5523941707147814, "grad_norm": 2.226392984390259, "learning_rate": 3.911202980881226e-05, "loss": 0.5818, "step": 3184 }, { "epoch": 0.5525676613462873, "grad_norm": 0.9508465528488159, "learning_rate": 3.911002109090156e-05, "loss": 0.6475, "step": 3185 }, { "epoch": 0.5527411519777932, "grad_norm": 0.8926861882209778, "learning_rate": 3.910801015525064e-05, "loss": 0.613, "step": 3186 }, { "epoch": 0.5529146426092991, "grad_norm": 0.8146591782569885, "learning_rate": 3.9105997002092896e-05, "loss": 0.6594, "step": 3187 }, { "epoch": 0.553088133240805, "grad_norm": 1.1827658414840698, "learning_rate": 3.910398163166192e-05, "loss": 0.6274, "step": 3188 }, { "epoch": 0.5532616238723109, "grad_norm": 1.186378836631775, "learning_rate": 3.910196404419163e-05, "loss": 0.5598, "step": 3189 }, { "epoch": 0.5534351145038168, "grad_norm": 1.9686994552612305, "learning_rate": 3.909994423991614e-05, "loss": 0.5443, "step": 3190 }, { "epoch": 0.5536086051353227, "grad_norm": 0.8391484618186951, "learning_rate": 3.909792221906987e-05, "loss": 0.5592, "step": 3191 }, { "epoch": 0.5537820957668286, "grad_norm": 0.9526604413986206, "learning_rate": 3.909589798188747e-05, "loss": 0.614, "step": 3192 }, { "epoch": 0.5539555863983345, "grad_norm": 1.4479925632476807, "learning_rate": 3.909387152860386e-05, "loss": 0.5209, "step": 3193 }, { "epoch": 0.5541290770298404, "grad_norm": 2.3782713413238525, "learning_rate": 3.909184285945421e-05, "loss": 0.5903, "step": 3194 }, { "epoch": 0.5543025676613463, "grad_norm": 1.63461434841156, "learning_rate": 3.908981197467396e-05, "loss": 0.5917, "step": 3195 }, { "epoch": 0.5544760582928522, "grad_norm": 0.9873148202896118, "learning_rate": 3.908777887449877e-05, "loss": 0.7415, "step": 3196 }, { "epoch": 0.5546495489243581, "grad_norm": 0.669289231300354, "learning_rate": 3.908574355916461e-05, "loss": 0.6958, "step": 3197 }, { "epoch": 0.554823039555864, "grad_norm": 0.6251704096794128, "learning_rate": 3.9083706028907665e-05, "loss": 0.6174, "step": 3198 }, { "epoch": 0.5549965301873698, "grad_norm": 0.7469822764396667, "learning_rate": 3.90816662839644e-05, "loss": 0.6473, "step": 3199 }, { "epoch": 0.5551700208188758, "grad_norm": 0.8963797092437744, "learning_rate": 3.9079624324571536e-05, "loss": 0.5769, "step": 3200 }, { "epoch": 0.5553435114503816, "grad_norm": 0.5527542233467102, "learning_rate": 3.907758015096603e-05, "loss": 0.6652, "step": 3201 }, { "epoch": 0.5555170020818876, "grad_norm": 0.6194865107536316, "learning_rate": 3.9075533763385116e-05, "loss": 0.7543, "step": 3202 }, { "epoch": 0.5556904927133934, "grad_norm": 2.4148449897766113, "learning_rate": 3.907348516206629e-05, "loss": 0.6011, "step": 3203 }, { "epoch": 0.5558639833448994, "grad_norm": 0.6149037480354309, "learning_rate": 3.9071434347247275e-05, "loss": 0.6194, "step": 3204 }, { "epoch": 0.5560374739764052, "grad_norm": 0.7264496684074402, "learning_rate": 3.906938131916609e-05, "loss": 0.5947, "step": 3205 }, { "epoch": 0.5562109646079112, "grad_norm": 1.004270076751709, "learning_rate": 3.906732607806098e-05, "loss": 0.5686, "step": 3206 }, { "epoch": 0.556384455239417, "grad_norm": 0.5807811617851257, "learning_rate": 3.906526862417046e-05, "loss": 0.6287, "step": 3207 }, { "epoch": 0.556557945870923, "grad_norm": 0.8036131262779236, "learning_rate": 3.906320895773329e-05, "loss": 0.6539, "step": 3208 }, { "epoch": 0.5567314365024288, "grad_norm": 0.7775804996490479, "learning_rate": 3.9061147078988526e-05, "loss": 0.6222, "step": 3209 }, { "epoch": 0.5569049271339348, "grad_norm": 0.962177038192749, "learning_rate": 3.905908298817543e-05, "loss": 0.6736, "step": 3210 }, { "epoch": 0.5570784177654406, "grad_norm": 1.0258029699325562, "learning_rate": 3.905701668553353e-05, "loss": 0.6505, "step": 3211 }, { "epoch": 0.5572519083969466, "grad_norm": 0.8902332186698914, "learning_rate": 3.905494817130265e-05, "loss": 0.6418, "step": 3212 }, { "epoch": 0.5574253990284525, "grad_norm": 0.803525984287262, "learning_rate": 3.905287744572283e-05, "loss": 0.5302, "step": 3213 }, { "epoch": 0.5575988896599584, "grad_norm": 0.9576848745346069, "learning_rate": 3.9050804509034383e-05, "loss": 0.5084, "step": 3214 }, { "epoch": 0.5577723802914643, "grad_norm": 2.1501924991607666, "learning_rate": 3.904872936147787e-05, "loss": 0.5497, "step": 3215 }, { "epoch": 0.5579458709229702, "grad_norm": 1.1034975051879883, "learning_rate": 3.904665200329411e-05, "loss": 0.5637, "step": 3216 }, { "epoch": 0.5581193615544761, "grad_norm": 0.7428118586540222, "learning_rate": 3.904457243472421e-05, "loss": 0.613, "step": 3217 }, { "epoch": 0.5582928521859819, "grad_norm": 1.164508581161499, "learning_rate": 3.904249065600948e-05, "loss": 0.6355, "step": 3218 }, { "epoch": 0.5584663428174879, "grad_norm": 0.8807821869850159, "learning_rate": 3.904040666739151e-05, "loss": 0.6489, "step": 3219 }, { "epoch": 0.5586398334489937, "grad_norm": 0.7938810586929321, "learning_rate": 3.903832046911218e-05, "loss": 0.6605, "step": 3220 }, { "epoch": 0.5588133240804997, "grad_norm": 0.7629036903381348, "learning_rate": 3.903623206141356e-05, "loss": 0.5359, "step": 3221 }, { "epoch": 0.5589868147120055, "grad_norm": 0.7561957836151123, "learning_rate": 3.9034141444538034e-05, "loss": 0.6113, "step": 3222 }, { "epoch": 0.5591603053435115, "grad_norm": 1.1009613275527954, "learning_rate": 3.903204861872821e-05, "loss": 0.6989, "step": 3223 }, { "epoch": 0.5593337959750173, "grad_norm": 0.6956751942634583, "learning_rate": 3.902995358422697e-05, "loss": 0.5776, "step": 3224 }, { "epoch": 0.5595072866065233, "grad_norm": 0.6899406909942627, "learning_rate": 3.902785634127744e-05, "loss": 0.7062, "step": 3225 }, { "epoch": 0.5596807772380291, "grad_norm": 0.9534558057785034, "learning_rate": 3.902575689012301e-05, "loss": 0.6958, "step": 3226 }, { "epoch": 0.5598542678695351, "grad_norm": 1.1844556331634521, "learning_rate": 3.9023655231007325e-05, "loss": 0.5811, "step": 3227 }, { "epoch": 0.5600277585010409, "grad_norm": 0.6327782869338989, "learning_rate": 3.9021551364174286e-05, "loss": 0.5557, "step": 3228 }, { "epoch": 0.5602012491325469, "grad_norm": 0.8286023139953613, "learning_rate": 3.901944528986804e-05, "loss": 0.6006, "step": 3229 }, { "epoch": 0.5603747397640527, "grad_norm": 0.9115365743637085, "learning_rate": 3.901733700833301e-05, "loss": 0.6746, "step": 3230 }, { "epoch": 0.5605482303955587, "grad_norm": 0.7401015162467957, "learning_rate": 3.9015226519813864e-05, "loss": 0.6975, "step": 3231 }, { "epoch": 0.5607217210270645, "grad_norm": 1.462897539138794, "learning_rate": 3.9013113824555515e-05, "loss": 0.7756, "step": 3232 }, { "epoch": 0.5608952116585705, "grad_norm": 1.277316689491272, "learning_rate": 3.901099892280316e-05, "loss": 0.6306, "step": 3233 }, { "epoch": 0.5610687022900763, "grad_norm": 0.779032826423645, "learning_rate": 3.9008881814802225e-05, "loss": 0.6066, "step": 3234 }, { "epoch": 0.5612421929215823, "grad_norm": 0.7906771898269653, "learning_rate": 3.900676250079841e-05, "loss": 0.6125, "step": 3235 }, { "epoch": 0.5614156835530881, "grad_norm": 0.7157325148582458, "learning_rate": 3.900464098103765e-05, "loss": 0.6409, "step": 3236 }, { "epoch": 0.5615891741845941, "grad_norm": 0.7426790595054626, "learning_rate": 3.9002517255766163e-05, "loss": 0.5331, "step": 3237 }, { "epoch": 0.5617626648160999, "grad_norm": 0.8521499633789062, "learning_rate": 3.9000391325230405e-05, "loss": 0.5262, "step": 3238 }, { "epoch": 0.5619361554476058, "grad_norm": 0.8248336911201477, "learning_rate": 3.89982631896771e-05, "loss": 0.6337, "step": 3239 }, { "epoch": 0.5621096460791117, "grad_norm": 0.874073326587677, "learning_rate": 3.899613284935321e-05, "loss": 0.5125, "step": 3240 }, { "epoch": 0.5622831367106176, "grad_norm": 0.6740594506263733, "learning_rate": 3.899400030450597e-05, "loss": 0.5568, "step": 3241 }, { "epoch": 0.5624566273421235, "grad_norm": 0.867178201675415, "learning_rate": 3.899186555538286e-05, "loss": 0.6821, "step": 3242 }, { "epoch": 0.5626301179736294, "grad_norm": 0.7415544986724854, "learning_rate": 3.8989728602231623e-05, "loss": 0.6562, "step": 3243 }, { "epoch": 0.5628036086051353, "grad_norm": 1.0366650819778442, "learning_rate": 3.898758944530025e-05, "loss": 0.616, "step": 3244 }, { "epoch": 0.5629770992366412, "grad_norm": 0.976502001285553, "learning_rate": 3.8985448084837e-05, "loss": 0.5437, "step": 3245 }, { "epoch": 0.5631505898681471, "grad_norm": 0.8038862943649292, "learning_rate": 3.898330452109038e-05, "loss": 0.5486, "step": 3246 }, { "epoch": 0.563324080499653, "grad_norm": 0.7974483370780945, "learning_rate": 3.8981158754309156e-05, "loss": 0.5627, "step": 3247 }, { "epoch": 0.563497571131159, "grad_norm": 0.7054277062416077, "learning_rate": 3.897901078474233e-05, "loss": 0.6125, "step": 3248 }, { "epoch": 0.5636710617626648, "grad_norm": 0.6825677752494812, "learning_rate": 3.897686061263919e-05, "loss": 0.5743, "step": 3249 }, { "epoch": 0.5638445523941708, "grad_norm": 1.8333244323730469, "learning_rate": 3.897470823824927e-05, "loss": 0.551, "step": 3250 }, { "epoch": 0.5640180430256766, "grad_norm": 0.9035657644271851, "learning_rate": 3.8972553661822334e-05, "loss": 0.6897, "step": 3251 }, { "epoch": 0.5641915336571826, "grad_norm": 1.166638731956482, "learning_rate": 3.897039688360845e-05, "loss": 0.6208, "step": 3252 }, { "epoch": 0.5643650242886884, "grad_norm": 1.1381759643554688, "learning_rate": 3.8968237903857906e-05, "loss": 0.5958, "step": 3253 }, { "epoch": 0.5645385149201944, "grad_norm": 0.7991777658462524, "learning_rate": 3.8966076722821245e-05, "loss": 0.6111, "step": 3254 }, { "epoch": 0.5647120055517002, "grad_norm": 0.9115409851074219, "learning_rate": 3.896391334074928e-05, "loss": 0.5956, "step": 3255 }, { "epoch": 0.5648854961832062, "grad_norm": 1.2931652069091797, "learning_rate": 3.8961747757893075e-05, "loss": 0.5609, "step": 3256 }, { "epoch": 0.565058986814712, "grad_norm": 1.0862016677856445, "learning_rate": 3.8959579974503947e-05, "loss": 0.5239, "step": 3257 }, { "epoch": 0.5652324774462179, "grad_norm": 0.8347268104553223, "learning_rate": 3.895740999083347e-05, "loss": 0.5426, "step": 3258 }, { "epoch": 0.5654059680777238, "grad_norm": 1.2553099393844604, "learning_rate": 3.8955237807133485e-05, "loss": 0.5894, "step": 3259 }, { "epoch": 0.5655794587092297, "grad_norm": 0.8644529581069946, "learning_rate": 3.8953063423656055e-05, "loss": 0.615, "step": 3260 }, { "epoch": 0.5657529493407356, "grad_norm": 0.7972590923309326, "learning_rate": 3.8950886840653524e-05, "loss": 0.5249, "step": 3261 }, { "epoch": 0.5659264399722415, "grad_norm": 0.6203532814979553, "learning_rate": 3.8948708058378504e-05, "loss": 0.7021, "step": 3262 }, { "epoch": 0.5660999306037474, "grad_norm": 0.6847710013389587, "learning_rate": 3.894652707708383e-05, "loss": 0.6949, "step": 3263 }, { "epoch": 0.5662734212352533, "grad_norm": 1.1506133079528809, "learning_rate": 3.894434389702261e-05, "loss": 0.5867, "step": 3264 }, { "epoch": 0.5664469118667592, "grad_norm": 1.1352781057357788, "learning_rate": 3.894215851844821e-05, "loss": 0.5782, "step": 3265 }, { "epoch": 0.5666204024982651, "grad_norm": 0.7782225012779236, "learning_rate": 3.8939970941614247e-05, "loss": 0.5499, "step": 3266 }, { "epoch": 0.566793893129771, "grad_norm": 2.2187085151672363, "learning_rate": 3.893778116677457e-05, "loss": 0.5884, "step": 3267 }, { "epoch": 0.5669673837612769, "grad_norm": 1.034565806388855, "learning_rate": 3.893558919418334e-05, "loss": 0.5935, "step": 3268 }, { "epoch": 0.5671408743927828, "grad_norm": 0.6328162550926208, "learning_rate": 3.8933395024094904e-05, "loss": 0.6771, "step": 3269 }, { "epoch": 0.5673143650242887, "grad_norm": 1.196004033088684, "learning_rate": 3.893119865676393e-05, "loss": 0.5558, "step": 3270 }, { "epoch": 0.5674878556557946, "grad_norm": 1.0808980464935303, "learning_rate": 3.892900009244528e-05, "loss": 0.6027, "step": 3271 }, { "epoch": 0.5676613462873005, "grad_norm": 0.7612327933311462, "learning_rate": 3.892679933139412e-05, "loss": 0.5385, "step": 3272 }, { "epoch": 0.5678348369188064, "grad_norm": 0.8031080365180969, "learning_rate": 3.8924596373865834e-05, "loss": 0.6525, "step": 3273 }, { "epoch": 0.5680083275503123, "grad_norm": 0.6683812737464905, "learning_rate": 3.8922391220116094e-05, "loss": 0.5999, "step": 3274 }, { "epoch": 0.5681818181818182, "grad_norm": 0.7505687475204468, "learning_rate": 3.89201838704008e-05, "loss": 0.6857, "step": 3275 }, { "epoch": 0.5683553088133241, "grad_norm": 0.8488569855690002, "learning_rate": 3.891797432497613e-05, "loss": 0.4913, "step": 3276 }, { "epoch": 0.5685287994448299, "grad_norm": 0.9531639814376831, "learning_rate": 3.8915762584098484e-05, "loss": 0.4996, "step": 3277 }, { "epoch": 0.5687022900763359, "grad_norm": 1.6543718576431274, "learning_rate": 3.891354864802455e-05, "loss": 0.6038, "step": 3278 }, { "epoch": 0.5688757807078417, "grad_norm": 0.8249922394752502, "learning_rate": 3.891133251701127e-05, "loss": 0.6492, "step": 3279 }, { "epoch": 0.5690492713393477, "grad_norm": 0.7629801630973816, "learning_rate": 3.89091141913158e-05, "loss": 0.5842, "step": 3280 }, { "epoch": 0.5692227619708535, "grad_norm": 1.0736356973648071, "learning_rate": 3.890689367119561e-05, "loss": 0.5034, "step": 3281 }, { "epoch": 0.5693962526023595, "grad_norm": 0.8037618398666382, "learning_rate": 3.890467095690837e-05, "loss": 0.6332, "step": 3282 }, { "epoch": 0.5695697432338653, "grad_norm": 1.05054771900177, "learning_rate": 3.890244604871203e-05, "loss": 0.6654, "step": 3283 }, { "epoch": 0.5697432338653713, "grad_norm": 0.8441582322120667, "learning_rate": 3.890021894686481e-05, "loss": 0.651, "step": 3284 }, { "epoch": 0.5699167244968771, "grad_norm": 0.895268440246582, "learning_rate": 3.889798965162516e-05, "loss": 0.7266, "step": 3285 }, { "epoch": 0.5700902151283831, "grad_norm": 0.8688643574714661, "learning_rate": 3.8895758163251783e-05, "loss": 0.652, "step": 3286 }, { "epoch": 0.5702637057598889, "grad_norm": 0.9622326493263245, "learning_rate": 3.889352448200366e-05, "loss": 0.5823, "step": 3287 }, { "epoch": 0.5704371963913949, "grad_norm": 0.8369941115379333, "learning_rate": 3.889128860814e-05, "loss": 0.5424, "step": 3288 }, { "epoch": 0.5706106870229007, "grad_norm": 1.4068269729614258, "learning_rate": 3.8889050541920285e-05, "loss": 0.5499, "step": 3289 }, { "epoch": 0.5707841776544067, "grad_norm": 0.8182569146156311, "learning_rate": 3.8886810283604245e-05, "loss": 0.5638, "step": 3290 }, { "epoch": 0.5709576682859125, "grad_norm": 1.9418103694915771, "learning_rate": 3.888456783345187e-05, "loss": 0.5587, "step": 3291 }, { "epoch": 0.5711311589174185, "grad_norm": 1.0349828004837036, "learning_rate": 3.888232319172338e-05, "loss": 0.6218, "step": 3292 }, { "epoch": 0.5713046495489243, "grad_norm": 0.7936072945594788, "learning_rate": 3.8880076358679295e-05, "loss": 0.6178, "step": 3293 }, { "epoch": 0.5714781401804303, "grad_norm": 0.9973400831222534, "learning_rate": 3.887782733458034e-05, "loss": 0.5144, "step": 3294 }, { "epoch": 0.5716516308119362, "grad_norm": 0.9298374056816101, "learning_rate": 3.887557611968753e-05, "loss": 0.5629, "step": 3295 }, { "epoch": 0.5718251214434421, "grad_norm": 0.887874960899353, "learning_rate": 3.887332271426211e-05, "loss": 0.6729, "step": 3296 }, { "epoch": 0.571998612074948, "grad_norm": 1.6880004405975342, "learning_rate": 3.887106711856561e-05, "loss": 0.5826, "step": 3297 }, { "epoch": 0.5721721027064538, "grad_norm": 0.9786666035652161, "learning_rate": 3.886880933285977e-05, "loss": 0.6461, "step": 3298 }, { "epoch": 0.5723455933379598, "grad_norm": 1.2034778594970703, "learning_rate": 3.886654935740663e-05, "loss": 0.6392, "step": 3299 }, { "epoch": 0.5725190839694656, "grad_norm": 0.7050906419754028, "learning_rate": 3.886428719246845e-05, "loss": 0.6788, "step": 3300 }, { "epoch": 0.5726925746009716, "grad_norm": 0.8306242823600769, "learning_rate": 3.886202283830776e-05, "loss": 0.6774, "step": 3301 }, { "epoch": 0.5728660652324774, "grad_norm": 0.8787949085235596, "learning_rate": 3.885975629518734e-05, "loss": 0.5353, "step": 3302 }, { "epoch": 0.5730395558639834, "grad_norm": 1.0400038957595825, "learning_rate": 3.885748756337022e-05, "loss": 0.5448, "step": 3303 }, { "epoch": 0.5732130464954892, "grad_norm": 0.7915439605712891, "learning_rate": 3.8855216643119697e-05, "loss": 0.657, "step": 3304 }, { "epoch": 0.5733865371269952, "grad_norm": 0.8591414093971252, "learning_rate": 3.885294353469932e-05, "loss": 0.5073, "step": 3305 }, { "epoch": 0.573560027758501, "grad_norm": 1.1922606229782104, "learning_rate": 3.885066823837287e-05, "loss": 0.6675, "step": 3306 }, { "epoch": 0.573733518390007, "grad_norm": 0.7343970537185669, "learning_rate": 3.884839075440441e-05, "loss": 0.5431, "step": 3307 }, { "epoch": 0.5739070090215128, "grad_norm": 0.8767739534378052, "learning_rate": 3.884611108305824e-05, "loss": 0.6619, "step": 3308 }, { "epoch": 0.5740804996530188, "grad_norm": 1.115496039390564, "learning_rate": 3.884382922459891e-05, "loss": 0.6133, "step": 3309 }, { "epoch": 0.5742539902845246, "grad_norm": 0.813336193561554, "learning_rate": 3.8841545179291254e-05, "loss": 0.6106, "step": 3310 }, { "epoch": 0.5744274809160306, "grad_norm": 0.7772932052612305, "learning_rate": 3.8839258947400325e-05, "loss": 0.6854, "step": 3311 }, { "epoch": 0.5746009715475364, "grad_norm": 0.823560357093811, "learning_rate": 3.883697052919143e-05, "loss": 0.5686, "step": 3312 }, { "epoch": 0.5747744621790424, "grad_norm": 0.7215649485588074, "learning_rate": 3.883467992493017e-05, "loss": 0.6415, "step": 3313 }, { "epoch": 0.5749479528105482, "grad_norm": 0.746321976184845, "learning_rate": 3.883238713488235e-05, "loss": 0.5688, "step": 3314 }, { "epoch": 0.5751214434420542, "grad_norm": 0.6839783787727356, "learning_rate": 3.883009215931406e-05, "loss": 0.6318, "step": 3315 }, { "epoch": 0.57529493407356, "grad_norm": 0.7651140689849854, "learning_rate": 3.882779499849163e-05, "loss": 0.6566, "step": 3316 }, { "epoch": 0.5754684247050659, "grad_norm": 1.2375156879425049, "learning_rate": 3.8825495652681653e-05, "loss": 0.6702, "step": 3317 }, { "epoch": 0.5756419153365718, "grad_norm": 0.7743232250213623, "learning_rate": 3.8823194122150975e-05, "loss": 0.5146, "step": 3318 }, { "epoch": 0.5758154059680777, "grad_norm": 1.2336572408676147, "learning_rate": 3.8820890407166683e-05, "loss": 0.7351, "step": 3319 }, { "epoch": 0.5759888965995836, "grad_norm": 0.9283602237701416, "learning_rate": 3.881858450799612e-05, "loss": 0.6333, "step": 3320 }, { "epoch": 0.5761623872310895, "grad_norm": 0.7599928379058838, "learning_rate": 3.881627642490691e-05, "loss": 0.6134, "step": 3321 }, { "epoch": 0.5763358778625954, "grad_norm": 1.3574687242507935, "learning_rate": 3.8813966158166894e-05, "loss": 0.6586, "step": 3322 }, { "epoch": 0.5765093684941013, "grad_norm": 1.052731990814209, "learning_rate": 3.8811653708044176e-05, "loss": 0.5859, "step": 3323 }, { "epoch": 0.5766828591256072, "grad_norm": 0.8059406280517578, "learning_rate": 3.8809339074807125e-05, "loss": 0.4701, "step": 3324 }, { "epoch": 0.5768563497571131, "grad_norm": 0.7636902332305908, "learning_rate": 3.880702225872437e-05, "loss": 0.5542, "step": 3325 }, { "epoch": 0.577029840388619, "grad_norm": 0.8611479997634888, "learning_rate": 3.8804703260064756e-05, "loss": 0.5389, "step": 3326 }, { "epoch": 0.5772033310201249, "grad_norm": 0.8257524371147156, "learning_rate": 3.880238207909742e-05, "loss": 0.4996, "step": 3327 }, { "epoch": 0.5773768216516308, "grad_norm": 1.1965727806091309, "learning_rate": 3.880005871609173e-05, "loss": 0.5571, "step": 3328 }, { "epoch": 0.5775503122831367, "grad_norm": 0.7814649343490601, "learning_rate": 3.879773317131732e-05, "loss": 0.4926, "step": 3329 }, { "epoch": 0.5777238029146426, "grad_norm": 0.9013784527778625, "learning_rate": 3.879540544504408e-05, "loss": 0.5669, "step": 3330 }, { "epoch": 0.5778972935461485, "grad_norm": 0.834071695804596, "learning_rate": 3.879307553754213e-05, "loss": 0.7269, "step": 3331 }, { "epoch": 0.5780707841776545, "grad_norm": 0.7171045541763306, "learning_rate": 3.879074344908187e-05, "loss": 0.7321, "step": 3332 }, { "epoch": 0.5782442748091603, "grad_norm": 0.5801519751548767, "learning_rate": 3.878840917993393e-05, "loss": 0.6796, "step": 3333 }, { "epoch": 0.5784177654406663, "grad_norm": 1.0420644283294678, "learning_rate": 3.878607273036922e-05, "loss": 0.6821, "step": 3334 }, { "epoch": 0.5785912560721721, "grad_norm": 0.7662199139595032, "learning_rate": 3.8783734100658874e-05, "loss": 0.5862, "step": 3335 }, { "epoch": 0.5787647467036781, "grad_norm": 0.8534917831420898, "learning_rate": 3.8781393291074296e-05, "loss": 0.6056, "step": 3336 }, { "epoch": 0.5789382373351839, "grad_norm": 0.7719308733940125, "learning_rate": 3.877905030188715e-05, "loss": 0.5311, "step": 3337 }, { "epoch": 0.5791117279666897, "grad_norm": 0.8380523920059204, "learning_rate": 3.8776705133369333e-05, "loss": 0.5789, "step": 3338 }, { "epoch": 0.5792852185981957, "grad_norm": 0.7678638100624084, "learning_rate": 3.8774357785793e-05, "loss": 0.6484, "step": 3339 }, { "epoch": 0.5794587092297016, "grad_norm": 0.788212776184082, "learning_rate": 3.8772008259430575e-05, "loss": 0.6382, "step": 3340 }, { "epoch": 0.5796321998612075, "grad_norm": 0.738366961479187, "learning_rate": 3.8769656554554716e-05, "loss": 0.6045, "step": 3341 }, { "epoch": 0.5798056904927134, "grad_norm": 0.9916058778762817, "learning_rate": 3.876730267143834e-05, "loss": 0.6257, "step": 3342 }, { "epoch": 0.5799791811242193, "grad_norm": 1.277799129486084, "learning_rate": 3.8764946610354626e-05, "loss": 0.5104, "step": 3343 }, { "epoch": 0.5801526717557252, "grad_norm": 0.9335923194885254, "learning_rate": 3.876258837157699e-05, "loss": 0.5049, "step": 3344 }, { "epoch": 0.5803261623872311, "grad_norm": 0.7122304439544678, "learning_rate": 3.876022795537911e-05, "loss": 0.5026, "step": 3345 }, { "epoch": 0.580499653018737, "grad_norm": 1.5594797134399414, "learning_rate": 3.8757865362034914e-05, "loss": 0.5314, "step": 3346 }, { "epoch": 0.5806731436502429, "grad_norm": 1.2827972173690796, "learning_rate": 3.875550059181859e-05, "loss": 0.6298, "step": 3347 }, { "epoch": 0.5808466342817488, "grad_norm": 1.3870747089385986, "learning_rate": 3.875313364500456e-05, "loss": 0.6448, "step": 3348 }, { "epoch": 0.5810201249132547, "grad_norm": 1.5545334815979004, "learning_rate": 3.8750764521867526e-05, "loss": 0.5092, "step": 3349 }, { "epoch": 0.5811936155447606, "grad_norm": 0.780291736125946, "learning_rate": 3.8748393222682425e-05, "loss": 0.5072, "step": 3350 }, { "epoch": 0.5813671061762665, "grad_norm": 1.0323545932769775, "learning_rate": 3.8746019747724436e-05, "loss": 0.5936, "step": 3351 }, { "epoch": 0.5815405968077724, "grad_norm": 0.9434840679168701, "learning_rate": 3.874364409726901e-05, "loss": 0.651, "step": 3352 }, { "epoch": 0.5817140874392783, "grad_norm": 1.0047508478164673, "learning_rate": 3.8741266271591846e-05, "loss": 0.6742, "step": 3353 }, { "epoch": 0.5818875780707842, "grad_norm": 1.0800899267196655, "learning_rate": 3.87388862709689e-05, "loss": 0.5039, "step": 3354 }, { "epoch": 0.5820610687022901, "grad_norm": 0.9108862280845642, "learning_rate": 3.8736504095676364e-05, "loss": 0.5164, "step": 3355 }, { "epoch": 0.582234559333796, "grad_norm": 0.7775710821151733, "learning_rate": 3.8734119745990696e-05, "loss": 0.6127, "step": 3356 }, { "epoch": 0.5824080499653018, "grad_norm": 1.023120641708374, "learning_rate": 3.8731733222188605e-05, "loss": 0.641, "step": 3357 }, { "epoch": 0.5825815405968078, "grad_norm": 0.6763226985931396, "learning_rate": 3.872934452454704e-05, "loss": 0.5679, "step": 3358 }, { "epoch": 0.5827550312283136, "grad_norm": 0.5067607164382935, "learning_rate": 3.8726953653343226e-05, "loss": 0.6472, "step": 3359 }, { "epoch": 0.5829285218598196, "grad_norm": 0.7462106347084045, "learning_rate": 3.872456060885461e-05, "loss": 0.594, "step": 3360 }, { "epoch": 0.5831020124913254, "grad_norm": 0.8248808979988098, "learning_rate": 3.8722165391358926e-05, "loss": 0.4955, "step": 3361 }, { "epoch": 0.5832755031228314, "grad_norm": 0.7597683072090149, "learning_rate": 3.8719768001134124e-05, "loss": 0.6084, "step": 3362 }, { "epoch": 0.5834489937543372, "grad_norm": 1.114116907119751, "learning_rate": 3.8717368438458435e-05, "loss": 0.5118, "step": 3363 }, { "epoch": 0.5836224843858432, "grad_norm": 1.0715887546539307, "learning_rate": 3.871496670361033e-05, "loss": 0.6173, "step": 3364 }, { "epoch": 0.583795975017349, "grad_norm": 0.9598300457000732, "learning_rate": 3.871256279686854e-05, "loss": 0.6216, "step": 3365 }, { "epoch": 0.583969465648855, "grad_norm": 1.053706407546997, "learning_rate": 3.871015671851202e-05, "loss": 0.5491, "step": 3366 }, { "epoch": 0.5841429562803608, "grad_norm": 1.0202147960662842, "learning_rate": 3.8707748468820024e-05, "loss": 0.6046, "step": 3367 }, { "epoch": 0.5843164469118668, "grad_norm": 0.9622864127159119, "learning_rate": 3.870533804807201e-05, "loss": 0.5989, "step": 3368 }, { "epoch": 0.5844899375433726, "grad_norm": 0.8145611882209778, "learning_rate": 3.870292545654772e-05, "loss": 0.5956, "step": 3369 }, { "epoch": 0.5846634281748786, "grad_norm": 0.9293409585952759, "learning_rate": 3.870051069452714e-05, "loss": 0.5316, "step": 3370 }, { "epoch": 0.5848369188063844, "grad_norm": 0.7547447681427002, "learning_rate": 3.86980937622905e-05, "loss": 0.6106, "step": 3371 }, { "epoch": 0.5850104094378904, "grad_norm": 0.7704901099205017, "learning_rate": 3.8695674660118294e-05, "loss": 0.7321, "step": 3372 }, { "epoch": 0.5851839000693962, "grad_norm": 1.2197743654251099, "learning_rate": 3.8693253388291256e-05, "loss": 0.7271, "step": 3373 }, { "epoch": 0.5853573907009022, "grad_norm": 0.7334229946136475, "learning_rate": 3.8690829947090386e-05, "loss": 0.5839, "step": 3374 }, { "epoch": 0.585530881332408, "grad_norm": 0.7082890272140503, "learning_rate": 3.868840433679692e-05, "loss": 0.6974, "step": 3375 }, { "epoch": 0.5857043719639139, "grad_norm": 0.7663260102272034, "learning_rate": 3.868597655769235e-05, "loss": 0.6614, "step": 3376 }, { "epoch": 0.5858778625954199, "grad_norm": 0.7415555715560913, "learning_rate": 3.8683546610058434e-05, "loss": 0.5551, "step": 3377 }, { "epoch": 0.5860513532269257, "grad_norm": 0.8075070381164551, "learning_rate": 3.868111449417716e-05, "loss": 0.6012, "step": 3378 }, { "epoch": 0.5862248438584317, "grad_norm": 0.704871416091919, "learning_rate": 3.867868021033078e-05, "loss": 0.6234, "step": 3379 }, { "epoch": 0.5863983344899375, "grad_norm": 0.6264182925224304, "learning_rate": 3.867624375880179e-05, "loss": 0.6178, "step": 3380 }, { "epoch": 0.5865718251214435, "grad_norm": 1.559768557548523, "learning_rate": 3.8673805139872966e-05, "loss": 0.502, "step": 3381 }, { "epoch": 0.5867453157529493, "grad_norm": 0.8009780645370483, "learning_rate": 3.8671364353827284e-05, "loss": 0.6245, "step": 3382 }, { "epoch": 0.5869188063844553, "grad_norm": 0.746608555316925, "learning_rate": 3.8668921400948015e-05, "loss": 0.5422, "step": 3383 }, { "epoch": 0.5870922970159611, "grad_norm": 0.8048222064971924, "learning_rate": 3.8666476281518665e-05, "loss": 0.5168, "step": 3384 }, { "epoch": 0.5872657876474671, "grad_norm": 0.7640017867088318, "learning_rate": 3.866402899582299e-05, "loss": 0.7377, "step": 3385 }, { "epoch": 0.5874392782789729, "grad_norm": 0.8351792693138123, "learning_rate": 3.8661579544145e-05, "loss": 0.6089, "step": 3386 }, { "epoch": 0.5876127689104789, "grad_norm": 0.6824077367782593, "learning_rate": 3.865912792676897e-05, "loss": 0.5981, "step": 3387 }, { "epoch": 0.5877862595419847, "grad_norm": 0.9101720452308655, "learning_rate": 3.8656674143979386e-05, "loss": 0.5658, "step": 3388 }, { "epoch": 0.5879597501734907, "grad_norm": 0.753296434879303, "learning_rate": 3.865421819606104e-05, "loss": 0.6121, "step": 3389 }, { "epoch": 0.5881332408049965, "grad_norm": 0.7643285989761353, "learning_rate": 3.8651760083298926e-05, "loss": 0.5245, "step": 3390 }, { "epoch": 0.5883067314365025, "grad_norm": 0.6890398859977722, "learning_rate": 3.8649299805978324e-05, "loss": 0.6505, "step": 3391 }, { "epoch": 0.5884802220680083, "grad_norm": 0.638952374458313, "learning_rate": 3.864683736438475e-05, "loss": 0.6943, "step": 3392 }, { "epoch": 0.5886537126995143, "grad_norm": 0.8157287240028381, "learning_rate": 3.864437275880398e-05, "loss": 0.5846, "step": 3393 }, { "epoch": 0.5888272033310201, "grad_norm": 0.7618674039840698, "learning_rate": 3.8641905989522016e-05, "loss": 0.6025, "step": 3394 }, { "epoch": 0.5890006939625261, "grad_norm": 1.0098158121109009, "learning_rate": 3.8639437056825146e-05, "loss": 0.5103, "step": 3395 }, { "epoch": 0.5891741845940319, "grad_norm": 0.9587876796722412, "learning_rate": 3.863696596099988e-05, "loss": 0.5607, "step": 3396 }, { "epoch": 0.5893476752255378, "grad_norm": 0.5911865830421448, "learning_rate": 3.8634492702333e-05, "loss": 0.6744, "step": 3397 }, { "epoch": 0.5895211658570437, "grad_norm": 0.850542426109314, "learning_rate": 3.863201728111153e-05, "loss": 0.6318, "step": 3398 }, { "epoch": 0.5896946564885496, "grad_norm": 0.9997515678405762, "learning_rate": 3.8629539697622746e-05, "loss": 0.7252, "step": 3399 }, { "epoch": 0.5898681471200555, "grad_norm": 0.9618603587150574, "learning_rate": 3.862705995215417e-05, "loss": 0.5009, "step": 3400 }, { "epoch": 0.5900416377515614, "grad_norm": 0.8880718350410461, "learning_rate": 3.862457804499358e-05, "loss": 0.562, "step": 3401 }, { "epoch": 0.5902151283830673, "grad_norm": 0.7900727987289429, "learning_rate": 3.862209397642901e-05, "loss": 0.6401, "step": 3402 }, { "epoch": 0.5903886190145732, "grad_norm": 0.847778856754303, "learning_rate": 3.861960774674874e-05, "loss": 0.4969, "step": 3403 }, { "epoch": 0.5905621096460791, "grad_norm": 0.9456307291984558, "learning_rate": 3.861711935624129e-05, "loss": 0.5255, "step": 3404 }, { "epoch": 0.590735600277585, "grad_norm": 1.0695728063583374, "learning_rate": 3.861462880519543e-05, "loss": 0.6191, "step": 3405 }, { "epoch": 0.5909090909090909, "grad_norm": 0.7595635056495667, "learning_rate": 3.8612136093900224e-05, "loss": 0.5824, "step": 3406 }, { "epoch": 0.5910825815405968, "grad_norm": 11.357579231262207, "learning_rate": 3.860964122264493e-05, "loss": 0.691, "step": 3407 }, { "epoch": 0.5912560721721027, "grad_norm": 1.208006739616394, "learning_rate": 3.860714419171909e-05, "loss": 0.5909, "step": 3408 }, { "epoch": 0.5914295628036086, "grad_norm": 0.839406430721283, "learning_rate": 3.860464500141249e-05, "loss": 0.572, "step": 3409 }, { "epoch": 0.5916030534351145, "grad_norm": 1.164560317993164, "learning_rate": 3.860214365201515e-05, "loss": 0.6687, "step": 3410 }, { "epoch": 0.5917765440666204, "grad_norm": 1.1396112442016602, "learning_rate": 3.859964014381737e-05, "loss": 0.6771, "step": 3411 }, { "epoch": 0.5919500346981263, "grad_norm": 0.8461340665817261, "learning_rate": 3.8597134477109674e-05, "loss": 0.7394, "step": 3412 }, { "epoch": 0.5921235253296322, "grad_norm": 0.7491380572319031, "learning_rate": 3.859462665218286e-05, "loss": 0.6487, "step": 3413 }, { "epoch": 0.5922970159611382, "grad_norm": 0.7270049452781677, "learning_rate": 3.8592116669327945e-05, "loss": 0.5641, "step": 3414 }, { "epoch": 0.592470506592644, "grad_norm": 0.7229993343353271, "learning_rate": 3.858960452883623e-05, "loss": 0.6256, "step": 3415 }, { "epoch": 0.5926439972241498, "grad_norm": 0.7910577058792114, "learning_rate": 3.858709023099925e-05, "loss": 0.5709, "step": 3416 }, { "epoch": 0.5928174878556558, "grad_norm": 0.7861661911010742, "learning_rate": 3.8584573776108794e-05, "loss": 0.578, "step": 3417 }, { "epoch": 0.5929909784871616, "grad_norm": 0.7965797781944275, "learning_rate": 3.858205516445689e-05, "loss": 0.5209, "step": 3418 }, { "epoch": 0.5931644691186676, "grad_norm": 1.3844408988952637, "learning_rate": 3.8579534396335835e-05, "loss": 0.6051, "step": 3419 }, { "epoch": 0.5933379597501734, "grad_norm": 1.988527536392212, "learning_rate": 3.857701147203816e-05, "loss": 0.5623, "step": 3420 }, { "epoch": 0.5935114503816794, "grad_norm": 0.8353315591812134, "learning_rate": 3.8574486391856655e-05, "loss": 0.4869, "step": 3421 }, { "epoch": 0.5936849410131853, "grad_norm": 0.7921696305274963, "learning_rate": 3.857195915608437e-05, "loss": 0.6094, "step": 3422 }, { "epoch": 0.5938584316446912, "grad_norm": 1.1405901908874512, "learning_rate": 3.856942976501458e-05, "loss": 0.6217, "step": 3423 }, { "epoch": 0.594031922276197, "grad_norm": 0.9423086643218994, "learning_rate": 3.8566898218940825e-05, "loss": 0.5925, "step": 3424 }, { "epoch": 0.594205412907703, "grad_norm": 0.6545419096946716, "learning_rate": 3.85643645181569e-05, "loss": 0.5238, "step": 3425 }, { "epoch": 0.5943789035392089, "grad_norm": 0.9272714853286743, "learning_rate": 3.856182866295684e-05, "loss": 0.5372, "step": 3426 }, { "epoch": 0.5945523941707148, "grad_norm": 0.8032915592193604, "learning_rate": 3.855929065363493e-05, "loss": 0.6144, "step": 3427 }, { "epoch": 0.5947258848022207, "grad_norm": 0.9311384558677673, "learning_rate": 3.8556750490485724e-05, "loss": 0.6423, "step": 3428 }, { "epoch": 0.5948993754337266, "grad_norm": 0.9234505295753479, "learning_rate": 3.8554208173804e-05, "loss": 0.5463, "step": 3429 }, { "epoch": 0.5950728660652325, "grad_norm": 0.9996792674064636, "learning_rate": 3.855166370388479e-05, "loss": 0.6635, "step": 3430 }, { "epoch": 0.5952463566967384, "grad_norm": 0.7507815361022949, "learning_rate": 3.854911708102339e-05, "loss": 0.6295, "step": 3431 }, { "epoch": 0.5954198473282443, "grad_norm": 0.8840140104293823, "learning_rate": 3.8546568305515345e-05, "loss": 0.5497, "step": 3432 }, { "epoch": 0.5955933379597502, "grad_norm": 1.1633116006851196, "learning_rate": 3.854401737765644e-05, "loss": 0.5869, "step": 3433 }, { "epoch": 0.5957668285912561, "grad_norm": 0.8514946103096008, "learning_rate": 3.85414642977427e-05, "loss": 0.6907, "step": 3434 }, { "epoch": 0.5959403192227619, "grad_norm": 1.305191159248352, "learning_rate": 3.853890906607043e-05, "loss": 0.5542, "step": 3435 }, { "epoch": 0.5961138098542679, "grad_norm": 0.9533877968788147, "learning_rate": 3.8536351682936155e-05, "loss": 0.6359, "step": 3436 }, { "epoch": 0.5962873004857737, "grad_norm": 0.7394745945930481, "learning_rate": 3.853379214863667e-05, "loss": 0.58, "step": 3437 }, { "epoch": 0.5964607911172797, "grad_norm": 0.6518771648406982, "learning_rate": 3.8531230463469015e-05, "loss": 0.6638, "step": 3438 }, { "epoch": 0.5966342817487855, "grad_norm": 0.9110044240951538, "learning_rate": 3.852866662773047e-05, "loss": 0.7, "step": 3439 }, { "epoch": 0.5968077723802915, "grad_norm": 1.4987447261810303, "learning_rate": 3.852610064171857e-05, "loss": 0.728, "step": 3440 }, { "epoch": 0.5969812630117973, "grad_norm": 0.9062539935112, "learning_rate": 3.85235325057311e-05, "loss": 0.6445, "step": 3441 }, { "epoch": 0.5971547536433033, "grad_norm": 1.3974779844284058, "learning_rate": 3.85209622200661e-05, "loss": 0.6837, "step": 3442 }, { "epoch": 0.5973282442748091, "grad_norm": 0.6922855973243713, "learning_rate": 3.851838978502186e-05, "loss": 0.6296, "step": 3443 }, { "epoch": 0.5975017349063151, "grad_norm": 0.7842308878898621, "learning_rate": 3.8515815200896905e-05, "loss": 0.5752, "step": 3444 }, { "epoch": 0.5976752255378209, "grad_norm": 0.7984820604324341, "learning_rate": 3.851323846799002e-05, "loss": 0.6096, "step": 3445 }, { "epoch": 0.5978487161693269, "grad_norm": 1.3559335470199585, "learning_rate": 3.851065958660023e-05, "loss": 0.6342, "step": 3446 }, { "epoch": 0.5980222068008327, "grad_norm": 0.9452993273735046, "learning_rate": 3.8508078557026835e-05, "loss": 0.7297, "step": 3447 }, { "epoch": 0.5981956974323387, "grad_norm": 0.634530246257782, "learning_rate": 3.8505495379569354e-05, "loss": 0.5994, "step": 3448 }, { "epoch": 0.5983691880638445, "grad_norm": 0.8228620290756226, "learning_rate": 3.850291005452757e-05, "loss": 0.5238, "step": 3449 }, { "epoch": 0.5985426786953505, "grad_norm": 1.0455272197723389, "learning_rate": 3.850032258220152e-05, "loss": 0.5449, "step": 3450 }, { "epoch": 0.5987161693268563, "grad_norm": 0.7928491234779358, "learning_rate": 3.849773296289147e-05, "loss": 0.6968, "step": 3451 }, { "epoch": 0.5988896599583623, "grad_norm": 1.4088414907455444, "learning_rate": 3.849514119689796e-05, "loss": 0.5996, "step": 3452 }, { "epoch": 0.5990631505898681, "grad_norm": 0.9783432483673096, "learning_rate": 3.849254728452176e-05, "loss": 0.5813, "step": 3453 }, { "epoch": 0.5992366412213741, "grad_norm": 0.7994115352630615, "learning_rate": 3.84899512260639e-05, "loss": 0.6154, "step": 3454 }, { "epoch": 0.59941013185288, "grad_norm": 0.754689633846283, "learning_rate": 3.848735302182566e-05, "loss": 0.584, "step": 3455 }, { "epoch": 0.5995836224843858, "grad_norm": 2.09782075881958, "learning_rate": 3.848475267210856e-05, "loss": 0.5867, "step": 3456 }, { "epoch": 0.5997571131158917, "grad_norm": 1.2456940412521362, "learning_rate": 3.848215017721437e-05, "loss": 0.5066, "step": 3457 }, { "epoch": 0.5999306037473976, "grad_norm": 0.7088345289230347, "learning_rate": 3.8479545537445115e-05, "loss": 0.6174, "step": 3458 }, { "epoch": 0.6001040943789036, "grad_norm": 2.3128747940063477, "learning_rate": 3.8476938753103066e-05, "loss": 0.47, "step": 3459 }, { "epoch": 0.6002775850104094, "grad_norm": 1.0873178243637085, "learning_rate": 3.847432982449075e-05, "loss": 0.6378, "step": 3460 }, { "epoch": 0.6004510756419154, "grad_norm": 0.6794579029083252, "learning_rate": 3.8471718751910926e-05, "loss": 0.661, "step": 3461 }, { "epoch": 0.6006245662734212, "grad_norm": 0.8252881765365601, "learning_rate": 3.846910553566662e-05, "loss": 0.5981, "step": 3462 }, { "epoch": 0.6007980569049272, "grad_norm": 0.7919279336929321, "learning_rate": 3.846649017606109e-05, "loss": 0.6964, "step": 3463 }, { "epoch": 0.600971547536433, "grad_norm": 0.8595472574234009, "learning_rate": 3.846387267339787e-05, "loss": 0.6184, "step": 3464 }, { "epoch": 0.601145038167939, "grad_norm": 0.7740964889526367, "learning_rate": 3.84612530279807e-05, "loss": 0.6564, "step": 3465 }, { "epoch": 0.6013185287994448, "grad_norm": 0.8735344409942627, "learning_rate": 3.845863124011361e-05, "loss": 0.6528, "step": 3466 }, { "epoch": 0.6014920194309508, "grad_norm": 1.1475926637649536, "learning_rate": 3.845600731010085e-05, "loss": 0.5629, "step": 3467 }, { "epoch": 0.6016655100624566, "grad_norm": 0.6880063414573669, "learning_rate": 3.845338123824694e-05, "loss": 0.6332, "step": 3468 }, { "epoch": 0.6018390006939626, "grad_norm": 2.3320505619049072, "learning_rate": 3.845075302485664e-05, "loss": 0.6035, "step": 3469 }, { "epoch": 0.6020124913254684, "grad_norm": 0.8664422631263733, "learning_rate": 3.844812267023495e-05, "loss": 0.6415, "step": 3470 }, { "epoch": 0.6021859819569744, "grad_norm": 0.9907106161117554, "learning_rate": 3.844549017468712e-05, "loss": 0.563, "step": 3471 }, { "epoch": 0.6023594725884802, "grad_norm": 0.7867692112922668, "learning_rate": 3.8442855538518667e-05, "loss": 0.5328, "step": 3472 }, { "epoch": 0.6025329632199862, "grad_norm": 1.5940982103347778, "learning_rate": 3.844021876203534e-05, "loss": 0.516, "step": 3473 }, { "epoch": 0.602706453851492, "grad_norm": 0.8625582456588745, "learning_rate": 3.8437579845543133e-05, "loss": 0.67, "step": 3474 }, { "epoch": 0.6028799444829979, "grad_norm": 1.81771719455719, "learning_rate": 3.843493878934831e-05, "loss": 0.6008, "step": 3475 }, { "epoch": 0.6030534351145038, "grad_norm": 1.882793664932251, "learning_rate": 3.843229559375735e-05, "loss": 0.6074, "step": 3476 }, { "epoch": 0.6032269257460097, "grad_norm": 0.9315357804298401, "learning_rate": 3.842965025907701e-05, "loss": 0.5636, "step": 3477 }, { "epoch": 0.6034004163775156, "grad_norm": 1.3701409101486206, "learning_rate": 3.842700278561429e-05, "loss": 0.4924, "step": 3478 }, { "epoch": 0.6035739070090215, "grad_norm": 1.117668867111206, "learning_rate": 3.842435317367642e-05, "loss": 0.6133, "step": 3479 }, { "epoch": 0.6037473976405274, "grad_norm": 1.1667253971099854, "learning_rate": 3.8421701423570895e-05, "loss": 0.601, "step": 3480 }, { "epoch": 0.6039208882720333, "grad_norm": 1.159469485282898, "learning_rate": 3.8419047535605456e-05, "loss": 0.724, "step": 3481 }, { "epoch": 0.6040943789035392, "grad_norm": 1.304949402809143, "learning_rate": 3.841639151008809e-05, "loss": 0.5074, "step": 3482 }, { "epoch": 0.6042678695350451, "grad_norm": 1.1901766061782837, "learning_rate": 3.8413733347327024e-05, "loss": 0.5042, "step": 3483 }, { "epoch": 0.604441360166551, "grad_norm": 1.2651426792144775, "learning_rate": 3.8411073047630745e-05, "loss": 0.5394, "step": 3484 }, { "epoch": 0.6046148507980569, "grad_norm": 0.7895896434783936, "learning_rate": 3.8408410611308e-05, "loss": 0.5227, "step": 3485 }, { "epoch": 0.6047883414295628, "grad_norm": 1.146105408668518, "learning_rate": 3.840574603866774e-05, "loss": 0.5277, "step": 3486 }, { "epoch": 0.6049618320610687, "grad_norm": 1.4869883060455322, "learning_rate": 3.840307933001921e-05, "loss": 0.5737, "step": 3487 }, { "epoch": 0.6051353226925746, "grad_norm": 1.337009072303772, "learning_rate": 3.840041048567188e-05, "loss": 0.5547, "step": 3488 }, { "epoch": 0.6053088133240805, "grad_norm": 1.0347092151641846, "learning_rate": 3.839773950593547e-05, "loss": 0.5471, "step": 3489 }, { "epoch": 0.6054823039555864, "grad_norm": 0.7365402579307556, "learning_rate": 3.839506639111996e-05, "loss": 0.6643, "step": 3490 }, { "epoch": 0.6056557945870923, "grad_norm": 0.7664414644241333, "learning_rate": 3.839239114153555e-05, "loss": 0.5186, "step": 3491 }, { "epoch": 0.6058292852185982, "grad_norm": 0.8788231611251831, "learning_rate": 3.838971375749272e-05, "loss": 0.496, "step": 3492 }, { "epoch": 0.6060027758501041, "grad_norm": 1.0685210227966309, "learning_rate": 3.838703423930218e-05, "loss": 0.6027, "step": 3493 }, { "epoch": 0.6061762664816099, "grad_norm": 1.3240433931350708, "learning_rate": 3.83843525872749e-05, "loss": 0.687, "step": 3494 }, { "epoch": 0.6063497571131159, "grad_norm": 0.7077116370201111, "learning_rate": 3.838166880172207e-05, "loss": 0.5708, "step": 3495 }, { "epoch": 0.6065232477446217, "grad_norm": 0.778710663318634, "learning_rate": 3.837898288295516e-05, "loss": 0.572, "step": 3496 }, { "epoch": 0.6066967383761277, "grad_norm": 1.6770925521850586, "learning_rate": 3.837629483128587e-05, "loss": 0.6476, "step": 3497 }, { "epoch": 0.6068702290076335, "grad_norm": 0.8148396015167236, "learning_rate": 3.837360464702616e-05, "loss": 0.589, "step": 3498 }, { "epoch": 0.6070437196391395, "grad_norm": 0.9807456731796265, "learning_rate": 3.837091233048821e-05, "loss": 0.5621, "step": 3499 }, { "epoch": 0.6072172102706453, "grad_norm": 1.5750974416732788, "learning_rate": 3.8368217881984484e-05, "loss": 0.515, "step": 3500 }, { "epoch": 0.6073907009021513, "grad_norm": 0.8710517883300781, "learning_rate": 3.836552130182766e-05, "loss": 0.5878, "step": 3501 }, { "epoch": 0.6075641915336571, "grad_norm": 0.8788391351699829, "learning_rate": 3.83628225903307e-05, "loss": 0.5024, "step": 3502 }, { "epoch": 0.6077376821651631, "grad_norm": 0.8667015433311462, "learning_rate": 3.836012174780678e-05, "loss": 0.5999, "step": 3503 }, { "epoch": 0.607911172796669, "grad_norm": 0.7130902409553528, "learning_rate": 3.8357418774569335e-05, "loss": 0.6808, "step": 3504 }, { "epoch": 0.6080846634281749, "grad_norm": 1.0192363262176514, "learning_rate": 3.835471367093205e-05, "loss": 0.5347, "step": 3505 }, { "epoch": 0.6082581540596808, "grad_norm": 2.0837340354919434, "learning_rate": 3.835200643720886e-05, "loss": 0.5211, "step": 3506 }, { "epoch": 0.6084316446911867, "grad_norm": 0.8674420714378357, "learning_rate": 3.834929707371394e-05, "loss": 0.6078, "step": 3507 }, { "epoch": 0.6086051353226926, "grad_norm": 0.8269285559654236, "learning_rate": 3.8346585580761705e-05, "loss": 0.6461, "step": 3508 }, { "epoch": 0.6087786259541985, "grad_norm": 0.7027774453163147, "learning_rate": 3.834387195866684e-05, "loss": 0.6669, "step": 3509 }, { "epoch": 0.6089521165857044, "grad_norm": 1.2846192121505737, "learning_rate": 3.8341156207744254e-05, "loss": 0.5911, "step": 3510 }, { "epoch": 0.6091256072172103, "grad_norm": 0.6821696162223816, "learning_rate": 3.8338438328309126e-05, "loss": 0.542, "step": 3511 }, { "epoch": 0.6092990978487162, "grad_norm": 0.8131824135780334, "learning_rate": 3.833571832067685e-05, "loss": 0.5776, "step": 3512 }, { "epoch": 0.6094725884802221, "grad_norm": 0.6710551977157593, "learning_rate": 3.833299618516311e-05, "loss": 0.5839, "step": 3513 }, { "epoch": 0.609646079111728, "grad_norm": 1.05852210521698, "learning_rate": 3.8330271922083795e-05, "loss": 0.6022, "step": 3514 }, { "epoch": 0.6098195697432338, "grad_norm": 0.6794431209564209, "learning_rate": 3.832754553175507e-05, "loss": 0.536, "step": 3515 }, { "epoch": 0.6099930603747398, "grad_norm": 4.972042083740234, "learning_rate": 3.8324817014493326e-05, "loss": 0.5684, "step": 3516 }, { "epoch": 0.6101665510062456, "grad_norm": 0.8742214441299438, "learning_rate": 3.832208637061522e-05, "loss": 0.5333, "step": 3517 }, { "epoch": 0.6103400416377516, "grad_norm": 0.9846548438072205, "learning_rate": 3.831935360043763e-05, "loss": 0.6179, "step": 3518 }, { "epoch": 0.6105135322692574, "grad_norm": 1.2224737405776978, "learning_rate": 3.8316618704277715e-05, "loss": 0.6616, "step": 3519 }, { "epoch": 0.6106870229007634, "grad_norm": 0.6444560885429382, "learning_rate": 3.8313881682452854e-05, "loss": 0.5886, "step": 3520 }, { "epoch": 0.6108605135322692, "grad_norm": 1.0943591594696045, "learning_rate": 3.8311142535280684e-05, "loss": 0.5546, "step": 3521 }, { "epoch": 0.6110340041637752, "grad_norm": 0.7637893557548523, "learning_rate": 3.830840126307909e-05, "loss": 0.575, "step": 3522 }, { "epoch": 0.611207494795281, "grad_norm": 0.822937548160553, "learning_rate": 3.830565786616619e-05, "loss": 0.5853, "step": 3523 }, { "epoch": 0.611380985426787, "grad_norm": 0.9483650326728821, "learning_rate": 3.830291234486037e-05, "loss": 0.5626, "step": 3524 }, { "epoch": 0.6115544760582928, "grad_norm": 0.7164247632026672, "learning_rate": 3.8300164699480246e-05, "loss": 0.5519, "step": 3525 }, { "epoch": 0.6117279666897988, "grad_norm": 0.7991425395011902, "learning_rate": 3.8297414930344684e-05, "loss": 0.6359, "step": 3526 }, { "epoch": 0.6119014573213046, "grad_norm": 0.918340802192688, "learning_rate": 3.8294663037772794e-05, "loss": 0.6689, "step": 3527 }, { "epoch": 0.6120749479528106, "grad_norm": 1.0987683534622192, "learning_rate": 3.829190902208394e-05, "loss": 0.5338, "step": 3528 }, { "epoch": 0.6122484385843164, "grad_norm": 1.5816664695739746, "learning_rate": 3.828915288359774e-05, "loss": 0.5103, "step": 3529 }, { "epoch": 0.6124219292158224, "grad_norm": 0.9146783351898193, "learning_rate": 3.828639462263403e-05, "loss": 0.525, "step": 3530 }, { "epoch": 0.6125954198473282, "grad_norm": 1.0526752471923828, "learning_rate": 3.828363423951291e-05, "loss": 0.6726, "step": 3531 }, { "epoch": 0.6127689104788342, "grad_norm": 0.8325849175453186, "learning_rate": 3.8280871734554746e-05, "loss": 0.5735, "step": 3532 }, { "epoch": 0.61294240111034, "grad_norm": 1.0969488620758057, "learning_rate": 3.8278107108080104e-05, "loss": 0.6194, "step": 3533 }, { "epoch": 0.6131158917418459, "grad_norm": 0.6949189305305481, "learning_rate": 3.827534036040984e-05, "loss": 0.7388, "step": 3534 }, { "epoch": 0.6132893823733518, "grad_norm": 0.9849112629890442, "learning_rate": 3.827257149186502e-05, "loss": 0.6366, "step": 3535 }, { "epoch": 0.6134628730048577, "grad_norm": 0.8824640512466431, "learning_rate": 3.8269800502767e-05, "loss": 0.4998, "step": 3536 }, { "epoch": 0.6136363636363636, "grad_norm": 1.1439348459243774, "learning_rate": 3.826702739343734e-05, "loss": 0.5786, "step": 3537 }, { "epoch": 0.6138098542678695, "grad_norm": 0.8941345810890198, "learning_rate": 3.8264252164197866e-05, "loss": 0.599, "step": 3538 }, { "epoch": 0.6139833448993754, "grad_norm": 0.8163082003593445, "learning_rate": 3.826147481537065e-05, "loss": 0.5389, "step": 3539 }, { "epoch": 0.6141568355308813, "grad_norm": 1.1131253242492676, "learning_rate": 3.825869534727799e-05, "loss": 0.5281, "step": 3540 }, { "epoch": 0.6143303261623873, "grad_norm": 1.7549223899841309, "learning_rate": 3.825591376024247e-05, "loss": 0.5359, "step": 3541 }, { "epoch": 0.6145038167938931, "grad_norm": 0.6971312761306763, "learning_rate": 3.8253130054586886e-05, "loss": 0.5726, "step": 3542 }, { "epoch": 0.614677307425399, "grad_norm": 0.8808236122131348, "learning_rate": 3.825034423063429e-05, "loss": 0.5706, "step": 3543 }, { "epoch": 0.6148507980569049, "grad_norm": 0.9417666792869568, "learning_rate": 3.824755628870797e-05, "loss": 0.6189, "step": 3544 }, { "epoch": 0.6150242886884109, "grad_norm": 1.137608289718628, "learning_rate": 3.824476622913149e-05, "loss": 0.5914, "step": 3545 }, { "epoch": 0.6151977793199167, "grad_norm": 0.6162335276603699, "learning_rate": 3.824197405222863e-05, "loss": 0.7371, "step": 3546 }, { "epoch": 0.6153712699514227, "grad_norm": 0.6862666606903076, "learning_rate": 3.8239179758323424e-05, "loss": 0.666, "step": 3547 }, { "epoch": 0.6155447605829285, "grad_norm": 0.8268684148788452, "learning_rate": 3.8236383347740146e-05, "loss": 0.6798, "step": 3548 }, { "epoch": 0.6157182512144345, "grad_norm": 0.7263082265853882, "learning_rate": 3.823358482080334e-05, "loss": 0.6742, "step": 3549 }, { "epoch": 0.6158917418459403, "grad_norm": 0.8222211003303528, "learning_rate": 3.823078417783777e-05, "loss": 0.6381, "step": 3550 }, { "epoch": 0.6160652324774463, "grad_norm": 0.8517816662788391, "learning_rate": 3.8227981419168445e-05, "loss": 0.5504, "step": 3551 }, { "epoch": 0.6162387231089521, "grad_norm": 1.240667700767517, "learning_rate": 3.8225176545120646e-05, "loss": 0.5581, "step": 3552 }, { "epoch": 0.6164122137404581, "grad_norm": 0.7769044041633606, "learning_rate": 3.822236955601987e-05, "loss": 0.6517, "step": 3553 }, { "epoch": 0.6165857043719639, "grad_norm": 1.2511776685714722, "learning_rate": 3.821956045219186e-05, "loss": 0.6025, "step": 3554 }, { "epoch": 0.6167591950034698, "grad_norm": 0.6716343760490417, "learning_rate": 3.821674923396265e-05, "loss": 0.5631, "step": 3555 }, { "epoch": 0.6169326856349757, "grad_norm": 1.228094458580017, "learning_rate": 3.821393590165845e-05, "loss": 0.5713, "step": 3556 }, { "epoch": 0.6171061762664816, "grad_norm": 0.6799286603927612, "learning_rate": 3.8211120455605774e-05, "loss": 0.6575, "step": 3557 }, { "epoch": 0.6172796668979875, "grad_norm": 0.5899439454078674, "learning_rate": 3.8208302896131344e-05, "loss": 0.6927, "step": 3558 }, { "epoch": 0.6174531575294934, "grad_norm": 0.7722693085670471, "learning_rate": 3.820548322356215e-05, "loss": 0.6003, "step": 3559 }, { "epoch": 0.6176266481609993, "grad_norm": 0.8632433414459229, "learning_rate": 3.820266143822541e-05, "loss": 0.4967, "step": 3560 }, { "epoch": 0.6178001387925052, "grad_norm": 0.756759762763977, "learning_rate": 3.81998375404486e-05, "loss": 0.6062, "step": 3561 }, { "epoch": 0.6179736294240111, "grad_norm": 0.6955695748329163, "learning_rate": 3.819701153055944e-05, "loss": 0.5519, "step": 3562 }, { "epoch": 0.618147120055517, "grad_norm": 0.6327463984489441, "learning_rate": 3.8194183408885885e-05, "loss": 0.657, "step": 3563 }, { "epoch": 0.6183206106870229, "grad_norm": 0.7696917057037354, "learning_rate": 3.8191353175756145e-05, "loss": 0.6415, "step": 3564 }, { "epoch": 0.6184941013185288, "grad_norm": 0.7328816056251526, "learning_rate": 3.818852083149867e-05, "loss": 0.6356, "step": 3565 }, { "epoch": 0.6186675919500347, "grad_norm": 1.2973989248275757, "learning_rate": 3.818568637644217e-05, "loss": 0.6426, "step": 3566 }, { "epoch": 0.6188410825815406, "grad_norm": 1.6103951930999756, "learning_rate": 3.818284981091556e-05, "loss": 0.5831, "step": 3567 }, { "epoch": 0.6190145732130465, "grad_norm": 1.0352448225021362, "learning_rate": 3.8180011135248055e-05, "loss": 0.6628, "step": 3568 }, { "epoch": 0.6191880638445524, "grad_norm": 0.9566216468811035, "learning_rate": 3.8177170349769064e-05, "loss": 0.561, "step": 3569 }, { "epoch": 0.6193615544760583, "grad_norm": 0.8397760987281799, "learning_rate": 3.8174327454808275e-05, "loss": 0.6062, "step": 3570 }, { "epoch": 0.6195350451075642, "grad_norm": 0.9678451418876648, "learning_rate": 3.8171482450695616e-05, "loss": 0.5275, "step": 3571 }, { "epoch": 0.6197085357390701, "grad_norm": 1.1903417110443115, "learning_rate": 3.816863533776124e-05, "loss": 0.6281, "step": 3572 }, { "epoch": 0.619882026370576, "grad_norm": 0.8403670191764832, "learning_rate": 3.816578611633556e-05, "loss": 0.707, "step": 3573 }, { "epoch": 0.6200555170020818, "grad_norm": 0.7110308408737183, "learning_rate": 3.816293478674923e-05, "loss": 0.5867, "step": 3574 }, { "epoch": 0.6202290076335878, "grad_norm": 0.7667453289031982, "learning_rate": 3.816008134933317e-05, "loss": 0.4829, "step": 3575 }, { "epoch": 0.6204024982650936, "grad_norm": 1.0670323371887207, "learning_rate": 3.815722580441849e-05, "loss": 0.5897, "step": 3576 }, { "epoch": 0.6205759888965996, "grad_norm": 0.7509621381759644, "learning_rate": 3.8154368152336606e-05, "loss": 0.6483, "step": 3577 }, { "epoch": 0.6207494795281054, "grad_norm": 0.9665395617485046, "learning_rate": 3.815150839341915e-05, "loss": 0.6711, "step": 3578 }, { "epoch": 0.6209229701596114, "grad_norm": 0.6299415826797485, "learning_rate": 3.814864652799798e-05, "loss": 0.7004, "step": 3579 }, { "epoch": 0.6210964607911172, "grad_norm": 0.620185375213623, "learning_rate": 3.8145782556405244e-05, "loss": 0.7126, "step": 3580 }, { "epoch": 0.6212699514226232, "grad_norm": 0.8498899340629578, "learning_rate": 3.81429164789733e-05, "loss": 0.5984, "step": 3581 }, { "epoch": 0.621443442054129, "grad_norm": 0.8451429605484009, "learning_rate": 3.814004829603475e-05, "loss": 0.5302, "step": 3582 }, { "epoch": 0.621616932685635, "grad_norm": 0.6915079355239868, "learning_rate": 3.813717800792246e-05, "loss": 0.6641, "step": 3583 }, { "epoch": 0.6217904233171409, "grad_norm": 1.105806589126587, "learning_rate": 3.813430561496953e-05, "loss": 0.6299, "step": 3584 }, { "epoch": 0.6219639139486468, "grad_norm": 0.5665948390960693, "learning_rate": 3.81314311175093e-05, "loss": 0.6212, "step": 3585 }, { "epoch": 0.6221374045801527, "grad_norm": 0.7293710708618164, "learning_rate": 3.812855451587537e-05, "loss": 0.541, "step": 3586 }, { "epoch": 0.6223108952116586, "grad_norm": 0.6838366985321045, "learning_rate": 3.812567581040155e-05, "loss": 0.6431, "step": 3587 }, { "epoch": 0.6224843858431645, "grad_norm": 0.8503236174583435, "learning_rate": 3.812279500142194e-05, "loss": 0.6522, "step": 3588 }, { "epoch": 0.6226578764746704, "grad_norm": 0.882392406463623, "learning_rate": 3.811991208927085e-05, "loss": 0.5879, "step": 3589 }, { "epoch": 0.6228313671061763, "grad_norm": 0.8290567994117737, "learning_rate": 3.811702707428285e-05, "loss": 0.6743, "step": 3590 }, { "epoch": 0.6230048577376822, "grad_norm": 0.9682883620262146, "learning_rate": 3.811413995679275e-05, "loss": 0.5339, "step": 3591 }, { "epoch": 0.6231783483691881, "grad_norm": 0.7133485078811646, "learning_rate": 3.81112507371356e-05, "loss": 0.5846, "step": 3592 }, { "epoch": 0.6233518390006939, "grad_norm": 0.6190349459648132, "learning_rate": 3.8108359415646694e-05, "loss": 0.6305, "step": 3593 }, { "epoch": 0.6235253296321999, "grad_norm": 0.5973116755485535, "learning_rate": 3.810546599266158e-05, "loss": 0.5449, "step": 3594 }, { "epoch": 0.6236988202637057, "grad_norm": 0.7279934287071228, "learning_rate": 3.810257046851604e-05, "loss": 0.4998, "step": 3595 }, { "epoch": 0.6238723108952117, "grad_norm": 0.8145825862884521, "learning_rate": 3.8099672843546106e-05, "loss": 0.5234, "step": 3596 }, { "epoch": 0.6240458015267175, "grad_norm": 0.8200207352638245, "learning_rate": 3.8096773118088045e-05, "loss": 0.5153, "step": 3597 }, { "epoch": 0.6242192921582235, "grad_norm": 0.8757025599479675, "learning_rate": 3.809387129247838e-05, "loss": 0.5671, "step": 3598 }, { "epoch": 0.6243927827897293, "grad_norm": 0.7421393394470215, "learning_rate": 3.809096736705387e-05, "loss": 0.6555, "step": 3599 }, { "epoch": 0.6245662734212353, "grad_norm": 0.8077309131622314, "learning_rate": 3.808806134215151e-05, "loss": 0.6855, "step": 3600 }, { "epoch": 0.6247397640527411, "grad_norm": 0.7182208299636841, "learning_rate": 3.8085153218108555e-05, "loss": 0.6501, "step": 3601 }, { "epoch": 0.6249132546842471, "grad_norm": 0.8924763202667236, "learning_rate": 3.80822429952625e-05, "loss": 0.5684, "step": 3602 }, { "epoch": 0.6250867453157529, "grad_norm": 0.6226588487625122, "learning_rate": 3.807933067395108e-05, "loss": 0.6013, "step": 3603 }, { "epoch": 0.6252602359472589, "grad_norm": 0.7973635196685791, "learning_rate": 3.8076416254512256e-05, "loss": 0.701, "step": 3604 }, { "epoch": 0.6254337265787647, "grad_norm": 0.5976009368896484, "learning_rate": 3.807349973728427e-05, "loss": 0.5522, "step": 3605 }, { "epoch": 0.6256072172102707, "grad_norm": 0.7112151384353638, "learning_rate": 3.807058112260558e-05, "loss": 0.6797, "step": 3606 }, { "epoch": 0.6257807078417765, "grad_norm": 0.8459869623184204, "learning_rate": 3.8067660410814895e-05, "loss": 0.6395, "step": 3607 }, { "epoch": 0.6259541984732825, "grad_norm": 0.9730987548828125, "learning_rate": 3.8064737602251155e-05, "loss": 0.6216, "step": 3608 }, { "epoch": 0.6261276891047883, "grad_norm": 0.7733995318412781, "learning_rate": 3.8061812697253576e-05, "loss": 0.6182, "step": 3609 }, { "epoch": 0.6263011797362943, "grad_norm": 0.6619139313697815, "learning_rate": 3.8058885696161595e-05, "loss": 0.5614, "step": 3610 }, { "epoch": 0.6264746703678001, "grad_norm": 0.7139784693717957, "learning_rate": 3.805595659931487e-05, "loss": 0.6194, "step": 3611 }, { "epoch": 0.6266481609993061, "grad_norm": 0.7814990282058716, "learning_rate": 3.805302540705335e-05, "loss": 0.6265, "step": 3612 }, { "epoch": 0.6268216516308119, "grad_norm": 0.7852691411972046, "learning_rate": 3.80500921197172e-05, "loss": 0.5624, "step": 3613 }, { "epoch": 0.6269951422623178, "grad_norm": 1.1444884538650513, "learning_rate": 3.8047156737646825e-05, "loss": 0.6121, "step": 3614 }, { "epoch": 0.6271686328938237, "grad_norm": 0.6964013576507568, "learning_rate": 3.8044219261182876e-05, "loss": 0.6388, "step": 3615 }, { "epoch": 0.6273421235253296, "grad_norm": 0.7965458035469055, "learning_rate": 3.8041279690666254e-05, "loss": 0.5717, "step": 3616 }, { "epoch": 0.6275156141568355, "grad_norm": 0.8811926245689392, "learning_rate": 3.8038338026438116e-05, "loss": 0.5613, "step": 3617 }, { "epoch": 0.6276891047883414, "grad_norm": 0.8348861932754517, "learning_rate": 3.803539426883982e-05, "loss": 0.6429, "step": 3618 }, { "epoch": 0.6278625954198473, "grad_norm": 0.8193329572677612, "learning_rate": 3.803244841821301e-05, "loss": 0.571, "step": 3619 }, { "epoch": 0.6280360860513532, "grad_norm": 0.7115561366081238, "learning_rate": 3.8029500474899544e-05, "loss": 0.5219, "step": 3620 }, { "epoch": 0.6282095766828591, "grad_norm": 0.7684637904167175, "learning_rate": 3.8026550439241535e-05, "loss": 0.6139, "step": 3621 }, { "epoch": 0.628383067314365, "grad_norm": 0.6806290745735168, "learning_rate": 3.802359831158135e-05, "loss": 0.5044, "step": 3622 }, { "epoch": 0.628556557945871, "grad_norm": 0.7397187352180481, "learning_rate": 3.802064409226158e-05, "loss": 0.5122, "step": 3623 }, { "epoch": 0.6287300485773768, "grad_norm": 2.4987173080444336, "learning_rate": 3.801768778162506e-05, "loss": 0.5508, "step": 3624 }, { "epoch": 0.6289035392088828, "grad_norm": 0.8442872762680054, "learning_rate": 3.801472938001488e-05, "loss": 0.5547, "step": 3625 }, { "epoch": 0.6290770298403886, "grad_norm": 0.7826525568962097, "learning_rate": 3.8011768887774365e-05, "loss": 0.7209, "step": 3626 }, { "epoch": 0.6292505204718946, "grad_norm": 0.8531131744384766, "learning_rate": 3.8008806305247083e-05, "loss": 0.5614, "step": 3627 }, { "epoch": 0.6294240111034004, "grad_norm": 1.044183373451233, "learning_rate": 3.800584163277684e-05, "loss": 0.6444, "step": 3628 }, { "epoch": 0.6295975017349064, "grad_norm": 0.8884148001670837, "learning_rate": 3.80028748707077e-05, "loss": 0.5234, "step": 3629 }, { "epoch": 0.6297709923664122, "grad_norm": 0.9937842488288879, "learning_rate": 3.7999906019383954e-05, "loss": 0.5386, "step": 3630 }, { "epoch": 0.6299444829979182, "grad_norm": 0.8778430819511414, "learning_rate": 3.799693507915014e-05, "loss": 0.6859, "step": 3631 }, { "epoch": 0.630117973629424, "grad_norm": 0.9054518938064575, "learning_rate": 3.799396205035104e-05, "loss": 0.4907, "step": 3632 }, { "epoch": 0.6302914642609299, "grad_norm": 0.9029878377914429, "learning_rate": 3.799098693333167e-05, "loss": 0.526, "step": 3633 }, { "epoch": 0.6304649548924358, "grad_norm": 1.7739448547363281, "learning_rate": 3.7988009728437304e-05, "loss": 0.4902, "step": 3634 }, { "epoch": 0.6306384455239417, "grad_norm": 0.6787098050117493, "learning_rate": 3.7985030436013454e-05, "loss": 0.5785, "step": 3635 }, { "epoch": 0.6308119361554476, "grad_norm": 0.8755621314048767, "learning_rate": 3.7982049056405866e-05, "loss": 0.5568, "step": 3636 }, { "epoch": 0.6309854267869535, "grad_norm": 0.662980318069458, "learning_rate": 3.797906558996053e-05, "loss": 0.5785, "step": 3637 }, { "epoch": 0.6311589174184594, "grad_norm": 0.9010483622550964, "learning_rate": 3.797608003702368e-05, "loss": 0.5588, "step": 3638 }, { "epoch": 0.6313324080499653, "grad_norm": 1.4660987854003906, "learning_rate": 3.79730923979418e-05, "loss": 0.515, "step": 3639 }, { "epoch": 0.6315058986814712, "grad_norm": 0.8304885625839233, "learning_rate": 3.79701026730616e-05, "loss": 0.574, "step": 3640 }, { "epoch": 0.6316793893129771, "grad_norm": 0.8260068893432617, "learning_rate": 3.7967110862730045e-05, "loss": 0.5349, "step": 3641 }, { "epoch": 0.631852879944483, "grad_norm": 0.65542072057724, "learning_rate": 3.796411696729434e-05, "loss": 0.6261, "step": 3642 }, { "epoch": 0.6320263705759889, "grad_norm": 0.7199093699455261, "learning_rate": 3.7961120987101933e-05, "loss": 0.7023, "step": 3643 }, { "epoch": 0.6321998612074948, "grad_norm": 0.9273139238357544, "learning_rate": 3.79581229225005e-05, "loss": 0.5454, "step": 3644 }, { "epoch": 0.6323733518390007, "grad_norm": 0.7068122029304504, "learning_rate": 3.795512277383798e-05, "loss": 0.6456, "step": 3645 }, { "epoch": 0.6325468424705066, "grad_norm": 0.6215282678604126, "learning_rate": 3.795212054146254e-05, "loss": 0.6597, "step": 3646 }, { "epoch": 0.6327203331020125, "grad_norm": 1.4631701707839966, "learning_rate": 3.794911622572259e-05, "loss": 0.6924, "step": 3647 }, { "epoch": 0.6328938237335184, "grad_norm": 1.1630256175994873, "learning_rate": 3.794610982696679e-05, "loss": 0.4807, "step": 3648 }, { "epoch": 0.6330673143650243, "grad_norm": 0.5245219469070435, "learning_rate": 3.794310134554403e-05, "loss": 0.6442, "step": 3649 }, { "epoch": 0.6332408049965302, "grad_norm": 0.7798471450805664, "learning_rate": 3.7940090781803454e-05, "loss": 0.5171, "step": 3650 }, { "epoch": 0.6334142956280361, "grad_norm": 1.0232371091842651, "learning_rate": 3.793707813609444e-05, "loss": 0.5538, "step": 3651 }, { "epoch": 0.6335877862595419, "grad_norm": 1.0044523477554321, "learning_rate": 3.7934063408766606e-05, "loss": 0.6938, "step": 3652 }, { "epoch": 0.6337612768910479, "grad_norm": 0.8687499165534973, "learning_rate": 3.7931046600169815e-05, "loss": 0.6128, "step": 3653 }, { "epoch": 0.6339347675225537, "grad_norm": 0.7496787309646606, "learning_rate": 3.792802771065417e-05, "loss": 0.5076, "step": 3654 }, { "epoch": 0.6341082581540597, "grad_norm": 0.714073657989502, "learning_rate": 3.792500674057002e-05, "loss": 0.6096, "step": 3655 }, { "epoch": 0.6342817487855655, "grad_norm": 0.9160600304603577, "learning_rate": 3.792198369026796e-05, "loss": 0.5463, "step": 3656 }, { "epoch": 0.6344552394170715, "grad_norm": 1.1145319938659668, "learning_rate": 3.79189585600988e-05, "loss": 0.63, "step": 3657 }, { "epoch": 0.6346287300485773, "grad_norm": 1.1254013776779175, "learning_rate": 3.791593135041362e-05, "loss": 0.7301, "step": 3658 }, { "epoch": 0.6348022206800833, "grad_norm": 0.9876613020896912, "learning_rate": 3.791290206156373e-05, "loss": 0.6985, "step": 3659 }, { "epoch": 0.6349757113115891, "grad_norm": 0.6043176651000977, "learning_rate": 3.790987069390069e-05, "loss": 0.6089, "step": 3660 }, { "epoch": 0.6351492019430951, "grad_norm": 0.5808443427085876, "learning_rate": 3.790683724777628e-05, "loss": 0.6603, "step": 3661 }, { "epoch": 0.635322692574601, "grad_norm": 0.9439184665679932, "learning_rate": 3.790380172354255e-05, "loss": 0.5812, "step": 3662 }, { "epoch": 0.6354961832061069, "grad_norm": 0.6527266502380371, "learning_rate": 3.790076412155176e-05, "loss": 0.6115, "step": 3663 }, { "epoch": 0.6356696738376127, "grad_norm": 0.7660706639289856, "learning_rate": 3.789772444215644e-05, "loss": 0.6599, "step": 3664 }, { "epoch": 0.6358431644691187, "grad_norm": 0.9501388072967529, "learning_rate": 3.7894682685709335e-05, "loss": 0.749, "step": 3665 }, { "epoch": 0.6360166551006246, "grad_norm": 1.4506312608718872, "learning_rate": 3.7891638852563455e-05, "loss": 0.494, "step": 3666 }, { "epoch": 0.6361901457321305, "grad_norm": 0.711625337600708, "learning_rate": 3.788859294307204e-05, "loss": 0.6405, "step": 3667 }, { "epoch": 0.6363636363636364, "grad_norm": 0.6802700161933899, "learning_rate": 3.788554495758858e-05, "loss": 0.5615, "step": 3668 }, { "epoch": 0.6365371269951423, "grad_norm": 0.7881873846054077, "learning_rate": 3.788249489646677e-05, "loss": 0.5312, "step": 3669 }, { "epoch": 0.6367106176266482, "grad_norm": 0.7416797876358032, "learning_rate": 3.7879442760060604e-05, "loss": 0.5347, "step": 3670 }, { "epoch": 0.6368841082581541, "grad_norm": 0.6100966334342957, "learning_rate": 3.7876388548724256e-05, "loss": 0.6012, "step": 3671 }, { "epoch": 0.63705759888966, "grad_norm": 0.918250560760498, "learning_rate": 3.78733322628122e-05, "loss": 0.5327, "step": 3672 }, { "epoch": 0.6372310895211658, "grad_norm": 0.8421688079833984, "learning_rate": 3.787027390267911e-05, "loss": 0.6249, "step": 3673 }, { "epoch": 0.6374045801526718, "grad_norm": 0.6886511445045471, "learning_rate": 3.786721346867991e-05, "loss": 0.6, "step": 3674 }, { "epoch": 0.6375780707841776, "grad_norm": 0.7019809484481812, "learning_rate": 3.786415096116976e-05, "loss": 0.6454, "step": 3675 }, { "epoch": 0.6377515614156836, "grad_norm": 0.7325215339660645, "learning_rate": 3.786108638050408e-05, "loss": 0.6024, "step": 3676 }, { "epoch": 0.6379250520471894, "grad_norm": 0.6856330037117004, "learning_rate": 3.785801972703851e-05, "loss": 0.6418, "step": 3677 }, { "epoch": 0.6380985426786954, "grad_norm": 0.5166242122650146, "learning_rate": 3.785495100112894e-05, "loss": 0.7063, "step": 3678 }, { "epoch": 0.6382720333102012, "grad_norm": 0.6169303059577942, "learning_rate": 3.7851880203131506e-05, "loss": 0.7209, "step": 3679 }, { "epoch": 0.6384455239417072, "grad_norm": 0.7382290959358215, "learning_rate": 3.784880733340257e-05, "loss": 0.7397, "step": 3680 }, { "epoch": 0.638619014573213, "grad_norm": 0.6344757080078125, "learning_rate": 3.7845732392298746e-05, "loss": 0.6554, "step": 3681 }, { "epoch": 0.638792505204719, "grad_norm": 1.1019749641418457, "learning_rate": 3.784265538017689e-05, "loss": 0.5975, "step": 3682 }, { "epoch": 0.6389659958362248, "grad_norm": 0.8122680187225342, "learning_rate": 3.783957629739408e-05, "loss": 0.6364, "step": 3683 }, { "epoch": 0.6391394864677308, "grad_norm": 0.7165302038192749, "learning_rate": 3.7836495144307644e-05, "loss": 0.5143, "step": 3684 }, { "epoch": 0.6393129770992366, "grad_norm": 0.6541818976402283, "learning_rate": 3.783341192127518e-05, "loss": 0.5791, "step": 3685 }, { "epoch": 0.6394864677307426, "grad_norm": 0.5491472482681274, "learning_rate": 3.783032662865447e-05, "loss": 0.5684, "step": 3686 }, { "epoch": 0.6396599583622484, "grad_norm": 2.529473066329956, "learning_rate": 3.7827239266803584e-05, "loss": 0.5333, "step": 3687 }, { "epoch": 0.6398334489937544, "grad_norm": 0.7565951943397522, "learning_rate": 3.782414983608081e-05, "loss": 0.5792, "step": 3688 }, { "epoch": 0.6400069396252602, "grad_norm": 0.8166025280952454, "learning_rate": 3.7821058336844676e-05, "loss": 0.5707, "step": 3689 }, { "epoch": 0.6401804302567662, "grad_norm": 0.6751120090484619, "learning_rate": 3.7817964769453956e-05, "loss": 0.6344, "step": 3690 }, { "epoch": 0.640353920888272, "grad_norm": 0.6678987145423889, "learning_rate": 3.781486913426766e-05, "loss": 0.6392, "step": 3691 }, { "epoch": 0.6405274115197779, "grad_norm": 0.7985641360282898, "learning_rate": 3.781177143164505e-05, "loss": 0.5919, "step": 3692 }, { "epoch": 0.6407009021512838, "grad_norm": 0.740614116191864, "learning_rate": 3.7808671661945606e-05, "loss": 0.5917, "step": 3693 }, { "epoch": 0.6408743927827897, "grad_norm": 0.5903328657150269, "learning_rate": 3.7805569825529055e-05, "loss": 0.6633, "step": 3694 }, { "epoch": 0.6410478834142956, "grad_norm": 0.6017022132873535, "learning_rate": 3.780246592275539e-05, "loss": 0.7225, "step": 3695 }, { "epoch": 0.6412213740458015, "grad_norm": 0.919784426689148, "learning_rate": 3.779935995398481e-05, "loss": 0.5295, "step": 3696 }, { "epoch": 0.6413948646773074, "grad_norm": 0.5991265773773193, "learning_rate": 3.7796251919577764e-05, "loss": 0.6859, "step": 3697 }, { "epoch": 0.6415683553088133, "grad_norm": 0.6057894825935364, "learning_rate": 3.7793141819894955e-05, "loss": 0.6577, "step": 3698 }, { "epoch": 0.6417418459403192, "grad_norm": 0.7780317664146423, "learning_rate": 3.779002965529729e-05, "loss": 0.5516, "step": 3699 }, { "epoch": 0.6419153365718251, "grad_norm": 1.1641170978546143, "learning_rate": 3.778691542614596e-05, "loss": 0.6179, "step": 3700 }, { "epoch": 0.642088827203331, "grad_norm": 0.7804569602012634, "learning_rate": 3.7783799132802365e-05, "loss": 0.6097, "step": 3701 }, { "epoch": 0.6422623178348369, "grad_norm": 0.7090769410133362, "learning_rate": 3.778068077562817e-05, "loss": 0.6624, "step": 3702 }, { "epoch": 0.6424358084663429, "grad_norm": 0.6512264609336853, "learning_rate": 3.7777560354985246e-05, "loss": 0.6213, "step": 3703 }, { "epoch": 0.6426092990978487, "grad_norm": 0.7044447660446167, "learning_rate": 3.7774437871235724e-05, "loss": 0.5912, "step": 3704 }, { "epoch": 0.6427827897293547, "grad_norm": 0.7511945366859436, "learning_rate": 3.7771313324741974e-05, "loss": 0.5898, "step": 3705 }, { "epoch": 0.6429562803608605, "grad_norm": 1.123663306236267, "learning_rate": 3.776818671586662e-05, "loss": 0.568, "step": 3706 }, { "epoch": 0.6431297709923665, "grad_norm": 0.8128359913825989, "learning_rate": 3.776505804497248e-05, "loss": 0.5587, "step": 3707 }, { "epoch": 0.6433032616238723, "grad_norm": 0.7907178997993469, "learning_rate": 3.776192731242265e-05, "loss": 0.6127, "step": 3708 }, { "epoch": 0.6434767522553783, "grad_norm": 0.7276610136032104, "learning_rate": 3.775879451858047e-05, "loss": 0.5898, "step": 3709 }, { "epoch": 0.6436502428868841, "grad_norm": 0.6831201910972595, "learning_rate": 3.775565966380949e-05, "loss": 0.547, "step": 3710 }, { "epoch": 0.6438237335183901, "grad_norm": 0.752452552318573, "learning_rate": 3.7752522748473517e-05, "loss": 0.6072, "step": 3711 }, { "epoch": 0.6439972241498959, "grad_norm": 0.9028571248054504, "learning_rate": 3.774938377293659e-05, "loss": 0.6353, "step": 3712 }, { "epoch": 0.6441707147814018, "grad_norm": 0.7592943906784058, "learning_rate": 3.774624273756299e-05, "loss": 0.5831, "step": 3713 }, { "epoch": 0.6443442054129077, "grad_norm": 2.0297415256500244, "learning_rate": 3.774309964271725e-05, "loss": 0.6412, "step": 3714 }, { "epoch": 0.6445176960444136, "grad_norm": 0.8096716403961182, "learning_rate": 3.773995448876412e-05, "loss": 0.6069, "step": 3715 }, { "epoch": 0.6446911866759195, "grad_norm": 0.7666990160942078, "learning_rate": 3.7736807276068604e-05, "loss": 0.6991, "step": 3716 }, { "epoch": 0.6448646773074254, "grad_norm": 0.6890394687652588, "learning_rate": 3.773365800499592e-05, "loss": 0.6167, "step": 3717 }, { "epoch": 0.6450381679389313, "grad_norm": 0.6246633529663086, "learning_rate": 3.773050667591158e-05, "loss": 0.5603, "step": 3718 }, { "epoch": 0.6452116585704372, "grad_norm": 0.950532853603363, "learning_rate": 3.772735328918127e-05, "loss": 0.5162, "step": 3719 }, { "epoch": 0.6453851492019431, "grad_norm": 0.8982919454574585, "learning_rate": 3.772419784517095e-05, "loss": 0.6329, "step": 3720 }, { "epoch": 0.645558639833449, "grad_norm": 1.2791414260864258, "learning_rate": 3.7721040344246824e-05, "loss": 0.543, "step": 3721 }, { "epoch": 0.6457321304649549, "grad_norm": 0.6306223273277283, "learning_rate": 3.771788078677532e-05, "loss": 0.5756, "step": 3722 }, { "epoch": 0.6459056210964608, "grad_norm": 0.8798757195472717, "learning_rate": 3.7714719173123104e-05, "loss": 0.4753, "step": 3723 }, { "epoch": 0.6460791117279667, "grad_norm": 0.6941823959350586, "learning_rate": 3.771155550365708e-05, "loss": 0.5959, "step": 3724 }, { "epoch": 0.6462526023594726, "grad_norm": 0.70530766248703, "learning_rate": 3.770838977874441e-05, "loss": 0.6239, "step": 3725 }, { "epoch": 0.6464260929909785, "grad_norm": 0.8281103372573853, "learning_rate": 3.770522199875247e-05, "loss": 0.5128, "step": 3726 }, { "epoch": 0.6465995836224844, "grad_norm": 1.0173561573028564, "learning_rate": 3.770205216404888e-05, "loss": 0.6163, "step": 3727 }, { "epoch": 0.6467730742539903, "grad_norm": 0.9786710143089294, "learning_rate": 3.7698880275001516e-05, "loss": 0.4941, "step": 3728 }, { "epoch": 0.6469465648854962, "grad_norm": 0.6810334920883179, "learning_rate": 3.7695706331978465e-05, "loss": 0.6027, "step": 3729 }, { "epoch": 0.6471200555170021, "grad_norm": 0.8536402583122253, "learning_rate": 3.769253033534808e-05, "loss": 0.6367, "step": 3730 }, { "epoch": 0.647293546148508, "grad_norm": 0.8468443751335144, "learning_rate": 3.768935228547894e-05, "loss": 0.625, "step": 3731 }, { "epoch": 0.6474670367800138, "grad_norm": 0.7573752403259277, "learning_rate": 3.7686172182739845e-05, "loss": 0.5385, "step": 3732 }, { "epoch": 0.6476405274115198, "grad_norm": 0.7154396176338196, "learning_rate": 3.7682990027499864e-05, "loss": 0.6071, "step": 3733 }, { "epoch": 0.6478140180430256, "grad_norm": 1.3461333513259888, "learning_rate": 3.767980582012828e-05, "loss": 0.6571, "step": 3734 }, { "epoch": 0.6479875086745316, "grad_norm": 0.6264582276344299, "learning_rate": 3.767661956099464e-05, "loss": 0.6262, "step": 3735 }, { "epoch": 0.6481609993060374, "grad_norm": 0.6777341961860657, "learning_rate": 3.7673431250468695e-05, "loss": 0.6062, "step": 3736 }, { "epoch": 0.6483344899375434, "grad_norm": 1.1197324991226196, "learning_rate": 3.767024088892046e-05, "loss": 0.5386, "step": 3737 }, { "epoch": 0.6485079805690492, "grad_norm": 0.6859471797943115, "learning_rate": 3.766704847672018e-05, "loss": 0.6283, "step": 3738 }, { "epoch": 0.6486814712005552, "grad_norm": 0.9123736619949341, "learning_rate": 3.7663854014238344e-05, "loss": 0.5356, "step": 3739 }, { "epoch": 0.648854961832061, "grad_norm": 1.0681298971176147, "learning_rate": 3.766065750184566e-05, "loss": 0.5234, "step": 3740 }, { "epoch": 0.649028452463567, "grad_norm": 0.7163435220718384, "learning_rate": 3.765745893991309e-05, "loss": 0.4725, "step": 3741 }, { "epoch": 0.6492019430950728, "grad_norm": 1.132663369178772, "learning_rate": 3.7654258328811856e-05, "loss": 0.5359, "step": 3742 }, { "epoch": 0.6493754337265788, "grad_norm": 1.1401888132095337, "learning_rate": 3.765105566891335e-05, "loss": 0.516, "step": 3743 }, { "epoch": 0.6495489243580846, "grad_norm": 0.5803067088127136, "learning_rate": 3.764785096058927e-05, "loss": 0.5691, "step": 3744 }, { "epoch": 0.6497224149895906, "grad_norm": 2.6133241653442383, "learning_rate": 3.764464420421153e-05, "loss": 0.6187, "step": 3745 }, { "epoch": 0.6498959056210964, "grad_norm": 0.6367717385292053, "learning_rate": 3.764143540015227e-05, "loss": 0.6381, "step": 3746 }, { "epoch": 0.6500693962526024, "grad_norm": 0.5755173563957214, "learning_rate": 3.763822454878387e-05, "loss": 0.6499, "step": 3747 }, { "epoch": 0.6502428868841083, "grad_norm": 0.6816663146018982, "learning_rate": 3.763501165047896e-05, "loss": 0.6312, "step": 3748 }, { "epoch": 0.6504163775156142, "grad_norm": 0.7217597961425781, "learning_rate": 3.76317967056104e-05, "loss": 0.488, "step": 3749 }, { "epoch": 0.65058986814712, "grad_norm": 0.7203596234321594, "learning_rate": 3.7628579714551285e-05, "loss": 0.6619, "step": 3750 }, { "epoch": 0.6507633587786259, "grad_norm": 0.9814811944961548, "learning_rate": 3.762536067767495e-05, "loss": 0.7832, "step": 3751 }, { "epoch": 0.6509368494101319, "grad_norm": 0.719393253326416, "learning_rate": 3.7622139595354976e-05, "loss": 0.6853, "step": 3752 }, { "epoch": 0.6511103400416377, "grad_norm": 0.8610318303108215, "learning_rate": 3.761891646796517e-05, "loss": 0.5215, "step": 3753 }, { "epoch": 0.6512838306731437, "grad_norm": 1.0605509281158447, "learning_rate": 3.7615691295879574e-05, "loss": 0.5273, "step": 3754 }, { "epoch": 0.6514573213046495, "grad_norm": 0.6611588597297668, "learning_rate": 3.7612464079472474e-05, "loss": 0.6794, "step": 3755 }, { "epoch": 0.6516308119361555, "grad_norm": 0.7887797355651855, "learning_rate": 3.76092348191184e-05, "loss": 0.6007, "step": 3756 }, { "epoch": 0.6518043025676613, "grad_norm": 0.7849190831184387, "learning_rate": 3.7606003515192103e-05, "loss": 0.6798, "step": 3757 }, { "epoch": 0.6519777931991673, "grad_norm": 0.8868094086647034, "learning_rate": 3.7602770168068586e-05, "loss": 0.6073, "step": 3758 }, { "epoch": 0.6521512838306731, "grad_norm": 0.6700664162635803, "learning_rate": 3.7599534778123074e-05, "loss": 0.5991, "step": 3759 }, { "epoch": 0.6523247744621791, "grad_norm": 0.7111955285072327, "learning_rate": 3.759629734573105e-05, "loss": 0.6134, "step": 3760 }, { "epoch": 0.6524982650936849, "grad_norm": 0.5644627809524536, "learning_rate": 3.759305787126821e-05, "loss": 0.6559, "step": 3761 }, { "epoch": 0.6526717557251909, "grad_norm": 0.6465210914611816, "learning_rate": 3.758981635511051e-05, "loss": 0.6991, "step": 3762 }, { "epoch": 0.6528452463566967, "grad_norm": 0.8938121795654297, "learning_rate": 3.758657279763412e-05, "loss": 0.7344, "step": 3763 }, { "epoch": 0.6530187369882027, "grad_norm": 0.8159437775611877, "learning_rate": 3.758332719921547e-05, "loss": 0.6278, "step": 3764 }, { "epoch": 0.6531922276197085, "grad_norm": 0.661339282989502, "learning_rate": 3.758007956023121e-05, "loss": 0.5828, "step": 3765 }, { "epoch": 0.6533657182512145, "grad_norm": 0.5961443185806274, "learning_rate": 3.757682988105823e-05, "loss": 0.6519, "step": 3766 }, { "epoch": 0.6535392088827203, "grad_norm": 0.7474210262298584, "learning_rate": 3.757357816207366e-05, "loss": 0.5505, "step": 3767 }, { "epoch": 0.6537126995142263, "grad_norm": 0.5833430290222168, "learning_rate": 3.7570324403654866e-05, "loss": 0.5767, "step": 3768 }, { "epoch": 0.6538861901457321, "grad_norm": 0.8346602320671082, "learning_rate": 3.756706860617945e-05, "loss": 0.6614, "step": 3769 }, { "epoch": 0.6540596807772381, "grad_norm": 0.6843047738075256, "learning_rate": 3.756381077002526e-05, "loss": 0.6467, "step": 3770 }, { "epoch": 0.6542331714087439, "grad_norm": 0.7140724062919617, "learning_rate": 3.756055089557036e-05, "loss": 0.5745, "step": 3771 }, { "epoch": 0.6544066620402498, "grad_norm": 0.7032603621482849, "learning_rate": 3.755728898319306e-05, "loss": 0.6626, "step": 3772 }, { "epoch": 0.6545801526717557, "grad_norm": 1.0892679691314697, "learning_rate": 3.7554025033271923e-05, "loss": 0.5334, "step": 3773 }, { "epoch": 0.6547536433032616, "grad_norm": 0.6795757412910461, "learning_rate": 3.7550759046185726e-05, "loss": 0.5492, "step": 3774 }, { "epoch": 0.6549271339347675, "grad_norm": 0.7899928092956543, "learning_rate": 3.754749102231349e-05, "loss": 0.5258, "step": 3775 }, { "epoch": 0.6551006245662734, "grad_norm": 0.9099841117858887, "learning_rate": 3.7544220962034475e-05, "loss": 0.5477, "step": 3776 }, { "epoch": 0.6552741151977793, "grad_norm": 0.7497731447219849, "learning_rate": 3.7540948865728174e-05, "loss": 0.6212, "step": 3777 }, { "epoch": 0.6554476058292852, "grad_norm": 0.724162220954895, "learning_rate": 3.7537674733774315e-05, "loss": 0.6899, "step": 3778 }, { "epoch": 0.6556210964607911, "grad_norm": 0.7789028882980347, "learning_rate": 3.7534398566552866e-05, "loss": 0.6855, "step": 3779 }, { "epoch": 0.655794587092297, "grad_norm": 0.6694778800010681, "learning_rate": 3.753112036444404e-05, "loss": 0.5677, "step": 3780 }, { "epoch": 0.655968077723803, "grad_norm": 0.671506404876709, "learning_rate": 3.7527840127828256e-05, "loss": 0.5542, "step": 3781 }, { "epoch": 0.6561415683553088, "grad_norm": 0.9021669626235962, "learning_rate": 3.752455785708622e-05, "loss": 0.7089, "step": 3782 }, { "epoch": 0.6563150589868147, "grad_norm": 0.925146222114563, "learning_rate": 3.752127355259881e-05, "loss": 0.5023, "step": 3783 }, { "epoch": 0.6564885496183206, "grad_norm": 0.7789936065673828, "learning_rate": 3.7517987214747186e-05, "loss": 0.4883, "step": 3784 }, { "epoch": 0.6566620402498266, "grad_norm": 2.8964085578918457, "learning_rate": 3.751469884391274e-05, "loss": 0.5497, "step": 3785 }, { "epoch": 0.6568355308813324, "grad_norm": 0.6731738448143005, "learning_rate": 3.751140844047708e-05, "loss": 0.5436, "step": 3786 }, { "epoch": 0.6570090215128384, "grad_norm": 0.6144465804100037, "learning_rate": 3.750811600482207e-05, "loss": 0.6362, "step": 3787 }, { "epoch": 0.6571825121443442, "grad_norm": 1.0024034976959229, "learning_rate": 3.7504821537329795e-05, "loss": 0.6335, "step": 3788 }, { "epoch": 0.6573560027758502, "grad_norm": 0.6064333915710449, "learning_rate": 3.750152503838258e-05, "loss": 0.6765, "step": 3789 }, { "epoch": 0.657529493407356, "grad_norm": 0.8006238341331482, "learning_rate": 3.7498226508362996e-05, "loss": 0.5896, "step": 3790 }, { "epoch": 0.6577029840388618, "grad_norm": 0.9184333682060242, "learning_rate": 3.7494925947653835e-05, "loss": 0.5553, "step": 3791 }, { "epoch": 0.6578764746703678, "grad_norm": 0.774874746799469, "learning_rate": 3.749162335663813e-05, "loss": 0.7189, "step": 3792 }, { "epoch": 0.6580499653018737, "grad_norm": 0.68658846616745, "learning_rate": 3.7488318735699154e-05, "loss": 0.7041, "step": 3793 }, { "epoch": 0.6582234559333796, "grad_norm": 1.0522260665893555, "learning_rate": 3.7485012085220416e-05, "loss": 0.6862, "step": 3794 }, { "epoch": 0.6583969465648855, "grad_norm": 0.6765912175178528, "learning_rate": 3.7481703405585646e-05, "loss": 0.5156, "step": 3795 }, { "epoch": 0.6585704371963914, "grad_norm": 0.63466876745224, "learning_rate": 3.747839269717882e-05, "loss": 0.6035, "step": 3796 }, { "epoch": 0.6587439278278973, "grad_norm": 0.736077070236206, "learning_rate": 3.747507996038416e-05, "loss": 0.554, "step": 3797 }, { "epoch": 0.6589174184594032, "grad_norm": 2.358635663986206, "learning_rate": 3.7471765195586115e-05, "loss": 0.5072, "step": 3798 }, { "epoch": 0.6590909090909091, "grad_norm": 0.7252426743507385, "learning_rate": 3.746844840316935e-05, "loss": 0.5627, "step": 3799 }, { "epoch": 0.659264399722415, "grad_norm": 0.9332752227783203, "learning_rate": 3.74651295835188e-05, "loss": 0.5043, "step": 3800 }, { "epoch": 0.6594378903539209, "grad_norm": 0.8140218257904053, "learning_rate": 3.7461808737019606e-05, "loss": 0.5044, "step": 3801 }, { "epoch": 0.6596113809854268, "grad_norm": 0.7537535429000854, "learning_rate": 3.745848586405717e-05, "loss": 0.6608, "step": 3802 }, { "epoch": 0.6597848716169327, "grad_norm": 0.8436901569366455, "learning_rate": 3.745516096501709e-05, "loss": 0.5093, "step": 3803 }, { "epoch": 0.6599583622484386, "grad_norm": 0.9159920811653137, "learning_rate": 3.745183404028525e-05, "loss": 0.4623, "step": 3804 }, { "epoch": 0.6601318528799445, "grad_norm": 1.2931699752807617, "learning_rate": 3.744850509024774e-05, "loss": 0.5604, "step": 3805 }, { "epoch": 0.6603053435114504, "grad_norm": 0.6823956370353699, "learning_rate": 3.7445174115290875e-05, "loss": 0.5564, "step": 3806 }, { "epoch": 0.6604788341429563, "grad_norm": 0.720169186592102, "learning_rate": 3.744184111580123e-05, "loss": 0.5358, "step": 3807 }, { "epoch": 0.6606523247744622, "grad_norm": 0.9941638708114624, "learning_rate": 3.74385060921656e-05, "loss": 0.5621, "step": 3808 }, { "epoch": 0.6608258154059681, "grad_norm": 0.7703692317008972, "learning_rate": 3.7435169044771016e-05, "loss": 0.58, "step": 3809 }, { "epoch": 0.6609993060374739, "grad_norm": 0.8619455695152283, "learning_rate": 3.743182997400475e-05, "loss": 0.593, "step": 3810 }, { "epoch": 0.6611727966689799, "grad_norm": 1.0943324565887451, "learning_rate": 3.74284888802543e-05, "loss": 0.4762, "step": 3811 }, { "epoch": 0.6613462873004857, "grad_norm": 1.0455622673034668, "learning_rate": 3.742514576390741e-05, "loss": 0.5531, "step": 3812 }, { "epoch": 0.6615197779319917, "grad_norm": 4.09170389175415, "learning_rate": 3.742180062535205e-05, "loss": 0.6555, "step": 3813 }, { "epoch": 0.6616932685634975, "grad_norm": 0.5392979383468628, "learning_rate": 3.741845346497643e-05, "loss": 0.7412, "step": 3814 }, { "epoch": 0.6618667591950035, "grad_norm": 0.5717268586158752, "learning_rate": 3.741510428316898e-05, "loss": 0.5106, "step": 3815 }, { "epoch": 0.6620402498265093, "grad_norm": 0.8426308035850525, "learning_rate": 3.741175308031839e-05, "loss": 0.5457, "step": 3816 }, { "epoch": 0.6622137404580153, "grad_norm": 1.7442169189453125, "learning_rate": 3.7408399856813565e-05, "loss": 0.6315, "step": 3817 }, { "epoch": 0.6623872310895211, "grad_norm": 0.6281899213790894, "learning_rate": 3.740504461304366e-05, "loss": 0.7258, "step": 3818 }, { "epoch": 0.6625607217210271, "grad_norm": 0.624485194683075, "learning_rate": 3.7401687349398044e-05, "loss": 0.5144, "step": 3819 }, { "epoch": 0.6627342123525329, "grad_norm": 0.7151901125907898, "learning_rate": 3.739832806626632e-05, "loss": 0.4746, "step": 3820 }, { "epoch": 0.6629077029840389, "grad_norm": 0.48618757724761963, "learning_rate": 3.7394966764038366e-05, "loss": 0.7042, "step": 3821 }, { "epoch": 0.6630811936155447, "grad_norm": 0.9319300055503845, "learning_rate": 3.7391603443104244e-05, "loss": 0.6274, "step": 3822 }, { "epoch": 0.6632546842470507, "grad_norm": 0.5655573010444641, "learning_rate": 3.738823810385428e-05, "loss": 0.629, "step": 3823 }, { "epoch": 0.6634281748785565, "grad_norm": 0.5381420850753784, "learning_rate": 3.738487074667902e-05, "loss": 0.6809, "step": 3824 }, { "epoch": 0.6636016655100625, "grad_norm": 0.5153032541275024, "learning_rate": 3.738150137196925e-05, "loss": 0.7103, "step": 3825 }, { "epoch": 0.6637751561415683, "grad_norm": 0.8185628056526184, "learning_rate": 3.7378129980116e-05, "loss": 0.6635, "step": 3826 }, { "epoch": 0.6639486467730743, "grad_norm": 0.705909013748169, "learning_rate": 3.737475657151051e-05, "loss": 0.6787, "step": 3827 }, { "epoch": 0.6641221374045801, "grad_norm": 0.6243531703948975, "learning_rate": 3.7371381146544276e-05, "loss": 0.6552, "step": 3828 }, { "epoch": 0.6642956280360861, "grad_norm": 0.7824167013168335, "learning_rate": 3.736800370560902e-05, "loss": 0.6104, "step": 3829 }, { "epoch": 0.664469118667592, "grad_norm": 1.479079008102417, "learning_rate": 3.736462424909669e-05, "loss": 0.5413, "step": 3830 }, { "epoch": 0.6646426092990978, "grad_norm": 1.029030442237854, "learning_rate": 3.736124277739949e-05, "loss": 0.6205, "step": 3831 }, { "epoch": 0.6648160999306038, "grad_norm": 0.8108117580413818, "learning_rate": 3.735785929090983e-05, "loss": 0.5165, "step": 3832 }, { "epoch": 0.6649895905621096, "grad_norm": 0.5535688996315002, "learning_rate": 3.7354473790020375e-05, "loss": 0.666, "step": 3833 }, { "epoch": 0.6651630811936156, "grad_norm": 0.6154541373252869, "learning_rate": 3.7351086275124023e-05, "loss": 0.6436, "step": 3834 }, { "epoch": 0.6653365718251214, "grad_norm": 0.6031832695007324, "learning_rate": 3.734769674661388e-05, "loss": 0.7224, "step": 3835 }, { "epoch": 0.6655100624566274, "grad_norm": 1.4116318225860596, "learning_rate": 3.7344305204883326e-05, "loss": 0.5701, "step": 3836 }, { "epoch": 0.6656835530881332, "grad_norm": 0.553779125213623, "learning_rate": 3.7340911650325934e-05, "loss": 0.6707, "step": 3837 }, { "epoch": 0.6658570437196392, "grad_norm": 2.773503303527832, "learning_rate": 3.7337516083335536e-05, "loss": 0.6321, "step": 3838 }, { "epoch": 0.666030534351145, "grad_norm": 1.2878464460372925, "learning_rate": 3.73341185043062e-05, "loss": 0.6041, "step": 3839 }, { "epoch": 0.666204024982651, "grad_norm": 0.9436421394348145, "learning_rate": 3.7330718913632215e-05, "loss": 0.6665, "step": 3840 }, { "epoch": 0.6663775156141568, "grad_norm": 0.6700578927993774, "learning_rate": 3.7327317311708095e-05, "loss": 0.6097, "step": 3841 }, { "epoch": 0.6665510062456628, "grad_norm": 0.658366858959198, "learning_rate": 3.732391369892862e-05, "loss": 0.5806, "step": 3842 }, { "epoch": 0.6667244968771686, "grad_norm": 0.688478410243988, "learning_rate": 3.732050807568878e-05, "loss": 0.589, "step": 3843 }, { "epoch": 0.6668979875086746, "grad_norm": 0.7162792086601257, "learning_rate": 3.731710044238378e-05, "loss": 0.5082, "step": 3844 }, { "epoch": 0.6670714781401804, "grad_norm": 0.749352216720581, "learning_rate": 3.731369079940911e-05, "loss": 0.495, "step": 3845 }, { "epoch": 0.6672449687716864, "grad_norm": 0.7571706771850586, "learning_rate": 3.731027914716044e-05, "loss": 0.5282, "step": 3846 }, { "epoch": 0.6674184594031922, "grad_norm": 1.0114901065826416, "learning_rate": 3.73068654860337e-05, "loss": 0.5433, "step": 3847 }, { "epoch": 0.6675919500346982, "grad_norm": 0.8244972229003906, "learning_rate": 3.7303449816425066e-05, "loss": 0.5514, "step": 3848 }, { "epoch": 0.667765440666204, "grad_norm": 0.9451310634613037, "learning_rate": 3.730003213873091e-05, "loss": 0.6599, "step": 3849 }, { "epoch": 0.6679389312977099, "grad_norm": 0.7476672530174255, "learning_rate": 3.729661245334787e-05, "loss": 0.5265, "step": 3850 }, { "epoch": 0.6681124219292158, "grad_norm": 1.3498326539993286, "learning_rate": 3.72931907606728e-05, "loss": 0.6234, "step": 3851 }, { "epoch": 0.6682859125607217, "grad_norm": 0.7990555763244629, "learning_rate": 3.728976706110278e-05, "loss": 0.6112, "step": 3852 }, { "epoch": 0.6684594031922276, "grad_norm": 0.702285647392273, "learning_rate": 3.7286341355035155e-05, "loss": 0.6128, "step": 3853 }, { "epoch": 0.6686328938237335, "grad_norm": 1.068147897720337, "learning_rate": 3.7282913642867484e-05, "loss": 0.5525, "step": 3854 }, { "epoch": 0.6688063844552394, "grad_norm": 0.9744014143943787, "learning_rate": 3.7279483924997534e-05, "loss": 0.6964, "step": 3855 }, { "epoch": 0.6689798750867453, "grad_norm": 0.6208134293556213, "learning_rate": 3.727605220182334e-05, "loss": 0.5535, "step": 3856 }, { "epoch": 0.6691533657182512, "grad_norm": 0.6973011493682861, "learning_rate": 3.727261847374316e-05, "loss": 0.5404, "step": 3857 }, { "epoch": 0.6693268563497571, "grad_norm": 0.6904463171958923, "learning_rate": 3.726918274115548e-05, "loss": 0.5416, "step": 3858 }, { "epoch": 0.669500346981263, "grad_norm": 0.59099280834198, "learning_rate": 3.726574500445902e-05, "loss": 0.6119, "step": 3859 }, { "epoch": 0.6696738376127689, "grad_norm": 0.644543468952179, "learning_rate": 3.726230526405273e-05, "loss": 0.6818, "step": 3860 }, { "epoch": 0.6698473282442748, "grad_norm": 0.7885875701904297, "learning_rate": 3.7258863520335804e-05, "loss": 0.5179, "step": 3861 }, { "epoch": 0.6700208188757807, "grad_norm": 0.8245733380317688, "learning_rate": 3.725541977370765e-05, "loss": 0.5963, "step": 3862 }, { "epoch": 0.6701943095072866, "grad_norm": 0.8566779494285583, "learning_rate": 3.725197402456793e-05, "loss": 0.493, "step": 3863 }, { "epoch": 0.6703678001387925, "grad_norm": 0.8461937308311462, "learning_rate": 3.7248526273316524e-05, "loss": 0.5311, "step": 3864 }, { "epoch": 0.6705412907702984, "grad_norm": 0.5525814890861511, "learning_rate": 3.724507652035354e-05, "loss": 0.5729, "step": 3865 }, { "epoch": 0.6707147814018043, "grad_norm": 0.7559514045715332, "learning_rate": 3.724162476607933e-05, "loss": 0.5732, "step": 3866 }, { "epoch": 0.6708882720333103, "grad_norm": 1.441557765007019, "learning_rate": 3.723817101089448e-05, "loss": 0.697, "step": 3867 }, { "epoch": 0.6710617626648161, "grad_norm": 0.8079794049263, "learning_rate": 3.72347152551998e-05, "loss": 0.594, "step": 3868 }, { "epoch": 0.6712352532963219, "grad_norm": 0.6825093030929565, "learning_rate": 3.723125749939633e-05, "loss": 0.6075, "step": 3869 }, { "epoch": 0.6714087439278279, "grad_norm": 1.3218646049499512, "learning_rate": 3.722779774388535e-05, "loss": 0.6593, "step": 3870 }, { "epoch": 0.6715822345593337, "grad_norm": 1.1525205373764038, "learning_rate": 3.722433598906836e-05, "loss": 0.6132, "step": 3871 }, { "epoch": 0.6717557251908397, "grad_norm": 1.3912229537963867, "learning_rate": 3.722087223534711e-05, "loss": 0.4697, "step": 3872 }, { "epoch": 0.6719292158223455, "grad_norm": 1.0367560386657715, "learning_rate": 3.7217406483123575e-05, "loss": 0.6156, "step": 3873 }, { "epoch": 0.6721027064538515, "grad_norm": 0.9154960513114929, "learning_rate": 3.721393873279996e-05, "loss": 0.5574, "step": 3874 }, { "epoch": 0.6722761970853574, "grad_norm": 0.7309114336967468, "learning_rate": 3.721046898477869e-05, "loss": 0.6003, "step": 3875 }, { "epoch": 0.6724496877168633, "grad_norm": 0.6787078976631165, "learning_rate": 3.720699723946244e-05, "loss": 0.6046, "step": 3876 }, { "epoch": 0.6726231783483692, "grad_norm": 0.6763220429420471, "learning_rate": 3.720352349725411e-05, "loss": 0.6923, "step": 3877 }, { "epoch": 0.6727966689798751, "grad_norm": 0.8732578158378601, "learning_rate": 3.720004775855684e-05, "loss": 0.6654, "step": 3878 }, { "epoch": 0.672970159611381, "grad_norm": 0.6277561783790588, "learning_rate": 3.719657002377397e-05, "loss": 0.5978, "step": 3879 }, { "epoch": 0.6731436502428869, "grad_norm": 1.2320107221603394, "learning_rate": 3.719309029330912e-05, "loss": 0.4968, "step": 3880 }, { "epoch": 0.6733171408743928, "grad_norm": 0.7353063225746155, "learning_rate": 3.718960856756611e-05, "loss": 0.58, "step": 3881 }, { "epoch": 0.6734906315058987, "grad_norm": 0.8003371357917786, "learning_rate": 3.7186124846948995e-05, "loss": 0.63, "step": 3882 }, { "epoch": 0.6736641221374046, "grad_norm": 0.6182820796966553, "learning_rate": 3.718263913186206e-05, "loss": 0.6464, "step": 3883 }, { "epoch": 0.6738376127689105, "grad_norm": 0.9211560487747192, "learning_rate": 3.7179151422709845e-05, "loss": 0.6189, "step": 3884 }, { "epoch": 0.6740111034004164, "grad_norm": 0.8812063336372375, "learning_rate": 3.717566171989708e-05, "loss": 0.7413, "step": 3885 }, { "epoch": 0.6741845940319223, "grad_norm": 0.9910178780555725, "learning_rate": 3.717217002382875e-05, "loss": 0.5789, "step": 3886 }, { "epoch": 0.6743580846634282, "grad_norm": 0.730434238910675, "learning_rate": 3.716867633491009e-05, "loss": 0.5742, "step": 3887 }, { "epoch": 0.6745315752949341, "grad_norm": 0.7126477956771851, "learning_rate": 3.716518065354654e-05, "loss": 0.6165, "step": 3888 }, { "epoch": 0.67470506592644, "grad_norm": 0.9688850045204163, "learning_rate": 3.7161682980143766e-05, "loss": 0.5385, "step": 3889 }, { "epoch": 0.6748785565579458, "grad_norm": 0.7977421283721924, "learning_rate": 3.715818331510769e-05, "loss": 0.6541, "step": 3890 }, { "epoch": 0.6750520471894518, "grad_norm": 0.782105028629303, "learning_rate": 3.715468165884444e-05, "loss": 0.5095, "step": 3891 }, { "epoch": 0.6752255378209576, "grad_norm": 1.3656920194625854, "learning_rate": 3.71511780117604e-05, "loss": 0.6842, "step": 3892 }, { "epoch": 0.6753990284524636, "grad_norm": 0.7901886701583862, "learning_rate": 3.7147672374262165e-05, "loss": 0.5476, "step": 3893 }, { "epoch": 0.6755725190839694, "grad_norm": 0.7951087355613708, "learning_rate": 3.714416474675657e-05, "loss": 0.4933, "step": 3894 }, { "epoch": 0.6757460097154754, "grad_norm": 0.8243618011474609, "learning_rate": 3.7140655129650676e-05, "loss": 0.4677, "step": 3895 }, { "epoch": 0.6759195003469812, "grad_norm": 0.8517226576805115, "learning_rate": 3.7137143523351787e-05, "loss": 0.639, "step": 3896 }, { "epoch": 0.6760929909784872, "grad_norm": 1.2318670749664307, "learning_rate": 3.713362992826742e-05, "loss": 0.6829, "step": 3897 }, { "epoch": 0.676266481609993, "grad_norm": 0.7173767685890198, "learning_rate": 3.713011434480534e-05, "loss": 0.6592, "step": 3898 }, { "epoch": 0.676439972241499, "grad_norm": 0.7821794152259827, "learning_rate": 3.712659677337352e-05, "loss": 0.6282, "step": 3899 }, { "epoch": 0.6766134628730048, "grad_norm": 0.7882617712020874, "learning_rate": 3.71230772143802e-05, "loss": 0.5424, "step": 3900 }, { "epoch": 0.6767869535045108, "grad_norm": 0.8059515953063965, "learning_rate": 3.711955566823381e-05, "loss": 0.5006, "step": 3901 }, { "epoch": 0.6769604441360166, "grad_norm": 0.7536359429359436, "learning_rate": 3.711603213534303e-05, "loss": 0.5542, "step": 3902 }, { "epoch": 0.6771339347675226, "grad_norm": 0.7734538912773132, "learning_rate": 3.7112506616116794e-05, "loss": 0.6211, "step": 3903 }, { "epoch": 0.6773074253990284, "grad_norm": 0.6895074248313904, "learning_rate": 3.710897911096421e-05, "loss": 0.5533, "step": 3904 }, { "epoch": 0.6774809160305344, "grad_norm": 1.085900902748108, "learning_rate": 3.710544962029467e-05, "loss": 0.4974, "step": 3905 }, { "epoch": 0.6776544066620402, "grad_norm": 0.7151042222976685, "learning_rate": 3.710191814451777e-05, "loss": 0.5927, "step": 3906 }, { "epoch": 0.6778278972935462, "grad_norm": 0.8727800846099854, "learning_rate": 3.709838468404334e-05, "loss": 0.6307, "step": 3907 }, { "epoch": 0.678001387925052, "grad_norm": 1.0264941453933716, "learning_rate": 3.7094849239281444e-05, "loss": 0.5277, "step": 3908 }, { "epoch": 0.6781748785565579, "grad_norm": 1.1419225931167603, "learning_rate": 3.709131181064238e-05, "loss": 0.4739, "step": 3909 }, { "epoch": 0.6783483691880638, "grad_norm": 0.7475083470344543, "learning_rate": 3.7087772398536656e-05, "loss": 0.5382, "step": 3910 }, { "epoch": 0.6785218598195697, "grad_norm": 0.8183844089508057, "learning_rate": 3.708423100337504e-05, "loss": 0.4733, "step": 3911 }, { "epoch": 0.6786953504510757, "grad_norm": 0.8566032648086548, "learning_rate": 3.70806876255685e-05, "loss": 0.5155, "step": 3912 }, { "epoch": 0.6788688410825815, "grad_norm": 0.6601811051368713, "learning_rate": 3.707714226552827e-05, "loss": 0.6111, "step": 3913 }, { "epoch": 0.6790423317140875, "grad_norm": 0.8909444212913513, "learning_rate": 3.7073594923665774e-05, "loss": 0.6725, "step": 3914 }, { "epoch": 0.6792158223455933, "grad_norm": 0.5450756549835205, "learning_rate": 3.70700456003927e-05, "loss": 0.5869, "step": 3915 }, { "epoch": 0.6793893129770993, "grad_norm": 0.8035801649093628, "learning_rate": 3.7066494296120935e-05, "loss": 0.512, "step": 3916 }, { "epoch": 0.6795628036086051, "grad_norm": 0.6066131591796875, "learning_rate": 3.7062941011262624e-05, "loss": 0.6162, "step": 3917 }, { "epoch": 0.6797362942401111, "grad_norm": 0.7695337533950806, "learning_rate": 3.705938574623012e-05, "loss": 0.5311, "step": 3918 }, { "epoch": 0.6799097848716169, "grad_norm": 0.7239415645599365, "learning_rate": 3.705582850143603e-05, "loss": 0.5833, "step": 3919 }, { "epoch": 0.6800832755031229, "grad_norm": 0.5847352147102356, "learning_rate": 3.705226927729317e-05, "loss": 0.5315, "step": 3920 }, { "epoch": 0.6802567661346287, "grad_norm": 1.0682518482208252, "learning_rate": 3.7048708074214586e-05, "loss": 0.6537, "step": 3921 }, { "epoch": 0.6804302567661347, "grad_norm": 0.690758466720581, "learning_rate": 3.704514489261357e-05, "loss": 0.637, "step": 3922 }, { "epoch": 0.6806037473976405, "grad_norm": 0.7355716824531555, "learning_rate": 3.7041579732903617e-05, "loss": 0.5459, "step": 3923 }, { "epoch": 0.6807772380291465, "grad_norm": 1.3008263111114502, "learning_rate": 3.703801259549848e-05, "loss": 0.647, "step": 3924 }, { "epoch": 0.6809507286606523, "grad_norm": 0.6505176424980164, "learning_rate": 3.7034443480812144e-05, "loss": 0.5614, "step": 3925 }, { "epoch": 0.6811242192921583, "grad_norm": 0.6328716278076172, "learning_rate": 3.7030872389258777e-05, "loss": 0.5817, "step": 3926 }, { "epoch": 0.6812977099236641, "grad_norm": 0.5805076956748962, "learning_rate": 3.7027299321252825e-05, "loss": 0.6594, "step": 3927 }, { "epoch": 0.6814712005551701, "grad_norm": 1.107621192932129, "learning_rate": 3.702372427720895e-05, "loss": 0.5508, "step": 3928 }, { "epoch": 0.6816446911866759, "grad_norm": 0.7718678712844849, "learning_rate": 3.702014725754204e-05, "loss": 0.5469, "step": 3929 }, { "epoch": 0.6818181818181818, "grad_norm": 0.722978949546814, "learning_rate": 3.701656826266721e-05, "loss": 0.5425, "step": 3930 }, { "epoch": 0.6819916724496877, "grad_norm": 0.7684771418571472, "learning_rate": 3.701298729299979e-05, "loss": 0.6063, "step": 3931 }, { "epoch": 0.6821651630811936, "grad_norm": 0.6785157322883606, "learning_rate": 3.7009404348955385e-05, "loss": 0.6631, "step": 3932 }, { "epoch": 0.6823386537126995, "grad_norm": 0.8750243782997131, "learning_rate": 3.700581943094978e-05, "loss": 0.5497, "step": 3933 }, { "epoch": 0.6825121443442054, "grad_norm": 0.6999068856239319, "learning_rate": 3.7002232539399014e-05, "loss": 0.7249, "step": 3934 }, { "epoch": 0.6826856349757113, "grad_norm": 0.7761996984481812, "learning_rate": 3.699864367471935e-05, "loss": 0.6375, "step": 3935 }, { "epoch": 0.6828591256072172, "grad_norm": 0.7740322351455688, "learning_rate": 3.6995052837327274e-05, "loss": 0.5499, "step": 3936 }, { "epoch": 0.6830326162387231, "grad_norm": 0.7279134392738342, "learning_rate": 3.699146002763953e-05, "loss": 0.7603, "step": 3937 }, { "epoch": 0.683206106870229, "grad_norm": 1.0422098636627197, "learning_rate": 3.6987865246073035e-05, "loss": 0.5532, "step": 3938 }, { "epoch": 0.6833795975017349, "grad_norm": 0.8805818557739258, "learning_rate": 3.6984268493044994e-05, "loss": 0.5599, "step": 3939 }, { "epoch": 0.6835530881332408, "grad_norm": 0.8069884181022644, "learning_rate": 3.6980669768972795e-05, "loss": 0.6333, "step": 3940 }, { "epoch": 0.6837265787647467, "grad_norm": 0.7256645560264587, "learning_rate": 3.697706907427409e-05, "loss": 0.7054, "step": 3941 }, { "epoch": 0.6839000693962526, "grad_norm": 0.6287798285484314, "learning_rate": 3.6973466409366735e-05, "loss": 0.7194, "step": 3942 }, { "epoch": 0.6840735600277585, "grad_norm": 1.039019227027893, "learning_rate": 3.696986177466882e-05, "loss": 0.5857, "step": 3943 }, { "epoch": 0.6842470506592644, "grad_norm": 1.0137076377868652, "learning_rate": 3.696625517059868e-05, "loss": 0.5593, "step": 3944 }, { "epoch": 0.6844205412907703, "grad_norm": 0.8316372036933899, "learning_rate": 3.696264659757485e-05, "loss": 0.605, "step": 3945 }, { "epoch": 0.6845940319222762, "grad_norm": 0.7433950901031494, "learning_rate": 3.695903605601612e-05, "loss": 0.6311, "step": 3946 }, { "epoch": 0.6847675225537821, "grad_norm": 0.7588471174240112, "learning_rate": 3.6955423546341494e-05, "loss": 0.5829, "step": 3947 }, { "epoch": 0.684941013185288, "grad_norm": 0.7198483943939209, "learning_rate": 3.695180906897021e-05, "loss": 0.5868, "step": 3948 }, { "epoch": 0.6851145038167938, "grad_norm": 0.7661027908325195, "learning_rate": 3.694819262432173e-05, "loss": 0.4917, "step": 3949 }, { "epoch": 0.6852879944482998, "grad_norm": 0.6911413669586182, "learning_rate": 3.694457421281575e-05, "loss": 0.6302, "step": 3950 }, { "epoch": 0.6854614850798056, "grad_norm": 0.6780367493629456, "learning_rate": 3.694095383487219e-05, "loss": 0.6379, "step": 3951 }, { "epoch": 0.6856349757113116, "grad_norm": 0.6340448260307312, "learning_rate": 3.693733149091119e-05, "loss": 0.6678, "step": 3952 }, { "epoch": 0.6858084663428174, "grad_norm": 0.6367129683494568, "learning_rate": 3.693370718135314e-05, "loss": 0.6665, "step": 3953 }, { "epoch": 0.6859819569743234, "grad_norm": 0.9811844825744629, "learning_rate": 3.693008090661864e-05, "loss": 0.5535, "step": 3954 }, { "epoch": 0.6861554476058292, "grad_norm": 1.003396987915039, "learning_rate": 3.692645266712852e-05, "loss": 0.7156, "step": 3955 }, { "epoch": 0.6863289382373352, "grad_norm": 1.047627568244934, "learning_rate": 3.6922822463303846e-05, "loss": 0.4995, "step": 3956 }, { "epoch": 0.686502428868841, "grad_norm": 0.689240038394928, "learning_rate": 3.691919029556591e-05, "loss": 0.5833, "step": 3957 }, { "epoch": 0.686675919500347, "grad_norm": 0.8044683933258057, "learning_rate": 3.691555616433622e-05, "loss": 0.6375, "step": 3958 }, { "epoch": 0.6868494101318529, "grad_norm": 0.7325993776321411, "learning_rate": 3.691192007003652e-05, "loss": 0.6768, "step": 3959 }, { "epoch": 0.6870229007633588, "grad_norm": 1.0790187120437622, "learning_rate": 3.69082820130888e-05, "loss": 0.6622, "step": 3960 }, { "epoch": 0.6871963913948647, "grad_norm": 0.856217622756958, "learning_rate": 3.690464199391525e-05, "loss": 0.5355, "step": 3961 }, { "epoch": 0.6873698820263706, "grad_norm": 0.6712712049484253, "learning_rate": 3.69010000129383e-05, "loss": 0.5717, "step": 3962 }, { "epoch": 0.6875433726578765, "grad_norm": 0.5234697461128235, "learning_rate": 3.6897356070580596e-05, "loss": 0.642, "step": 3963 }, { "epoch": 0.6877168632893824, "grad_norm": 0.7027401924133301, "learning_rate": 3.689371016726504e-05, "loss": 0.5718, "step": 3964 }, { "epoch": 0.6878903539208883, "grad_norm": 0.6622681617736816, "learning_rate": 3.6890062303414734e-05, "loss": 0.5706, "step": 3965 }, { "epoch": 0.6880638445523942, "grad_norm": 1.1115381717681885, "learning_rate": 3.6886412479453004e-05, "loss": 0.6348, "step": 3966 }, { "epoch": 0.6882373351839001, "grad_norm": 0.6777711510658264, "learning_rate": 3.6882760695803444e-05, "loss": 0.5442, "step": 3967 }, { "epoch": 0.6884108258154059, "grad_norm": 0.8778414130210876, "learning_rate": 3.6879106952889826e-05, "loss": 0.5205, "step": 3968 }, { "epoch": 0.6885843164469119, "grad_norm": 0.753093957901001, "learning_rate": 3.687545125113618e-05, "loss": 0.6105, "step": 3969 }, { "epoch": 0.6887578070784177, "grad_norm": 0.8035931587219238, "learning_rate": 3.687179359096675e-05, "loss": 0.568, "step": 3970 }, { "epoch": 0.6889312977099237, "grad_norm": 0.6357128620147705, "learning_rate": 3.686813397280602e-05, "loss": 0.548, "step": 3971 }, { "epoch": 0.6891047883414295, "grad_norm": 0.9069519639015198, "learning_rate": 3.686447239707868e-05, "loss": 0.6504, "step": 3972 }, { "epoch": 0.6892782789729355, "grad_norm": 0.6657199859619141, "learning_rate": 3.686080886420968e-05, "loss": 0.5139, "step": 3973 }, { "epoch": 0.6894517696044413, "grad_norm": 0.7213208675384521, "learning_rate": 3.685714337462415e-05, "loss": 0.5786, "step": 3974 }, { "epoch": 0.6896252602359473, "grad_norm": 0.6772657036781311, "learning_rate": 3.685347592874749e-05, "loss": 0.6503, "step": 3975 }, { "epoch": 0.6897987508674531, "grad_norm": 0.7594093084335327, "learning_rate": 3.6849806527005316e-05, "loss": 0.708, "step": 3976 }, { "epoch": 0.6899722414989591, "grad_norm": 1.1025997400283813, "learning_rate": 3.684613516982346e-05, "loss": 0.5925, "step": 3977 }, { "epoch": 0.6901457321304649, "grad_norm": 0.7050821781158447, "learning_rate": 3.6842461857627986e-05, "loss": 0.525, "step": 3978 }, { "epoch": 0.6903192227619709, "grad_norm": 0.8308770060539246, "learning_rate": 3.683878659084519e-05, "loss": 0.5809, "step": 3979 }, { "epoch": 0.6904927133934767, "grad_norm": 0.612371563911438, "learning_rate": 3.6835109369901586e-05, "loss": 0.5927, "step": 3980 }, { "epoch": 0.6906662040249827, "grad_norm": 0.6579895615577698, "learning_rate": 3.6831430195223927e-05, "loss": 0.6783, "step": 3981 }, { "epoch": 0.6908396946564885, "grad_norm": 0.8142983317375183, "learning_rate": 3.682774906723918e-05, "loss": 0.4998, "step": 3982 }, { "epoch": 0.6910131852879945, "grad_norm": 0.7529022097587585, "learning_rate": 3.6824065986374546e-05, "loss": 0.6178, "step": 3983 }, { "epoch": 0.6911866759195003, "grad_norm": 1.1721090078353882, "learning_rate": 3.6820380953057446e-05, "loss": 0.536, "step": 3984 }, { "epoch": 0.6913601665510063, "grad_norm": 0.7267649173736572, "learning_rate": 3.681669396771554e-05, "loss": 0.5693, "step": 3985 }, { "epoch": 0.6915336571825121, "grad_norm": 0.8769500851631165, "learning_rate": 3.681300503077671e-05, "loss": 0.5223, "step": 3986 }, { "epoch": 0.6917071478140181, "grad_norm": 0.7207356691360474, "learning_rate": 3.6809314142669044e-05, "loss": 0.6375, "step": 3987 }, { "epoch": 0.6918806384455239, "grad_norm": 0.6993891000747681, "learning_rate": 3.680562130382089e-05, "loss": 0.6946, "step": 3988 }, { "epoch": 0.6920541290770298, "grad_norm": 0.5964895486831665, "learning_rate": 3.68019265146608e-05, "loss": 0.6268, "step": 3989 }, { "epoch": 0.6922276197085357, "grad_norm": 0.887861967086792, "learning_rate": 3.679822977561756e-05, "loss": 0.6449, "step": 3990 }, { "epoch": 0.6924011103400416, "grad_norm": 0.4853559732437134, "learning_rate": 3.679453108712018e-05, "loss": 0.6294, "step": 3991 }, { "epoch": 0.6925746009715475, "grad_norm": 1.1081832647323608, "learning_rate": 3.67908304495979e-05, "loss": 0.5466, "step": 3992 }, { "epoch": 0.6927480916030534, "grad_norm": 0.8581452369689941, "learning_rate": 3.678712786348018e-05, "loss": 0.5555, "step": 3993 }, { "epoch": 0.6929215822345594, "grad_norm": 0.7335177659988403, "learning_rate": 3.678342332919671e-05, "loss": 0.6768, "step": 3994 }, { "epoch": 0.6930950728660652, "grad_norm": 0.6366673707962036, "learning_rate": 3.677971684717741e-05, "loss": 0.7219, "step": 3995 }, { "epoch": 0.6932685634975712, "grad_norm": 0.6510780453681946, "learning_rate": 3.6776008417852415e-05, "loss": 0.6423, "step": 3996 }, { "epoch": 0.693442054129077, "grad_norm": 0.7125526070594788, "learning_rate": 3.6772298041652095e-05, "loss": 0.6307, "step": 3997 }, { "epoch": 0.693615544760583, "grad_norm": 0.920978844165802, "learning_rate": 3.676858571900704e-05, "loss": 0.6475, "step": 3998 }, { "epoch": 0.6937890353920888, "grad_norm": 0.6296781897544861, "learning_rate": 3.676487145034808e-05, "loss": 0.621, "step": 3999 }, { "epoch": 0.6939625260235948, "grad_norm": 0.8165046572685242, "learning_rate": 3.6761155236106246e-05, "loss": 0.6057, "step": 4000 }, { "epoch": 0.6941360166551006, "grad_norm": 0.5971564650535583, "learning_rate": 3.675743707671282e-05, "loss": 0.6636, "step": 4001 }, { "epoch": 0.6943095072866066, "grad_norm": 0.5804475545883179, "learning_rate": 3.67537169725993e-05, "loss": 0.6046, "step": 4002 }, { "epoch": 0.6944829979181124, "grad_norm": 0.8879976868629456, "learning_rate": 3.6749994924197394e-05, "loss": 0.5135, "step": 4003 }, { "epoch": 0.6946564885496184, "grad_norm": 0.6747565865516663, "learning_rate": 3.6746270931939064e-05, "loss": 0.5944, "step": 4004 }, { "epoch": 0.6948299791811242, "grad_norm": 0.8641384243965149, "learning_rate": 3.674254499625648e-05, "loss": 0.54, "step": 4005 }, { "epoch": 0.6950034698126302, "grad_norm": 0.7296652793884277, "learning_rate": 3.6738817117582045e-05, "loss": 0.4667, "step": 4006 }, { "epoch": 0.695176960444136, "grad_norm": 0.9644712209701538, "learning_rate": 3.6735087296348366e-05, "loss": 0.6002, "step": 4007 }, { "epoch": 0.6953504510756419, "grad_norm": 1.2004969120025635, "learning_rate": 3.6731355532988315e-05, "loss": 0.537, "step": 4008 }, { "epoch": 0.6955239417071478, "grad_norm": 0.5749496817588806, "learning_rate": 3.672762182793496e-05, "loss": 0.6667, "step": 4009 }, { "epoch": 0.6956974323386537, "grad_norm": 0.6510900855064392, "learning_rate": 3.6723886181621595e-05, "loss": 0.6166, "step": 4010 }, { "epoch": 0.6958709229701596, "grad_norm": 0.723423182964325, "learning_rate": 3.672014859448175e-05, "loss": 0.6903, "step": 4011 }, { "epoch": 0.6960444136016655, "grad_norm": 1.0655949115753174, "learning_rate": 3.6716409066949184e-05, "loss": 0.5122, "step": 4012 }, { "epoch": 0.6962179042331714, "grad_norm": 0.749419629573822, "learning_rate": 3.671266759945786e-05, "loss": 0.6561, "step": 4013 }, { "epoch": 0.6963913948646773, "grad_norm": 0.7480120658874512, "learning_rate": 3.670892419244199e-05, "loss": 0.5352, "step": 4014 }, { "epoch": 0.6965648854961832, "grad_norm": 0.6904357671737671, "learning_rate": 3.6705178846336004e-05, "loss": 0.606, "step": 4015 }, { "epoch": 0.6967383761276891, "grad_norm": 0.6453255414962769, "learning_rate": 3.670143156157454e-05, "loss": 0.6123, "step": 4016 }, { "epoch": 0.696911866759195, "grad_norm": 0.8253284096717834, "learning_rate": 3.669768233859249e-05, "loss": 0.5403, "step": 4017 }, { "epoch": 0.6970853573907009, "grad_norm": 0.5644474029541016, "learning_rate": 3.6693931177824934e-05, "loss": 0.5294, "step": 4018 }, { "epoch": 0.6972588480222068, "grad_norm": 0.6671839952468872, "learning_rate": 3.6690178079707226e-05, "loss": 0.5482, "step": 4019 }, { "epoch": 0.6974323386537127, "grad_norm": 0.7043505907058716, "learning_rate": 3.66864230446749e-05, "loss": 0.5681, "step": 4020 }, { "epoch": 0.6976058292852186, "grad_norm": 0.6432799100875854, "learning_rate": 3.668266607316373e-05, "loss": 0.6671, "step": 4021 }, { "epoch": 0.6977793199167245, "grad_norm": 0.7257577180862427, "learning_rate": 3.667890716560973e-05, "loss": 0.6791, "step": 4022 }, { "epoch": 0.6979528105482304, "grad_norm": 0.685150682926178, "learning_rate": 3.667514632244912e-05, "loss": 0.6329, "step": 4023 }, { "epoch": 0.6981263011797363, "grad_norm": 0.584290087223053, "learning_rate": 3.667138354411834e-05, "loss": 0.6116, "step": 4024 }, { "epoch": 0.6982997918112422, "grad_norm": 1.7903361320495605, "learning_rate": 3.666761883105408e-05, "loss": 0.7131, "step": 4025 }, { "epoch": 0.6984732824427481, "grad_norm": 1.1780725717544556, "learning_rate": 3.666385218369324e-05, "loss": 0.575, "step": 4026 }, { "epoch": 0.6986467730742539, "grad_norm": 0.793237030506134, "learning_rate": 3.6660083602472924e-05, "loss": 0.6355, "step": 4027 }, { "epoch": 0.6988202637057599, "grad_norm": 0.8836570978164673, "learning_rate": 3.6656313087830505e-05, "loss": 0.5863, "step": 4028 }, { "epoch": 0.6989937543372657, "grad_norm": 0.68492192029953, "learning_rate": 3.665254064020353e-05, "loss": 0.4689, "step": 4029 }, { "epoch": 0.6991672449687717, "grad_norm": 0.7318260669708252, "learning_rate": 3.664876626002982e-05, "loss": 0.717, "step": 4030 }, { "epoch": 0.6993407356002775, "grad_norm": 0.6521219611167908, "learning_rate": 3.664498994774738e-05, "loss": 0.6057, "step": 4031 }, { "epoch": 0.6995142262317835, "grad_norm": 0.8591718673706055, "learning_rate": 3.6641211703794466e-05, "loss": 0.6348, "step": 4032 }, { "epoch": 0.6996877168632893, "grad_norm": 0.47968804836273193, "learning_rate": 3.663743152860954e-05, "loss": 0.728, "step": 4033 }, { "epoch": 0.6998612074947953, "grad_norm": 0.9473422765731812, "learning_rate": 3.66336494226313e-05, "loss": 0.6333, "step": 4034 }, { "epoch": 0.7000346981263011, "grad_norm": 0.6778231263160706, "learning_rate": 3.662986538629866e-05, "loss": 0.5881, "step": 4035 }, { "epoch": 0.7002081887578071, "grad_norm": 1.6221238374710083, "learning_rate": 3.662607942005077e-05, "loss": 0.6375, "step": 4036 }, { "epoch": 0.700381679389313, "grad_norm": 0.6425724029541016, "learning_rate": 3.6622291524326986e-05, "loss": 0.5854, "step": 4037 }, { "epoch": 0.7005551700208189, "grad_norm": 0.6844794750213623, "learning_rate": 3.66185016995669e-05, "loss": 0.5521, "step": 4038 }, { "epoch": 0.7007286606523248, "grad_norm": 0.902301013469696, "learning_rate": 3.661470994621033e-05, "loss": 0.5486, "step": 4039 }, { "epoch": 0.7009021512838307, "grad_norm": 0.6516269445419312, "learning_rate": 3.661091626469731e-05, "loss": 0.582, "step": 4040 }, { "epoch": 0.7010756419153366, "grad_norm": 0.7208936214447021, "learning_rate": 3.66071206554681e-05, "loss": 0.7527, "step": 4041 }, { "epoch": 0.7012491325468425, "grad_norm": 0.48527052998542786, "learning_rate": 3.6603323118963194e-05, "loss": 0.6077, "step": 4042 }, { "epoch": 0.7014226231783484, "grad_norm": 0.9502211809158325, "learning_rate": 3.6599523655623285e-05, "loss": 0.6543, "step": 4043 }, { "epoch": 0.7015961138098543, "grad_norm": 0.622065544128418, "learning_rate": 3.659572226588932e-05, "loss": 0.5192, "step": 4044 }, { "epoch": 0.7017696044413602, "grad_norm": 0.6672574281692505, "learning_rate": 3.659191895020244e-05, "loss": 0.5074, "step": 4045 }, { "epoch": 0.7019430950728661, "grad_norm": 0.6417238712310791, "learning_rate": 3.658811370900404e-05, "loss": 0.6324, "step": 4046 }, { "epoch": 0.702116585704372, "grad_norm": 0.7055219411849976, "learning_rate": 3.6584306542735715e-05, "loss": 0.6863, "step": 4047 }, { "epoch": 0.7022900763358778, "grad_norm": 0.7298030853271484, "learning_rate": 3.658049745183928e-05, "loss": 0.5736, "step": 4048 }, { "epoch": 0.7024635669673838, "grad_norm": 0.6878877282142639, "learning_rate": 3.657668643675681e-05, "loss": 0.5702, "step": 4049 }, { "epoch": 0.7026370575988896, "grad_norm": 0.5812045335769653, "learning_rate": 3.657287349793056e-05, "loss": 0.6903, "step": 4050 }, { "epoch": 0.7028105482303956, "grad_norm": 0.7609200477600098, "learning_rate": 3.656905863580302e-05, "loss": 0.5286, "step": 4051 }, { "epoch": 0.7029840388619014, "grad_norm": 0.7479817271232605, "learning_rate": 3.656524185081693e-05, "loss": 0.5653, "step": 4052 }, { "epoch": 0.7031575294934074, "grad_norm": 0.6225118041038513, "learning_rate": 3.6561423143415216e-05, "loss": 0.6211, "step": 4053 }, { "epoch": 0.7033310201249132, "grad_norm": 0.6613042950630188, "learning_rate": 3.655760251404105e-05, "loss": 0.6046, "step": 4054 }, { "epoch": 0.7035045107564192, "grad_norm": 0.8066621422767639, "learning_rate": 3.655377996313782e-05, "loss": 0.5394, "step": 4055 }, { "epoch": 0.703678001387925, "grad_norm": 0.7806961536407471, "learning_rate": 3.654995549114913e-05, "loss": 0.5581, "step": 4056 }, { "epoch": 0.703851492019431, "grad_norm": 0.803566038608551, "learning_rate": 3.654612909851882e-05, "loss": 0.4973, "step": 4057 }, { "epoch": 0.7040249826509368, "grad_norm": 0.998016893863678, "learning_rate": 3.6542300785690954e-05, "loss": 0.5757, "step": 4058 }, { "epoch": 0.7041984732824428, "grad_norm": 0.49902257323265076, "learning_rate": 3.653847055310981e-05, "loss": 0.6404, "step": 4059 }, { "epoch": 0.7043719639139486, "grad_norm": 1.0214604139328003, "learning_rate": 3.6534638401219874e-05, "loss": 0.5183, "step": 4060 }, { "epoch": 0.7045454545454546, "grad_norm": 1.037064552307129, "learning_rate": 3.653080433046589e-05, "loss": 0.4678, "step": 4061 }, { "epoch": 0.7047189451769604, "grad_norm": 0.6829334497451782, "learning_rate": 3.652696834129281e-05, "loss": 0.5582, "step": 4062 }, { "epoch": 0.7048924358084664, "grad_norm": 1.581860065460205, "learning_rate": 3.652313043414579e-05, "loss": 0.5304, "step": 4063 }, { "epoch": 0.7050659264399722, "grad_norm": 0.5808870792388916, "learning_rate": 3.6519290609470225e-05, "loss": 0.6938, "step": 4064 }, { "epoch": 0.7052394170714782, "grad_norm": 0.6855258941650391, "learning_rate": 3.651544886771174e-05, "loss": 0.5574, "step": 4065 }, { "epoch": 0.705412907702984, "grad_norm": 0.970683217048645, "learning_rate": 3.651160520931617e-05, "loss": 0.5873, "step": 4066 }, { "epoch": 0.7055863983344899, "grad_norm": 0.6593789458274841, "learning_rate": 3.650775963472958e-05, "loss": 0.5842, "step": 4067 }, { "epoch": 0.7057598889659958, "grad_norm": 0.8516277074813843, "learning_rate": 3.650391214439825e-05, "loss": 0.5527, "step": 4068 }, { "epoch": 0.7059333795975017, "grad_norm": 0.7127820253372192, "learning_rate": 3.6500062738768675e-05, "loss": 0.598, "step": 4069 }, { "epoch": 0.7061068702290076, "grad_norm": 0.6315264105796814, "learning_rate": 3.64962114182876e-05, "loss": 0.563, "step": 4070 }, { "epoch": 0.7062803608605135, "grad_norm": 0.6952970623970032, "learning_rate": 3.649235818340197e-05, "loss": 0.5381, "step": 4071 }, { "epoch": 0.7064538514920194, "grad_norm": 0.9195808172225952, "learning_rate": 3.648850303455895e-05, "loss": 0.5012, "step": 4072 }, { "epoch": 0.7066273421235253, "grad_norm": 0.7608962655067444, "learning_rate": 3.648464597220594e-05, "loss": 0.6113, "step": 4073 }, { "epoch": 0.7068008327550312, "grad_norm": 0.8608875274658203, "learning_rate": 3.6480786996790554e-05, "loss": 0.4757, "step": 4074 }, { "epoch": 0.7069743233865371, "grad_norm": 0.5480713844299316, "learning_rate": 3.647692610876064e-05, "loss": 0.5889, "step": 4075 }, { "epoch": 0.707147814018043, "grad_norm": 0.7620058655738831, "learning_rate": 3.647306330856425e-05, "loss": 0.5334, "step": 4076 }, { "epoch": 0.7073213046495489, "grad_norm": 0.7351611256599426, "learning_rate": 3.6469198596649663e-05, "loss": 0.6351, "step": 4077 }, { "epoch": 0.7074947952810549, "grad_norm": 0.877390444278717, "learning_rate": 3.646533197346539e-05, "loss": 0.6279, "step": 4078 }, { "epoch": 0.7076682859125607, "grad_norm": 0.5145160555839539, "learning_rate": 3.6461463439460156e-05, "loss": 0.5768, "step": 4079 }, { "epoch": 0.7078417765440667, "grad_norm": 0.6730548739433289, "learning_rate": 3.6457592995082915e-05, "loss": 0.6096, "step": 4080 }, { "epoch": 0.7080152671755725, "grad_norm": 0.6766643524169922, "learning_rate": 3.645372064078282e-05, "loss": 0.5292, "step": 4081 }, { "epoch": 0.7081887578070785, "grad_norm": 0.7172795534133911, "learning_rate": 3.644984637700928e-05, "loss": 0.6473, "step": 4082 }, { "epoch": 0.7083622484385843, "grad_norm": 0.6874622702598572, "learning_rate": 3.644597020421189e-05, "loss": 0.5645, "step": 4083 }, { "epoch": 0.7085357390700903, "grad_norm": 0.8783832788467407, "learning_rate": 3.6442092122840505e-05, "loss": 0.5518, "step": 4084 }, { "epoch": 0.7087092297015961, "grad_norm": 0.7163074016571045, "learning_rate": 3.6438212133345164e-05, "loss": 0.5182, "step": 4085 }, { "epoch": 0.7088827203331021, "grad_norm": 0.7176882028579712, "learning_rate": 3.643433023617616e-05, "loss": 0.6757, "step": 4086 }, { "epoch": 0.7090562109646079, "grad_norm": 0.6373384594917297, "learning_rate": 3.643044643178397e-05, "loss": 0.598, "step": 4087 }, { "epoch": 0.7092297015961138, "grad_norm": 0.6766403317451477, "learning_rate": 3.642656072061933e-05, "loss": 0.6661, "step": 4088 }, { "epoch": 0.7094031922276197, "grad_norm": 0.9148829579353333, "learning_rate": 3.6422673103133186e-05, "loss": 0.5396, "step": 4089 }, { "epoch": 0.7095766828591256, "grad_norm": 0.7896472811698914, "learning_rate": 3.641878357977668e-05, "loss": 0.5364, "step": 4090 }, { "epoch": 0.7097501734906315, "grad_norm": 0.7675262689590454, "learning_rate": 3.641489215100122e-05, "loss": 0.5026, "step": 4091 }, { "epoch": 0.7099236641221374, "grad_norm": 0.7292678952217102, "learning_rate": 3.641099881725839e-05, "loss": 0.6086, "step": 4092 }, { "epoch": 0.7100971547536433, "grad_norm": 0.9237269759178162, "learning_rate": 3.6407103579000024e-05, "loss": 0.5127, "step": 4093 }, { "epoch": 0.7102706453851492, "grad_norm": 0.7068149447441101, "learning_rate": 3.6403206436678173e-05, "loss": 0.4916, "step": 4094 }, { "epoch": 0.7104441360166551, "grad_norm": 1.187597393989563, "learning_rate": 3.63993073907451e-05, "loss": 0.4594, "step": 4095 }, { "epoch": 0.710617626648161, "grad_norm": 1.7699358463287354, "learning_rate": 3.63954064416533e-05, "loss": 0.5876, "step": 4096 }, { "epoch": 0.7107911172796669, "grad_norm": 0.8028684258460999, "learning_rate": 3.639150358985547e-05, "loss": 0.5413, "step": 4097 }, { "epoch": 0.7109646079111728, "grad_norm": 0.7019853591918945, "learning_rate": 3.6387598835804555e-05, "loss": 0.6101, "step": 4098 }, { "epoch": 0.7111380985426787, "grad_norm": 0.8166154623031616, "learning_rate": 3.63836921799537e-05, "loss": 0.588, "step": 4099 }, { "epoch": 0.7113115891741846, "grad_norm": 1.1425904035568237, "learning_rate": 3.6379783622756275e-05, "loss": 0.611, "step": 4100 }, { "epoch": 0.7114850798056905, "grad_norm": 0.6073325276374817, "learning_rate": 3.637587316466587e-05, "loss": 0.5802, "step": 4101 }, { "epoch": 0.7116585704371964, "grad_norm": 0.864340603351593, "learning_rate": 3.6371960806136313e-05, "loss": 0.5504, "step": 4102 }, { "epoch": 0.7118320610687023, "grad_norm": 0.8540159463882446, "learning_rate": 3.636804654762162e-05, "loss": 0.5891, "step": 4103 }, { "epoch": 0.7120055517002082, "grad_norm": 0.9849295616149902, "learning_rate": 3.636413038957605e-05, "loss": 0.5096, "step": 4104 }, { "epoch": 0.7121790423317141, "grad_norm": 0.8594913482666016, "learning_rate": 3.6360212332454087e-05, "loss": 0.5494, "step": 4105 }, { "epoch": 0.71235253296322, "grad_norm": 0.7813236713409424, "learning_rate": 3.635629237671041e-05, "loss": 0.5718, "step": 4106 }, { "epoch": 0.7125260235947258, "grad_norm": 0.8800257444381714, "learning_rate": 3.6352370522799956e-05, "loss": 0.5593, "step": 4107 }, { "epoch": 0.7126995142262318, "grad_norm": 0.680556058883667, "learning_rate": 3.634844677117784e-05, "loss": 0.5435, "step": 4108 }, { "epoch": 0.7128730048577376, "grad_norm": 0.6794553399085999, "learning_rate": 3.634452112229942e-05, "loss": 0.5635, "step": 4109 }, { "epoch": 0.7130464954892436, "grad_norm": 0.762883722782135, "learning_rate": 3.63405935766203e-05, "loss": 0.547, "step": 4110 }, { "epoch": 0.7132199861207494, "grad_norm": 0.7484919428825378, "learning_rate": 3.633666413459624e-05, "loss": 0.5802, "step": 4111 }, { "epoch": 0.7133934767522554, "grad_norm": 0.7314563393592834, "learning_rate": 3.633273279668327e-05, "loss": 0.5463, "step": 4112 }, { "epoch": 0.7135669673837612, "grad_norm": 0.7025768160820007, "learning_rate": 3.632879956333763e-05, "loss": 0.5608, "step": 4113 }, { "epoch": 0.7137404580152672, "grad_norm": 0.6107587218284607, "learning_rate": 3.632486443501578e-05, "loss": 0.5657, "step": 4114 }, { "epoch": 0.713913948646773, "grad_norm": 0.6207438111305237, "learning_rate": 3.632092741217438e-05, "loss": 0.6782, "step": 4115 }, { "epoch": 0.714087439278279, "grad_norm": 0.7434753775596619, "learning_rate": 3.631698849527034e-05, "loss": 0.6816, "step": 4116 }, { "epoch": 0.7142609299097848, "grad_norm": 0.7651646733283997, "learning_rate": 3.631304768476078e-05, "loss": 0.5726, "step": 4117 }, { "epoch": 0.7144344205412908, "grad_norm": 0.6007829904556274, "learning_rate": 3.630910498110302e-05, "loss": 0.5927, "step": 4118 }, { "epoch": 0.7146079111727967, "grad_norm": 0.6312075853347778, "learning_rate": 3.630516038475462e-05, "loss": 0.6816, "step": 4119 }, { "epoch": 0.7147814018043026, "grad_norm": 0.9027377963066101, "learning_rate": 3.630121389617336e-05, "loss": 0.6041, "step": 4120 }, { "epoch": 0.7149548924358085, "grad_norm": 0.628914475440979, "learning_rate": 3.6297265515817234e-05, "loss": 0.6572, "step": 4121 }, { "epoch": 0.7151283830673144, "grad_norm": 0.7190338969230652, "learning_rate": 3.629331524414446e-05, "loss": 0.5398, "step": 4122 }, { "epoch": 0.7153018736988203, "grad_norm": 0.9281529784202576, "learning_rate": 3.628936308161346e-05, "loss": 0.6353, "step": 4123 }, { "epoch": 0.7154753643303262, "grad_norm": 0.8841820955276489, "learning_rate": 3.6285409028682895e-05, "loss": 0.6016, "step": 4124 }, { "epoch": 0.7156488549618321, "grad_norm": 0.7699342370033264, "learning_rate": 3.6281453085811634e-05, "loss": 0.5547, "step": 4125 }, { "epoch": 0.7158223455933379, "grad_norm": 0.8891981244087219, "learning_rate": 3.627749525345878e-05, "loss": 0.5593, "step": 4126 }, { "epoch": 0.7159958362248439, "grad_norm": 0.8777641654014587, "learning_rate": 3.627353553208362e-05, "loss": 0.5215, "step": 4127 }, { "epoch": 0.7161693268563497, "grad_norm": 0.6126023530960083, "learning_rate": 3.626957392214571e-05, "loss": 0.5089, "step": 4128 }, { "epoch": 0.7163428174878557, "grad_norm": 1.155997395515442, "learning_rate": 3.626561042410479e-05, "loss": 0.5682, "step": 4129 }, { "epoch": 0.7165163081193615, "grad_norm": 0.8990711569786072, "learning_rate": 3.626164503842082e-05, "loss": 0.4742, "step": 4130 }, { "epoch": 0.7166897987508675, "grad_norm": 0.8453020453453064, "learning_rate": 3.6257677765553996e-05, "loss": 0.5734, "step": 4131 }, { "epoch": 0.7168632893823733, "grad_norm": 0.8402464389801025, "learning_rate": 3.6253708605964724e-05, "loss": 0.6011, "step": 4132 }, { "epoch": 0.7170367800138793, "grad_norm": 0.7821096777915955, "learning_rate": 3.624973756011363e-05, "loss": 0.5906, "step": 4133 }, { "epoch": 0.7172102706453851, "grad_norm": 0.5626051425933838, "learning_rate": 3.6245764628461556e-05, "loss": 0.6475, "step": 4134 }, { "epoch": 0.7173837612768911, "grad_norm": 0.6475934982299805, "learning_rate": 3.624178981146956e-05, "loss": 0.5848, "step": 4135 }, { "epoch": 0.7175572519083969, "grad_norm": 0.88841313123703, "learning_rate": 3.6237813109598944e-05, "loss": 0.7047, "step": 4136 }, { "epoch": 0.7177307425399029, "grad_norm": 0.6983802914619446, "learning_rate": 3.623383452331119e-05, "loss": 0.5806, "step": 4137 }, { "epoch": 0.7179042331714087, "grad_norm": 0.7312623858451843, "learning_rate": 3.622985405306803e-05, "loss": 0.5026, "step": 4138 }, { "epoch": 0.7180777238029147, "grad_norm": 1.1107406616210938, "learning_rate": 3.622587169933138e-05, "loss": 0.5413, "step": 4139 }, { "epoch": 0.7182512144344205, "grad_norm": 0.672258198261261, "learning_rate": 3.622188746256343e-05, "loss": 0.5979, "step": 4140 }, { "epoch": 0.7184247050659265, "grad_norm": 0.5452670454978943, "learning_rate": 3.6217901343226526e-05, "loss": 0.6443, "step": 4141 }, { "epoch": 0.7185981956974323, "grad_norm": 0.6215023398399353, "learning_rate": 3.621391334178328e-05, "loss": 0.6107, "step": 4142 }, { "epoch": 0.7187716863289383, "grad_norm": 0.8545822501182556, "learning_rate": 3.620992345869649e-05, "loss": 0.6942, "step": 4143 }, { "epoch": 0.7189451769604441, "grad_norm": 1.0239531993865967, "learning_rate": 3.62059316944292e-05, "loss": 0.6053, "step": 4144 }, { "epoch": 0.7191186675919501, "grad_norm": 0.798069953918457, "learning_rate": 3.6201938049444654e-05, "loss": 0.5001, "step": 4145 }, { "epoch": 0.7192921582234559, "grad_norm": 0.8938401937484741, "learning_rate": 3.619794252420632e-05, "loss": 0.5348, "step": 4146 }, { "epoch": 0.7194656488549618, "grad_norm": 0.8637401461601257, "learning_rate": 3.619394511917788e-05, "loss": 0.6536, "step": 4147 }, { "epoch": 0.7196391394864677, "grad_norm": 0.9881258010864258, "learning_rate": 3.618994583482323e-05, "loss": 0.5515, "step": 4148 }, { "epoch": 0.7198126301179736, "grad_norm": 0.9566797018051147, "learning_rate": 3.618594467160651e-05, "loss": 0.6193, "step": 4149 }, { "epoch": 0.7199861207494795, "grad_norm": 1.0019598007202148, "learning_rate": 3.618194162999205e-05, "loss": 0.7379, "step": 4150 }, { "epoch": 0.7201596113809854, "grad_norm": 1.4422179460525513, "learning_rate": 3.617793671044441e-05, "loss": 0.5209, "step": 4151 }, { "epoch": 0.7203331020124913, "grad_norm": 0.6101250648498535, "learning_rate": 3.617392991342836e-05, "loss": 0.5112, "step": 4152 }, { "epoch": 0.7205065926439972, "grad_norm": 0.7908729910850525, "learning_rate": 3.6169921239408894e-05, "loss": 0.5563, "step": 4153 }, { "epoch": 0.7206800832755031, "grad_norm": 0.8282957673072815, "learning_rate": 3.616591068885123e-05, "loss": 0.5977, "step": 4154 }, { "epoch": 0.720853573907009, "grad_norm": 0.7281785011291504, "learning_rate": 3.61618982622208e-05, "loss": 0.5371, "step": 4155 }, { "epoch": 0.721027064538515, "grad_norm": 0.5883955955505371, "learning_rate": 3.6157883959983234e-05, "loss": 0.6997, "step": 4156 }, { "epoch": 0.7212005551700208, "grad_norm": 0.8235392570495605, "learning_rate": 3.615386778260441e-05, "loss": 0.582, "step": 4157 }, { "epoch": 0.7213740458015268, "grad_norm": 0.762932538986206, "learning_rate": 3.614984973055041e-05, "loss": 0.5698, "step": 4158 }, { "epoch": 0.7215475364330326, "grad_norm": 0.9984418153762817, "learning_rate": 3.6145829804287526e-05, "loss": 0.5438, "step": 4159 }, { "epoch": 0.7217210270645386, "grad_norm": 0.5380730032920837, "learning_rate": 3.614180800428228e-05, "loss": 0.6814, "step": 4160 }, { "epoch": 0.7218945176960444, "grad_norm": 0.65350341796875, "learning_rate": 3.613778433100141e-05, "loss": 0.6564, "step": 4161 }, { "epoch": 0.7220680083275504, "grad_norm": 0.7141516804695129, "learning_rate": 3.6133758784911864e-05, "loss": 0.5327, "step": 4162 }, { "epoch": 0.7222414989590562, "grad_norm": 0.8622221350669861, "learning_rate": 3.612973136648081e-05, "loss": 0.6542, "step": 4163 }, { "epoch": 0.7224149895905622, "grad_norm": 0.6311085224151611, "learning_rate": 3.6125702076175636e-05, "loss": 0.6011, "step": 4164 }, { "epoch": 0.722588480222068, "grad_norm": 0.929018497467041, "learning_rate": 3.612167091446394e-05, "loss": 0.6543, "step": 4165 }, { "epoch": 0.7227619708535739, "grad_norm": 0.7394356727600098, "learning_rate": 3.611763788181356e-05, "loss": 0.5372, "step": 4166 }, { "epoch": 0.7229354614850798, "grad_norm": 0.8344247937202454, "learning_rate": 3.6113602978692514e-05, "loss": 0.5393, "step": 4167 }, { "epoch": 0.7231089521165857, "grad_norm": 0.6699580550193787, "learning_rate": 3.610956620556907e-05, "loss": 0.491, "step": 4168 }, { "epoch": 0.7232824427480916, "grad_norm": 1.0480327606201172, "learning_rate": 3.61055275629117e-05, "loss": 0.5978, "step": 4169 }, { "epoch": 0.7234559333795975, "grad_norm": 0.6671690940856934, "learning_rate": 3.610148705118908e-05, "loss": 0.6597, "step": 4170 }, { "epoch": 0.7236294240111034, "grad_norm": 0.7958476543426514, "learning_rate": 3.6097444670870136e-05, "loss": 0.635, "step": 4171 }, { "epoch": 0.7238029146426093, "grad_norm": 0.6575822830200195, "learning_rate": 3.609340042242397e-05, "loss": 0.5558, "step": 4172 }, { "epoch": 0.7239764052741152, "grad_norm": 0.7265298366546631, "learning_rate": 3.608935430631994e-05, "loss": 0.5389, "step": 4173 }, { "epoch": 0.7241498959056211, "grad_norm": 0.7857984900474548, "learning_rate": 3.6085306323027596e-05, "loss": 0.5946, "step": 4174 }, { "epoch": 0.724323386537127, "grad_norm": 0.7398806214332581, "learning_rate": 3.608125647301671e-05, "loss": 0.53, "step": 4175 }, { "epoch": 0.7244968771686329, "grad_norm": 0.5658463835716248, "learning_rate": 3.607720475675727e-05, "loss": 0.5669, "step": 4176 }, { "epoch": 0.7246703678001388, "grad_norm": 0.6145365834236145, "learning_rate": 3.607315117471948e-05, "loss": 0.647, "step": 4177 }, { "epoch": 0.7248438584316447, "grad_norm": 0.8134177923202515, "learning_rate": 3.606909572737378e-05, "loss": 0.5913, "step": 4178 }, { "epoch": 0.7250173490631506, "grad_norm": 0.8853358626365662, "learning_rate": 3.6065038415190775e-05, "loss": 0.6432, "step": 4179 }, { "epoch": 0.7251908396946565, "grad_norm": 0.5950773358345032, "learning_rate": 3.6060979238641363e-05, "loss": 0.5945, "step": 4180 }, { "epoch": 0.7253643303261624, "grad_norm": 0.6152825951576233, "learning_rate": 3.605691819819659e-05, "loss": 0.5674, "step": 4181 }, { "epoch": 0.7255378209576683, "grad_norm": 0.8700108528137207, "learning_rate": 3.6052855294327746e-05, "loss": 0.6201, "step": 4182 }, { "epoch": 0.7257113115891742, "grad_norm": 0.7685799598693848, "learning_rate": 3.604879052750634e-05, "loss": 0.5131, "step": 4183 }, { "epoch": 0.7258848022206801, "grad_norm": 1.0418882369995117, "learning_rate": 3.604472389820409e-05, "loss": 0.5507, "step": 4184 }, { "epoch": 0.7260582928521859, "grad_norm": 0.6010865569114685, "learning_rate": 3.604065540689295e-05, "loss": 0.5939, "step": 4185 }, { "epoch": 0.7262317834836919, "grad_norm": 0.9228698015213013, "learning_rate": 3.6036585054045044e-05, "loss": 0.5513, "step": 4186 }, { "epoch": 0.7264052741151977, "grad_norm": 1.3044053316116333, "learning_rate": 3.603251284013276e-05, "loss": 0.5715, "step": 4187 }, { "epoch": 0.7265787647467037, "grad_norm": 0.7584335207939148, "learning_rate": 3.602843876562868e-05, "loss": 0.4867, "step": 4188 }, { "epoch": 0.7267522553782095, "grad_norm": 0.6108629703521729, "learning_rate": 3.602436283100561e-05, "loss": 0.6052, "step": 4189 }, { "epoch": 0.7269257460097155, "grad_norm": 0.6309239864349365, "learning_rate": 3.6020285036736554e-05, "loss": 0.6798, "step": 4190 }, { "epoch": 0.7270992366412213, "grad_norm": 0.7907925844192505, "learning_rate": 3.601620538329476e-05, "loss": 0.5789, "step": 4191 }, { "epoch": 0.7272727272727273, "grad_norm": 0.6239023208618164, "learning_rate": 3.601212387115366e-05, "loss": 0.5896, "step": 4192 }, { "epoch": 0.7274462179042331, "grad_norm": 0.6699426770210266, "learning_rate": 3.6008040500786926e-05, "loss": 0.5713, "step": 4193 }, { "epoch": 0.7276197085357391, "grad_norm": 1.3908271789550781, "learning_rate": 3.6003955272668444e-05, "loss": 0.6373, "step": 4194 }, { "epoch": 0.7277931991672449, "grad_norm": 0.7028026580810547, "learning_rate": 3.599986818727231e-05, "loss": 0.5668, "step": 4195 }, { "epoch": 0.7279666897987509, "grad_norm": 0.5071716904640198, "learning_rate": 3.5995779245072816e-05, "loss": 0.589, "step": 4196 }, { "epoch": 0.7281401804302567, "grad_norm": 0.5421550869941711, "learning_rate": 3.599168844654451e-05, "loss": 0.7155, "step": 4197 }, { "epoch": 0.7283136710617627, "grad_norm": 0.5536943078041077, "learning_rate": 3.5987595792162126e-05, "loss": 0.5199, "step": 4198 }, { "epoch": 0.7284871616932685, "grad_norm": 0.7120516300201416, "learning_rate": 3.5983501282400617e-05, "loss": 0.6636, "step": 4199 }, { "epoch": 0.7286606523247745, "grad_norm": 1.147731900215149, "learning_rate": 3.597940491773516e-05, "loss": 0.5612, "step": 4200 }, { "epoch": 0.7288341429562804, "grad_norm": 0.6481512188911438, "learning_rate": 3.597530669864115e-05, "loss": 0.6503, "step": 4201 }, { "epoch": 0.7290076335877863, "grad_norm": 0.8503965139389038, "learning_rate": 3.5971206625594176e-05, "loss": 0.5345, "step": 4202 }, { "epoch": 0.7291811242192922, "grad_norm": 0.49129992723464966, "learning_rate": 3.596710469907006e-05, "loss": 0.6738, "step": 4203 }, { "epoch": 0.7293546148507981, "grad_norm": 0.6439663171768188, "learning_rate": 3.5963000919544844e-05, "loss": 0.6027, "step": 4204 }, { "epoch": 0.729528105482304, "grad_norm": 0.7014073729515076, "learning_rate": 3.595889528749477e-05, "loss": 0.6725, "step": 4205 }, { "epoch": 0.7297015961138098, "grad_norm": 0.6441563963890076, "learning_rate": 3.59547878033963e-05, "loss": 0.5385, "step": 4206 }, { "epoch": 0.7298750867453158, "grad_norm": 0.6568038463592529, "learning_rate": 3.595067846772612e-05, "loss": 0.5377, "step": 4207 }, { "epoch": 0.7300485773768216, "grad_norm": 0.7476823925971985, "learning_rate": 3.594656728096111e-05, "loss": 0.5637, "step": 4208 }, { "epoch": 0.7302220680083276, "grad_norm": 0.7711260914802551, "learning_rate": 3.594245424357839e-05, "loss": 0.5879, "step": 4209 }, { "epoch": 0.7303955586398334, "grad_norm": 0.6500536203384399, "learning_rate": 3.5938339356055274e-05, "loss": 0.6674, "step": 4210 }, { "epoch": 0.7305690492713394, "grad_norm": 0.6398093700408936, "learning_rate": 3.593422261886931e-05, "loss": 0.5714, "step": 4211 }, { "epoch": 0.7307425399028452, "grad_norm": 0.9162209630012512, "learning_rate": 3.593010403249824e-05, "loss": 0.6091, "step": 4212 }, { "epoch": 0.7309160305343512, "grad_norm": 0.7451730370521545, "learning_rate": 3.592598359742004e-05, "loss": 0.549, "step": 4213 }, { "epoch": 0.731089521165857, "grad_norm": 0.6620838642120361, "learning_rate": 3.592186131411288e-05, "loss": 0.5551, "step": 4214 }, { "epoch": 0.731263011797363, "grad_norm": 0.603359043598175, "learning_rate": 3.591773718305517e-05, "loss": 0.7273, "step": 4215 }, { "epoch": 0.7314365024288688, "grad_norm": 0.637779176235199, "learning_rate": 3.5913611204725496e-05, "loss": 0.6625, "step": 4216 }, { "epoch": 0.7316099930603748, "grad_norm": 0.8620924949645996, "learning_rate": 3.590948337960271e-05, "loss": 0.5035, "step": 4217 }, { "epoch": 0.7317834836918806, "grad_norm": 0.6196661591529846, "learning_rate": 3.590535370816584e-05, "loss": 0.4924, "step": 4218 }, { "epoch": 0.7319569743233866, "grad_norm": 0.7694399356842041, "learning_rate": 3.5901222190894136e-05, "loss": 0.6069, "step": 4219 }, { "epoch": 0.7321304649548924, "grad_norm": 0.7322472333908081, "learning_rate": 3.589708882826707e-05, "loss": 0.6136, "step": 4220 }, { "epoch": 0.7323039555863984, "grad_norm": 0.9868308901786804, "learning_rate": 3.589295362076432e-05, "loss": 0.5369, "step": 4221 }, { "epoch": 0.7324774462179042, "grad_norm": 0.8929972648620605, "learning_rate": 3.588881656886578e-05, "loss": 0.5729, "step": 4222 }, { "epoch": 0.7326509368494102, "grad_norm": 0.9018761515617371, "learning_rate": 3.588467767305157e-05, "loss": 0.5753, "step": 4223 }, { "epoch": 0.732824427480916, "grad_norm": 0.6200088262557983, "learning_rate": 3.5880536933802e-05, "loss": 0.6248, "step": 4224 }, { "epoch": 0.7329979181124219, "grad_norm": 0.5684300065040588, "learning_rate": 3.587639435159762e-05, "loss": 0.6528, "step": 4225 }, { "epoch": 0.7331714087439278, "grad_norm": 0.8975379467010498, "learning_rate": 3.587224992691917e-05, "loss": 0.5836, "step": 4226 }, { "epoch": 0.7333448993754337, "grad_norm": 0.6046488285064697, "learning_rate": 3.586810366024763e-05, "loss": 0.5603, "step": 4227 }, { "epoch": 0.7335183900069396, "grad_norm": 0.9245457649230957, "learning_rate": 3.586395555206417e-05, "loss": 0.5519, "step": 4228 }, { "epoch": 0.7336918806384455, "grad_norm": 0.681410014629364, "learning_rate": 3.585980560285017e-05, "loss": 0.5819, "step": 4229 }, { "epoch": 0.7338653712699514, "grad_norm": 0.8801109194755554, "learning_rate": 3.585565381308726e-05, "loss": 0.5975, "step": 4230 }, { "epoch": 0.7340388619014573, "grad_norm": 0.6511202454566956, "learning_rate": 3.5851500183257246e-05, "loss": 0.5861, "step": 4231 }, { "epoch": 0.7342123525329632, "grad_norm": 0.8441863656044006, "learning_rate": 3.584734471384217e-05, "loss": 0.5149, "step": 4232 }, { "epoch": 0.7343858431644691, "grad_norm": 0.7297527194023132, "learning_rate": 3.5843187405324266e-05, "loss": 0.5925, "step": 4233 }, { "epoch": 0.734559333795975, "grad_norm": 0.7790377140045166, "learning_rate": 3.5839028258186014e-05, "loss": 0.5204, "step": 4234 }, { "epoch": 0.7347328244274809, "grad_norm": 0.6037988662719727, "learning_rate": 3.583486727291007e-05, "loss": 0.6438, "step": 4235 }, { "epoch": 0.7349063150589868, "grad_norm": 0.9271818399429321, "learning_rate": 3.583070444997932e-05, "loss": 0.4866, "step": 4236 }, { "epoch": 0.7350798056904927, "grad_norm": 0.5512932538986206, "learning_rate": 3.5826539789876885e-05, "loss": 0.6483, "step": 4237 }, { "epoch": 0.7352532963219987, "grad_norm": 1.7557094097137451, "learning_rate": 3.5822373293086055e-05, "loss": 0.6453, "step": 4238 }, { "epoch": 0.7354267869535045, "grad_norm": 0.8460390567779541, "learning_rate": 3.581820496009038e-05, "loss": 0.559, "step": 4239 }, { "epoch": 0.7356002775850105, "grad_norm": 0.6969012022018433, "learning_rate": 3.581403479137358e-05, "loss": 0.5341, "step": 4240 }, { "epoch": 0.7357737682165163, "grad_norm": 0.6169527769088745, "learning_rate": 3.580986278741961e-05, "loss": 0.709, "step": 4241 }, { "epoch": 0.7359472588480223, "grad_norm": 0.6563184261322021, "learning_rate": 3.580568894871265e-05, "loss": 0.7075, "step": 4242 }, { "epoch": 0.7361207494795281, "grad_norm": 0.6531060338020325, "learning_rate": 3.580151327573707e-05, "loss": 0.6005, "step": 4243 }, { "epoch": 0.7362942401110341, "grad_norm": 0.7275852560997009, "learning_rate": 3.579733576897746e-05, "loss": 0.5295, "step": 4244 }, { "epoch": 0.7364677307425399, "grad_norm": 0.6438325643539429, "learning_rate": 3.579315642891862e-05, "loss": 0.681, "step": 4245 }, { "epoch": 0.7366412213740458, "grad_norm": 0.6312209963798523, "learning_rate": 3.578897525604558e-05, "loss": 0.7383, "step": 4246 }, { "epoch": 0.7368147120055517, "grad_norm": 1.2075726985931396, "learning_rate": 3.5784792250843564e-05, "loss": 0.5753, "step": 4247 }, { "epoch": 0.7369882026370576, "grad_norm": 0.7225402593612671, "learning_rate": 3.578060741379801e-05, "loss": 0.6145, "step": 4248 }, { "epoch": 0.7371616932685635, "grad_norm": 0.685030996799469, "learning_rate": 3.5776420745394584e-05, "loss": 0.5864, "step": 4249 }, { "epoch": 0.7373351839000694, "grad_norm": 0.668433666229248, "learning_rate": 3.577223224611915e-05, "loss": 0.6794, "step": 4250 }, { "epoch": 0.7375086745315753, "grad_norm": 0.6833786964416504, "learning_rate": 3.576804191645778e-05, "loss": 0.6241, "step": 4251 }, { "epoch": 0.7376821651630812, "grad_norm": 0.529434323310852, "learning_rate": 3.576384975689677e-05, "loss": 0.6364, "step": 4252 }, { "epoch": 0.7378556557945871, "grad_norm": 1.049576997756958, "learning_rate": 3.5759655767922624e-05, "loss": 0.5231, "step": 4253 }, { "epoch": 0.738029146426093, "grad_norm": 1.0075207948684692, "learning_rate": 3.575545995002207e-05, "loss": 0.4973, "step": 4254 }, { "epoch": 0.7382026370575989, "grad_norm": 0.8046914935112, "learning_rate": 3.5751262303682034e-05, "loss": 0.5264, "step": 4255 }, { "epoch": 0.7383761276891048, "grad_norm": 0.8025881052017212, "learning_rate": 3.574706282938964e-05, "loss": 0.6394, "step": 4256 }, { "epoch": 0.7385496183206107, "grad_norm": 0.5996862649917603, "learning_rate": 3.574286152763226e-05, "loss": 0.636, "step": 4257 }, { "epoch": 0.7387231089521166, "grad_norm": 0.8055362105369568, "learning_rate": 3.573865839889746e-05, "loss": 0.541, "step": 4258 }, { "epoch": 0.7388965995836225, "grad_norm": 0.7214866280555725, "learning_rate": 3.573445344367302e-05, "loss": 0.6044, "step": 4259 }, { "epoch": 0.7390700902151284, "grad_norm": 0.5948684811592102, "learning_rate": 3.5730246662446916e-05, "loss": 0.6464, "step": 4260 }, { "epoch": 0.7392435808466343, "grad_norm": 0.8364885449409485, "learning_rate": 3.572603805570736e-05, "loss": 0.6919, "step": 4261 }, { "epoch": 0.7394170714781402, "grad_norm": 0.5774275064468384, "learning_rate": 3.572182762394276e-05, "loss": 0.5867, "step": 4262 }, { "epoch": 0.7395905621096461, "grad_norm": 0.7644405364990234, "learning_rate": 3.571761536764174e-05, "loss": 0.6997, "step": 4263 }, { "epoch": 0.739764052741152, "grad_norm": 1.6703912019729614, "learning_rate": 3.571340128729315e-05, "loss": 0.593, "step": 4264 }, { "epoch": 0.7399375433726578, "grad_norm": 0.6243732571601868, "learning_rate": 3.5709185383386024e-05, "loss": 0.6183, "step": 4265 }, { "epoch": 0.7401110340041638, "grad_norm": 0.7216456532478333, "learning_rate": 3.570496765640964e-05, "loss": 0.5013, "step": 4266 }, { "epoch": 0.7402845246356696, "grad_norm": 0.7056600451469421, "learning_rate": 3.570074810685345e-05, "loss": 0.4917, "step": 4267 }, { "epoch": 0.7404580152671756, "grad_norm": 0.8297005295753479, "learning_rate": 3.569652673520715e-05, "loss": 0.5697, "step": 4268 }, { "epoch": 0.7406315058986814, "grad_norm": 0.7679122686386108, "learning_rate": 3.569230354196063e-05, "loss": 0.5293, "step": 4269 }, { "epoch": 0.7408049965301874, "grad_norm": 0.7023241519927979, "learning_rate": 3.5688078527604e-05, "loss": 0.6606, "step": 4270 }, { "epoch": 0.7409784871616932, "grad_norm": 0.6871032118797302, "learning_rate": 3.568385169262758e-05, "loss": 0.6316, "step": 4271 }, { "epoch": 0.7411519777931992, "grad_norm": 1.1948171854019165, "learning_rate": 3.56796230375219e-05, "loss": 0.495, "step": 4272 }, { "epoch": 0.741325468424705, "grad_norm": 0.5721275806427002, "learning_rate": 3.567539256277769e-05, "loss": 0.5731, "step": 4273 }, { "epoch": 0.741498959056211, "grad_norm": 1.6316579580307007, "learning_rate": 3.567116026888591e-05, "loss": 0.6626, "step": 4274 }, { "epoch": 0.7416724496877168, "grad_norm": 0.6861583590507507, "learning_rate": 3.566692615633771e-05, "loss": 0.6213, "step": 4275 }, { "epoch": 0.7418459403192228, "grad_norm": 0.932975709438324, "learning_rate": 3.5662690225624484e-05, "loss": 0.5396, "step": 4276 }, { "epoch": 0.7420194309507286, "grad_norm": 0.8106069564819336, "learning_rate": 3.56584524772378e-05, "loss": 0.4543, "step": 4277 }, { "epoch": 0.7421929215822346, "grad_norm": 1.3101338148117065, "learning_rate": 3.565421291166946e-05, "loss": 0.4977, "step": 4278 }, { "epoch": 0.7423664122137404, "grad_norm": 0.7285441756248474, "learning_rate": 3.564997152941148e-05, "loss": 0.605, "step": 4279 }, { "epoch": 0.7425399028452464, "grad_norm": 0.619378924369812, "learning_rate": 3.5645728330956074e-05, "loss": 0.7219, "step": 4280 }, { "epoch": 0.7427133934767522, "grad_norm": 1.1070685386657715, "learning_rate": 3.564148331679565e-05, "loss": 0.5995, "step": 4281 }, { "epoch": 0.7428868841082582, "grad_norm": 0.656654417514801, "learning_rate": 3.563723648742286e-05, "loss": 0.6273, "step": 4282 }, { "epoch": 0.743060374739764, "grad_norm": 0.8392003774642944, "learning_rate": 3.563298784333056e-05, "loss": 0.5298, "step": 4283 }, { "epoch": 0.7432338653712699, "grad_norm": 0.889798641204834, "learning_rate": 3.5628737385011814e-05, "loss": 0.5413, "step": 4284 }, { "epoch": 0.7434073560027759, "grad_norm": 0.8371496796607971, "learning_rate": 3.562448511295987e-05, "loss": 0.6377, "step": 4285 }, { "epoch": 0.7435808466342817, "grad_norm": 0.5523166656494141, "learning_rate": 3.562023102766822e-05, "loss": 0.6639, "step": 4286 }, { "epoch": 0.7437543372657877, "grad_norm": 0.9057916402816772, "learning_rate": 3.561597512963057e-05, "loss": 0.5361, "step": 4287 }, { "epoch": 0.7439278278972935, "grad_norm": 0.7643842697143555, "learning_rate": 3.561171741934081e-05, "loss": 0.4995, "step": 4288 }, { "epoch": 0.7441013185287995, "grad_norm": 0.7651995420455933, "learning_rate": 3.560745789729304e-05, "loss": 0.5859, "step": 4289 }, { "epoch": 0.7442748091603053, "grad_norm": 0.7631704807281494, "learning_rate": 3.56031965639816e-05, "loss": 0.5461, "step": 4290 }, { "epoch": 0.7444482997918113, "grad_norm": 0.928712010383606, "learning_rate": 3.559893341990102e-05, "loss": 0.6956, "step": 4291 }, { "epoch": 0.7446217904233171, "grad_norm": 0.5718164443969727, "learning_rate": 3.559466846554604e-05, "loss": 0.6771, "step": 4292 }, { "epoch": 0.7447952810548231, "grad_norm": 0.7099377512931824, "learning_rate": 3.559040170141161e-05, "loss": 0.6006, "step": 4293 }, { "epoch": 0.7449687716863289, "grad_norm": 0.7230462431907654, "learning_rate": 3.5586133127992904e-05, "loss": 0.6356, "step": 4294 }, { "epoch": 0.7451422623178349, "grad_norm": 0.7968019843101501, "learning_rate": 3.558186274578527e-05, "loss": 0.5314, "step": 4295 }, { "epoch": 0.7453157529493407, "grad_norm": 0.7601330280303955, "learning_rate": 3.557759055528433e-05, "loss": 0.5615, "step": 4296 }, { "epoch": 0.7454892435808467, "grad_norm": 0.7466877698898315, "learning_rate": 3.5573316556985845e-05, "loss": 0.6106, "step": 4297 }, { "epoch": 0.7456627342123525, "grad_norm": 0.6433128118515015, "learning_rate": 3.5569040751385825e-05, "loss": 0.619, "step": 4298 }, { "epoch": 0.7458362248438585, "grad_norm": 0.7198663353919983, "learning_rate": 3.556476313898048e-05, "loss": 0.5938, "step": 4299 }, { "epoch": 0.7460097154753643, "grad_norm": 0.8491593599319458, "learning_rate": 3.556048372026625e-05, "loss": 0.6785, "step": 4300 }, { "epoch": 0.7461832061068703, "grad_norm": 0.7524433732032776, "learning_rate": 3.5556202495739736e-05, "loss": 0.5038, "step": 4301 }, { "epoch": 0.7463566967383761, "grad_norm": 0.4702468514442444, "learning_rate": 3.555191946589781e-05, "loss": 0.5743, "step": 4302 }, { "epoch": 0.7465301873698821, "grad_norm": 0.6478613018989563, "learning_rate": 3.55476346312375e-05, "loss": 0.5717, "step": 4303 }, { "epoch": 0.7467036780013879, "grad_norm": 0.6415278911590576, "learning_rate": 3.554334799225608e-05, "loss": 0.4797, "step": 4304 }, { "epoch": 0.7468771686328938, "grad_norm": 0.6717489361763, "learning_rate": 3.5539059549451e-05, "loss": 0.5934, "step": 4305 }, { "epoch": 0.7470506592643997, "grad_norm": 0.7758067846298218, "learning_rate": 3.553476930331996e-05, "loss": 0.6525, "step": 4306 }, { "epoch": 0.7472241498959056, "grad_norm": 0.6344873905181885, "learning_rate": 3.553047725436085e-05, "loss": 0.547, "step": 4307 }, { "epoch": 0.7473976405274115, "grad_norm": 0.6888913512229919, "learning_rate": 3.5526183403071754e-05, "loss": 0.5333, "step": 4308 }, { "epoch": 0.7475711311589174, "grad_norm": 0.871826708316803, "learning_rate": 3.552188774995098e-05, "loss": 0.6178, "step": 4309 }, { "epoch": 0.7477446217904233, "grad_norm": 0.9191585183143616, "learning_rate": 3.551759029549705e-05, "loss": 0.6038, "step": 4310 }, { "epoch": 0.7479181124219292, "grad_norm": 0.7116936445236206, "learning_rate": 3.5513291040208674e-05, "loss": 0.675, "step": 4311 }, { "epoch": 0.7480916030534351, "grad_norm": 0.7276498675346375, "learning_rate": 3.550898998458481e-05, "loss": 0.6112, "step": 4312 }, { "epoch": 0.748265093684941, "grad_norm": 0.7814944982528687, "learning_rate": 3.550468712912458e-05, "loss": 0.5112, "step": 4313 }, { "epoch": 0.7484385843164469, "grad_norm": 0.5744490623474121, "learning_rate": 3.550038247432734e-05, "loss": 0.6537, "step": 4314 }, { "epoch": 0.7486120749479528, "grad_norm": 0.6232500076293945, "learning_rate": 3.549607602069265e-05, "loss": 0.5288, "step": 4315 }, { "epoch": 0.7487855655794587, "grad_norm": 1.3707828521728516, "learning_rate": 3.549176776872029e-05, "loss": 0.5879, "step": 4316 }, { "epoch": 0.7489590562109646, "grad_norm": 0.8000961542129517, "learning_rate": 3.5487457718910226e-05, "loss": 0.5605, "step": 4317 }, { "epoch": 0.7491325468424705, "grad_norm": 0.8479990363121033, "learning_rate": 3.5483145871762646e-05, "loss": 0.5392, "step": 4318 }, { "epoch": 0.7493060374739764, "grad_norm": 0.7323888540267944, "learning_rate": 3.5478832227777945e-05, "loss": 0.562, "step": 4319 }, { "epoch": 0.7494795281054824, "grad_norm": 0.7212268710136414, "learning_rate": 3.547451678745673e-05, "loss": 0.5671, "step": 4320 }, { "epoch": 0.7496530187369882, "grad_norm": 1.0141124725341797, "learning_rate": 3.547019955129981e-05, "loss": 0.5107, "step": 4321 }, { "epoch": 0.7498265093684942, "grad_norm": 0.853689968585968, "learning_rate": 3.54658805198082e-05, "loss": 0.5624, "step": 4322 }, { "epoch": 0.75, "grad_norm": 0.6390842199325562, "learning_rate": 3.546155969348315e-05, "loss": 0.5446, "step": 4323 }, { "epoch": 0.7501734906315058, "grad_norm": 0.7222588658332825, "learning_rate": 3.545723707282606e-05, "loss": 0.6731, "step": 4324 }, { "epoch": 0.7503469812630118, "grad_norm": 0.7966939210891724, "learning_rate": 3.5452912658338605e-05, "loss": 0.5018, "step": 4325 }, { "epoch": 0.7505204718945176, "grad_norm": 0.6748339533805847, "learning_rate": 3.5448586450522635e-05, "loss": 0.6138, "step": 4326 }, { "epoch": 0.7506939625260236, "grad_norm": 0.7996774911880493, "learning_rate": 3.5444258449880205e-05, "loss": 0.4681, "step": 4327 }, { "epoch": 0.7508674531575295, "grad_norm": 0.8768160939216614, "learning_rate": 3.5439928656913586e-05, "loss": 0.4862, "step": 4328 }, { "epoch": 0.7510409437890354, "grad_norm": 0.957241415977478, "learning_rate": 3.543559707212525e-05, "loss": 0.5496, "step": 4329 }, { "epoch": 0.7512144344205413, "grad_norm": 0.8147217631340027, "learning_rate": 3.543126369601789e-05, "loss": 0.6323, "step": 4330 }, { "epoch": 0.7513879250520472, "grad_norm": 0.7651244401931763, "learning_rate": 3.54269285290944e-05, "loss": 0.6366, "step": 4331 }, { "epoch": 0.7515614156835531, "grad_norm": 0.7931744456291199, "learning_rate": 3.542259157185787e-05, "loss": 0.6262, "step": 4332 }, { "epoch": 0.751734906315059, "grad_norm": 0.9671867489814758, "learning_rate": 3.541825282481162e-05, "loss": 0.5729, "step": 4333 }, { "epoch": 0.7519083969465649, "grad_norm": 0.8910104632377625, "learning_rate": 3.5413912288459174e-05, "loss": 0.4968, "step": 4334 }, { "epoch": 0.7520818875780708, "grad_norm": 0.6812747716903687, "learning_rate": 3.540956996330424e-05, "loss": 0.5334, "step": 4335 }, { "epoch": 0.7522553782095767, "grad_norm": 0.6249709129333496, "learning_rate": 3.5405225849850754e-05, "loss": 0.5815, "step": 4336 }, { "epoch": 0.7524288688410826, "grad_norm": 0.6938480138778687, "learning_rate": 3.5400879948602854e-05, "loss": 0.5894, "step": 4337 }, { "epoch": 0.7526023594725885, "grad_norm": 0.8001736402511597, "learning_rate": 3.53965322600649e-05, "loss": 0.5944, "step": 4338 }, { "epoch": 0.7527758501040944, "grad_norm": 0.6530340313911438, "learning_rate": 3.539218278474143e-05, "loss": 0.641, "step": 4339 }, { "epoch": 0.7529493407356003, "grad_norm": 0.736674964427948, "learning_rate": 3.5387831523137216e-05, "loss": 0.6057, "step": 4340 }, { "epoch": 0.7531228313671062, "grad_norm": 0.7169548869132996, "learning_rate": 3.538347847575722e-05, "loss": 0.578, "step": 4341 }, { "epoch": 0.7532963219986121, "grad_norm": 0.9289293885231018, "learning_rate": 3.5379123643106625e-05, "loss": 0.5378, "step": 4342 }, { "epoch": 0.7534698126301179, "grad_norm": 0.6985144019126892, "learning_rate": 3.537476702569081e-05, "loss": 0.5327, "step": 4343 }, { "epoch": 0.7536433032616239, "grad_norm": 0.8143550753593445, "learning_rate": 3.5370408624015364e-05, "loss": 0.5986, "step": 4344 }, { "epoch": 0.7538167938931297, "grad_norm": 0.563184916973114, "learning_rate": 3.536604843858609e-05, "loss": 0.6035, "step": 4345 }, { "epoch": 0.7539902845246357, "grad_norm": 0.8249099254608154, "learning_rate": 3.536168646990899e-05, "loss": 0.6566, "step": 4346 }, { "epoch": 0.7541637751561415, "grad_norm": 0.8906046152114868, "learning_rate": 3.535732271849028e-05, "loss": 0.573, "step": 4347 }, { "epoch": 0.7543372657876475, "grad_norm": 0.7494427561759949, "learning_rate": 3.535295718483636e-05, "loss": 0.4839, "step": 4348 }, { "epoch": 0.7545107564191533, "grad_norm": 0.6854152679443359, "learning_rate": 3.5348589869453874e-05, "loss": 0.5288, "step": 4349 }, { "epoch": 0.7546842470506593, "grad_norm": 0.7171089053153992, "learning_rate": 3.5344220772849654e-05, "loss": 0.6676, "step": 4350 }, { "epoch": 0.7548577376821651, "grad_norm": 0.6131109595298767, "learning_rate": 3.533984989553073e-05, "loss": 0.6404, "step": 4351 }, { "epoch": 0.7550312283136711, "grad_norm": 0.6121817827224731, "learning_rate": 3.533547723800435e-05, "loss": 0.6564, "step": 4352 }, { "epoch": 0.7552047189451769, "grad_norm": 0.6851575970649719, "learning_rate": 3.533110280077797e-05, "loss": 0.4674, "step": 4353 }, { "epoch": 0.7553782095766829, "grad_norm": 0.8325938582420349, "learning_rate": 3.532672658435925e-05, "loss": 0.525, "step": 4354 }, { "epoch": 0.7555517002081887, "grad_norm": 0.8352726697921753, "learning_rate": 3.5322348589256044e-05, "loss": 0.5544, "step": 4355 }, { "epoch": 0.7557251908396947, "grad_norm": 0.8470213413238525, "learning_rate": 3.531796881597643e-05, "loss": 0.5129, "step": 4356 }, { "epoch": 0.7558986814712005, "grad_norm": 0.8853175044059753, "learning_rate": 3.5313587265028686e-05, "loss": 0.4796, "step": 4357 }, { "epoch": 0.7560721721027065, "grad_norm": 0.6890913844108582, "learning_rate": 3.53092039369213e-05, "loss": 0.527, "step": 4358 }, { "epoch": 0.7562456627342123, "grad_norm": 0.6278190016746521, "learning_rate": 3.5304818832162956e-05, "loss": 0.5302, "step": 4359 }, { "epoch": 0.7564191533657183, "grad_norm": 0.7474961280822754, "learning_rate": 3.530043195126255e-05, "loss": 0.5416, "step": 4360 }, { "epoch": 0.7565926439972241, "grad_norm": 0.6712853908538818, "learning_rate": 3.529604329472919e-05, "loss": 0.5948, "step": 4361 }, { "epoch": 0.7567661346287301, "grad_norm": 0.6338025331497192, "learning_rate": 3.529165286307219e-05, "loss": 0.6205, "step": 4362 }, { "epoch": 0.756939625260236, "grad_norm": 0.6051141619682312, "learning_rate": 3.5287260656801044e-05, "loss": 0.6592, "step": 4363 }, { "epoch": 0.7571131158917418, "grad_norm": 0.6509369611740112, "learning_rate": 3.528286667642549e-05, "loss": 0.5748, "step": 4364 }, { "epoch": 0.7572866065232478, "grad_norm": 1.2799158096313477, "learning_rate": 3.5278470922455453e-05, "loss": 0.5452, "step": 4365 }, { "epoch": 0.7574600971547536, "grad_norm": 1.0939301252365112, "learning_rate": 3.527407339540106e-05, "loss": 0.729, "step": 4366 }, { "epoch": 0.7576335877862596, "grad_norm": 0.759294331073761, "learning_rate": 3.5269674095772654e-05, "loss": 0.5856, "step": 4367 }, { "epoch": 0.7578070784177654, "grad_norm": 0.782491147518158, "learning_rate": 3.5265273024080776e-05, "loss": 0.5333, "step": 4368 }, { "epoch": 0.7579805690492714, "grad_norm": 0.6015889644622803, "learning_rate": 3.526087018083617e-05, "loss": 0.6262, "step": 4369 }, { "epoch": 0.7581540596807772, "grad_norm": 0.5154745578765869, "learning_rate": 3.52564655665498e-05, "loss": 0.7069, "step": 4370 }, { "epoch": 0.7583275503122832, "grad_norm": 0.7049304842948914, "learning_rate": 3.525205918173283e-05, "loss": 0.5781, "step": 4371 }, { "epoch": 0.758501040943789, "grad_norm": 0.7382335662841797, "learning_rate": 3.524765102689662e-05, "loss": 0.5284, "step": 4372 }, { "epoch": 0.758674531575295, "grad_norm": 0.8790127038955688, "learning_rate": 3.524324110255273e-05, "loss": 0.5142, "step": 4373 }, { "epoch": 0.7588480222068008, "grad_norm": 0.9203531742095947, "learning_rate": 3.523882940921296e-05, "loss": 0.4869, "step": 4374 }, { "epoch": 0.7590215128383068, "grad_norm": 1.163057565689087, "learning_rate": 3.523441594738927e-05, "loss": 0.5157, "step": 4375 }, { "epoch": 0.7591950034698126, "grad_norm": 0.6108386516571045, "learning_rate": 3.523000071759387e-05, "loss": 0.5833, "step": 4376 }, { "epoch": 0.7593684941013186, "grad_norm": 0.5940057039260864, "learning_rate": 3.522558372033912e-05, "loss": 0.6936, "step": 4377 }, { "epoch": 0.7595419847328244, "grad_norm": 0.6746407151222229, "learning_rate": 3.522116495613766e-05, "loss": 0.5966, "step": 4378 }, { "epoch": 0.7597154753643304, "grad_norm": 0.8779783248901367, "learning_rate": 3.521674442550226e-05, "loss": 0.5787, "step": 4379 }, { "epoch": 0.7598889659958362, "grad_norm": 0.8782005906105042, "learning_rate": 3.521232212894594e-05, "loss": 0.5933, "step": 4380 }, { "epoch": 0.7600624566273422, "grad_norm": 0.5405778884887695, "learning_rate": 3.520789806698191e-05, "loss": 0.6982, "step": 4381 }, { "epoch": 0.760235947258848, "grad_norm": 0.5692259669303894, "learning_rate": 3.5203472240123594e-05, "loss": 0.6979, "step": 4382 }, { "epoch": 0.7604094378903539, "grad_norm": 1.0693467855453491, "learning_rate": 3.5199044648884605e-05, "loss": 0.5969, "step": 4383 }, { "epoch": 0.7605829285218598, "grad_norm": 1.04296875, "learning_rate": 3.519461529377877e-05, "loss": 0.5542, "step": 4384 }, { "epoch": 0.7607564191533657, "grad_norm": 0.725059449672699, "learning_rate": 3.519018417532013e-05, "loss": 0.6002, "step": 4385 }, { "epoch": 0.7609299097848716, "grad_norm": 0.8393305540084839, "learning_rate": 3.5185751294022914e-05, "loss": 0.5996, "step": 4386 }, { "epoch": 0.7611034004163775, "grad_norm": 0.745979905128479, "learning_rate": 3.518131665040157e-05, "loss": 0.5409, "step": 4387 }, { "epoch": 0.7612768910478834, "grad_norm": 0.6680980920791626, "learning_rate": 3.5176880244970735e-05, "loss": 0.5209, "step": 4388 }, { "epoch": 0.7614503816793893, "grad_norm": 1.4826394319534302, "learning_rate": 3.517244207824526e-05, "loss": 0.6648, "step": 4389 }, { "epoch": 0.7616238723108952, "grad_norm": 0.6956479549407959, "learning_rate": 3.516800215074021e-05, "loss": 0.5179, "step": 4390 }, { "epoch": 0.7617973629424011, "grad_norm": 0.7217515110969543, "learning_rate": 3.516356046297083e-05, "loss": 0.6899, "step": 4391 }, { "epoch": 0.761970853573907, "grad_norm": 0.7801789045333862, "learning_rate": 3.515911701545259e-05, "loss": 0.5912, "step": 4392 }, { "epoch": 0.7621443442054129, "grad_norm": 0.902409553527832, "learning_rate": 3.515467180870116e-05, "loss": 0.5863, "step": 4393 }, { "epoch": 0.7623178348369188, "grad_norm": 0.6760614514350891, "learning_rate": 3.5150224843232405e-05, "loss": 0.5063, "step": 4394 }, { "epoch": 0.7624913254684247, "grad_norm": 0.8342067003250122, "learning_rate": 3.51457761195624e-05, "loss": 0.5487, "step": 4395 }, { "epoch": 0.7626648160999306, "grad_norm": 0.7199931144714355, "learning_rate": 3.514132563820744e-05, "loss": 0.5853, "step": 4396 }, { "epoch": 0.7628383067314365, "grad_norm": 0.9212760329246521, "learning_rate": 3.513687339968399e-05, "loss": 0.6139, "step": 4397 }, { "epoch": 0.7630117973629424, "grad_norm": 0.6572199463844299, "learning_rate": 3.513241940450874e-05, "loss": 0.5898, "step": 4398 }, { "epoch": 0.7631852879944483, "grad_norm": 0.9196416735649109, "learning_rate": 3.5127963653198583e-05, "loss": 0.6492, "step": 4399 }, { "epoch": 0.7633587786259542, "grad_norm": 0.66299968957901, "learning_rate": 3.512350614627062e-05, "loss": 0.5554, "step": 4400 }, { "epoch": 0.7635322692574601, "grad_norm": 0.7973070740699768, "learning_rate": 3.511904688424215e-05, "loss": 0.6779, "step": 4401 }, { "epoch": 0.7637057598889659, "grad_norm": 0.8950045704841614, "learning_rate": 3.511458586763067e-05, "loss": 0.499, "step": 4402 }, { "epoch": 0.7638792505204719, "grad_norm": 0.8955358862876892, "learning_rate": 3.511012309695389e-05, "loss": 0.5558, "step": 4403 }, { "epoch": 0.7640527411519777, "grad_norm": 1.0587538480758667, "learning_rate": 3.510565857272972e-05, "loss": 0.5178, "step": 4404 }, { "epoch": 0.7642262317834837, "grad_norm": 0.8319945335388184, "learning_rate": 3.510119229547626e-05, "loss": 0.5959, "step": 4405 }, { "epoch": 0.7643997224149895, "grad_norm": 0.9041110277175903, "learning_rate": 3.509672426571185e-05, "loss": 0.5969, "step": 4406 }, { "epoch": 0.7645732130464955, "grad_norm": 0.8114092350006104, "learning_rate": 3.509225448395499e-05, "loss": 0.532, "step": 4407 }, { "epoch": 0.7647467036780013, "grad_norm": 0.7527754902839661, "learning_rate": 3.508778295072441e-05, "loss": 0.5211, "step": 4408 }, { "epoch": 0.7649201943095073, "grad_norm": 0.9807412624359131, "learning_rate": 3.5083309666539043e-05, "loss": 0.5741, "step": 4409 }, { "epoch": 0.7650936849410132, "grad_norm": 0.7295682430267334, "learning_rate": 3.5078834631918014e-05, "loss": 0.6608, "step": 4410 }, { "epoch": 0.7652671755725191, "grad_norm": 0.8115662336349487, "learning_rate": 3.507435784738065e-05, "loss": 0.5958, "step": 4411 }, { "epoch": 0.765440666204025, "grad_norm": 0.6505253314971924, "learning_rate": 3.506987931344649e-05, "loss": 0.6276, "step": 4412 }, { "epoch": 0.7656141568355309, "grad_norm": 0.6952764391899109, "learning_rate": 3.5065399030635286e-05, "loss": 0.5837, "step": 4413 }, { "epoch": 0.7657876474670368, "grad_norm": 0.9617361426353455, "learning_rate": 3.506091699946697e-05, "loss": 0.5822, "step": 4414 }, { "epoch": 0.7659611380985427, "grad_norm": 0.6262912154197693, "learning_rate": 3.505643322046168e-05, "loss": 0.5688, "step": 4415 }, { "epoch": 0.7661346287300486, "grad_norm": 0.8222533464431763, "learning_rate": 3.505194769413977e-05, "loss": 0.4937, "step": 4416 }, { "epoch": 0.7663081193615545, "grad_norm": 0.7947388887405396, "learning_rate": 3.5047460421021796e-05, "loss": 0.5691, "step": 4417 }, { "epoch": 0.7664816099930604, "grad_norm": 0.9605175852775574, "learning_rate": 3.504297140162851e-05, "loss": 0.5865, "step": 4418 }, { "epoch": 0.7666551006245663, "grad_norm": 0.8637752532958984, "learning_rate": 3.503848063648086e-05, "loss": 0.637, "step": 4419 }, { "epoch": 0.7668285912560722, "grad_norm": 0.7540271878242493, "learning_rate": 3.50339881261e-05, "loss": 0.5406, "step": 4420 }, { "epoch": 0.7670020818875781, "grad_norm": 0.6558127403259277, "learning_rate": 3.502949387100731e-05, "loss": 0.6517, "step": 4421 }, { "epoch": 0.767175572519084, "grad_norm": 0.6077901124954224, "learning_rate": 3.502499787172434e-05, "loss": 0.5991, "step": 4422 }, { "epoch": 0.7673490631505898, "grad_norm": 1.032698154449463, "learning_rate": 3.502050012877286e-05, "loss": 0.5167, "step": 4423 }, { "epoch": 0.7675225537820958, "grad_norm": 0.7792657613754272, "learning_rate": 3.5016000642674836e-05, "loss": 0.6167, "step": 4424 }, { "epoch": 0.7676960444136016, "grad_norm": 0.6926698684692383, "learning_rate": 3.501149941395245e-05, "loss": 0.6359, "step": 4425 }, { "epoch": 0.7678695350451076, "grad_norm": 0.7828586101531982, "learning_rate": 3.500699644312805e-05, "loss": 0.6838, "step": 4426 }, { "epoch": 0.7680430256766134, "grad_norm": 0.7258651256561279, "learning_rate": 3.5002491730724235e-05, "loss": 0.6031, "step": 4427 }, { "epoch": 0.7682165163081194, "grad_norm": 0.7690781354904175, "learning_rate": 3.4997985277263765e-05, "loss": 0.5748, "step": 4428 }, { "epoch": 0.7683900069396252, "grad_norm": 0.5877462029457092, "learning_rate": 3.499347708326964e-05, "loss": 0.5717, "step": 4429 }, { "epoch": 0.7685634975711312, "grad_norm": 0.5916820168495178, "learning_rate": 3.498896714926502e-05, "loss": 0.5902, "step": 4430 }, { "epoch": 0.768736988202637, "grad_norm": 0.7683703303337097, "learning_rate": 3.4984455475773304e-05, "loss": 0.6587, "step": 4431 }, { "epoch": 0.768910478834143, "grad_norm": 0.7843685746192932, "learning_rate": 3.4979942063318066e-05, "loss": 0.5276, "step": 4432 }, { "epoch": 0.7690839694656488, "grad_norm": 0.620903491973877, "learning_rate": 3.497542691242309e-05, "loss": 0.6189, "step": 4433 }, { "epoch": 0.7692574600971548, "grad_norm": 0.5340049862861633, "learning_rate": 3.497091002361238e-05, "loss": 0.6627, "step": 4434 }, { "epoch": 0.7694309507286606, "grad_norm": 1.0713303089141846, "learning_rate": 3.496639139741011e-05, "loss": 0.5232, "step": 4435 }, { "epoch": 0.7696044413601666, "grad_norm": 0.6281771063804626, "learning_rate": 3.496187103434069e-05, "loss": 0.4862, "step": 4436 }, { "epoch": 0.7697779319916724, "grad_norm": 0.7040949463844299, "learning_rate": 3.495734893492869e-05, "loss": 0.5281, "step": 4437 }, { "epoch": 0.7699514226231784, "grad_norm": 0.6082890033721924, "learning_rate": 3.4952825099698926e-05, "loss": 0.6719, "step": 4438 }, { "epoch": 0.7701249132546842, "grad_norm": 0.6842986941337585, "learning_rate": 3.494829952917638e-05, "loss": 0.7261, "step": 4439 }, { "epoch": 0.7702984038861902, "grad_norm": 1.3153560161590576, "learning_rate": 3.4943772223886264e-05, "loss": 0.5721, "step": 4440 }, { "epoch": 0.770471894517696, "grad_norm": 0.6036491990089417, "learning_rate": 3.493924318435395e-05, "loss": 0.6052, "step": 4441 }, { "epoch": 0.7706453851492019, "grad_norm": 0.7688340544700623, "learning_rate": 3.493471241110507e-05, "loss": 0.5905, "step": 4442 }, { "epoch": 0.7708188757807078, "grad_norm": 0.8035319447517395, "learning_rate": 3.493017990466542e-05, "loss": 0.6389, "step": 4443 }, { "epoch": 0.7709923664122137, "grad_norm": 0.7157716751098633, "learning_rate": 3.492564566556098e-05, "loss": 0.5776, "step": 4444 }, { "epoch": 0.7711658570437196, "grad_norm": 0.9449665546417236, "learning_rate": 3.4921109694317974e-05, "loss": 0.5808, "step": 4445 }, { "epoch": 0.7713393476752255, "grad_norm": 1.098851203918457, "learning_rate": 3.491657199146281e-05, "loss": 0.6249, "step": 4446 }, { "epoch": 0.7715128383067315, "grad_norm": 1.1828197240829468, "learning_rate": 3.4912032557522075e-05, "loss": 0.6592, "step": 4447 }, { "epoch": 0.7716863289382373, "grad_norm": 0.6309101581573486, "learning_rate": 3.490749139302258e-05, "loss": 0.5276, "step": 4448 }, { "epoch": 0.7718598195697433, "grad_norm": 0.7277196645736694, "learning_rate": 3.4902948498491357e-05, "loss": 0.5864, "step": 4449 }, { "epoch": 0.7720333102012491, "grad_norm": 0.8082743287086487, "learning_rate": 3.4898403874455584e-05, "loss": 0.6727, "step": 4450 }, { "epoch": 0.7722068008327551, "grad_norm": 1.0545870065689087, "learning_rate": 3.489385752144268e-05, "loss": 0.6199, "step": 4451 }, { "epoch": 0.7723802914642609, "grad_norm": 0.7641955018043518, "learning_rate": 3.4889309439980256e-05, "loss": 0.6483, "step": 4452 }, { "epoch": 0.7725537820957669, "grad_norm": 0.6104955077171326, "learning_rate": 3.4884759630596124e-05, "loss": 0.6729, "step": 4453 }, { "epoch": 0.7727272727272727, "grad_norm": 0.9830421209335327, "learning_rate": 3.488020809381829e-05, "loss": 0.5653, "step": 4454 }, { "epoch": 0.7729007633587787, "grad_norm": 0.8413013815879822, "learning_rate": 3.4875654830174975e-05, "loss": 0.5801, "step": 4455 }, { "epoch": 0.7730742539902845, "grad_norm": 0.956383228302002, "learning_rate": 3.4871099840194575e-05, "loss": 0.585, "step": 4456 }, { "epoch": 0.7732477446217905, "grad_norm": 0.6991445422172546, "learning_rate": 3.4866543124405714e-05, "loss": 0.611, "step": 4457 }, { "epoch": 0.7734212352532963, "grad_norm": 0.7721594572067261, "learning_rate": 3.4861984683337205e-05, "loss": 0.5787, "step": 4458 }, { "epoch": 0.7735947258848023, "grad_norm": 0.7705695629119873, "learning_rate": 3.485742451751805e-05, "loss": 0.5342, "step": 4459 }, { "epoch": 0.7737682165163081, "grad_norm": 0.7413557767868042, "learning_rate": 3.485286262747747e-05, "loss": 0.6382, "step": 4460 }, { "epoch": 0.7739417071478141, "grad_norm": 0.7928658127784729, "learning_rate": 3.484829901374487e-05, "loss": 0.5946, "step": 4461 }, { "epoch": 0.7741151977793199, "grad_norm": 0.81223464012146, "learning_rate": 3.4843733676849876e-05, "loss": 0.6542, "step": 4462 }, { "epoch": 0.7742886884108258, "grad_norm": 1.0212039947509766, "learning_rate": 3.4839166617322285e-05, "loss": 0.6143, "step": 4463 }, { "epoch": 0.7744621790423317, "grad_norm": 0.9145832657814026, "learning_rate": 3.4834597835692117e-05, "loss": 0.5707, "step": 4464 }, { "epoch": 0.7746356696738376, "grad_norm": 0.7010418176651001, "learning_rate": 3.483002733248959e-05, "loss": 0.6993, "step": 4465 }, { "epoch": 0.7748091603053435, "grad_norm": 1.256196141242981, "learning_rate": 3.482545510824511e-05, "loss": 0.5703, "step": 4466 }, { "epoch": 0.7749826509368494, "grad_norm": 0.8264151811599731, "learning_rate": 3.4820881163489284e-05, "loss": 0.5764, "step": 4467 }, { "epoch": 0.7751561415683553, "grad_norm": 0.9197185039520264, "learning_rate": 3.481630549875293e-05, "loss": 0.6288, "step": 4468 }, { "epoch": 0.7753296321998612, "grad_norm": 0.9843990206718445, "learning_rate": 3.481172811456707e-05, "loss": 0.5416, "step": 4469 }, { "epoch": 0.7755031228313671, "grad_norm": 1.1497993469238281, "learning_rate": 3.480714901146289e-05, "loss": 0.5309, "step": 4470 }, { "epoch": 0.775676613462873, "grad_norm": 1.3240598440170288, "learning_rate": 3.4802568189971814e-05, "loss": 0.6338, "step": 4471 }, { "epoch": 0.7758501040943789, "grad_norm": 0.811797559261322, "learning_rate": 3.479798565062546e-05, "loss": 0.5841, "step": 4472 }, { "epoch": 0.7760235947258848, "grad_norm": 0.6281471848487854, "learning_rate": 3.479340139395562e-05, "loss": 0.7012, "step": 4473 }, { "epoch": 0.7761970853573907, "grad_norm": 0.5829970836639404, "learning_rate": 3.478881542049432e-05, "loss": 0.5142, "step": 4474 }, { "epoch": 0.7763705759888966, "grad_norm": 0.6744431257247925, "learning_rate": 3.478422773077375e-05, "loss": 0.6349, "step": 4475 }, { "epoch": 0.7765440666204025, "grad_norm": 1.3758318424224854, "learning_rate": 3.4779638325326326e-05, "loss": 0.5693, "step": 4476 }, { "epoch": 0.7767175572519084, "grad_norm": 0.8429315090179443, "learning_rate": 3.477504720468465e-05, "loss": 0.5776, "step": 4477 }, { "epoch": 0.7768910478834143, "grad_norm": 1.0065104961395264, "learning_rate": 3.477045436938154e-05, "loss": 0.5262, "step": 4478 }, { "epoch": 0.7770645385149202, "grad_norm": 0.9056709408760071, "learning_rate": 3.4765859819949977e-05, "loss": 0.545, "step": 4479 }, { "epoch": 0.7772380291464261, "grad_norm": 0.7382376790046692, "learning_rate": 3.476126355692318e-05, "loss": 0.7358, "step": 4480 }, { "epoch": 0.777411519777932, "grad_norm": 0.6907861828804016, "learning_rate": 3.475666558083455e-05, "loss": 0.6331, "step": 4481 }, { "epoch": 0.7775850104094378, "grad_norm": 0.852795422077179, "learning_rate": 3.475206589221768e-05, "loss": 0.4722, "step": 4482 }, { "epoch": 0.7777585010409438, "grad_norm": 0.8108128309249878, "learning_rate": 3.4747464491606376e-05, "loss": 0.5543, "step": 4483 }, { "epoch": 0.7779319916724496, "grad_norm": 0.6749537587165833, "learning_rate": 3.4742861379534636e-05, "loss": 0.6477, "step": 4484 }, { "epoch": 0.7781054823039556, "grad_norm": 0.8126578330993652, "learning_rate": 3.4738256556536654e-05, "loss": 0.6495, "step": 4485 }, { "epoch": 0.7782789729354614, "grad_norm": 1.5315734148025513, "learning_rate": 3.473365002314682e-05, "loss": 0.5951, "step": 4486 }, { "epoch": 0.7784524635669674, "grad_norm": 0.9686574935913086, "learning_rate": 3.4729041779899736e-05, "loss": 0.4756, "step": 4487 }, { "epoch": 0.7786259541984732, "grad_norm": 1.302496075630188, "learning_rate": 3.4724431827330196e-05, "loss": 0.4712, "step": 4488 }, { "epoch": 0.7787994448299792, "grad_norm": 1.0299724340438843, "learning_rate": 3.471982016597317e-05, "loss": 0.5264, "step": 4489 }, { "epoch": 0.778972935461485, "grad_norm": 0.5932894945144653, "learning_rate": 3.4715206796363876e-05, "loss": 0.6855, "step": 4490 }, { "epoch": 0.779146426092991, "grad_norm": 0.752143144607544, "learning_rate": 3.4710591719037685e-05, "loss": 0.6594, "step": 4491 }, { "epoch": 0.7793199167244969, "grad_norm": 2.0506062507629395, "learning_rate": 3.470597493453018e-05, "loss": 0.6189, "step": 4492 }, { "epoch": 0.7794934073560028, "grad_norm": 2.3081183433532715, "learning_rate": 3.470135644337715e-05, "loss": 0.6396, "step": 4493 }, { "epoch": 0.7796668979875087, "grad_norm": 0.7178435325622559, "learning_rate": 3.469673624611457e-05, "loss": 0.5553, "step": 4494 }, { "epoch": 0.7798403886190146, "grad_norm": 0.7959488034248352, "learning_rate": 3.4692114343278626e-05, "loss": 0.5393, "step": 4495 }, { "epoch": 0.7800138792505205, "grad_norm": 1.1394597291946411, "learning_rate": 3.4687490735405696e-05, "loss": 0.4987, "step": 4496 }, { "epoch": 0.7801873698820264, "grad_norm": 0.7853170037269592, "learning_rate": 3.468286542303235e-05, "loss": 0.4734, "step": 4497 }, { "epoch": 0.7803608605135323, "grad_norm": 0.6878113150596619, "learning_rate": 3.467823840669536e-05, "loss": 0.439, "step": 4498 }, { "epoch": 0.7805343511450382, "grad_norm": 0.646817147731781, "learning_rate": 3.4673609686931697e-05, "loss": 0.6442, "step": 4499 }, { "epoch": 0.7807078417765441, "grad_norm": 0.7293394804000854, "learning_rate": 3.466897926427854e-05, "loss": 0.5532, "step": 4500 }, { "epoch": 0.7808813324080499, "grad_norm": 0.8035613298416138, "learning_rate": 3.4664347139273245e-05, "loss": 0.6101, "step": 4501 }, { "epoch": 0.7810548230395559, "grad_norm": 0.8770453333854675, "learning_rate": 3.465971331245337e-05, "loss": 0.525, "step": 4502 }, { "epoch": 0.7812283136710617, "grad_norm": 0.7347700595855713, "learning_rate": 3.465507778435669e-05, "loss": 0.4902, "step": 4503 }, { "epoch": 0.7814018043025677, "grad_norm": 0.5748605132102966, "learning_rate": 3.465044055552116e-05, "loss": 0.6661, "step": 4504 }, { "epoch": 0.7815752949340735, "grad_norm": 0.5695153474807739, "learning_rate": 3.464580162648492e-05, "loss": 0.6606, "step": 4505 }, { "epoch": 0.7817487855655795, "grad_norm": 0.7740654349327087, "learning_rate": 3.464116099778634e-05, "loss": 0.5966, "step": 4506 }, { "epoch": 0.7819222761970853, "grad_norm": 3.157895088195801, "learning_rate": 3.463651866996397e-05, "loss": 0.5482, "step": 4507 }, { "epoch": 0.7820957668285913, "grad_norm": 0.9121180176734924, "learning_rate": 3.463187464355655e-05, "loss": 0.676, "step": 4508 }, { "epoch": 0.7822692574600971, "grad_norm": 0.6632499694824219, "learning_rate": 3.462722891910303e-05, "loss": 0.6271, "step": 4509 }, { "epoch": 0.7824427480916031, "grad_norm": 0.696527361869812, "learning_rate": 3.462258149714255e-05, "loss": 0.6219, "step": 4510 }, { "epoch": 0.7826162387231089, "grad_norm": 1.442875623703003, "learning_rate": 3.461793237821445e-05, "loss": 0.6343, "step": 4511 }, { "epoch": 0.7827897293546149, "grad_norm": 0.8152379393577576, "learning_rate": 3.461328156285826e-05, "loss": 0.7207, "step": 4512 }, { "epoch": 0.7829632199861207, "grad_norm": 1.4653974771499634, "learning_rate": 3.460862905161372e-05, "loss": 0.6526, "step": 4513 }, { "epoch": 0.7831367106176267, "grad_norm": 0.8740278482437134, "learning_rate": 3.4603974845020754e-05, "loss": 0.5209, "step": 4514 }, { "epoch": 0.7833102012491325, "grad_norm": 0.7870283722877502, "learning_rate": 3.459931894361949e-05, "loss": 0.5172, "step": 4515 }, { "epoch": 0.7834836918806385, "grad_norm": 0.7225337624549866, "learning_rate": 3.4594661347950255e-05, "loss": 0.6119, "step": 4516 }, { "epoch": 0.7836571825121443, "grad_norm": 0.9904224276542664, "learning_rate": 3.459000205855356e-05, "loss": 0.571, "step": 4517 }, { "epoch": 0.7838306731436503, "grad_norm": 0.6905709505081177, "learning_rate": 3.458534107597013e-05, "loss": 0.6287, "step": 4518 }, { "epoch": 0.7840041637751561, "grad_norm": 0.8519802689552307, "learning_rate": 3.458067840074087e-05, "loss": 0.5784, "step": 4519 }, { "epoch": 0.7841776544066621, "grad_norm": 1.2368385791778564, "learning_rate": 3.457601403340689e-05, "loss": 0.5349, "step": 4520 }, { "epoch": 0.7843511450381679, "grad_norm": 0.8582872748374939, "learning_rate": 3.45713479745095e-05, "loss": 0.55, "step": 4521 }, { "epoch": 0.7845246356696738, "grad_norm": 0.8825411200523376, "learning_rate": 3.45666802245902e-05, "loss": 0.5625, "step": 4522 }, { "epoch": 0.7846981263011797, "grad_norm": 0.8189730048179626, "learning_rate": 3.456201078419068e-05, "loss": 0.4833, "step": 4523 }, { "epoch": 0.7848716169326856, "grad_norm": 0.8698748350143433, "learning_rate": 3.455733965385284e-05, "loss": 0.5504, "step": 4524 }, { "epoch": 0.7850451075641915, "grad_norm": 2.195985794067383, "learning_rate": 3.455266683411878e-05, "loss": 0.5345, "step": 4525 }, { "epoch": 0.7852185981956974, "grad_norm": 0.6514781713485718, "learning_rate": 3.454799232553077e-05, "loss": 0.5372, "step": 4526 }, { "epoch": 0.7853920888272033, "grad_norm": 1.0037038326263428, "learning_rate": 3.45433161286313e-05, "loss": 0.4568, "step": 4527 }, { "epoch": 0.7855655794587092, "grad_norm": 2.1790852546691895, "learning_rate": 3.4538638243963045e-05, "loss": 0.5914, "step": 4528 }, { "epoch": 0.7857390700902152, "grad_norm": 0.7083577513694763, "learning_rate": 3.453395867206888e-05, "loss": 0.5159, "step": 4529 }, { "epoch": 0.785912560721721, "grad_norm": 0.737430989742279, "learning_rate": 3.4529277413491885e-05, "loss": 0.6462, "step": 4530 }, { "epoch": 0.786086051353227, "grad_norm": 0.9406954646110535, "learning_rate": 3.452459446877531e-05, "loss": 0.5854, "step": 4531 }, { "epoch": 0.7862595419847328, "grad_norm": 0.6856603026390076, "learning_rate": 3.451990983846262e-05, "loss": 0.5818, "step": 4532 }, { "epoch": 0.7864330326162388, "grad_norm": 1.1390035152435303, "learning_rate": 3.4515223523097476e-05, "loss": 0.5536, "step": 4533 }, { "epoch": 0.7866065232477446, "grad_norm": 0.7555428147315979, "learning_rate": 3.451053552322373e-05, "loss": 0.52, "step": 4534 }, { "epoch": 0.7867800138792506, "grad_norm": 1.1655290126800537, "learning_rate": 3.4505845839385426e-05, "loss": 0.5337, "step": 4535 }, { "epoch": 0.7869535045107564, "grad_norm": 0.8107268214225769, "learning_rate": 3.4501154472126815e-05, "loss": 0.531, "step": 4536 }, { "epoch": 0.7871269951422624, "grad_norm": 0.6117629408836365, "learning_rate": 3.449646142199233e-05, "loss": 0.5676, "step": 4537 }, { "epoch": 0.7873004857737682, "grad_norm": 0.5959532856941223, "learning_rate": 3.4491766689526596e-05, "loss": 0.703, "step": 4538 }, { "epoch": 0.7874739764052742, "grad_norm": 1.2634477615356445, "learning_rate": 3.4487070275274454e-05, "loss": 0.5743, "step": 4539 }, { "epoch": 0.78764746703678, "grad_norm": 0.7086024880409241, "learning_rate": 3.448237217978093e-05, "loss": 0.6033, "step": 4540 }, { "epoch": 0.7878209576682859, "grad_norm": 0.9089869260787964, "learning_rate": 3.447767240359124e-05, "loss": 0.7173, "step": 4541 }, { "epoch": 0.7879944482997918, "grad_norm": 0.8030762672424316, "learning_rate": 3.4472970947250794e-05, "loss": 0.5846, "step": 4542 }, { "epoch": 0.7881679389312977, "grad_norm": 0.5570784211158752, "learning_rate": 3.4468267811305206e-05, "loss": 0.5676, "step": 4543 }, { "epoch": 0.7883414295628036, "grad_norm": 0.7393231391906738, "learning_rate": 3.446356299630028e-05, "loss": 0.5032, "step": 4544 }, { "epoch": 0.7885149201943095, "grad_norm": 0.7207363843917847, "learning_rate": 3.4458856502782016e-05, "loss": 0.5482, "step": 4545 }, { "epoch": 0.7886884108258154, "grad_norm": 0.9359233975410461, "learning_rate": 3.4454148331296606e-05, "loss": 0.5486, "step": 4546 }, { "epoch": 0.7888619014573213, "grad_norm": 0.8161929249763489, "learning_rate": 3.444943848239044e-05, "loss": 0.5615, "step": 4547 }, { "epoch": 0.7890353920888272, "grad_norm": 1.0180940628051758, "learning_rate": 3.44447269566101e-05, "loss": 0.5215, "step": 4548 }, { "epoch": 0.7892088827203331, "grad_norm": 0.7428413033485413, "learning_rate": 3.444001375450237e-05, "loss": 0.5164, "step": 4549 }, { "epoch": 0.789382373351839, "grad_norm": 1.1766921281814575, "learning_rate": 3.443529887661421e-05, "loss": 0.5972, "step": 4550 }, { "epoch": 0.7895558639833449, "grad_norm": 0.6415600180625916, "learning_rate": 3.4430582323492805e-05, "loss": 0.5316, "step": 4551 }, { "epoch": 0.7897293546148508, "grad_norm": 0.6624864339828491, "learning_rate": 3.44258640956855e-05, "loss": 0.5677, "step": 4552 }, { "epoch": 0.7899028452463567, "grad_norm": 0.7604926228523254, "learning_rate": 3.4421144193739856e-05, "loss": 0.6213, "step": 4553 }, { "epoch": 0.7900763358778626, "grad_norm": 0.6163475513458252, "learning_rate": 3.441642261820363e-05, "loss": 0.5591, "step": 4554 }, { "epoch": 0.7902498265093685, "grad_norm": 1.185647964477539, "learning_rate": 3.441169936962475e-05, "loss": 0.5846, "step": 4555 }, { "epoch": 0.7904233171408744, "grad_norm": 0.9624270796775818, "learning_rate": 3.440697444855137e-05, "loss": 0.5084, "step": 4556 }, { "epoch": 0.7905968077723803, "grad_norm": 0.5973623991012573, "learning_rate": 3.440224785553183e-05, "loss": 0.6094, "step": 4557 }, { "epoch": 0.7907702984038862, "grad_norm": 0.6004007458686829, "learning_rate": 3.439751959111463e-05, "loss": 0.6891, "step": 4558 }, { "epoch": 0.7909437890353921, "grad_norm": 0.9249184131622314, "learning_rate": 3.439278965584851e-05, "loss": 0.5569, "step": 4559 }, { "epoch": 0.7911172796668979, "grad_norm": 0.8773783445358276, "learning_rate": 3.438805805028238e-05, "loss": 0.5096, "step": 4560 }, { "epoch": 0.7912907702984039, "grad_norm": 0.7292712330818176, "learning_rate": 3.438332477496534e-05, "loss": 0.6202, "step": 4561 }, { "epoch": 0.7914642609299097, "grad_norm": 1.348038911819458, "learning_rate": 3.4378589830446714e-05, "loss": 0.6636, "step": 4562 }, { "epoch": 0.7916377515614157, "grad_norm": 0.7116172909736633, "learning_rate": 3.437385321727597e-05, "loss": 0.6177, "step": 4563 }, { "epoch": 0.7918112421929215, "grad_norm": 0.6307856440544128, "learning_rate": 3.436911493600282e-05, "loss": 0.698, "step": 4564 }, { "epoch": 0.7919847328244275, "grad_norm": 0.7494140267372131, "learning_rate": 3.436437498717713e-05, "loss": 0.6954, "step": 4565 }, { "epoch": 0.7921582234559333, "grad_norm": 0.830976128578186, "learning_rate": 3.4359633371349e-05, "loss": 0.5254, "step": 4566 }, { "epoch": 0.7923317140874393, "grad_norm": 0.6158875226974487, "learning_rate": 3.435489008906867e-05, "loss": 0.6903, "step": 4567 }, { "epoch": 0.7925052047189451, "grad_norm": 0.6615442037582397, "learning_rate": 3.435014514088662e-05, "loss": 0.694, "step": 4568 }, { "epoch": 0.7926786953504511, "grad_norm": 1.397637963294983, "learning_rate": 3.434539852735352e-05, "loss": 0.5242, "step": 4569 }, { "epoch": 0.792852185981957, "grad_norm": 1.1062660217285156, "learning_rate": 3.434065024902019e-05, "loss": 0.5393, "step": 4570 }, { "epoch": 0.7930256766134629, "grad_norm": 0.7684928178787231, "learning_rate": 3.4335900306437694e-05, "loss": 0.6217, "step": 4571 }, { "epoch": 0.7931991672449688, "grad_norm": 0.6392742395401001, "learning_rate": 3.4331148700157263e-05, "loss": 0.5267, "step": 4572 }, { "epoch": 0.7933726578764747, "grad_norm": 0.6729644536972046, "learning_rate": 3.4326395430730325e-05, "loss": 0.7158, "step": 4573 }, { "epoch": 0.7935461485079806, "grad_norm": 1.0804266929626465, "learning_rate": 3.43216404987085e-05, "loss": 0.5745, "step": 4574 }, { "epoch": 0.7937196391394865, "grad_norm": 0.7597807049751282, "learning_rate": 3.431688390464361e-05, "loss": 0.5743, "step": 4575 }, { "epoch": 0.7938931297709924, "grad_norm": 0.9156981110572815, "learning_rate": 3.4312125649087664e-05, "loss": 0.5352, "step": 4576 }, { "epoch": 0.7940666204024983, "grad_norm": 0.7099980115890503, "learning_rate": 3.4307365732592854e-05, "loss": 0.6041, "step": 4577 }, { "epoch": 0.7942401110340042, "grad_norm": 0.8593411445617676, "learning_rate": 3.430260415571158e-05, "loss": 0.4979, "step": 4578 }, { "epoch": 0.7944136016655101, "grad_norm": 0.7119089365005493, "learning_rate": 3.429784091899644e-05, "loss": 0.5009, "step": 4579 }, { "epoch": 0.794587092297016, "grad_norm": 0.6532433032989502, "learning_rate": 3.429307602300019e-05, "loss": 0.6216, "step": 4580 }, { "epoch": 0.7947605829285218, "grad_norm": 0.6321079134941101, "learning_rate": 3.428830946827581e-05, "loss": 0.6637, "step": 4581 }, { "epoch": 0.7949340735600278, "grad_norm": 0.6303501129150391, "learning_rate": 3.4283541255376466e-05, "loss": 0.6283, "step": 4582 }, { "epoch": 0.7951075641915336, "grad_norm": 0.6134404540061951, "learning_rate": 3.427877138485552e-05, "loss": 0.626, "step": 4583 }, { "epoch": 0.7952810548230396, "grad_norm": 0.7335180044174194, "learning_rate": 3.427399985726652e-05, "loss": 0.4523, "step": 4584 }, { "epoch": 0.7954545454545454, "grad_norm": 0.836480975151062, "learning_rate": 3.4269226673163204e-05, "loss": 0.5232, "step": 4585 }, { "epoch": 0.7956280360860514, "grad_norm": 0.7442110180854797, "learning_rate": 3.42644518330995e-05, "loss": 0.5751, "step": 4586 }, { "epoch": 0.7958015267175572, "grad_norm": 0.6837741136550903, "learning_rate": 3.425967533762954e-05, "loss": 0.6285, "step": 4587 }, { "epoch": 0.7959750173490632, "grad_norm": 0.8320590257644653, "learning_rate": 3.425489718730765e-05, "loss": 0.6012, "step": 4588 }, { "epoch": 0.796148507980569, "grad_norm": 0.7570087313652039, "learning_rate": 3.425011738268832e-05, "loss": 0.5479, "step": 4589 }, { "epoch": 0.796321998612075, "grad_norm": 0.69992595911026, "learning_rate": 3.4245335924326274e-05, "loss": 0.5217, "step": 4590 }, { "epoch": 0.7964954892435808, "grad_norm": 0.8826947808265686, "learning_rate": 3.424055281277638e-05, "loss": 0.5259, "step": 4591 }, { "epoch": 0.7966689798750868, "grad_norm": 0.7894955277442932, "learning_rate": 3.423576804859375e-05, "loss": 0.507, "step": 4592 }, { "epoch": 0.7968424705065926, "grad_norm": 1.5117219686508179, "learning_rate": 3.423098163233365e-05, "loss": 0.5177, "step": 4593 }, { "epoch": 0.7970159611380986, "grad_norm": 0.9371479153633118, "learning_rate": 3.422619356455154e-05, "loss": 0.6943, "step": 4594 }, { "epoch": 0.7971894517696044, "grad_norm": 0.8013246059417725, "learning_rate": 3.42214038458031e-05, "loss": 0.5144, "step": 4595 }, { "epoch": 0.7973629424011104, "grad_norm": 0.6921529173851013, "learning_rate": 3.421661247664417e-05, "loss": 0.5118, "step": 4596 }, { "epoch": 0.7975364330326162, "grad_norm": 0.7774759531021118, "learning_rate": 3.4211819457630795e-05, "loss": 0.578, "step": 4597 }, { "epoch": 0.7977099236641222, "grad_norm": 0.7389329671859741, "learning_rate": 3.420702478931921e-05, "loss": 0.5549, "step": 4598 }, { "epoch": 0.797883414295628, "grad_norm": 1.7281907796859741, "learning_rate": 3.420222847226585e-05, "loss": 0.7001, "step": 4599 }, { "epoch": 0.7980569049271339, "grad_norm": 0.6511208415031433, "learning_rate": 3.419743050702732e-05, "loss": 0.5558, "step": 4600 }, { "epoch": 0.7982303955586398, "grad_norm": 0.9398014545440674, "learning_rate": 3.4192630894160435e-05, "loss": 0.4885, "step": 4601 }, { "epoch": 0.7984038861901457, "grad_norm": 0.6670880913734436, "learning_rate": 3.4187829634222205e-05, "loss": 0.5802, "step": 4602 }, { "epoch": 0.7985773768216516, "grad_norm": 1.0299681425094604, "learning_rate": 3.4183026727769806e-05, "loss": 0.4952, "step": 4603 }, { "epoch": 0.7987508674531575, "grad_norm": 0.7633277773857117, "learning_rate": 3.417822217536064e-05, "loss": 0.5039, "step": 4604 }, { "epoch": 0.7989243580846634, "grad_norm": 0.5957623720169067, "learning_rate": 3.417341597755226e-05, "loss": 0.689, "step": 4605 }, { "epoch": 0.7990978487161693, "grad_norm": 0.8645452260971069, "learning_rate": 3.4168608134902443e-05, "loss": 0.6707, "step": 4606 }, { "epoch": 0.7992713393476752, "grad_norm": 0.6606653928756714, "learning_rate": 3.416379864796914e-05, "loss": 0.6157, "step": 4607 }, { "epoch": 0.7994448299791811, "grad_norm": 0.6899295449256897, "learning_rate": 3.4158987517310506e-05, "loss": 0.6846, "step": 4608 }, { "epoch": 0.799618320610687, "grad_norm": 0.7298061847686768, "learning_rate": 3.4154174743484865e-05, "loss": 0.5833, "step": 4609 }, { "epoch": 0.7997918112421929, "grad_norm": 0.6306266784667969, "learning_rate": 3.414936032705075e-05, "loss": 0.6777, "step": 4610 }, { "epoch": 0.7999653018736989, "grad_norm": 0.6362050175666809, "learning_rate": 3.414454426856689e-05, "loss": 0.5823, "step": 4611 }, { "epoch": 0.8001387925052047, "grad_norm": 0.563105583190918, "learning_rate": 3.413972656859218e-05, "loss": 0.5461, "step": 4612 }, { "epoch": 0.8003122831367107, "grad_norm": 0.7751722931861877, "learning_rate": 3.413490722768573e-05, "loss": 0.5826, "step": 4613 }, { "epoch": 0.8004857737682165, "grad_norm": 0.6509333252906799, "learning_rate": 3.413008624640683e-05, "loss": 0.5813, "step": 4614 }, { "epoch": 0.8006592643997225, "grad_norm": 0.697648823261261, "learning_rate": 3.412526362531495e-05, "loss": 0.5458, "step": 4615 }, { "epoch": 0.8008327550312283, "grad_norm": 0.7336834073066711, "learning_rate": 3.4120439364969766e-05, "loss": 0.5037, "step": 4616 }, { "epoch": 0.8010062456627343, "grad_norm": 0.643248438835144, "learning_rate": 3.4115613465931145e-05, "loss": 0.6239, "step": 4617 }, { "epoch": 0.8011797362942401, "grad_norm": 0.860309898853302, "learning_rate": 3.411078592875912e-05, "loss": 0.5322, "step": 4618 }, { "epoch": 0.8013532269257461, "grad_norm": 0.6955852508544922, "learning_rate": 3.4105956754013966e-05, "loss": 0.6305, "step": 4619 }, { "epoch": 0.8015267175572519, "grad_norm": 0.5716445446014404, "learning_rate": 3.410112594225607e-05, "loss": 0.5771, "step": 4620 }, { "epoch": 0.8017002081887578, "grad_norm": 0.517090380191803, "learning_rate": 3.4096293494046103e-05, "loss": 0.6381, "step": 4621 }, { "epoch": 0.8018736988202637, "grad_norm": 0.6566348671913147, "learning_rate": 3.4091459409944836e-05, "loss": 0.6127, "step": 4622 }, { "epoch": 0.8020471894517696, "grad_norm": 0.6508132815361023, "learning_rate": 3.408662369051329e-05, "loss": 0.5328, "step": 4623 }, { "epoch": 0.8022206800832755, "grad_norm": 0.7261322736740112, "learning_rate": 3.408178633631265e-05, "loss": 0.5547, "step": 4624 }, { "epoch": 0.8023941707147814, "grad_norm": 0.6083239316940308, "learning_rate": 3.407694734790429e-05, "loss": 0.5652, "step": 4625 }, { "epoch": 0.8025676613462873, "grad_norm": 0.6434155702590942, "learning_rate": 3.407210672584979e-05, "loss": 0.5436, "step": 4626 }, { "epoch": 0.8027411519777932, "grad_norm": 0.9602734446525574, "learning_rate": 3.406726447071091e-05, "loss": 0.5203, "step": 4627 }, { "epoch": 0.8029146426092991, "grad_norm": 0.6316145062446594, "learning_rate": 3.40624205830496e-05, "loss": 0.5953, "step": 4628 }, { "epoch": 0.803088133240805, "grad_norm": 0.5899636745452881, "learning_rate": 3.405757506342799e-05, "loss": 0.6345, "step": 4629 }, { "epoch": 0.8032616238723109, "grad_norm": 0.681353747844696, "learning_rate": 3.4052727912408414e-05, "loss": 0.6948, "step": 4630 }, { "epoch": 0.8034351145038168, "grad_norm": 0.6250098943710327, "learning_rate": 3.404787913055339e-05, "loss": 0.598, "step": 4631 }, { "epoch": 0.8036086051353227, "grad_norm": 0.7942088842391968, "learning_rate": 3.404302871842563e-05, "loss": 0.6128, "step": 4632 }, { "epoch": 0.8037820957668286, "grad_norm": 0.765593945980072, "learning_rate": 3.4038176676588014e-05, "loss": 0.5671, "step": 4633 }, { "epoch": 0.8039555863983345, "grad_norm": 0.6088782548904419, "learning_rate": 3.403332300560364e-05, "loss": 0.6742, "step": 4634 }, { "epoch": 0.8041290770298404, "grad_norm": 1.0885931253433228, "learning_rate": 3.402846770603578e-05, "loss": 0.6034, "step": 4635 }, { "epoch": 0.8043025676613463, "grad_norm": 0.6098711490631104, "learning_rate": 3.4023610778447895e-05, "loss": 0.5867, "step": 4636 }, { "epoch": 0.8044760582928522, "grad_norm": 0.7530845403671265, "learning_rate": 3.4018752223403634e-05, "loss": 0.5793, "step": 4637 }, { "epoch": 0.8046495489243581, "grad_norm": 0.7427805662155151, "learning_rate": 3.401389204146685e-05, "loss": 0.6084, "step": 4638 }, { "epoch": 0.804823039555864, "grad_norm": 0.734031617641449, "learning_rate": 3.400903023320156e-05, "loss": 0.5615, "step": 4639 }, { "epoch": 0.8049965301873698, "grad_norm": 0.6565466523170471, "learning_rate": 3.4004166799172004e-05, "loss": 0.5858, "step": 4640 }, { "epoch": 0.8051700208188758, "grad_norm": 0.8570353984832764, "learning_rate": 3.399930173994255e-05, "loss": 0.7297, "step": 4641 }, { "epoch": 0.8053435114503816, "grad_norm": 0.7440707683563232, "learning_rate": 3.3994435056077827e-05, "loss": 0.5283, "step": 4642 }, { "epoch": 0.8055170020818876, "grad_norm": 0.7319448590278625, "learning_rate": 3.398956674814261e-05, "loss": 0.6826, "step": 4643 }, { "epoch": 0.8056904927133934, "grad_norm": 0.7692334651947021, "learning_rate": 3.3984696816701865e-05, "loss": 0.5645, "step": 4644 }, { "epoch": 0.8058639833448994, "grad_norm": 0.7705518007278442, "learning_rate": 3.397982526232077e-05, "loss": 0.5481, "step": 4645 }, { "epoch": 0.8060374739764052, "grad_norm": 0.7228432297706604, "learning_rate": 3.397495208556465e-05, "loss": 0.5607, "step": 4646 }, { "epoch": 0.8062109646079112, "grad_norm": 0.7670116424560547, "learning_rate": 3.397007728699907e-05, "loss": 0.6168, "step": 4647 }, { "epoch": 0.806384455239417, "grad_norm": 0.7331372499465942, "learning_rate": 3.3965200867189734e-05, "loss": 0.4906, "step": 4648 }, { "epoch": 0.806557945870923, "grad_norm": 0.5422827005386353, "learning_rate": 3.3960322826702565e-05, "loss": 0.5981, "step": 4649 }, { "epoch": 0.8067314365024288, "grad_norm": 0.6020506620407104, "learning_rate": 3.395544316610367e-05, "loss": 0.5762, "step": 4650 }, { "epoch": 0.8069049271339348, "grad_norm": 0.8605988025665283, "learning_rate": 3.395056188595933e-05, "loss": 0.5676, "step": 4651 }, { "epoch": 0.8070784177654406, "grad_norm": 0.6065489649772644, "learning_rate": 3.394567898683602e-05, "loss": 0.5508, "step": 4652 }, { "epoch": 0.8072519083969466, "grad_norm": 0.6497067213058472, "learning_rate": 3.394079446930043e-05, "loss": 0.5767, "step": 4653 }, { "epoch": 0.8074253990284525, "grad_norm": 0.7564646601676941, "learning_rate": 3.3935908333919385e-05, "loss": 0.6957, "step": 4654 }, { "epoch": 0.8075988896599584, "grad_norm": 0.6681103706359863, "learning_rate": 3.393102058125995e-05, "loss": 0.5319, "step": 4655 }, { "epoch": 0.8077723802914643, "grad_norm": 0.7304568886756897, "learning_rate": 3.3926131211889336e-05, "loss": 0.5475, "step": 4656 }, { "epoch": 0.8079458709229702, "grad_norm": 0.6326201558113098, "learning_rate": 3.392124022637497e-05, "loss": 0.6721, "step": 4657 }, { "epoch": 0.8081193615544761, "grad_norm": 0.622427225112915, "learning_rate": 3.391634762528445e-05, "loss": 0.5912, "step": 4658 }, { "epoch": 0.8082928521859819, "grad_norm": 0.850075900554657, "learning_rate": 3.391145340918557e-05, "loss": 0.5068, "step": 4659 }, { "epoch": 0.8084663428174879, "grad_norm": 0.8585726022720337, "learning_rate": 3.3906557578646317e-05, "loss": 0.5958, "step": 4660 }, { "epoch": 0.8086398334489937, "grad_norm": 0.7498711347579956, "learning_rate": 3.390166013423485e-05, "loss": 0.4838, "step": 4661 }, { "epoch": 0.8088133240804997, "grad_norm": 0.7035397887229919, "learning_rate": 3.389676107651953e-05, "loss": 0.6271, "step": 4662 }, { "epoch": 0.8089868147120055, "grad_norm": 0.642154335975647, "learning_rate": 3.38918604060689e-05, "loss": 0.6163, "step": 4663 }, { "epoch": 0.8091603053435115, "grad_norm": 0.7695229053497314, "learning_rate": 3.388695812345168e-05, "loss": 0.5443, "step": 4664 }, { "epoch": 0.8093337959750173, "grad_norm": 0.7467740178108215, "learning_rate": 3.388205422923678e-05, "loss": 0.6442, "step": 4665 }, { "epoch": 0.8095072866065233, "grad_norm": 0.5992782115936279, "learning_rate": 3.3877148723993306e-05, "loss": 0.5465, "step": 4666 }, { "epoch": 0.8096807772380291, "grad_norm": 0.7369151711463928, "learning_rate": 3.387224160829057e-05, "loss": 0.5757, "step": 4667 }, { "epoch": 0.8098542678695351, "grad_norm": 0.7979943156242371, "learning_rate": 3.3867332882698016e-05, "loss": 0.671, "step": 4668 }, { "epoch": 0.8100277585010409, "grad_norm": 0.5905467867851257, "learning_rate": 3.386242254778533e-05, "loss": 0.6077, "step": 4669 }, { "epoch": 0.8102012491325469, "grad_norm": 0.781458854675293, "learning_rate": 3.385751060412235e-05, "loss": 0.6287, "step": 4670 }, { "epoch": 0.8103747397640527, "grad_norm": 0.7113485336303711, "learning_rate": 3.3852597052279124e-05, "loss": 0.5155, "step": 4671 }, { "epoch": 0.8105482303955587, "grad_norm": 0.7800762057304382, "learning_rate": 3.3847681892825865e-05, "loss": 0.536, "step": 4672 }, { "epoch": 0.8107217210270645, "grad_norm": 0.6829761266708374, "learning_rate": 3.3842765126332984e-05, "loss": 0.6334, "step": 4673 }, { "epoch": 0.8108952116585705, "grad_norm": 0.6885767579078674, "learning_rate": 3.383784675337108e-05, "loss": 0.4915, "step": 4674 }, { "epoch": 0.8110687022900763, "grad_norm": 0.5690920352935791, "learning_rate": 3.383292677451094e-05, "loss": 0.5786, "step": 4675 }, { "epoch": 0.8112421929215823, "grad_norm": 0.6843216419219971, "learning_rate": 3.382800519032353e-05, "loss": 0.5209, "step": 4676 }, { "epoch": 0.8114156835530881, "grad_norm": 0.588300883769989, "learning_rate": 3.382308200138e-05, "loss": 0.6359, "step": 4677 }, { "epoch": 0.8115891741845941, "grad_norm": 0.682434618473053, "learning_rate": 3.38181572082517e-05, "loss": 0.6154, "step": 4678 }, { "epoch": 0.8117626648160999, "grad_norm": 1.069860577583313, "learning_rate": 3.381323081151015e-05, "loss": 0.5594, "step": 4679 }, { "epoch": 0.8119361554476058, "grad_norm": 0.8422788381576538, "learning_rate": 3.3808302811727074e-05, "loss": 0.516, "step": 4680 }, { "epoch": 0.8121096460791117, "grad_norm": 1.0733751058578491, "learning_rate": 3.380337320947437e-05, "loss": 0.498, "step": 4681 }, { "epoch": 0.8122831367106176, "grad_norm": 0.6904345750808716, "learning_rate": 3.379844200532411e-05, "loss": 0.6714, "step": 4682 }, { "epoch": 0.8124566273421235, "grad_norm": 0.8521090745925903, "learning_rate": 3.379350919984858e-05, "loss": 0.541, "step": 4683 }, { "epoch": 0.8126301179736294, "grad_norm": 0.7869723439216614, "learning_rate": 3.378857479362024e-05, "loss": 0.5409, "step": 4684 }, { "epoch": 0.8128036086051353, "grad_norm": 0.8046953678131104, "learning_rate": 3.378363878721173e-05, "loss": 0.5199, "step": 4685 }, { "epoch": 0.8129770992366412, "grad_norm": 0.5668148994445801, "learning_rate": 3.377870118119587e-05, "loss": 0.6606, "step": 4686 }, { "epoch": 0.8131505898681471, "grad_norm": 0.6504331231117249, "learning_rate": 3.377376197614568e-05, "loss": 0.631, "step": 4687 }, { "epoch": 0.813324080499653, "grad_norm": 0.6131774187088013, "learning_rate": 3.376882117263437e-05, "loss": 0.6415, "step": 4688 }, { "epoch": 0.813497571131159, "grad_norm": 0.905603289604187, "learning_rate": 3.376387877123533e-05, "loss": 0.5316, "step": 4689 }, { "epoch": 0.8136710617626648, "grad_norm": 0.8098279237747192, "learning_rate": 3.37589347725221e-05, "loss": 0.5935, "step": 4690 }, { "epoch": 0.8138445523941708, "grad_norm": 0.5495930314064026, "learning_rate": 3.375398917706847e-05, "loss": 0.7113, "step": 4691 }, { "epoch": 0.8140180430256766, "grad_norm": 0.8750534653663635, "learning_rate": 3.374904198544836e-05, "loss": 0.6096, "step": 4692 }, { "epoch": 0.8141915336571826, "grad_norm": 0.7372435331344604, "learning_rate": 3.374409319823592e-05, "loss": 0.6508, "step": 4693 }, { "epoch": 0.8143650242886884, "grad_norm": 0.7002584934234619, "learning_rate": 3.373914281600544e-05, "loss": 0.5582, "step": 4694 }, { "epoch": 0.8145385149201944, "grad_norm": 0.6547242403030396, "learning_rate": 3.3734190839331425e-05, "loss": 0.5031, "step": 4695 }, { "epoch": 0.8147120055517002, "grad_norm": 0.6443631649017334, "learning_rate": 3.372923726878856e-05, "loss": 0.6936, "step": 4696 }, { "epoch": 0.8148854961832062, "grad_norm": 0.5747844576835632, "learning_rate": 3.372428210495172e-05, "loss": 0.6733, "step": 4697 }, { "epoch": 0.815058986814712, "grad_norm": 0.6448113918304443, "learning_rate": 3.371932534839594e-05, "loss": 0.66, "step": 4698 }, { "epoch": 0.8152324774462179, "grad_norm": 0.8336846828460693, "learning_rate": 3.371436699969648e-05, "loss": 0.4602, "step": 4699 }, { "epoch": 0.8154059680777238, "grad_norm": 0.7222806215286255, "learning_rate": 3.370940705942874e-05, "loss": 0.5013, "step": 4700 }, { "epoch": 0.8155794587092297, "grad_norm": 1.2259315252304077, "learning_rate": 3.3704445528168335e-05, "loss": 0.6086, "step": 4701 }, { "epoch": 0.8157529493407356, "grad_norm": 0.8463292121887207, "learning_rate": 3.369948240649106e-05, "loss": 0.4943, "step": 4702 }, { "epoch": 0.8159264399722415, "grad_norm": 0.6418718695640564, "learning_rate": 3.369451769497289e-05, "loss": 0.5498, "step": 4703 }, { "epoch": 0.8160999306037474, "grad_norm": 0.634361207485199, "learning_rate": 3.368955139418998e-05, "loss": 0.5438, "step": 4704 }, { "epoch": 0.8162734212352533, "grad_norm": 0.6433486938476562, "learning_rate": 3.368458350471868e-05, "loss": 0.6433, "step": 4705 }, { "epoch": 0.8164469118667592, "grad_norm": 0.7039868235588074, "learning_rate": 3.367961402713553e-05, "loss": 0.6018, "step": 4706 }, { "epoch": 0.8166204024982651, "grad_norm": 2.6011016368865967, "learning_rate": 3.3674642962017215e-05, "loss": 0.5925, "step": 4707 }, { "epoch": 0.816793893129771, "grad_norm": 0.8203527331352234, "learning_rate": 3.3669670309940663e-05, "loss": 0.4731, "step": 4708 }, { "epoch": 0.8169673837612769, "grad_norm": 0.862680196762085, "learning_rate": 3.366469607148293e-05, "loss": 0.6145, "step": 4709 }, { "epoch": 0.8171408743927828, "grad_norm": 0.8152139782905579, "learning_rate": 3.365972024722131e-05, "loss": 0.6744, "step": 4710 }, { "epoch": 0.8173143650242887, "grad_norm": 0.7853912711143494, "learning_rate": 3.365474283773323e-05, "loss": 0.5402, "step": 4711 }, { "epoch": 0.8174878556557946, "grad_norm": 0.8514853715896606, "learning_rate": 3.3649763843596334e-05, "loss": 0.5544, "step": 4712 }, { "epoch": 0.8176613462873005, "grad_norm": 0.7401768565177917, "learning_rate": 3.364478326538844e-05, "loss": 0.5277, "step": 4713 }, { "epoch": 0.8178348369188064, "grad_norm": 0.9554262161254883, "learning_rate": 3.363980110368755e-05, "loss": 0.5295, "step": 4714 }, { "epoch": 0.8180083275503123, "grad_norm": 0.6825421452522278, "learning_rate": 3.363481735907185e-05, "loss": 0.6483, "step": 4715 }, { "epoch": 0.8181818181818182, "grad_norm": 0.6086795926094055, "learning_rate": 3.36298320321197e-05, "loss": 0.5706, "step": 4716 }, { "epoch": 0.8183553088133241, "grad_norm": 0.9270070791244507, "learning_rate": 3.3624845123409665e-05, "loss": 0.4869, "step": 4717 }, { "epoch": 0.8185287994448299, "grad_norm": 0.7197990417480469, "learning_rate": 3.361985663352048e-05, "loss": 0.5343, "step": 4718 }, { "epoch": 0.8187022900763359, "grad_norm": 0.7212938070297241, "learning_rate": 3.361486656303106e-05, "loss": 0.5411, "step": 4719 }, { "epoch": 0.8188757807078417, "grad_norm": 0.613197386264801, "learning_rate": 3.360987491252051e-05, "loss": 0.6571, "step": 4720 }, { "epoch": 0.8190492713393477, "grad_norm": 0.7823191285133362, "learning_rate": 3.3604881682568126e-05, "loss": 0.4731, "step": 4721 }, { "epoch": 0.8192227619708535, "grad_norm": 0.8566967844963074, "learning_rate": 3.359988687375336e-05, "loss": 0.4845, "step": 4722 }, { "epoch": 0.8193962526023595, "grad_norm": 0.6343022584915161, "learning_rate": 3.359489048665587e-05, "loss": 0.5663, "step": 4723 }, { "epoch": 0.8195697432338653, "grad_norm": 1.0479716062545776, "learning_rate": 3.3589892521855515e-05, "loss": 0.526, "step": 4724 }, { "epoch": 0.8197432338653713, "grad_norm": 1.333949327468872, "learning_rate": 3.3584892979932284e-05, "loss": 0.5077, "step": 4725 }, { "epoch": 0.8199167244968771, "grad_norm": 0.8642805814743042, "learning_rate": 3.35798918614664e-05, "loss": 0.5461, "step": 4726 }, { "epoch": 0.8200902151283831, "grad_norm": 1.1042691469192505, "learning_rate": 3.357488916703824e-05, "loss": 0.475, "step": 4727 }, { "epoch": 0.8202637057598889, "grad_norm": 0.8307474851608276, "learning_rate": 3.356988489722837e-05, "loss": 0.5586, "step": 4728 }, { "epoch": 0.8204371963913949, "grad_norm": 0.8251770734786987, "learning_rate": 3.3564879052617555e-05, "loss": 0.6156, "step": 4729 }, { "epoch": 0.8206106870229007, "grad_norm": 0.7211297750473022, "learning_rate": 3.355987163378671e-05, "loss": 0.5853, "step": 4730 }, { "epoch": 0.8207841776544067, "grad_norm": 0.8894070386886597, "learning_rate": 3.3554862641316965e-05, "loss": 0.5624, "step": 4731 }, { "epoch": 0.8209576682859125, "grad_norm": 0.6238900423049927, "learning_rate": 3.354985207578961e-05, "loss": 0.6147, "step": 4732 }, { "epoch": 0.8211311589174185, "grad_norm": 0.5919173359870911, "learning_rate": 3.354483993778614e-05, "loss": 0.6302, "step": 4733 }, { "epoch": 0.8213046495489243, "grad_norm": 0.5965079665184021, "learning_rate": 3.3539826227888216e-05, "loss": 0.7605, "step": 4734 }, { "epoch": 0.8214781401804303, "grad_norm": 0.6897684931755066, "learning_rate": 3.3534810946677676e-05, "loss": 0.6838, "step": 4735 }, { "epoch": 0.8216516308119362, "grad_norm": 0.7165369987487793, "learning_rate": 3.352979409473656e-05, "loss": 0.6315, "step": 4736 }, { "epoch": 0.8218251214434421, "grad_norm": 1.2224963903427124, "learning_rate": 3.3524775672647064e-05, "loss": 0.5433, "step": 4737 }, { "epoch": 0.821998612074948, "grad_norm": 0.7387986779212952, "learning_rate": 3.351975568099159e-05, "loss": 0.5634, "step": 4738 }, { "epoch": 0.8221721027064538, "grad_norm": 0.7433927059173584, "learning_rate": 3.3514734120352735e-05, "loss": 0.552, "step": 4739 }, { "epoch": 0.8223455933379598, "grad_norm": 0.7800529599189758, "learning_rate": 3.350971099131322e-05, "loss": 0.4694, "step": 4740 }, { "epoch": 0.8225190839694656, "grad_norm": 0.8435859084129333, "learning_rate": 3.350468629445601e-05, "loss": 0.5874, "step": 4741 }, { "epoch": 0.8226925746009716, "grad_norm": 0.8645241856575012, "learning_rate": 3.349966003036421e-05, "loss": 0.6349, "step": 4742 }, { "epoch": 0.8228660652324774, "grad_norm": 0.9837126135826111, "learning_rate": 3.3494632199621146e-05, "loss": 0.5375, "step": 4743 }, { "epoch": 0.8230395558639834, "grad_norm": 0.8240866661071777, "learning_rate": 3.3489602802810276e-05, "loss": 0.4889, "step": 4744 }, { "epoch": 0.8232130464954892, "grad_norm": 0.8092725276947021, "learning_rate": 3.3484571840515295e-05, "loss": 0.4625, "step": 4745 }, { "epoch": 0.8233865371269952, "grad_norm": 0.857721209526062, "learning_rate": 3.347953931332004e-05, "loss": 0.5581, "step": 4746 }, { "epoch": 0.823560027758501, "grad_norm": 4.3697943687438965, "learning_rate": 3.347450522180854e-05, "loss": 0.5386, "step": 4747 }, { "epoch": 0.823733518390007, "grad_norm": 0.7056412696838379, "learning_rate": 3.3469469566565e-05, "loss": 0.5598, "step": 4748 }, { "epoch": 0.8239070090215128, "grad_norm": 0.6662498116493225, "learning_rate": 3.3464432348173827e-05, "loss": 0.5824, "step": 4749 }, { "epoch": 0.8240804996530188, "grad_norm": 0.8417497873306274, "learning_rate": 3.345939356721959e-05, "loss": 0.5107, "step": 4750 }, { "epoch": 0.8242539902845246, "grad_norm": 0.5518035888671875, "learning_rate": 3.345435322428705e-05, "loss": 0.5029, "step": 4751 }, { "epoch": 0.8244274809160306, "grad_norm": 0.5892915725708008, "learning_rate": 3.3449311319961134e-05, "loss": 0.6656, "step": 4752 }, { "epoch": 0.8246009715475364, "grad_norm": 0.5732632279396057, "learning_rate": 3.344426785482697e-05, "loss": 0.6272, "step": 4753 }, { "epoch": 0.8247744621790424, "grad_norm": 0.6002928018569946, "learning_rate": 3.343922282946985e-05, "loss": 0.5825, "step": 4754 }, { "epoch": 0.8249479528105482, "grad_norm": 0.7098351120948792, "learning_rate": 3.343417624447527e-05, "loss": 0.5216, "step": 4755 }, { "epoch": 0.8251214434420542, "grad_norm": 0.5916649103164673, "learning_rate": 3.342912810042888e-05, "loss": 0.7095, "step": 4756 }, { "epoch": 0.82529493407356, "grad_norm": 0.5721748471260071, "learning_rate": 3.342407839791653e-05, "loss": 0.6305, "step": 4757 }, { "epoch": 0.8254684247050659, "grad_norm": 0.7761891484260559, "learning_rate": 3.3419027137524236e-05, "loss": 0.6144, "step": 4758 }, { "epoch": 0.8256419153365718, "grad_norm": 0.7536807060241699, "learning_rate": 3.34139743198382e-05, "loss": 0.5024, "step": 4759 }, { "epoch": 0.8258154059680777, "grad_norm": 0.8090083003044128, "learning_rate": 3.340891994544483e-05, "loss": 0.5054, "step": 4760 }, { "epoch": 0.8259888965995836, "grad_norm": 1.0500702857971191, "learning_rate": 3.3403864014930665e-05, "loss": 0.6921, "step": 4761 }, { "epoch": 0.8261623872310895, "grad_norm": 0.6257863640785217, "learning_rate": 3.339880652888246e-05, "loss": 0.7422, "step": 4762 }, { "epoch": 0.8263358778625954, "grad_norm": 0.6666653752326965, "learning_rate": 3.339374748788715e-05, "loss": 0.573, "step": 4763 }, { "epoch": 0.8265093684941013, "grad_norm": 0.8072382211685181, "learning_rate": 3.338868689253183e-05, "loss": 0.4584, "step": 4764 }, { "epoch": 0.8266828591256072, "grad_norm": 0.6549413800239563, "learning_rate": 3.338362474340381e-05, "loss": 0.5607, "step": 4765 }, { "epoch": 0.8268563497571131, "grad_norm": 0.6903956532478333, "learning_rate": 3.337856104109053e-05, "loss": 0.6548, "step": 4766 }, { "epoch": 0.827029840388619, "grad_norm": 0.9219315052032471, "learning_rate": 3.337349578617965e-05, "loss": 0.6171, "step": 4767 }, { "epoch": 0.8272033310201249, "grad_norm": 1.2927672863006592, "learning_rate": 3.3368428979259006e-05, "loss": 0.687, "step": 4768 }, { "epoch": 0.8273768216516308, "grad_norm": 1.2503191232681274, "learning_rate": 3.336336062091661e-05, "loss": 0.6322, "step": 4769 }, { "epoch": 0.8275503122831367, "grad_norm": 0.7192893028259277, "learning_rate": 3.335829071174063e-05, "loss": 0.5916, "step": 4770 }, { "epoch": 0.8277238029146426, "grad_norm": 0.8918823003768921, "learning_rate": 3.335321925231946e-05, "loss": 0.5858, "step": 4771 }, { "epoch": 0.8278972935461485, "grad_norm": 1.0696288347244263, "learning_rate": 3.334814624324163e-05, "loss": 0.5559, "step": 4772 }, { "epoch": 0.8280707841776545, "grad_norm": 2.7750024795532227, "learning_rate": 3.334307168509587e-05, "loss": 0.5511, "step": 4773 }, { "epoch": 0.8282442748091603, "grad_norm": 0.6778345704078674, "learning_rate": 3.333799557847109e-05, "loss": 0.656, "step": 4774 }, { "epoch": 0.8284177654406663, "grad_norm": 0.6657668948173523, "learning_rate": 3.3332917923956394e-05, "loss": 0.5795, "step": 4775 }, { "epoch": 0.8285912560721721, "grad_norm": 0.9768241047859192, "learning_rate": 3.332783872214103e-05, "loss": 0.5366, "step": 4776 }, { "epoch": 0.8287647467036781, "grad_norm": 0.6983128786087036, "learning_rate": 3.332275797361446e-05, "loss": 0.5173, "step": 4777 }, { "epoch": 0.8289382373351839, "grad_norm": 0.9408507943153381, "learning_rate": 3.331767567896629e-05, "loss": 0.6117, "step": 4778 }, { "epoch": 0.8291117279666897, "grad_norm": 0.7272319197654724, "learning_rate": 3.331259183878635e-05, "loss": 0.5349, "step": 4779 }, { "epoch": 0.8292852185981957, "grad_norm": 0.9621340036392212, "learning_rate": 3.330750645366461e-05, "loss": 0.7329, "step": 4780 }, { "epoch": 0.8294587092297016, "grad_norm": 1.641162633895874, "learning_rate": 3.330241952419123e-05, "loss": 0.552, "step": 4781 }, { "epoch": 0.8296321998612075, "grad_norm": 1.0812734365463257, "learning_rate": 3.3297331050956576e-05, "loss": 0.6604, "step": 4782 }, { "epoch": 0.8298056904927134, "grad_norm": 0.7657483816146851, "learning_rate": 3.329224103455116e-05, "loss": 0.5968, "step": 4783 }, { "epoch": 0.8299791811242193, "grad_norm": 0.7395285367965698, "learning_rate": 3.328714947556568e-05, "loss": 0.5719, "step": 4784 }, { "epoch": 0.8301526717557252, "grad_norm": 0.6676485538482666, "learning_rate": 3.328205637459102e-05, "loss": 0.6592, "step": 4785 }, { "epoch": 0.8303261623872311, "grad_norm": 0.6954163312911987, "learning_rate": 3.327696173221824e-05, "loss": 0.6749, "step": 4786 }, { "epoch": 0.830499653018737, "grad_norm": 0.6690672636032104, "learning_rate": 3.327186554903859e-05, "loss": 0.5436, "step": 4787 }, { "epoch": 0.8306731436502429, "grad_norm": 0.5956690311431885, "learning_rate": 3.326676782564347e-05, "loss": 0.6643, "step": 4788 }, { "epoch": 0.8308466342817488, "grad_norm": 0.6999792456626892, "learning_rate": 3.3261668562624484e-05, "loss": 0.6108, "step": 4789 }, { "epoch": 0.8310201249132547, "grad_norm": 0.7433859705924988, "learning_rate": 3.325656776057341e-05, "loss": 0.5802, "step": 4790 }, { "epoch": 0.8311936155447606, "grad_norm": 1.4846487045288086, "learning_rate": 3.32514654200822e-05, "loss": 0.5504, "step": 4791 }, { "epoch": 0.8313671061762665, "grad_norm": 0.6759796738624573, "learning_rate": 3.324636154174299e-05, "loss": 0.6409, "step": 4792 }, { "epoch": 0.8315405968077724, "grad_norm": 0.6552556753158569, "learning_rate": 3.3241256126148084e-05, "loss": 0.6464, "step": 4793 }, { "epoch": 0.8317140874392783, "grad_norm": 0.8450214862823486, "learning_rate": 3.3236149173889975e-05, "loss": 0.5894, "step": 4794 }, { "epoch": 0.8318875780707842, "grad_norm": 0.7100221514701843, "learning_rate": 3.323104068556133e-05, "loss": 0.6774, "step": 4795 }, { "epoch": 0.8320610687022901, "grad_norm": 0.6351094841957092, "learning_rate": 3.3225930661755005e-05, "loss": 0.5955, "step": 4796 }, { "epoch": 0.832234559333796, "grad_norm": 1.1856613159179688, "learning_rate": 3.322081910306401e-05, "loss": 0.585, "step": 4797 }, { "epoch": 0.8324080499653018, "grad_norm": 0.9470002055168152, "learning_rate": 3.321570601008155e-05, "loss": 0.5363, "step": 4798 }, { "epoch": 0.8325815405968078, "grad_norm": 0.7687327861785889, "learning_rate": 3.321059138340101e-05, "loss": 0.5703, "step": 4799 }, { "epoch": 0.8327550312283136, "grad_norm": 0.8524781465530396, "learning_rate": 3.320547522361595e-05, "loss": 0.4939, "step": 4800 }, { "epoch": 0.8329285218598196, "grad_norm": 1.0657740831375122, "learning_rate": 3.32003575313201e-05, "loss": 0.5284, "step": 4801 }, { "epoch": 0.8331020124913254, "grad_norm": 0.6151628494262695, "learning_rate": 3.3195238307107375e-05, "loss": 0.5389, "step": 4802 }, { "epoch": 0.8332755031228314, "grad_norm": 0.9582716226577759, "learning_rate": 3.3190117551571876e-05, "loss": 0.623, "step": 4803 }, { "epoch": 0.8334489937543372, "grad_norm": 1.1062990427017212, "learning_rate": 3.318499526530786e-05, "loss": 0.5428, "step": 4804 }, { "epoch": 0.8336224843858432, "grad_norm": 0.8115580081939697, "learning_rate": 3.317987144890978e-05, "loss": 0.6187, "step": 4805 }, { "epoch": 0.833795975017349, "grad_norm": 0.6153755784034729, "learning_rate": 3.317474610297226e-05, "loss": 0.5103, "step": 4806 }, { "epoch": 0.833969465648855, "grad_norm": 0.586530864238739, "learning_rate": 3.31696192280901e-05, "loss": 0.6226, "step": 4807 }, { "epoch": 0.8341429562803608, "grad_norm": 0.7791578769683838, "learning_rate": 3.316449082485829e-05, "loss": 0.5255, "step": 4808 }, { "epoch": 0.8343164469118668, "grad_norm": 0.6278956532478333, "learning_rate": 3.315936089387198e-05, "loss": 0.5488, "step": 4809 }, { "epoch": 0.8344899375433726, "grad_norm": 0.779247522354126, "learning_rate": 3.31542294357265e-05, "loss": 0.5624, "step": 4810 }, { "epoch": 0.8346634281748786, "grad_norm": 0.8130978345870972, "learning_rate": 3.314909645101737e-05, "loss": 0.5956, "step": 4811 }, { "epoch": 0.8348369188063844, "grad_norm": 0.5932303667068481, "learning_rate": 3.3143961940340274e-05, "loss": 0.4766, "step": 4812 }, { "epoch": 0.8350104094378904, "grad_norm": 0.8169277906417847, "learning_rate": 3.313882590429108e-05, "loss": 0.5641, "step": 4813 }, { "epoch": 0.8351839000693962, "grad_norm": 0.6386431455612183, "learning_rate": 3.313368834346583e-05, "loss": 0.5211, "step": 4814 }, { "epoch": 0.8353573907009022, "grad_norm": 0.9650004506111145, "learning_rate": 3.3128549258460734e-05, "loss": 0.6526, "step": 4815 }, { "epoch": 0.835530881332408, "grad_norm": 0.6431168913841248, "learning_rate": 3.312340864987221e-05, "loss": 0.6299, "step": 4816 }, { "epoch": 0.8357043719639139, "grad_norm": 0.5430099368095398, "learning_rate": 3.311826651829682e-05, "loss": 0.6063, "step": 4817 }, { "epoch": 0.8358778625954199, "grad_norm": 0.6411396265029907, "learning_rate": 3.311312286433131e-05, "loss": 0.6183, "step": 4818 }, { "epoch": 0.8360513532269257, "grad_norm": 0.5702820420265198, "learning_rate": 3.310797768857262e-05, "loss": 0.5219, "step": 4819 }, { "epoch": 0.8362248438584317, "grad_norm": 0.6720927953720093, "learning_rate": 3.310283099161783e-05, "loss": 0.6099, "step": 4820 }, { "epoch": 0.8363983344899375, "grad_norm": 0.7756640315055847, "learning_rate": 3.3097682774064255e-05, "loss": 0.5182, "step": 4821 }, { "epoch": 0.8365718251214435, "grad_norm": 0.7168863415718079, "learning_rate": 3.309253303650932e-05, "loss": 0.5314, "step": 4822 }, { "epoch": 0.8367453157529493, "grad_norm": 0.7782425880432129, "learning_rate": 3.308738177955067e-05, "loss": 0.6135, "step": 4823 }, { "epoch": 0.8369188063844553, "grad_norm": 0.6969422101974487, "learning_rate": 3.3082229003786114e-05, "loss": 0.5763, "step": 4824 }, { "epoch": 0.8370922970159611, "grad_norm": 0.8078770637512207, "learning_rate": 3.307707470981364e-05, "loss": 0.675, "step": 4825 }, { "epoch": 0.8372657876474671, "grad_norm": 0.9803861975669861, "learning_rate": 3.3071918898231413e-05, "loss": 0.5399, "step": 4826 }, { "epoch": 0.8374392782789729, "grad_norm": 0.6594573855400085, "learning_rate": 3.306676156963776e-05, "loss": 0.6039, "step": 4827 }, { "epoch": 0.8376127689104789, "grad_norm": 0.7573655843734741, "learning_rate": 3.3061602724631205e-05, "loss": 0.5188, "step": 4828 }, { "epoch": 0.8377862595419847, "grad_norm": 0.7867535352706909, "learning_rate": 3.3056442363810435e-05, "loss": 0.573, "step": 4829 }, { "epoch": 0.8379597501734907, "grad_norm": 0.9551301002502441, "learning_rate": 3.3051280487774316e-05, "loss": 0.4702, "step": 4830 }, { "epoch": 0.8381332408049965, "grad_norm": 0.8941601514816284, "learning_rate": 3.3046117097121884e-05, "loss": 0.517, "step": 4831 }, { "epoch": 0.8383067314365025, "grad_norm": 0.9159838557243347, "learning_rate": 3.304095219245236e-05, "loss": 0.507, "step": 4832 }, { "epoch": 0.8384802220680083, "grad_norm": 0.8108102083206177, "learning_rate": 3.3035785774365136e-05, "loss": 0.5586, "step": 4833 }, { "epoch": 0.8386537126995143, "grad_norm": 1.6117689609527588, "learning_rate": 3.303061784345979e-05, "loss": 0.5632, "step": 4834 }, { "epoch": 0.8388272033310201, "grad_norm": 0.6373217105865479, "learning_rate": 3.3025448400336064e-05, "loss": 0.5707, "step": 4835 }, { "epoch": 0.8390006939625261, "grad_norm": 0.6130152344703674, "learning_rate": 3.302027744559387e-05, "loss": 0.6497, "step": 4836 }, { "epoch": 0.8391741845940319, "grad_norm": 0.7247080206871033, "learning_rate": 3.30151049798333e-05, "loss": 0.4744, "step": 4837 }, { "epoch": 0.8393476752255378, "grad_norm": 0.9034555554389954, "learning_rate": 3.300993100365463e-05, "loss": 0.4786, "step": 4838 }, { "epoch": 0.8395211658570437, "grad_norm": 0.7661714553833008, "learning_rate": 3.3004755517658314e-05, "loss": 0.5989, "step": 4839 }, { "epoch": 0.8396946564885496, "grad_norm": 0.7545140385627747, "learning_rate": 3.299957852244496e-05, "loss": 0.6145, "step": 4840 }, { "epoch": 0.8398681471200555, "grad_norm": 0.705784261226654, "learning_rate": 3.299440001861538e-05, "loss": 0.5802, "step": 4841 }, { "epoch": 0.8400416377515614, "grad_norm": 0.6998658776283264, "learning_rate": 3.298922000677053e-05, "loss": 0.5647, "step": 4842 }, { "epoch": 0.8402151283830673, "grad_norm": 0.852400004863739, "learning_rate": 3.298403848751157e-05, "loss": 0.5278, "step": 4843 }, { "epoch": 0.8403886190145732, "grad_norm": 0.774298369884491, "learning_rate": 3.2978855461439806e-05, "loss": 0.5186, "step": 4844 }, { "epoch": 0.8405621096460791, "grad_norm": 1.1349924802780151, "learning_rate": 3.297367092915675e-05, "loss": 0.5896, "step": 4845 }, { "epoch": 0.840735600277585, "grad_norm": 1.0255142450332642, "learning_rate": 3.296848489126406e-05, "loss": 0.4928, "step": 4846 }, { "epoch": 0.8409090909090909, "grad_norm": 0.716189980506897, "learning_rate": 3.296329734836359e-05, "loss": 0.5927, "step": 4847 }, { "epoch": 0.8410825815405968, "grad_norm": 0.7085785269737244, "learning_rate": 3.295810830105736e-05, "loss": 0.4711, "step": 4848 }, { "epoch": 0.8412560721721027, "grad_norm": 0.7188493013381958, "learning_rate": 3.2952917749947556e-05, "loss": 0.6251, "step": 4849 }, { "epoch": 0.8414295628036086, "grad_norm": 0.7988995909690857, "learning_rate": 3.294772569563656e-05, "loss": 0.5269, "step": 4850 }, { "epoch": 0.8416030534351145, "grad_norm": 0.8568524122238159, "learning_rate": 3.2942532138726906e-05, "loss": 0.4526, "step": 4851 }, { "epoch": 0.8417765440666204, "grad_norm": 0.9650561809539795, "learning_rate": 3.293733707982132e-05, "loss": 0.6852, "step": 4852 }, { "epoch": 0.8419500346981263, "grad_norm": 0.7690786719322205, "learning_rate": 3.2932140519522676e-05, "loss": 0.7271, "step": 4853 }, { "epoch": 0.8421235253296322, "grad_norm": 2.4386205673217773, "learning_rate": 3.292694245843407e-05, "loss": 0.5551, "step": 4854 }, { "epoch": 0.8422970159611382, "grad_norm": 0.6074408292770386, "learning_rate": 3.2921742897158726e-05, "loss": 0.616, "step": 4855 }, { "epoch": 0.842470506592644, "grad_norm": 0.6939220428466797, "learning_rate": 3.2916541836300065e-05, "loss": 0.5975, "step": 4856 }, { "epoch": 0.8426439972241498, "grad_norm": 0.5517061352729797, "learning_rate": 3.2911339276461665e-05, "loss": 0.5438, "step": 4857 }, { "epoch": 0.8428174878556558, "grad_norm": 0.7376974821090698, "learning_rate": 3.290613521824731e-05, "loss": 0.5941, "step": 4858 }, { "epoch": 0.8429909784871616, "grad_norm": 0.7541755437850952, "learning_rate": 3.290092966226092e-05, "loss": 0.6024, "step": 4859 }, { "epoch": 0.8431644691186676, "grad_norm": 0.6830940246582031, "learning_rate": 3.28957226091066e-05, "loss": 0.6503, "step": 4860 }, { "epoch": 0.8433379597501734, "grad_norm": 0.683634877204895, "learning_rate": 3.289051405938865e-05, "loss": 0.7583, "step": 4861 }, { "epoch": 0.8435114503816794, "grad_norm": 0.5917184948921204, "learning_rate": 3.2885304013711525e-05, "loss": 0.6101, "step": 4862 }, { "epoch": 0.8436849410131853, "grad_norm": 0.7042409777641296, "learning_rate": 3.2880092472679854e-05, "loss": 0.5956, "step": 4863 }, { "epoch": 0.8438584316446912, "grad_norm": 0.6945961713790894, "learning_rate": 3.2874879436898444e-05, "loss": 0.6509, "step": 4864 }, { "epoch": 0.844031922276197, "grad_norm": 0.8403766751289368, "learning_rate": 3.286966490697227e-05, "loss": 0.5544, "step": 4865 }, { "epoch": 0.844205412907703, "grad_norm": 0.6023331880569458, "learning_rate": 3.286444888350649e-05, "loss": 0.7223, "step": 4866 }, { "epoch": 0.8443789035392089, "grad_norm": 0.7682532668113708, "learning_rate": 3.285923136710643e-05, "loss": 0.5638, "step": 4867 }, { "epoch": 0.8445523941707148, "grad_norm": 0.8390847444534302, "learning_rate": 3.285401235837758e-05, "loss": 0.5304, "step": 4868 }, { "epoch": 0.8447258848022207, "grad_norm": 0.837834894657135, "learning_rate": 3.284879185792562e-05, "loss": 0.5387, "step": 4869 }, { "epoch": 0.8448993754337266, "grad_norm": 0.8863024115562439, "learning_rate": 3.28435698663564e-05, "loss": 0.5472, "step": 4870 }, { "epoch": 0.8450728660652325, "grad_norm": 0.7301396727561951, "learning_rate": 3.2838346384275924e-05, "loss": 0.6384, "step": 4871 }, { "epoch": 0.8452463566967384, "grad_norm": 0.9071313142776489, "learning_rate": 3.283312141229039e-05, "loss": 0.64, "step": 4872 }, { "epoch": 0.8454198473282443, "grad_norm": 0.9178149104118347, "learning_rate": 3.282789495100616e-05, "loss": 0.4503, "step": 4873 }, { "epoch": 0.8455933379597502, "grad_norm": 0.6926846504211426, "learning_rate": 3.282266700102978e-05, "loss": 0.6289, "step": 4874 }, { "epoch": 0.8457668285912561, "grad_norm": 0.7270438075065613, "learning_rate": 3.281743756296795e-05, "loss": 0.5001, "step": 4875 }, { "epoch": 0.8459403192227619, "grad_norm": 0.7516143321990967, "learning_rate": 3.281220663742756e-05, "loss": 0.4733, "step": 4876 }, { "epoch": 0.8461138098542679, "grad_norm": 0.7802963256835938, "learning_rate": 3.280697422501565e-05, "loss": 0.475, "step": 4877 }, { "epoch": 0.8462873004857737, "grad_norm": 0.7416410446166992, "learning_rate": 3.280174032633947e-05, "loss": 0.5905, "step": 4878 }, { "epoch": 0.8464607911172797, "grad_norm": 1.9238371849060059, "learning_rate": 3.27965049420064e-05, "loss": 0.5709, "step": 4879 }, { "epoch": 0.8466342817487855, "grad_norm": 0.6889614462852478, "learning_rate": 3.279126807262403e-05, "loss": 0.5857, "step": 4880 }, { "epoch": 0.8468077723802915, "grad_norm": 0.8448830842971802, "learning_rate": 3.278602971880009e-05, "loss": 0.455, "step": 4881 }, { "epoch": 0.8469812630117973, "grad_norm": 1.147859811782837, "learning_rate": 3.27807898811425e-05, "loss": 0.4927, "step": 4882 }, { "epoch": 0.8471547536433033, "grad_norm": 0.738511860370636, "learning_rate": 3.2775548560259355e-05, "loss": 0.5715, "step": 4883 }, { "epoch": 0.8473282442748091, "grad_norm": 1.2310166358947754, "learning_rate": 3.277030575675891e-05, "loss": 0.5648, "step": 4884 }, { "epoch": 0.8475017349063151, "grad_norm": 0.9854730367660522, "learning_rate": 3.27650614712496e-05, "loss": 0.6177, "step": 4885 }, { "epoch": 0.8476752255378209, "grad_norm": 0.8511006832122803, "learning_rate": 3.2759815704340034e-05, "loss": 0.649, "step": 4886 }, { "epoch": 0.8478487161693269, "grad_norm": 0.7844124436378479, "learning_rate": 3.275456845663899e-05, "loss": 0.5099, "step": 4887 }, { "epoch": 0.8480222068008327, "grad_norm": 0.7616360783576965, "learning_rate": 3.2749319728755415e-05, "loss": 0.5283, "step": 4888 }, { "epoch": 0.8481956974323387, "grad_norm": 1.0567359924316406, "learning_rate": 3.2744069521298424e-05, "loss": 0.5382, "step": 4889 }, { "epoch": 0.8483691880638445, "grad_norm": 0.6821542978286743, "learning_rate": 3.273881783487732e-05, "loss": 0.6072, "step": 4890 }, { "epoch": 0.8485426786953505, "grad_norm": 0.6456993222236633, "learning_rate": 3.273356467010156e-05, "loss": 0.5413, "step": 4891 }, { "epoch": 0.8487161693268563, "grad_norm": 0.6628501415252686, "learning_rate": 3.2728310027580786e-05, "loss": 0.5604, "step": 4892 }, { "epoch": 0.8488896599583623, "grad_norm": 0.921438992023468, "learning_rate": 3.27230539079248e-05, "loss": 0.5522, "step": 4893 }, { "epoch": 0.8490631505898681, "grad_norm": 0.8634639978408813, "learning_rate": 3.271779631174358e-05, "loss": 0.4839, "step": 4894 }, { "epoch": 0.8492366412213741, "grad_norm": 0.6726203560829163, "learning_rate": 3.271253723964728e-05, "loss": 0.5292, "step": 4895 }, { "epoch": 0.84941013185288, "grad_norm": 1.0867524147033691, "learning_rate": 3.270727669224622e-05, "loss": 0.6208, "step": 4896 }, { "epoch": 0.8495836224843858, "grad_norm": 0.9534117579460144, "learning_rate": 3.2702014670150904e-05, "loss": 0.5372, "step": 4897 }, { "epoch": 0.8497571131158917, "grad_norm": 0.7047215700149536, "learning_rate": 3.269675117397196e-05, "loss": 0.4513, "step": 4898 }, { "epoch": 0.8499306037473976, "grad_norm": 0.722929835319519, "learning_rate": 3.269148620432027e-05, "loss": 0.5305, "step": 4899 }, { "epoch": 0.8501040943789036, "grad_norm": 0.7983660697937012, "learning_rate": 3.268621976180681e-05, "loss": 0.5753, "step": 4900 }, { "epoch": 0.8502775850104094, "grad_norm": 0.5249261856079102, "learning_rate": 3.2680951847042766e-05, "loss": 0.6467, "step": 4901 }, { "epoch": 0.8504510756419154, "grad_norm": 0.8097899556159973, "learning_rate": 3.267568246063948e-05, "loss": 0.5132, "step": 4902 }, { "epoch": 0.8506245662734212, "grad_norm": 0.8358068466186523, "learning_rate": 3.2670411603208484e-05, "loss": 0.4952, "step": 4903 }, { "epoch": 0.8507980569049272, "grad_norm": 1.2445707321166992, "learning_rate": 3.2665139275361446e-05, "loss": 0.6091, "step": 4904 }, { "epoch": 0.850971547536433, "grad_norm": 0.6666598916053772, "learning_rate": 3.2659865477710244e-05, "loss": 0.6417, "step": 4905 }, { "epoch": 0.851145038167939, "grad_norm": 0.7324227690696716, "learning_rate": 3.26545902108669e-05, "loss": 0.4935, "step": 4906 }, { "epoch": 0.8513185287994448, "grad_norm": 0.7859541177749634, "learning_rate": 3.264931347544361e-05, "loss": 0.5149, "step": 4907 }, { "epoch": 0.8514920194309508, "grad_norm": 0.5554445385932922, "learning_rate": 3.2644035272052756e-05, "loss": 0.6406, "step": 4908 }, { "epoch": 0.8516655100624566, "grad_norm": 0.7216631174087524, "learning_rate": 3.263875560130689e-05, "loss": 0.5939, "step": 4909 }, { "epoch": 0.8518390006939626, "grad_norm": 0.9014060497283936, "learning_rate": 3.263347446381869e-05, "loss": 0.5608, "step": 4910 }, { "epoch": 0.8520124913254684, "grad_norm": 0.6206343173980713, "learning_rate": 3.262819186020106e-05, "loss": 0.5126, "step": 4911 }, { "epoch": 0.8521859819569744, "grad_norm": 1.0631262063980103, "learning_rate": 3.2622907791067056e-05, "loss": 0.5747, "step": 4912 }, { "epoch": 0.8523594725884802, "grad_norm": 0.7039932012557983, "learning_rate": 3.261762225702989e-05, "loss": 0.6125, "step": 4913 }, { "epoch": 0.8525329632199862, "grad_norm": 3.6588799953460693, "learning_rate": 3.261233525870296e-05, "loss": 0.4834, "step": 4914 }, { "epoch": 0.852706453851492, "grad_norm": 0.9658022522926331, "learning_rate": 3.2607046796699824e-05, "loss": 0.6519, "step": 4915 }, { "epoch": 0.8528799444829979, "grad_norm": 0.6569541096687317, "learning_rate": 3.260175687163423e-05, "loss": 0.552, "step": 4916 }, { "epoch": 0.8530534351145038, "grad_norm": 0.8620007634162903, "learning_rate": 3.259646548412005e-05, "loss": 0.5516, "step": 4917 }, { "epoch": 0.8532269257460097, "grad_norm": 0.7383683919906616, "learning_rate": 3.259117263477138e-05, "loss": 0.5127, "step": 4918 }, { "epoch": 0.8534004163775156, "grad_norm": 0.8829156160354614, "learning_rate": 3.258587832420246e-05, "loss": 0.7289, "step": 4919 }, { "epoch": 0.8535739070090215, "grad_norm": 0.6492002606391907, "learning_rate": 3.2580582553027684e-05, "loss": 0.717, "step": 4920 }, { "epoch": 0.8537473976405274, "grad_norm": 0.7520597577095032, "learning_rate": 3.2575285321861656e-05, "loss": 0.5946, "step": 4921 }, { "epoch": 0.8539208882720333, "grad_norm": 0.6571438312530518, "learning_rate": 3.2569986631319104e-05, "loss": 0.6296, "step": 4922 }, { "epoch": 0.8540943789035392, "grad_norm": 0.831369161605835, "learning_rate": 3.256468648201496e-05, "loss": 0.5238, "step": 4923 }, { "epoch": 0.8542678695350451, "grad_norm": 0.7360967397689819, "learning_rate": 3.2559384874564305e-05, "loss": 0.556, "step": 4924 }, { "epoch": 0.854441360166551, "grad_norm": 0.8607320785522461, "learning_rate": 3.25540818095824e-05, "loss": 0.4917, "step": 4925 }, { "epoch": 0.8546148507980569, "grad_norm": 1.5490577220916748, "learning_rate": 3.254877728768468e-05, "loss": 0.509, "step": 4926 }, { "epoch": 0.8547883414295628, "grad_norm": 0.9422374963760376, "learning_rate": 3.254347130948673e-05, "loss": 0.6012, "step": 4927 }, { "epoch": 0.8549618320610687, "grad_norm": 1.096207857131958, "learning_rate": 3.2538163875604316e-05, "loss": 0.5375, "step": 4928 }, { "epoch": 0.8551353226925746, "grad_norm": 0.7442988157272339, "learning_rate": 3.2532854986653375e-05, "loss": 0.5046, "step": 4929 }, { "epoch": 0.8553088133240805, "grad_norm": 0.9817276000976562, "learning_rate": 3.252754464325001e-05, "loss": 0.491, "step": 4930 }, { "epoch": 0.8554823039555864, "grad_norm": 1.6339360475540161, "learning_rate": 3.2522232846010496e-05, "loss": 0.4955, "step": 4931 }, { "epoch": 0.8556557945870923, "grad_norm": 0.9660579562187195, "learning_rate": 3.251691959555126e-05, "loss": 0.4913, "step": 4932 }, { "epoch": 0.8558292852185982, "grad_norm": 0.8408375382423401, "learning_rate": 3.251160489248893e-05, "loss": 0.5836, "step": 4933 }, { "epoch": 0.8560027758501041, "grad_norm": 0.7440459132194519, "learning_rate": 3.2506288737440265e-05, "loss": 0.6317, "step": 4934 }, { "epoch": 0.8561762664816099, "grad_norm": 0.6120437383651733, "learning_rate": 3.250097113102222e-05, "loss": 0.6022, "step": 4935 }, { "epoch": 0.8563497571131159, "grad_norm": 0.9216663241386414, "learning_rate": 3.249565207385192e-05, "loss": 0.5349, "step": 4936 }, { "epoch": 0.8565232477446217, "grad_norm": 0.7753810882568359, "learning_rate": 3.249033156654663e-05, "loss": 0.5497, "step": 4937 }, { "epoch": 0.8566967383761277, "grad_norm": 1.2120603322982788, "learning_rate": 3.24850096097238e-05, "loss": 0.6997, "step": 4938 }, { "epoch": 0.8568702290076335, "grad_norm": 0.9648410677909851, "learning_rate": 3.247968620400106e-05, "loss": 0.527, "step": 4939 }, { "epoch": 0.8570437196391395, "grad_norm": 0.8988485336303711, "learning_rate": 3.2474361349996205e-05, "loss": 0.5538, "step": 4940 }, { "epoch": 0.8572172102706453, "grad_norm": 2.031445264816284, "learning_rate": 3.2469035048327166e-05, "loss": 0.6179, "step": 4941 }, { "epoch": 0.8573907009021513, "grad_norm": 0.8860834240913391, "learning_rate": 3.2463707299612086e-05, "loss": 0.5106, "step": 4942 }, { "epoch": 0.8575641915336571, "grad_norm": 0.7643118500709534, "learning_rate": 3.245837810446925e-05, "loss": 0.5621, "step": 4943 }, { "epoch": 0.8577376821651631, "grad_norm": 0.6657795906066895, "learning_rate": 3.245304746351712e-05, "loss": 0.5447, "step": 4944 }, { "epoch": 0.857911172796669, "grad_norm": 1.121260643005371, "learning_rate": 3.244771537737432e-05, "loss": 0.5657, "step": 4945 }, { "epoch": 0.8580846634281749, "grad_norm": 0.910545825958252, "learning_rate": 3.2442381846659644e-05, "loss": 0.6078, "step": 4946 }, { "epoch": 0.8582581540596808, "grad_norm": 0.6867709159851074, "learning_rate": 3.243704687199206e-05, "loss": 0.6078, "step": 4947 }, { "epoch": 0.8584316446911867, "grad_norm": 0.9394713044166565, "learning_rate": 3.243171045399069e-05, "loss": 0.5994, "step": 4948 }, { "epoch": 0.8586051353226926, "grad_norm": 0.8011114597320557, "learning_rate": 3.2426372593274834e-05, "loss": 0.6193, "step": 4949 }, { "epoch": 0.8587786259541985, "grad_norm": 1.425828456878662, "learning_rate": 3.2421033290463966e-05, "loss": 0.4648, "step": 4950 }, { "epoch": 0.8589521165857044, "grad_norm": 0.8111879229545593, "learning_rate": 3.2415692546177714e-05, "loss": 0.5406, "step": 4951 }, { "epoch": 0.8591256072172103, "grad_norm": 0.621471107006073, "learning_rate": 3.241035036103587e-05, "loss": 0.6207, "step": 4952 }, { "epoch": 0.8592990978487162, "grad_norm": 0.7024137377738953, "learning_rate": 3.24050067356584e-05, "loss": 0.6758, "step": 4953 }, { "epoch": 0.8594725884802221, "grad_norm": 0.7092933058738708, "learning_rate": 3.239966167066545e-05, "loss": 0.5184, "step": 4954 }, { "epoch": 0.859646079111728, "grad_norm": 0.6382533311843872, "learning_rate": 3.239431516667732e-05, "loss": 0.4784, "step": 4955 }, { "epoch": 0.8598195697432338, "grad_norm": 0.899437427520752, "learning_rate": 3.2388967224314464e-05, "loss": 0.6016, "step": 4956 }, { "epoch": 0.8599930603747398, "grad_norm": 0.6273519396781921, "learning_rate": 3.238361784419753e-05, "loss": 0.6202, "step": 4957 }, { "epoch": 0.8601665510062456, "grad_norm": 0.7345899939537048, "learning_rate": 3.2378267026947314e-05, "loss": 0.5728, "step": 4958 }, { "epoch": 0.8603400416377516, "grad_norm": 1.0373185873031616, "learning_rate": 3.237291477318478e-05, "loss": 0.577, "step": 4959 }, { "epoch": 0.8605135322692574, "grad_norm": 0.5629845261573792, "learning_rate": 3.2367561083531074e-05, "loss": 0.6731, "step": 4960 }, { "epoch": 0.8606870229007634, "grad_norm": 0.7160812020301819, "learning_rate": 3.2362205958607495e-05, "loss": 0.6404, "step": 4961 }, { "epoch": 0.8608605135322692, "grad_norm": 0.814015805721283, "learning_rate": 3.235684939903551e-05, "loss": 0.51, "step": 4962 }, { "epoch": 0.8610340041637752, "grad_norm": 0.7279803156852722, "learning_rate": 3.235149140543675e-05, "loss": 0.5006, "step": 4963 }, { "epoch": 0.861207494795281, "grad_norm": 0.5915574431419373, "learning_rate": 3.234613197843302e-05, "loss": 0.6266, "step": 4964 }, { "epoch": 0.861380985426787, "grad_norm": 0.649194598197937, "learning_rate": 3.234077111864629e-05, "loss": 0.5829, "step": 4965 }, { "epoch": 0.8615544760582928, "grad_norm": 0.8546715974807739, "learning_rate": 3.233540882669869e-05, "loss": 0.4796, "step": 4966 }, { "epoch": 0.8617279666897988, "grad_norm": 0.623928964138031, "learning_rate": 3.233004510321253e-05, "loss": 0.6228, "step": 4967 }, { "epoch": 0.8619014573213046, "grad_norm": 0.5498993396759033, "learning_rate": 3.232467994881026e-05, "loss": 0.5118, "step": 4968 }, { "epoch": 0.8620749479528106, "grad_norm": 0.5465213656425476, "learning_rate": 3.2319313364114524e-05, "loss": 0.6534, "step": 4969 }, { "epoch": 0.8622484385843164, "grad_norm": 0.6918518543243408, "learning_rate": 3.2313945349748116e-05, "loss": 0.5933, "step": 4970 }, { "epoch": 0.8624219292158224, "grad_norm": 0.7475275993347168, "learning_rate": 3.2308575906334004e-05, "loss": 0.6959, "step": 4971 }, { "epoch": 0.8625954198473282, "grad_norm": 0.9270092248916626, "learning_rate": 3.230320503449531e-05, "loss": 0.6343, "step": 4972 }, { "epoch": 0.8627689104788342, "grad_norm": 0.9913419485092163, "learning_rate": 3.229783273485534e-05, "loss": 0.5769, "step": 4973 }, { "epoch": 0.86294240111034, "grad_norm": 0.8261295557022095, "learning_rate": 3.2292459008037554e-05, "loss": 0.564, "step": 4974 }, { "epoch": 0.8631158917418459, "grad_norm": 0.8295417428016663, "learning_rate": 3.2287083854665566e-05, "loss": 0.5098, "step": 4975 }, { "epoch": 0.8632893823733518, "grad_norm": 0.6348004341125488, "learning_rate": 3.228170727536319e-05, "loss": 0.6146, "step": 4976 }, { "epoch": 0.8634628730048577, "grad_norm": 0.6621866822242737, "learning_rate": 3.227632927075437e-05, "loss": 0.5645, "step": 4977 }, { "epoch": 0.8636363636363636, "grad_norm": 0.821857213973999, "learning_rate": 3.227094984146323e-05, "loss": 0.5233, "step": 4978 }, { "epoch": 0.8638098542678695, "grad_norm": 0.7312668561935425, "learning_rate": 3.2265568988114075e-05, "loss": 0.6039, "step": 4979 }, { "epoch": 0.8639833448993754, "grad_norm": 0.576884388923645, "learning_rate": 3.226018671133134e-05, "loss": 0.6044, "step": 4980 }, { "epoch": 0.8641568355308813, "grad_norm": 1.4261698722839355, "learning_rate": 3.225480301173964e-05, "loss": 0.5365, "step": 4981 }, { "epoch": 0.8643303261623873, "grad_norm": 0.7893202304840088, "learning_rate": 3.224941788996378e-05, "loss": 0.4792, "step": 4982 }, { "epoch": 0.8645038167938931, "grad_norm": 0.7231020927429199, "learning_rate": 3.22440313466287e-05, "loss": 0.4773, "step": 4983 }, { "epoch": 0.864677307425399, "grad_norm": 0.8429858088493347, "learning_rate": 3.223864338235951e-05, "loss": 0.4828, "step": 4984 }, { "epoch": 0.8648507980569049, "grad_norm": 0.813918948173523, "learning_rate": 3.22332539977815e-05, "loss": 0.5181, "step": 4985 }, { "epoch": 0.8650242886884109, "grad_norm": 0.6264095306396484, "learning_rate": 3.2227863193520115e-05, "loss": 0.5846, "step": 4986 }, { "epoch": 0.8651977793199167, "grad_norm": 0.7643534541130066, "learning_rate": 3.222247097020095e-05, "loss": 0.5066, "step": 4987 }, { "epoch": 0.8653712699514227, "grad_norm": 0.8795880675315857, "learning_rate": 3.221707732844979e-05, "loss": 0.5547, "step": 4988 }, { "epoch": 0.8655447605829285, "grad_norm": 0.7697674632072449, "learning_rate": 3.221168226889257e-05, "loss": 0.692, "step": 4989 }, { "epoch": 0.8657182512144345, "grad_norm": 0.575239896774292, "learning_rate": 3.220628579215539e-05, "loss": 0.5803, "step": 4990 }, { "epoch": 0.8658917418459403, "grad_norm": 0.7778034806251526, "learning_rate": 3.220088789886452e-05, "loss": 0.6017, "step": 4991 }, { "epoch": 0.8660652324774463, "grad_norm": 0.7716045379638672, "learning_rate": 3.21954885896464e-05, "loss": 0.5767, "step": 4992 }, { "epoch": 0.8662387231089521, "grad_norm": 0.7348926663398743, "learning_rate": 3.219008786512762e-05, "loss": 0.5782, "step": 4993 }, { "epoch": 0.8664122137404581, "grad_norm": 0.9667244553565979, "learning_rate": 3.2184685725934926e-05, "loss": 0.5251, "step": 4994 }, { "epoch": 0.8665857043719639, "grad_norm": 0.9431669116020203, "learning_rate": 3.217928217269527e-05, "loss": 0.5309, "step": 4995 }, { "epoch": 0.8667591950034698, "grad_norm": 0.7136666774749756, "learning_rate": 3.2173877206035714e-05, "loss": 0.5741, "step": 4996 }, { "epoch": 0.8669326856349757, "grad_norm": 0.7457221150398254, "learning_rate": 3.2168470826583525e-05, "loss": 0.611, "step": 4997 }, { "epoch": 0.8671061762664816, "grad_norm": 0.7214698195457458, "learning_rate": 3.2163063034966126e-05, "loss": 0.5966, "step": 4998 }, { "epoch": 0.8672796668979875, "grad_norm": 0.6944583654403687, "learning_rate": 3.215765383181108e-05, "loss": 0.5519, "step": 4999 }, { "epoch": 0.8674531575294934, "grad_norm": 0.8013060092926025, "learning_rate": 3.215224321774614e-05, "loss": 0.5673, "step": 5000 }, { "epoch": 0.8676266481609993, "grad_norm": 1.4525957107543945, "learning_rate": 3.2146831193399225e-05, "loss": 0.5699, "step": 5001 }, { "epoch": 0.8678001387925052, "grad_norm": 0.9097924828529358, "learning_rate": 3.214141775939839e-05, "loss": 0.5128, "step": 5002 }, { "epoch": 0.8679736294240111, "grad_norm": 0.6187672019004822, "learning_rate": 3.213600291637187e-05, "loss": 0.542, "step": 5003 }, { "epoch": 0.868147120055517, "grad_norm": 0.7136866450309753, "learning_rate": 3.213058666494807e-05, "loss": 0.567, "step": 5004 }, { "epoch": 0.8683206106870229, "grad_norm": 0.7285737991333008, "learning_rate": 3.2125169005755566e-05, "loss": 0.5281, "step": 5005 }, { "epoch": 0.8684941013185288, "grad_norm": 0.6819010376930237, "learning_rate": 3.2119749939423063e-05, "loss": 0.5353, "step": 5006 }, { "epoch": 0.8686675919500347, "grad_norm": 0.632310688495636, "learning_rate": 3.211432946657946e-05, "loss": 0.7002, "step": 5007 }, { "epoch": 0.8688410825815406, "grad_norm": 0.5707098841667175, "learning_rate": 3.2108907587853794e-05, "loss": 0.6343, "step": 5008 }, { "epoch": 0.8690145732130465, "grad_norm": 0.6051770448684692, "learning_rate": 3.210348430387531e-05, "loss": 0.6757, "step": 5009 }, { "epoch": 0.8691880638445524, "grad_norm": 1.1853944063186646, "learning_rate": 3.2098059615273354e-05, "loss": 0.6776, "step": 5010 }, { "epoch": 0.8693615544760583, "grad_norm": 0.6617575287818909, "learning_rate": 3.209263352267749e-05, "loss": 0.6189, "step": 5011 }, { "epoch": 0.8695350451075642, "grad_norm": 0.8673063516616821, "learning_rate": 3.2087206026717415e-05, "loss": 0.6863, "step": 5012 }, { "epoch": 0.8697085357390701, "grad_norm": 0.602722704410553, "learning_rate": 3.2081777128023005e-05, "loss": 0.5613, "step": 5013 }, { "epoch": 0.869882026370576, "grad_norm": 0.6934711933135986, "learning_rate": 3.207634682722427e-05, "loss": 0.5569, "step": 5014 }, { "epoch": 0.8700555170020818, "grad_norm": 0.7566132545471191, "learning_rate": 3.2070915124951406e-05, "loss": 0.5197, "step": 5015 }, { "epoch": 0.8702290076335878, "grad_norm": 0.8444889783859253, "learning_rate": 3.206548202183479e-05, "loss": 0.4972, "step": 5016 }, { "epoch": 0.8704024982650936, "grad_norm": 0.7059224843978882, "learning_rate": 3.206004751850493e-05, "loss": 0.5042, "step": 5017 }, { "epoch": 0.8705759888965996, "grad_norm": 0.7606081366539001, "learning_rate": 3.20546116155925e-05, "loss": 0.5923, "step": 5018 }, { "epoch": 0.8707494795281054, "grad_norm": 0.6471020579338074, "learning_rate": 3.204917431372833e-05, "loss": 0.4932, "step": 5019 }, { "epoch": 0.8709229701596114, "grad_norm": 0.716760516166687, "learning_rate": 3.204373561354345e-05, "loss": 0.5991, "step": 5020 }, { "epoch": 0.8710964607911172, "grad_norm": 0.8604891300201416, "learning_rate": 3.2038295515669024e-05, "loss": 0.6353, "step": 5021 }, { "epoch": 0.8712699514226232, "grad_norm": 0.7452986836433411, "learning_rate": 3.203285402073637e-05, "loss": 0.5154, "step": 5022 }, { "epoch": 0.871443442054129, "grad_norm": 0.981040894985199, "learning_rate": 3.202741112937699e-05, "loss": 0.5051, "step": 5023 }, { "epoch": 0.871616932685635, "grad_norm": 1.1439461708068848, "learning_rate": 3.202196684222253e-05, "loss": 0.5151, "step": 5024 }, { "epoch": 0.8717904233171409, "grad_norm": 0.6975328326225281, "learning_rate": 3.201652115990481e-05, "loss": 0.6628, "step": 5025 }, { "epoch": 0.8719639139486468, "grad_norm": 0.7577537298202515, "learning_rate": 3.2011074083055814e-05, "loss": 0.5559, "step": 5026 }, { "epoch": 0.8721374045801527, "grad_norm": 0.6863942742347717, "learning_rate": 3.200562561230767e-05, "loss": 0.5581, "step": 5027 }, { "epoch": 0.8723108952116586, "grad_norm": 2.8689303398132324, "learning_rate": 3.200017574829268e-05, "loss": 0.6234, "step": 5028 }, { "epoch": 0.8724843858431645, "grad_norm": 0.6341760158538818, "learning_rate": 3.199472449164332e-05, "loss": 0.5452, "step": 5029 }, { "epoch": 0.8726578764746704, "grad_norm": 0.5848334431648254, "learning_rate": 3.19892718429922e-05, "loss": 0.6141, "step": 5030 }, { "epoch": 0.8728313671061763, "grad_norm": 0.8144198060035706, "learning_rate": 3.198381780297211e-05, "loss": 0.6423, "step": 5031 }, { "epoch": 0.8730048577376822, "grad_norm": 0.6849162578582764, "learning_rate": 3.1978362372216006e-05, "loss": 0.6716, "step": 5032 }, { "epoch": 0.8731783483691881, "grad_norm": 0.6544033885002136, "learning_rate": 3.1972905551356995e-05, "loss": 0.6765, "step": 5033 }, { "epoch": 0.8733518390006939, "grad_norm": 0.5681815147399902, "learning_rate": 3.196744734102833e-05, "loss": 0.6099, "step": 5034 }, { "epoch": 0.8735253296321999, "grad_norm": 0.7293042540550232, "learning_rate": 3.196198774186347e-05, "loss": 0.5864, "step": 5035 }, { "epoch": 0.8736988202637057, "grad_norm": 0.7678490281105042, "learning_rate": 3.195652675449599e-05, "loss": 0.4551, "step": 5036 }, { "epoch": 0.8738723108952117, "grad_norm": 0.6535952091217041, "learning_rate": 3.195106437955964e-05, "loss": 0.6561, "step": 5037 }, { "epoch": 0.8740458015267175, "grad_norm": 0.6233806610107422, "learning_rate": 3.194560061768835e-05, "loss": 0.5675, "step": 5038 }, { "epoch": 0.8742192921582235, "grad_norm": 0.580977201461792, "learning_rate": 3.194013546951619e-05, "loss": 0.5573, "step": 5039 }, { "epoch": 0.8743927827897293, "grad_norm": 0.5699653029441833, "learning_rate": 3.193466893567739e-05, "loss": 0.6335, "step": 5040 }, { "epoch": 0.8745662734212353, "grad_norm": 0.6607775688171387, "learning_rate": 3.1929201016806355e-05, "loss": 0.5901, "step": 5041 }, { "epoch": 0.8747397640527411, "grad_norm": 0.6944895386695862, "learning_rate": 3.192373171353765e-05, "loss": 0.6528, "step": 5042 }, { "epoch": 0.8749132546842471, "grad_norm": 0.6482436060905457, "learning_rate": 3.1918261026505974e-05, "loss": 0.6279, "step": 5043 }, { "epoch": 0.8750867453157529, "grad_norm": 1.018430471420288, "learning_rate": 3.1912788956346226e-05, "loss": 0.5323, "step": 5044 }, { "epoch": 0.8752602359472589, "grad_norm": 0.5560790300369263, "learning_rate": 3.1907315503693434e-05, "loss": 0.5754, "step": 5045 }, { "epoch": 0.8754337265787647, "grad_norm": 1.4722284078598022, "learning_rate": 3.190184066918281e-05, "loss": 0.748, "step": 5046 }, { "epoch": 0.8756072172102707, "grad_norm": 0.856460452079773, "learning_rate": 3.18963644534497e-05, "loss": 0.5693, "step": 5047 }, { "epoch": 0.8757807078417765, "grad_norm": 0.6080543398857117, "learning_rate": 3.189088685712964e-05, "loss": 0.6155, "step": 5048 }, { "epoch": 0.8759541984732825, "grad_norm": 0.8170772194862366, "learning_rate": 3.18854078808583e-05, "loss": 0.5282, "step": 5049 }, { "epoch": 0.8761276891047883, "grad_norm": 0.8213574886322021, "learning_rate": 3.187992752527153e-05, "loss": 0.5027, "step": 5050 }, { "epoch": 0.8763011797362943, "grad_norm": 0.8203826546669006, "learning_rate": 3.187444579100533e-05, "loss": 0.5299, "step": 5051 }, { "epoch": 0.8764746703678001, "grad_norm": 0.7871171236038208, "learning_rate": 3.186896267869585e-05, "loss": 0.4895, "step": 5052 }, { "epoch": 0.8766481609993061, "grad_norm": 0.7084567546844482, "learning_rate": 3.1863478188979424e-05, "loss": 0.4972, "step": 5053 }, { "epoch": 0.8768216516308119, "grad_norm": 1.004349946975708, "learning_rate": 3.1857992322492525e-05, "loss": 0.567, "step": 5054 }, { "epoch": 0.8769951422623178, "grad_norm": 0.5509437322616577, "learning_rate": 3.185250507987181e-05, "loss": 0.5276, "step": 5055 }, { "epoch": 0.8771686328938237, "grad_norm": 0.6013137698173523, "learning_rate": 3.184701646175407e-05, "loss": 0.5765, "step": 5056 }, { "epoch": 0.8773421235253296, "grad_norm": 0.759196937084198, "learning_rate": 3.184152646877626e-05, "loss": 0.6095, "step": 5057 }, { "epoch": 0.8775156141568355, "grad_norm": 0.6360790133476257, "learning_rate": 3.183603510157551e-05, "loss": 0.6708, "step": 5058 }, { "epoch": 0.8776891047883414, "grad_norm": 0.7660230994224548, "learning_rate": 3.183054236078909e-05, "loss": 0.4877, "step": 5059 }, { "epoch": 0.8778625954198473, "grad_norm": 0.7693997025489807, "learning_rate": 3.1825048247054444e-05, "loss": 0.4686, "step": 5060 }, { "epoch": 0.8780360860513532, "grad_norm": 0.8580908179283142, "learning_rate": 3.181955276100917e-05, "loss": 0.4996, "step": 5061 }, { "epoch": 0.8782095766828591, "grad_norm": 0.6530462503433228, "learning_rate": 3.181405590329102e-05, "loss": 0.5463, "step": 5062 }, { "epoch": 0.878383067314365, "grad_norm": 0.7426706552505493, "learning_rate": 3.180855767453793e-05, "loss": 0.63, "step": 5063 }, { "epoch": 0.878556557945871, "grad_norm": 0.7013078927993774, "learning_rate": 3.180305807538796e-05, "loss": 0.5508, "step": 5064 }, { "epoch": 0.8787300485773768, "grad_norm": 0.564922571182251, "learning_rate": 3.179755710647934e-05, "loss": 0.5793, "step": 5065 }, { "epoch": 0.8789035392088828, "grad_norm": 0.8199964761734009, "learning_rate": 3.1792054768450466e-05, "loss": 0.5695, "step": 5066 }, { "epoch": 0.8790770298403886, "grad_norm": 0.8828762173652649, "learning_rate": 3.1786551061939905e-05, "loss": 0.54, "step": 5067 }, { "epoch": 0.8792505204718946, "grad_norm": 0.6814677119255066, "learning_rate": 3.178104598758636e-05, "loss": 0.7408, "step": 5068 }, { "epoch": 0.8794240111034004, "grad_norm": 0.5756251215934753, "learning_rate": 3.17755395460287e-05, "loss": 0.5538, "step": 5069 }, { "epoch": 0.8795975017349064, "grad_norm": 0.7447443008422852, "learning_rate": 3.1770031737905946e-05, "loss": 0.6128, "step": 5070 }, { "epoch": 0.8797709923664122, "grad_norm": 0.7762458324432373, "learning_rate": 3.17645225638573e-05, "loss": 0.5146, "step": 5071 }, { "epoch": 0.8799444829979182, "grad_norm": 0.6583542823791504, "learning_rate": 3.1759012024522103e-05, "loss": 0.4659, "step": 5072 }, { "epoch": 0.880117973629424, "grad_norm": 1.224825143814087, "learning_rate": 3.1753500120539856e-05, "loss": 0.575, "step": 5073 }, { "epoch": 0.8802914642609299, "grad_norm": 1.3071458339691162, "learning_rate": 3.1747986852550225e-05, "loss": 0.4731, "step": 5074 }, { "epoch": 0.8804649548924358, "grad_norm": 0.7071096301078796, "learning_rate": 3.174247222119303e-05, "loss": 0.4604, "step": 5075 }, { "epoch": 0.8806384455239417, "grad_norm": 0.9300988912582397, "learning_rate": 3.173695622710826e-05, "loss": 0.54, "step": 5076 }, { "epoch": 0.8808119361554476, "grad_norm": 0.9893963932991028, "learning_rate": 3.173143887093603e-05, "loss": 0.6067, "step": 5077 }, { "epoch": 0.8809854267869535, "grad_norm": 0.5781874656677246, "learning_rate": 3.172592015331666e-05, "loss": 0.5546, "step": 5078 }, { "epoch": 0.8811589174184594, "grad_norm": 1.6471365690231323, "learning_rate": 3.172040007489058e-05, "loss": 0.6376, "step": 5079 }, { "epoch": 0.8813324080499653, "grad_norm": 0.8680630326271057, "learning_rate": 3.171487863629843e-05, "loss": 0.4967, "step": 5080 }, { "epoch": 0.8815058986814712, "grad_norm": 0.7255361676216125, "learning_rate": 3.1709355838180953e-05, "loss": 0.6582, "step": 5081 }, { "epoch": 0.8816793893129771, "grad_norm": 0.594342827796936, "learning_rate": 3.1703831681179096e-05, "loss": 0.7122, "step": 5082 }, { "epoch": 0.881852879944483, "grad_norm": 0.7831090688705444, "learning_rate": 3.169830616593392e-05, "loss": 0.5392, "step": 5083 }, { "epoch": 0.8820263705759889, "grad_norm": 0.9640688896179199, "learning_rate": 3.169277929308669e-05, "loss": 0.5665, "step": 5084 }, { "epoch": 0.8821998612074948, "grad_norm": 0.7106096744537354, "learning_rate": 3.1687251063278806e-05, "loss": 0.5957, "step": 5085 }, { "epoch": 0.8823733518390007, "grad_norm": 0.7489754557609558, "learning_rate": 3.168172147715181e-05, "loss": 0.5967, "step": 5086 }, { "epoch": 0.8825468424705066, "grad_norm": 0.893450915813446, "learning_rate": 3.167619053534742e-05, "loss": 0.5494, "step": 5087 }, { "epoch": 0.8827203331020125, "grad_norm": 0.6532497406005859, "learning_rate": 3.1670658238507524e-05, "loss": 0.546, "step": 5088 }, { "epoch": 0.8828938237335184, "grad_norm": 0.684929370880127, "learning_rate": 3.1665124587274134e-05, "loss": 0.5731, "step": 5089 }, { "epoch": 0.8830673143650243, "grad_norm": 0.8249834179878235, "learning_rate": 3.1659589582289446e-05, "loss": 0.5719, "step": 5090 }, { "epoch": 0.8832408049965302, "grad_norm": 0.6517502069473267, "learning_rate": 3.16540532241958e-05, "loss": 0.6447, "step": 5091 }, { "epoch": 0.8834142956280361, "grad_norm": 0.6734473705291748, "learning_rate": 3.16485155136357e-05, "loss": 0.5737, "step": 5092 }, { "epoch": 0.8835877862595419, "grad_norm": 0.6809418201446533, "learning_rate": 3.164297645125179e-05, "loss": 0.5717, "step": 5093 }, { "epoch": 0.8837612768910479, "grad_norm": 0.8024628162384033, "learning_rate": 3.16374360376869e-05, "loss": 0.5487, "step": 5094 }, { "epoch": 0.8839347675225537, "grad_norm": 1.124971866607666, "learning_rate": 3.1631894273584007e-05, "loss": 0.5197, "step": 5095 }, { "epoch": 0.8841082581540597, "grad_norm": 0.6369608044624329, "learning_rate": 3.1626351159586224e-05, "loss": 0.5383, "step": 5096 }, { "epoch": 0.8842817487855655, "grad_norm": 1.3009787797927856, "learning_rate": 3.1620806696336845e-05, "loss": 0.5071, "step": 5097 }, { "epoch": 0.8844552394170715, "grad_norm": 0.6499316692352295, "learning_rate": 3.1615260884479304e-05, "loss": 0.5649, "step": 5098 }, { "epoch": 0.8846287300485773, "grad_norm": 0.7309339046478271, "learning_rate": 3.16097137246572e-05, "loss": 0.6074, "step": 5099 }, { "epoch": 0.8848022206800833, "grad_norm": 0.7429511547088623, "learning_rate": 3.1604165217514296e-05, "loss": 0.5323, "step": 5100 }, { "epoch": 0.8849757113115891, "grad_norm": 1.0571507215499878, "learning_rate": 3.15986153636945e-05, "loss": 0.6587, "step": 5101 }, { "epoch": 0.8851492019430951, "grad_norm": 0.7839773893356323, "learning_rate": 3.159306416384187e-05, "loss": 0.6249, "step": 5102 }, { "epoch": 0.885322692574601, "grad_norm": 0.7556633353233337, "learning_rate": 3.158751161860063e-05, "loss": 0.527, "step": 5103 }, { "epoch": 0.8854961832061069, "grad_norm": 0.9725896120071411, "learning_rate": 3.158195772861517e-05, "loss": 0.5485, "step": 5104 }, { "epoch": 0.8856696738376127, "grad_norm": 0.8777176141738892, "learning_rate": 3.1576402494530025e-05, "loss": 0.5334, "step": 5105 }, { "epoch": 0.8858431644691187, "grad_norm": 0.7924165725708008, "learning_rate": 3.1570845916989875e-05, "loss": 0.5017, "step": 5106 }, { "epoch": 0.8860166551006246, "grad_norm": 0.8182539343833923, "learning_rate": 3.156528799663957e-05, "loss": 0.5979, "step": 5107 }, { "epoch": 0.8861901457321305, "grad_norm": 0.946678102016449, "learning_rate": 3.1559728734124125e-05, "loss": 0.5708, "step": 5108 }, { "epoch": 0.8863636363636364, "grad_norm": 1.404183268547058, "learning_rate": 3.155416813008869e-05, "loss": 0.5416, "step": 5109 }, { "epoch": 0.8865371269951423, "grad_norm": 0.7550862431526184, "learning_rate": 3.154860618517858e-05, "loss": 0.5841, "step": 5110 }, { "epoch": 0.8867106176266482, "grad_norm": 0.7476509213447571, "learning_rate": 3.154304290003926e-05, "loss": 0.5048, "step": 5111 }, { "epoch": 0.8868841082581541, "grad_norm": 0.7353691458702087, "learning_rate": 3.1537478275316364e-05, "loss": 0.5107, "step": 5112 }, { "epoch": 0.88705759888966, "grad_norm": 0.8673321008682251, "learning_rate": 3.153191231165568e-05, "loss": 0.5299, "step": 5113 }, { "epoch": 0.8872310895211658, "grad_norm": 0.8236722350120544, "learning_rate": 3.152634500970312e-05, "loss": 0.5636, "step": 5114 }, { "epoch": 0.8874045801526718, "grad_norm": 1.140663504600525, "learning_rate": 3.15207763701048e-05, "loss": 0.5178, "step": 5115 }, { "epoch": 0.8875780707841776, "grad_norm": 0.6625383496284485, "learning_rate": 3.151520639350695e-05, "loss": 0.5139, "step": 5116 }, { "epoch": 0.8877515614156836, "grad_norm": 0.7425408959388733, "learning_rate": 3.150963508055599e-05, "loss": 0.5006, "step": 5117 }, { "epoch": 0.8879250520471894, "grad_norm": 0.6644777059555054, "learning_rate": 3.150406243189846e-05, "loss": 0.5924, "step": 5118 }, { "epoch": 0.8880985426786954, "grad_norm": 0.8685725331306458, "learning_rate": 3.1498488448181074e-05, "loss": 0.6484, "step": 5119 }, { "epoch": 0.8882720333102012, "grad_norm": 0.6809495687484741, "learning_rate": 3.1492913130050715e-05, "loss": 0.6633, "step": 5120 }, { "epoch": 0.8884455239417072, "grad_norm": 0.7087438106536865, "learning_rate": 3.14873364781544e-05, "loss": 0.6554, "step": 5121 }, { "epoch": 0.888619014573213, "grad_norm": 0.7695276737213135, "learning_rate": 3.1481758493139295e-05, "loss": 0.6027, "step": 5122 }, { "epoch": 0.888792505204719, "grad_norm": 0.7297627329826355, "learning_rate": 3.147617917565273e-05, "loss": 0.6149, "step": 5123 }, { "epoch": 0.8889659958362248, "grad_norm": 0.9996668696403503, "learning_rate": 3.147059852634221e-05, "loss": 0.5967, "step": 5124 }, { "epoch": 0.8891394864677308, "grad_norm": 0.8751716613769531, "learning_rate": 3.146501654585537e-05, "loss": 0.5002, "step": 5125 }, { "epoch": 0.8893129770992366, "grad_norm": 0.7023032307624817, "learning_rate": 3.145943323483999e-05, "loss": 0.5453, "step": 5126 }, { "epoch": 0.8894864677307426, "grad_norm": 0.9588670134544373, "learning_rate": 3.145384859394403e-05, "loss": 0.5043, "step": 5127 }, { "epoch": 0.8896599583622484, "grad_norm": 0.957583487033844, "learning_rate": 3.144826262381559e-05, "loss": 0.4735, "step": 5128 }, { "epoch": 0.8898334489937544, "grad_norm": 1.4724335670471191, "learning_rate": 3.144267532510295e-05, "loss": 0.5228, "step": 5129 }, { "epoch": 0.8900069396252602, "grad_norm": 0.8515888452529907, "learning_rate": 3.143708669845449e-05, "loss": 0.5482, "step": 5130 }, { "epoch": 0.8901804302567662, "grad_norm": 0.7285219430923462, "learning_rate": 3.1431496744518794e-05, "loss": 0.6484, "step": 5131 }, { "epoch": 0.890353920888272, "grad_norm": 1.1804567575454712, "learning_rate": 3.142590546394459e-05, "loss": 0.5201, "step": 5132 }, { "epoch": 0.8905274115197779, "grad_norm": 0.6425312161445618, "learning_rate": 3.142031285738073e-05, "loss": 0.5853, "step": 5133 }, { "epoch": 0.8907009021512838, "grad_norm": 0.804465651512146, "learning_rate": 3.141471892547627e-05, "loss": 0.5792, "step": 5134 }, { "epoch": 0.8908743927827897, "grad_norm": 0.670696496963501, "learning_rate": 3.140912366888037e-05, "loss": 0.5276, "step": 5135 }, { "epoch": 0.8910478834142956, "grad_norm": 0.8929148316383362, "learning_rate": 3.140352708824237e-05, "loss": 0.4873, "step": 5136 }, { "epoch": 0.8912213740458015, "grad_norm": 0.965196967124939, "learning_rate": 3.139792918421177e-05, "loss": 0.4818, "step": 5137 }, { "epoch": 0.8913948646773074, "grad_norm": 1.160454273223877, "learning_rate": 3.1392329957438203e-05, "loss": 0.4968, "step": 5138 }, { "epoch": 0.8915683553088133, "grad_norm": 0.9319005608558655, "learning_rate": 3.1386729408571467e-05, "loss": 0.5962, "step": 5139 }, { "epoch": 0.8917418459403192, "grad_norm": 0.6847009658813477, "learning_rate": 3.1381127538261524e-05, "loss": 0.6313, "step": 5140 }, { "epoch": 0.8919153365718251, "grad_norm": 0.7343131303787231, "learning_rate": 3.137552434715846e-05, "loss": 0.5214, "step": 5141 }, { "epoch": 0.892088827203331, "grad_norm": 1.1229071617126465, "learning_rate": 3.136991983591255e-05, "loss": 0.5236, "step": 5142 }, { "epoch": 0.8922623178348369, "grad_norm": 0.7773802280426025, "learning_rate": 3.1364314005174184e-05, "loss": 0.6536, "step": 5143 }, { "epoch": 0.8924358084663429, "grad_norm": 0.7838901281356812, "learning_rate": 3.1358706855593935e-05, "loss": 0.5215, "step": 5144 }, { "epoch": 0.8926092990978487, "grad_norm": 0.7665859460830688, "learning_rate": 3.135309838782253e-05, "loss": 0.6309, "step": 5145 }, { "epoch": 0.8927827897293547, "grad_norm": 0.8027396202087402, "learning_rate": 3.1347488602510824e-05, "loss": 0.614, "step": 5146 }, { "epoch": 0.8929562803608605, "grad_norm": 0.8135119676589966, "learning_rate": 3.134187750030984e-05, "loss": 0.6162, "step": 5147 }, { "epoch": 0.8931297709923665, "grad_norm": 0.7477341890335083, "learning_rate": 3.133626508187076e-05, "loss": 0.5168, "step": 5148 }, { "epoch": 0.8933032616238723, "grad_norm": 1.039930820465088, "learning_rate": 3.133065134784491e-05, "loss": 0.4841, "step": 5149 }, { "epoch": 0.8934767522553783, "grad_norm": 0.8588441610336304, "learning_rate": 3.132503629888376e-05, "loss": 0.6981, "step": 5150 }, { "epoch": 0.8936502428868841, "grad_norm": 0.6859707236289978, "learning_rate": 3.131941993563896e-05, "loss": 0.6285, "step": 5151 }, { "epoch": 0.8938237335183901, "grad_norm": 1.0702961683273315, "learning_rate": 3.131380225876228e-05, "loss": 0.4751, "step": 5152 }, { "epoch": 0.8939972241498959, "grad_norm": 0.7685006260871887, "learning_rate": 3.130818326890567e-05, "loss": 0.5216, "step": 5153 }, { "epoch": 0.8941707147814018, "grad_norm": 1.5758066177368164, "learning_rate": 3.130256296672121e-05, "loss": 0.6899, "step": 5154 }, { "epoch": 0.8943442054129077, "grad_norm": 1.3756191730499268, "learning_rate": 3.1296941352861146e-05, "loss": 0.4948, "step": 5155 }, { "epoch": 0.8945176960444136, "grad_norm": 0.7226235270500183, "learning_rate": 3.1291318427977876e-05, "loss": 0.5996, "step": 5156 }, { "epoch": 0.8946911866759195, "grad_norm": 0.7024737596511841, "learning_rate": 3.128569419272395e-05, "loss": 0.5797, "step": 5157 }, { "epoch": 0.8948646773074254, "grad_norm": 0.9813096523284912, "learning_rate": 3.1280068647752066e-05, "loss": 0.5109, "step": 5158 }, { "epoch": 0.8950381679389313, "grad_norm": 0.7015733122825623, "learning_rate": 3.127444179371506e-05, "loss": 0.4841, "step": 5159 }, { "epoch": 0.8952116585704372, "grad_norm": 0.7428815960884094, "learning_rate": 3.126881363126595e-05, "loss": 0.6926, "step": 5160 }, { "epoch": 0.8953851492019431, "grad_norm": 0.5810912847518921, "learning_rate": 3.126318416105789e-05, "loss": 0.6013, "step": 5161 }, { "epoch": 0.895558639833449, "grad_norm": 0.7125701904296875, "learning_rate": 3.1257553383744186e-05, "loss": 0.5754, "step": 5162 }, { "epoch": 0.8957321304649549, "grad_norm": 0.6725031733512878, "learning_rate": 3.125192129997829e-05, "loss": 0.5127, "step": 5163 }, { "epoch": 0.8959056210964608, "grad_norm": 0.626672089099884, "learning_rate": 3.1246287910413824e-05, "loss": 0.6064, "step": 5164 }, { "epoch": 0.8960791117279667, "grad_norm": 0.8559987545013428, "learning_rate": 3.124065321570453e-05, "loss": 0.569, "step": 5165 }, { "epoch": 0.8962526023594726, "grad_norm": 0.7856817841529846, "learning_rate": 3.123501721650434e-05, "loss": 0.5181, "step": 5166 }, { "epoch": 0.8964260929909785, "grad_norm": 1.0046130418777466, "learning_rate": 3.1229379913467305e-05, "loss": 0.5107, "step": 5167 }, { "epoch": 0.8965995836224844, "grad_norm": 0.7973633408546448, "learning_rate": 3.122374130724765e-05, "loss": 0.5603, "step": 5168 }, { "epoch": 0.8967730742539903, "grad_norm": 1.8391568660736084, "learning_rate": 3.1218101398499736e-05, "loss": 0.522, "step": 5169 }, { "epoch": 0.8969465648854962, "grad_norm": 1.0152584314346313, "learning_rate": 3.1212460187878085e-05, "loss": 0.5153, "step": 5170 }, { "epoch": 0.8971200555170021, "grad_norm": 0.8015640377998352, "learning_rate": 3.1206817676037365e-05, "loss": 0.5974, "step": 5171 }, { "epoch": 0.897293546148508, "grad_norm": 0.6515125036239624, "learning_rate": 3.1201173863632396e-05, "loss": 0.6613, "step": 5172 }, { "epoch": 0.8974670367800138, "grad_norm": 0.8756774067878723, "learning_rate": 3.119552875131814e-05, "loss": 0.5544, "step": 5173 }, { "epoch": 0.8976405274115198, "grad_norm": 1.1011831760406494, "learning_rate": 3.1189882339749735e-05, "loss": 0.5172, "step": 5174 }, { "epoch": 0.8978140180430256, "grad_norm": 0.6310820579528809, "learning_rate": 3.1184234629582444e-05, "loss": 0.5692, "step": 5175 }, { "epoch": 0.8979875086745316, "grad_norm": 0.8873246312141418, "learning_rate": 3.117858562147169e-05, "loss": 0.5406, "step": 5176 }, { "epoch": 0.8981609993060374, "grad_norm": 0.6612831950187683, "learning_rate": 3.117293531607305e-05, "loss": 0.5819, "step": 5177 }, { "epoch": 0.8983344899375434, "grad_norm": 0.8151834011077881, "learning_rate": 3.116728371404225e-05, "loss": 0.5066, "step": 5178 }, { "epoch": 0.8985079805690492, "grad_norm": 0.9618954062461853, "learning_rate": 3.116163081603516e-05, "loss": 0.5043, "step": 5179 }, { "epoch": 0.8986814712005552, "grad_norm": 0.8373923301696777, "learning_rate": 3.115597662270781e-05, "loss": 0.5439, "step": 5180 }, { "epoch": 0.898854961832061, "grad_norm": 0.8901704549789429, "learning_rate": 3.115032113471637e-05, "loss": 0.6293, "step": 5181 }, { "epoch": 0.899028452463567, "grad_norm": 1.0294442176818848, "learning_rate": 3.114466435271717e-05, "loss": 0.6533, "step": 5182 }, { "epoch": 0.8992019430950728, "grad_norm": 0.6956663727760315, "learning_rate": 3.113900627736669e-05, "loss": 0.6963, "step": 5183 }, { "epoch": 0.8993754337265788, "grad_norm": 0.9930188655853271, "learning_rate": 3.113334690932155e-05, "loss": 0.5216, "step": 5184 }, { "epoch": 0.8995489243580846, "grad_norm": 1.244781494140625, "learning_rate": 3.112768624923853e-05, "loss": 0.5474, "step": 5185 }, { "epoch": 0.8997224149895906, "grad_norm": 0.908898651599884, "learning_rate": 3.1122024297774545e-05, "loss": 0.4548, "step": 5186 }, { "epoch": 0.8998959056210964, "grad_norm": 0.6766604781150818, "learning_rate": 3.111636105558669e-05, "loss": 0.6635, "step": 5187 }, { "epoch": 0.9000693962526024, "grad_norm": 0.8646974563598633, "learning_rate": 3.111069652333219e-05, "loss": 0.6058, "step": 5188 }, { "epoch": 0.9002428868841083, "grad_norm": 0.6690236926078796, "learning_rate": 3.1105030701668395e-05, "loss": 0.5758, "step": 5189 }, { "epoch": 0.9004163775156142, "grad_norm": 1.2098963260650635, "learning_rate": 3.1099363591252844e-05, "loss": 0.4812, "step": 5190 }, { "epoch": 0.90058986814712, "grad_norm": 0.7784439325332642, "learning_rate": 3.109369519274323e-05, "loss": 0.4951, "step": 5191 }, { "epoch": 0.9007633587786259, "grad_norm": 0.55345219373703, "learning_rate": 3.1088025506797356e-05, "loss": 0.6191, "step": 5192 }, { "epoch": 0.9009368494101319, "grad_norm": 0.8035297989845276, "learning_rate": 3.1082354534073206e-05, "loss": 0.5957, "step": 5193 }, { "epoch": 0.9011103400416377, "grad_norm": 0.7231811881065369, "learning_rate": 3.107668227522889e-05, "loss": 0.5792, "step": 5194 }, { "epoch": 0.9012838306731437, "grad_norm": 1.1240482330322266, "learning_rate": 3.107100873092269e-05, "loss": 0.5188, "step": 5195 }, { "epoch": 0.9014573213046495, "grad_norm": 0.7079700231552124, "learning_rate": 3.106533390181304e-05, "loss": 0.6602, "step": 5196 }, { "epoch": 0.9016308119361555, "grad_norm": 0.772443950176239, "learning_rate": 3.105965778855848e-05, "loss": 0.5796, "step": 5197 }, { "epoch": 0.9018043025676613, "grad_norm": 0.6135613322257996, "learning_rate": 3.105398039181775e-05, "loss": 0.6349, "step": 5198 }, { "epoch": 0.9019777931991673, "grad_norm": 0.7034465670585632, "learning_rate": 3.1048301712249726e-05, "loss": 0.7047, "step": 5199 }, { "epoch": 0.9021512838306731, "grad_norm": 0.9171649813652039, "learning_rate": 3.1042621750513405e-05, "loss": 0.4851, "step": 5200 }, { "epoch": 0.9023247744621791, "grad_norm": 0.836614727973938, "learning_rate": 3.103694050726797e-05, "loss": 0.462, "step": 5201 }, { "epoch": 0.9024982650936849, "grad_norm": 0.726462721824646, "learning_rate": 3.103125798317272e-05, "loss": 0.4623, "step": 5202 }, { "epoch": 0.9026717557251909, "grad_norm": 0.9646469950675964, "learning_rate": 3.102557417888713e-05, "loss": 0.5901, "step": 5203 }, { "epoch": 0.9028452463566967, "grad_norm": 0.6644760966300964, "learning_rate": 3.101988909507081e-05, "loss": 0.5316, "step": 5204 }, { "epoch": 0.9030187369882027, "grad_norm": 0.7346537113189697, "learning_rate": 3.1014202732383525e-05, "loss": 0.6246, "step": 5205 }, { "epoch": 0.9031922276197085, "grad_norm": 0.6510661840438843, "learning_rate": 3.100851509148517e-05, "loss": 0.5361, "step": 5206 }, { "epoch": 0.9033657182512145, "grad_norm": 1.276292324066162, "learning_rate": 3.100282617303581e-05, "loss": 0.6156, "step": 5207 }, { "epoch": 0.9035392088827203, "grad_norm": 0.8766646385192871, "learning_rate": 3.099713597769566e-05, "loss": 0.51, "step": 5208 }, { "epoch": 0.9037126995142263, "grad_norm": 0.7327955365180969, "learning_rate": 3.0991444506125066e-05, "loss": 0.5787, "step": 5209 }, { "epoch": 0.9038861901457321, "grad_norm": 0.9742059111595154, "learning_rate": 3.098575175898452e-05, "loss": 0.5641, "step": 5210 }, { "epoch": 0.9040596807772381, "grad_norm": 0.7599835991859436, "learning_rate": 3.098005773693469e-05, "loss": 0.5162, "step": 5211 }, { "epoch": 0.9042331714087439, "grad_norm": 0.8517898321151733, "learning_rate": 3.097436244063636e-05, "loss": 0.6124, "step": 5212 }, { "epoch": 0.9044066620402498, "grad_norm": 0.7128757834434509, "learning_rate": 3.0968665870750484e-05, "loss": 0.4829, "step": 5213 }, { "epoch": 0.9045801526717557, "grad_norm": 0.6965377330780029, "learning_rate": 3.0962968027938156e-05, "loss": 0.5793, "step": 5214 }, { "epoch": 0.9047536433032616, "grad_norm": 0.9639315009117126, "learning_rate": 3.09572689128606e-05, "loss": 0.5276, "step": 5215 }, { "epoch": 0.9049271339347675, "grad_norm": 0.8930005431175232, "learning_rate": 3.0951568526179235e-05, "loss": 0.5297, "step": 5216 }, { "epoch": 0.9051006245662734, "grad_norm": 0.8374587893486023, "learning_rate": 3.094586686855558e-05, "loss": 0.64, "step": 5217 }, { "epoch": 0.9052741151977793, "grad_norm": 0.5651605725288391, "learning_rate": 3.094016394065131e-05, "loss": 0.5555, "step": 5218 }, { "epoch": 0.9054476058292852, "grad_norm": 0.7753821611404419, "learning_rate": 3.093445974312828e-05, "loss": 0.5393, "step": 5219 }, { "epoch": 0.9056210964607911, "grad_norm": 1.0455563068389893, "learning_rate": 3.0928754276648443e-05, "loss": 0.5767, "step": 5220 }, { "epoch": 0.905794587092297, "grad_norm": 0.8705766201019287, "learning_rate": 3.092304754187394e-05, "loss": 0.6367, "step": 5221 }, { "epoch": 0.905968077723803, "grad_norm": 0.8444671630859375, "learning_rate": 3.091733953946705e-05, "loss": 0.4584, "step": 5222 }, { "epoch": 0.9061415683553088, "grad_norm": 0.722236156463623, "learning_rate": 3.0911630270090177e-05, "loss": 0.5236, "step": 5223 }, { "epoch": 0.9063150589868147, "grad_norm": 0.7913243770599365, "learning_rate": 3.09059197344059e-05, "loss": 0.541, "step": 5224 }, { "epoch": 0.9064885496183206, "grad_norm": 0.8392624258995056, "learning_rate": 3.090020793307693e-05, "loss": 0.6556, "step": 5225 }, { "epoch": 0.9066620402498266, "grad_norm": 0.7073938846588135, "learning_rate": 3.089449486676613e-05, "loss": 0.5879, "step": 5226 }, { "epoch": 0.9068355308813324, "grad_norm": 0.5899890661239624, "learning_rate": 3.08887805361365e-05, "loss": 0.6183, "step": 5227 }, { "epoch": 0.9070090215128384, "grad_norm": 0.8052083849906921, "learning_rate": 3.08830649418512e-05, "loss": 0.5619, "step": 5228 }, { "epoch": 0.9071825121443442, "grad_norm": 0.8192055225372314, "learning_rate": 3.087734808457354e-05, "loss": 0.5341, "step": 5229 }, { "epoch": 0.9073560027758502, "grad_norm": 0.5747373700141907, "learning_rate": 3.087162996496696e-05, "loss": 0.6653, "step": 5230 }, { "epoch": 0.907529493407356, "grad_norm": 0.9766119122505188, "learning_rate": 3.086591058369505e-05, "loss": 0.5201, "step": 5231 }, { "epoch": 0.9077029840388618, "grad_norm": 0.8069945573806763, "learning_rate": 3.086018994142156e-05, "loss": 0.5443, "step": 5232 }, { "epoch": 0.9078764746703678, "grad_norm": 0.829116940498352, "learning_rate": 3.0854468038810365e-05, "loss": 0.4589, "step": 5233 }, { "epoch": 0.9080499653018737, "grad_norm": 0.6612994074821472, "learning_rate": 3.084874487652551e-05, "loss": 0.5837, "step": 5234 }, { "epoch": 0.9082234559333796, "grad_norm": 0.7349043488502502, "learning_rate": 3.0843020455231173e-05, "loss": 0.5742, "step": 5235 }, { "epoch": 0.9083969465648855, "grad_norm": 0.6258236765861511, "learning_rate": 3.0837294775591675e-05, "loss": 0.5515, "step": 5236 }, { "epoch": 0.9085704371963914, "grad_norm": 0.6086591482162476, "learning_rate": 3.08315678382715e-05, "loss": 0.5005, "step": 5237 }, { "epoch": 0.9087439278278973, "grad_norm": 0.6610521078109741, "learning_rate": 3.082583964393524e-05, "loss": 0.5731, "step": 5238 }, { "epoch": 0.9089174184594032, "grad_norm": 0.6950920820236206, "learning_rate": 3.082011019324768e-05, "loss": 0.6082, "step": 5239 }, { "epoch": 0.9090909090909091, "grad_norm": 0.6199288964271545, "learning_rate": 3.081437948687373e-05, "loss": 0.5906, "step": 5240 }, { "epoch": 0.909264399722415, "grad_norm": 0.6856377124786377, "learning_rate": 3.0808647525478434e-05, "loss": 0.502, "step": 5241 }, { "epoch": 0.9094378903539209, "grad_norm": 0.829742431640625, "learning_rate": 3.0802914309727004e-05, "loss": 0.6536, "step": 5242 }, { "epoch": 0.9096113809854268, "grad_norm": 0.7659375667572021, "learning_rate": 3.079717984028478e-05, "loss": 0.5575, "step": 5243 }, { "epoch": 0.9097848716169327, "grad_norm": 0.6697655916213989, "learning_rate": 3.0791444117817247e-05, "loss": 0.6537, "step": 5244 }, { "epoch": 0.9099583622484386, "grad_norm": 0.6022835373878479, "learning_rate": 3.078570714299005e-05, "loss": 0.5839, "step": 5245 }, { "epoch": 0.9101318528799445, "grad_norm": 0.6736202239990234, "learning_rate": 3.0779968916468974e-05, "loss": 0.6902, "step": 5246 }, { "epoch": 0.9103053435114504, "grad_norm": 0.8430968523025513, "learning_rate": 3.0774229438919944e-05, "loss": 0.5851, "step": 5247 }, { "epoch": 0.9104788341429563, "grad_norm": 1.3374613523483276, "learning_rate": 3.076848871100904e-05, "loss": 0.5179, "step": 5248 }, { "epoch": 0.9106523247744622, "grad_norm": 0.73630291223526, "learning_rate": 3.0762746733402456e-05, "loss": 0.561, "step": 5249 }, { "epoch": 0.9108258154059681, "grad_norm": 0.6396110653877258, "learning_rate": 3.075700350676659e-05, "loss": 0.5751, "step": 5250 }, { "epoch": 0.9109993060374739, "grad_norm": 0.7337170839309692, "learning_rate": 3.075125903176792e-05, "loss": 0.6136, "step": 5251 }, { "epoch": 0.9111727966689799, "grad_norm": 1.0147831439971924, "learning_rate": 3.074551330907312e-05, "loss": 0.5428, "step": 5252 }, { "epoch": 0.9113462873004857, "grad_norm": 0.7059173583984375, "learning_rate": 3.073976633934898e-05, "loss": 0.6399, "step": 5253 }, { "epoch": 0.9115197779319917, "grad_norm": 0.7515703439712524, "learning_rate": 3.073401812326244e-05, "loss": 0.6021, "step": 5254 }, { "epoch": 0.9116932685634975, "grad_norm": 0.9002457857131958, "learning_rate": 3.072826866148058e-05, "loss": 0.5867, "step": 5255 }, { "epoch": 0.9118667591950035, "grad_norm": 0.7314658164978027, "learning_rate": 3.072251795467065e-05, "loss": 0.516, "step": 5256 }, { "epoch": 0.9120402498265093, "grad_norm": 0.9362332224845886, "learning_rate": 3.071676600350002e-05, "loss": 0.52, "step": 5257 }, { "epoch": 0.9122137404580153, "grad_norm": 0.8564382195472717, "learning_rate": 3.071101280863621e-05, "loss": 0.5596, "step": 5258 }, { "epoch": 0.9123872310895211, "grad_norm": 0.974568784236908, "learning_rate": 3.0705258370746874e-05, "loss": 0.5802, "step": 5259 }, { "epoch": 0.9125607217210271, "grad_norm": 0.6499694585800171, "learning_rate": 3.069950269049983e-05, "loss": 0.5847, "step": 5260 }, { "epoch": 0.9127342123525329, "grad_norm": 0.7674784660339355, "learning_rate": 3.069374576856304e-05, "loss": 0.4797, "step": 5261 }, { "epoch": 0.9129077029840389, "grad_norm": 0.6690618991851807, "learning_rate": 3.068798760560458e-05, "loss": 0.5447, "step": 5262 }, { "epoch": 0.9130811936155447, "grad_norm": 0.6676790118217468, "learning_rate": 3.068222820229272e-05, "loss": 0.5741, "step": 5263 }, { "epoch": 0.9132546842470507, "grad_norm": 0.7896708250045776, "learning_rate": 3.067646755929582e-05, "loss": 0.488, "step": 5264 }, { "epoch": 0.9134281748785565, "grad_norm": 0.7204405665397644, "learning_rate": 3.067070567728242e-05, "loss": 0.563, "step": 5265 }, { "epoch": 0.9136016655100625, "grad_norm": 0.7507115006446838, "learning_rate": 3.066494255692119e-05, "loss": 0.5887, "step": 5266 }, { "epoch": 0.9137751561415683, "grad_norm": 0.6853576302528381, "learning_rate": 3.065917819888095e-05, "loss": 0.6367, "step": 5267 }, { "epoch": 0.9139486467730743, "grad_norm": 0.6107186675071716, "learning_rate": 3.0653412603830665e-05, "loss": 0.6069, "step": 5268 }, { "epoch": 0.9141221374045801, "grad_norm": 1.0904207229614258, "learning_rate": 3.064764577243943e-05, "loss": 0.5908, "step": 5269 }, { "epoch": 0.9142956280360861, "grad_norm": 1.1321725845336914, "learning_rate": 3.06418777053765e-05, "loss": 0.5096, "step": 5270 }, { "epoch": 0.914469118667592, "grad_norm": 0.7792344093322754, "learning_rate": 3.063610840331125e-05, "loss": 0.5742, "step": 5271 }, { "epoch": 0.9146426092990978, "grad_norm": 0.6540992259979248, "learning_rate": 3.0630337866913236e-05, "loss": 0.665, "step": 5272 }, { "epoch": 0.9148160999306038, "grad_norm": 0.7271228432655334, "learning_rate": 3.0624566096852124e-05, "loss": 0.4648, "step": 5273 }, { "epoch": 0.9149895905621096, "grad_norm": 0.6793175339698792, "learning_rate": 3.061879309379774e-05, "loss": 0.5343, "step": 5274 }, { "epoch": 0.9151630811936156, "grad_norm": 0.8328024744987488, "learning_rate": 3.061301885842004e-05, "loss": 0.5597, "step": 5275 }, { "epoch": 0.9153365718251214, "grad_norm": 1.870966911315918, "learning_rate": 3.060724339138913e-05, "loss": 0.5872, "step": 5276 }, { "epoch": 0.9155100624566274, "grad_norm": 0.8721336722373962, "learning_rate": 3.060146669337528e-05, "loss": 0.601, "step": 5277 }, { "epoch": 0.9156835530881332, "grad_norm": 0.8151071667671204, "learning_rate": 3.0595688765048855e-05, "loss": 0.5375, "step": 5278 }, { "epoch": 0.9158570437196392, "grad_norm": 0.8064154982566833, "learning_rate": 3.05899096070804e-05, "loss": 0.6492, "step": 5279 }, { "epoch": 0.916030534351145, "grad_norm": 0.7032491564750671, "learning_rate": 3.058412922014061e-05, "loss": 0.6439, "step": 5280 }, { "epoch": 0.916204024982651, "grad_norm": 0.7188166379928589, "learning_rate": 3.057834760490027e-05, "loss": 0.59, "step": 5281 }, { "epoch": 0.9163775156141568, "grad_norm": 0.8670480847358704, "learning_rate": 3.057256476203038e-05, "loss": 0.5725, "step": 5282 }, { "epoch": 0.9165510062456628, "grad_norm": 0.822883129119873, "learning_rate": 3.056678069220203e-05, "loss": 0.686, "step": 5283 }, { "epoch": 0.9167244968771686, "grad_norm": 0.8245817422866821, "learning_rate": 3.056099539608646e-05, "loss": 0.6072, "step": 5284 }, { "epoch": 0.9168979875086746, "grad_norm": 0.7782118916511536, "learning_rate": 3.055520887435507e-05, "loss": 0.5344, "step": 5285 }, { "epoch": 0.9170714781401804, "grad_norm": 0.7215374112129211, "learning_rate": 3.0549421127679395e-05, "loss": 0.5449, "step": 5286 }, { "epoch": 0.9172449687716864, "grad_norm": 0.8895929455757141, "learning_rate": 3.0543632156731105e-05, "loss": 0.5015, "step": 5287 }, { "epoch": 0.9174184594031922, "grad_norm": 0.8760278820991516, "learning_rate": 3.053784196218201e-05, "loss": 0.5188, "step": 5288 }, { "epoch": 0.9175919500346982, "grad_norm": 0.7011651992797852, "learning_rate": 3.053205054470408e-05, "loss": 0.6781, "step": 5289 }, { "epoch": 0.917765440666204, "grad_norm": 0.9101936221122742, "learning_rate": 3.052625790496942e-05, "loss": 0.5417, "step": 5290 }, { "epoch": 0.9179389312977099, "grad_norm": 0.796743631362915, "learning_rate": 3.052046404365025e-05, "loss": 0.5387, "step": 5291 }, { "epoch": 0.9181124219292158, "grad_norm": 0.8621236085891724, "learning_rate": 3.0514668961418984e-05, "loss": 0.6255, "step": 5292 }, { "epoch": 0.9182859125607217, "grad_norm": 0.7842460870742798, "learning_rate": 3.0508872658948125e-05, "loss": 0.5413, "step": 5293 }, { "epoch": 0.9184594031922276, "grad_norm": 0.7441717386245728, "learning_rate": 3.050307513691035e-05, "loss": 0.5526, "step": 5294 }, { "epoch": 0.9186328938237335, "grad_norm": 0.6309664249420166, "learning_rate": 3.0497276395978468e-05, "loss": 0.6365, "step": 5295 }, { "epoch": 0.9188063844552394, "grad_norm": 0.7494913935661316, "learning_rate": 3.0491476436825427e-05, "loss": 0.5692, "step": 5296 }, { "epoch": 0.9189798750867453, "grad_norm": 0.650999128818512, "learning_rate": 3.048567526012432e-05, "loss": 0.6404, "step": 5297 }, { "epoch": 0.9191533657182512, "grad_norm": 0.604469358921051, "learning_rate": 3.047987286654838e-05, "loss": 0.6122, "step": 5298 }, { "epoch": 0.9193268563497571, "grad_norm": 0.5945268273353577, "learning_rate": 3.0474069256770983e-05, "loss": 0.6539, "step": 5299 }, { "epoch": 0.919500346981263, "grad_norm": 0.5838721394538879, "learning_rate": 3.0468264431465643e-05, "loss": 0.7271, "step": 5300 }, { "epoch": 0.9196738376127689, "grad_norm": 0.8042100667953491, "learning_rate": 3.0462458391306023e-05, "loss": 0.5717, "step": 5301 }, { "epoch": 0.9198473282442748, "grad_norm": 1.0784622430801392, "learning_rate": 3.045665113696591e-05, "loss": 0.4805, "step": 5302 }, { "epoch": 0.9200208188757807, "grad_norm": 0.7842101454734802, "learning_rate": 3.0450842669119255e-05, "loss": 0.5531, "step": 5303 }, { "epoch": 0.9201943095072866, "grad_norm": 0.760572612285614, "learning_rate": 3.0445032988440126e-05, "loss": 0.5067, "step": 5304 }, { "epoch": 0.9203678001387925, "grad_norm": 0.7464740872383118, "learning_rate": 3.0439222095602744e-05, "loss": 0.5059, "step": 5305 }, { "epoch": 0.9205412907702984, "grad_norm": 1.0330287218093872, "learning_rate": 3.0433409991281483e-05, "loss": 0.4354, "step": 5306 }, { "epoch": 0.9207147814018043, "grad_norm": 1.0916588306427002, "learning_rate": 3.042759667615083e-05, "loss": 0.5347, "step": 5307 }, { "epoch": 0.9208882720333103, "grad_norm": 0.7047819495201111, "learning_rate": 3.042178215088543e-05, "loss": 0.6464, "step": 5308 }, { "epoch": 0.9210617626648161, "grad_norm": 1.1781091690063477, "learning_rate": 3.041596641616007e-05, "loss": 0.4905, "step": 5309 }, { "epoch": 0.9212352532963219, "grad_norm": 0.8421317338943481, "learning_rate": 3.041014947264967e-05, "loss": 0.6125, "step": 5310 }, { "epoch": 0.9214087439278279, "grad_norm": 0.7191337943077087, "learning_rate": 3.0404331321029293e-05, "loss": 0.6671, "step": 5311 }, { "epoch": 0.9215822345593337, "grad_norm": 0.6631479859352112, "learning_rate": 3.0398511961974143e-05, "loss": 0.5836, "step": 5312 }, { "epoch": 0.9217557251908397, "grad_norm": 0.5192256569862366, "learning_rate": 3.0392691396159562e-05, "loss": 0.6151, "step": 5313 }, { "epoch": 0.9219292158223455, "grad_norm": 0.9425125122070312, "learning_rate": 3.0386869624261036e-05, "loss": 0.495, "step": 5314 }, { "epoch": 0.9221027064538515, "grad_norm": 0.7365497946739197, "learning_rate": 3.0381046646954185e-05, "loss": 0.5093, "step": 5315 }, { "epoch": 0.9222761970853574, "grad_norm": 0.7596287131309509, "learning_rate": 3.0375222464914782e-05, "loss": 0.4493, "step": 5316 }, { "epoch": 0.9224496877168633, "grad_norm": 0.7143353223800659, "learning_rate": 3.036939707881871e-05, "loss": 0.5822, "step": 5317 }, { "epoch": 0.9226231783483692, "grad_norm": 0.9538487195968628, "learning_rate": 3.0363570489342033e-05, "loss": 0.5338, "step": 5318 }, { "epoch": 0.9227966689798751, "grad_norm": 0.79997718334198, "learning_rate": 3.0357742697160924e-05, "loss": 0.6605, "step": 5319 }, { "epoch": 0.922970159611381, "grad_norm": 0.7166290283203125, "learning_rate": 3.03519137029517e-05, "loss": 0.5365, "step": 5320 }, { "epoch": 0.9231436502428869, "grad_norm": 0.6760879158973694, "learning_rate": 3.034608350739084e-05, "loss": 0.5872, "step": 5321 }, { "epoch": 0.9233171408743928, "grad_norm": 0.6162000894546509, "learning_rate": 3.034025211115492e-05, "loss": 0.6427, "step": 5322 }, { "epoch": 0.9234906315058987, "grad_norm": 0.7952109575271606, "learning_rate": 3.03344195149207e-05, "loss": 0.567, "step": 5323 }, { "epoch": 0.9236641221374046, "grad_norm": 0.6535466313362122, "learning_rate": 3.0328585719365057e-05, "loss": 0.5482, "step": 5324 }, { "epoch": 0.9238376127689105, "grad_norm": 0.8834538459777832, "learning_rate": 3.0322750725165e-05, "loss": 0.5892, "step": 5325 }, { "epoch": 0.9240111034004164, "grad_norm": 0.8313627243041992, "learning_rate": 3.0316914532997694e-05, "loss": 0.4695, "step": 5326 }, { "epoch": 0.9241845940319223, "grad_norm": 0.7623519897460938, "learning_rate": 3.031107714354044e-05, "loss": 0.5778, "step": 5327 }, { "epoch": 0.9243580846634282, "grad_norm": 0.53902268409729, "learning_rate": 3.030523855747066e-05, "loss": 0.6056, "step": 5328 }, { "epoch": 0.9245315752949341, "grad_norm": 0.9953489303588867, "learning_rate": 3.0299398775465945e-05, "loss": 0.5792, "step": 5329 }, { "epoch": 0.92470506592644, "grad_norm": 0.88249671459198, "learning_rate": 3.0293557798203998e-05, "loss": 0.5087, "step": 5330 }, { "epoch": 0.9248785565579458, "grad_norm": 0.7026335597038269, "learning_rate": 3.0287715626362676e-05, "loss": 0.5618, "step": 5331 }, { "epoch": 0.9250520471894518, "grad_norm": 0.6941031813621521, "learning_rate": 3.0281872260619965e-05, "loss": 0.5142, "step": 5332 }, { "epoch": 0.9252255378209576, "grad_norm": 1.039933204650879, "learning_rate": 3.0276027701654e-05, "loss": 0.4713, "step": 5333 }, { "epoch": 0.9253990284524636, "grad_norm": 0.6781054735183716, "learning_rate": 3.0270181950143045e-05, "loss": 0.6986, "step": 5334 }, { "epoch": 0.9255725190839694, "grad_norm": 0.7049393653869629, "learning_rate": 3.0264335006765506e-05, "loss": 0.5482, "step": 5335 }, { "epoch": 0.9257460097154754, "grad_norm": 0.8201839923858643, "learning_rate": 3.025848687219993e-05, "loss": 0.6687, "step": 5336 }, { "epoch": 0.9259195003469812, "grad_norm": 0.9204058647155762, "learning_rate": 3.0252637547125e-05, "loss": 0.5258, "step": 5337 }, { "epoch": 0.9260929909784872, "grad_norm": 0.86858731508255, "learning_rate": 3.0246787032219535e-05, "loss": 0.4951, "step": 5338 }, { "epoch": 0.926266481609993, "grad_norm": 0.9537758231163025, "learning_rate": 3.0240935328162498e-05, "loss": 0.5529, "step": 5339 }, { "epoch": 0.926439972241499, "grad_norm": 0.8528037071228027, "learning_rate": 3.0235082435632984e-05, "loss": 0.65, "step": 5340 }, { "epoch": 0.9266134628730048, "grad_norm": 1.1834839582443237, "learning_rate": 3.0229228355310218e-05, "loss": 0.5521, "step": 5341 }, { "epoch": 0.9267869535045108, "grad_norm": 0.7489802837371826, "learning_rate": 3.022337308787359e-05, "loss": 0.5699, "step": 5342 }, { "epoch": 0.9269604441360166, "grad_norm": 0.9030627608299255, "learning_rate": 3.0217516634002596e-05, "loss": 0.5197, "step": 5343 }, { "epoch": 0.9271339347675226, "grad_norm": 0.6515082716941833, "learning_rate": 3.02116589943769e-05, "loss": 0.6346, "step": 5344 }, { "epoch": 0.9273074253990284, "grad_norm": 0.791456937789917, "learning_rate": 3.020580016967627e-05, "loss": 0.6565, "step": 5345 }, { "epoch": 0.9274809160305344, "grad_norm": 0.8554321527481079, "learning_rate": 3.019994016058064e-05, "loss": 0.5292, "step": 5346 }, { "epoch": 0.9276544066620402, "grad_norm": 0.7764866948127747, "learning_rate": 3.019407896777007e-05, "loss": 0.5233, "step": 5347 }, { "epoch": 0.9278278972935462, "grad_norm": 0.7593770623207092, "learning_rate": 3.018821659192476e-05, "loss": 0.5081, "step": 5348 }, { "epoch": 0.928001387925052, "grad_norm": 0.6704859733581543, "learning_rate": 3.018235303372504e-05, "loss": 0.5856, "step": 5349 }, { "epoch": 0.9281748785565579, "grad_norm": 0.6950547099113464, "learning_rate": 3.0176488293851388e-05, "loss": 0.511, "step": 5350 }, { "epoch": 0.9283483691880638, "grad_norm": 0.5945006012916565, "learning_rate": 3.017062237298441e-05, "loss": 0.6611, "step": 5351 }, { "epoch": 0.9285218598195697, "grad_norm": 0.925314724445343, "learning_rate": 3.0164755271804856e-05, "loss": 0.6042, "step": 5352 }, { "epoch": 0.9286953504510757, "grad_norm": 0.9462495446205139, "learning_rate": 3.0158886990993612e-05, "loss": 0.5857, "step": 5353 }, { "epoch": 0.9288688410825815, "grad_norm": 0.7837413549423218, "learning_rate": 3.015301753123169e-05, "loss": 0.6163, "step": 5354 }, { "epoch": 0.9290423317140875, "grad_norm": 0.8495484590530396, "learning_rate": 3.0147146893200248e-05, "loss": 0.6177, "step": 5355 }, { "epoch": 0.9292158223455933, "grad_norm": 0.8009089827537537, "learning_rate": 3.0141275077580592e-05, "loss": 0.5399, "step": 5356 }, { "epoch": 0.9293893129770993, "grad_norm": 1.9470219612121582, "learning_rate": 3.0135402085054148e-05, "loss": 0.5857, "step": 5357 }, { "epoch": 0.9295628036086051, "grad_norm": 0.6537600159645081, "learning_rate": 3.0129527916302482e-05, "loss": 0.7056, "step": 5358 }, { "epoch": 0.9297362942401111, "grad_norm": 0.8429149985313416, "learning_rate": 3.0123652572007295e-05, "loss": 0.6906, "step": 5359 }, { "epoch": 0.9299097848716169, "grad_norm": 0.8222671747207642, "learning_rate": 3.0117776052850427e-05, "loss": 0.6055, "step": 5360 }, { "epoch": 0.9300832755031229, "grad_norm": 0.5313491225242615, "learning_rate": 3.0111898359513865e-05, "loss": 0.6307, "step": 5361 }, { "epoch": 0.9302567661346287, "grad_norm": 0.6201162934303284, "learning_rate": 3.0106019492679714e-05, "loss": 0.6304, "step": 5362 }, { "epoch": 0.9304302567661347, "grad_norm": 0.9020854234695435, "learning_rate": 3.0100139453030222e-05, "loss": 0.4982, "step": 5363 }, { "epoch": 0.9306037473976405, "grad_norm": 0.7810084223747253, "learning_rate": 3.009425824124778e-05, "loss": 0.6063, "step": 5364 }, { "epoch": 0.9307772380291465, "grad_norm": 0.7074047923088074, "learning_rate": 3.0088375858014905e-05, "loss": 0.6418, "step": 5365 }, { "epoch": 0.9309507286606523, "grad_norm": 0.7246010899543762, "learning_rate": 3.008249230401426e-05, "loss": 0.6698, "step": 5366 }, { "epoch": 0.9311242192921583, "grad_norm": 0.7391355037689209, "learning_rate": 3.007660757992863e-05, "loss": 0.6702, "step": 5367 }, { "epoch": 0.9312977099236641, "grad_norm": 0.7674587368965149, "learning_rate": 3.0070721686440953e-05, "loss": 0.5809, "step": 5368 }, { "epoch": 0.9314712005551701, "grad_norm": 0.7705550193786621, "learning_rate": 3.0064834624234283e-05, "loss": 0.6299, "step": 5369 }, { "epoch": 0.9316446911866759, "grad_norm": 1.0173293352127075, "learning_rate": 3.0058946393991833e-05, "loss": 0.4689, "step": 5370 }, { "epoch": 0.9318181818181818, "grad_norm": 0.5605577230453491, "learning_rate": 3.005305699639693e-05, "loss": 0.6779, "step": 5371 }, { "epoch": 0.9319916724496877, "grad_norm": 0.6631309986114502, "learning_rate": 3.004716643213305e-05, "loss": 0.6848, "step": 5372 }, { "epoch": 0.9321651630811936, "grad_norm": 0.7009269595146179, "learning_rate": 3.0041274701883794e-05, "loss": 0.4752, "step": 5373 }, { "epoch": 0.9323386537126995, "grad_norm": 0.7363549470901489, "learning_rate": 3.003538180633292e-05, "loss": 0.5668, "step": 5374 }, { "epoch": 0.9325121443442054, "grad_norm": 2.7398874759674072, "learning_rate": 3.002948774616429e-05, "loss": 0.6873, "step": 5375 }, { "epoch": 0.9326856349757113, "grad_norm": 0.7422850131988525, "learning_rate": 3.0023592522061916e-05, "loss": 0.6235, "step": 5376 }, { "epoch": 0.9328591256072172, "grad_norm": 0.642112672328949, "learning_rate": 3.0017696134709946e-05, "loss": 0.6691, "step": 5377 }, { "epoch": 0.9330326162387231, "grad_norm": 0.731460690498352, "learning_rate": 3.0011798584792672e-05, "loss": 0.5582, "step": 5378 }, { "epoch": 0.933206106870229, "grad_norm": 1.0289613008499146, "learning_rate": 3.000589987299451e-05, "loss": 0.5909, "step": 5379 }, { "epoch": 0.9333795975017349, "grad_norm": 0.6110485792160034, "learning_rate": 3.0000000000000004e-05, "loss": 0.6539, "step": 5380 }, { "epoch": 0.9335530881332408, "grad_norm": 0.7026828527450562, "learning_rate": 2.9994098966493842e-05, "loss": 0.5867, "step": 5381 }, { "epoch": 0.9337265787647467, "grad_norm": 0.675056517124176, "learning_rate": 2.9988196773160857e-05, "loss": 0.5767, "step": 5382 }, { "epoch": 0.9339000693962526, "grad_norm": 0.8703925013542175, "learning_rate": 2.9982293420685998e-05, "loss": 0.6873, "step": 5383 }, { "epoch": 0.9340735600277585, "grad_norm": 0.7216558456420898, "learning_rate": 2.9976388909754348e-05, "loss": 0.5837, "step": 5384 }, { "epoch": 0.9342470506592644, "grad_norm": 1.0611991882324219, "learning_rate": 2.997048324105115e-05, "loss": 0.5745, "step": 5385 }, { "epoch": 0.9344205412907703, "grad_norm": 0.6006755828857422, "learning_rate": 2.996457641526174e-05, "loss": 0.4956, "step": 5386 }, { "epoch": 0.9345940319222762, "grad_norm": 0.713334858417511, "learning_rate": 2.995866843307164e-05, "loss": 0.6038, "step": 5387 }, { "epoch": 0.9347675225537821, "grad_norm": 0.6840294599533081, "learning_rate": 2.995275929516646e-05, "loss": 0.5118, "step": 5388 }, { "epoch": 0.934941013185288, "grad_norm": 0.853123128414154, "learning_rate": 2.9946849002231962e-05, "loss": 0.4916, "step": 5389 }, { "epoch": 0.9351145038167938, "grad_norm": 0.750608503818512, "learning_rate": 2.9940937554954053e-05, "loss": 0.4921, "step": 5390 }, { "epoch": 0.9352879944482998, "grad_norm": 0.769507110118866, "learning_rate": 2.993502495401875e-05, "loss": 0.6527, "step": 5391 }, { "epoch": 0.9354614850798056, "grad_norm": 0.6452317237854004, "learning_rate": 2.9929111200112233e-05, "loss": 0.5686, "step": 5392 }, { "epoch": 0.9356349757113116, "grad_norm": 0.6807578802108765, "learning_rate": 2.9923196293920786e-05, "loss": 0.6285, "step": 5393 }, { "epoch": 0.9358084663428174, "grad_norm": 0.6604334712028503, "learning_rate": 2.991728023613085e-05, "loss": 0.6748, "step": 5394 }, { "epoch": 0.9359819569743234, "grad_norm": 0.6205240488052368, "learning_rate": 2.9911363027428982e-05, "loss": 0.662, "step": 5395 }, { "epoch": 0.9361554476058292, "grad_norm": 0.8346847891807556, "learning_rate": 2.990544466850189e-05, "loss": 0.6627, "step": 5396 }, { "epoch": 0.9363289382373352, "grad_norm": 0.9589856863021851, "learning_rate": 2.9899525160036398e-05, "loss": 0.675, "step": 5397 }, { "epoch": 0.936502428868841, "grad_norm": 0.6498943567276001, "learning_rate": 2.9893604502719474e-05, "loss": 0.6348, "step": 5398 }, { "epoch": 0.936675919500347, "grad_norm": 1.012377142906189, "learning_rate": 2.9887682697238226e-05, "loss": 0.5652, "step": 5399 }, { "epoch": 0.9368494101318529, "grad_norm": 0.6812121868133545, "learning_rate": 2.9881759744279875e-05, "loss": 0.6731, "step": 5400 }, { "epoch": 0.9370229007633588, "grad_norm": 0.7409417629241943, "learning_rate": 2.9875835644531793e-05, "loss": 0.5255, "step": 5401 }, { "epoch": 0.9371963913948647, "grad_norm": 0.7330713272094727, "learning_rate": 2.986991039868148e-05, "loss": 0.5776, "step": 5402 }, { "epoch": 0.9373698820263706, "grad_norm": 0.6653503775596619, "learning_rate": 2.986398400741656e-05, "loss": 0.5612, "step": 5403 }, { "epoch": 0.9375433726578765, "grad_norm": 0.6653831601142883, "learning_rate": 2.9858056471424804e-05, "loss": 0.6423, "step": 5404 }, { "epoch": 0.9377168632893824, "grad_norm": 0.7596984505653381, "learning_rate": 2.9852127791394106e-05, "loss": 0.5795, "step": 5405 }, { "epoch": 0.9378903539208883, "grad_norm": 0.9415115714073181, "learning_rate": 2.98461979680125e-05, "loss": 0.5104, "step": 5406 }, { "epoch": 0.9380638445523942, "grad_norm": 0.8615516424179077, "learning_rate": 2.984026700196814e-05, "loss": 0.4844, "step": 5407 }, { "epoch": 0.9382373351839001, "grad_norm": 0.7577638626098633, "learning_rate": 2.983433489394934e-05, "loss": 0.4819, "step": 5408 }, { "epoch": 0.9384108258154059, "grad_norm": 1.619036316871643, "learning_rate": 2.982840164464451e-05, "loss": 0.5795, "step": 5409 }, { "epoch": 0.9385843164469119, "grad_norm": 0.6858264207839966, "learning_rate": 2.9822467254742212e-05, "loss": 0.4938, "step": 5410 }, { "epoch": 0.9387578070784177, "grad_norm": 0.8433195352554321, "learning_rate": 2.9816531724931152e-05, "loss": 0.4526, "step": 5411 }, { "epoch": 0.9389312977099237, "grad_norm": 0.7492510676383972, "learning_rate": 2.9810595055900148e-05, "loss": 0.5171, "step": 5412 }, { "epoch": 0.9391047883414295, "grad_norm": 1.151039958000183, "learning_rate": 2.9804657248338146e-05, "loss": 0.4989, "step": 5413 }, { "epoch": 0.9392782789729355, "grad_norm": 0.6459309458732605, "learning_rate": 2.9798718302934255e-05, "loss": 0.5631, "step": 5414 }, { "epoch": 0.9394517696044413, "grad_norm": 0.7519208788871765, "learning_rate": 2.9792778220377675e-05, "loss": 0.5808, "step": 5415 }, { "epoch": 0.9396252602359473, "grad_norm": 0.8628186583518982, "learning_rate": 2.9786837001357782e-05, "loss": 0.6256, "step": 5416 }, { "epoch": 0.9397987508674531, "grad_norm": 0.5924263000488281, "learning_rate": 2.978089464656405e-05, "loss": 0.6926, "step": 5417 }, { "epoch": 0.9399722414989591, "grad_norm": 0.7776699662208557, "learning_rate": 2.9774951156686094e-05, "loss": 0.5999, "step": 5418 }, { "epoch": 0.9401457321304649, "grad_norm": 0.7869418263435364, "learning_rate": 2.9769006532413667e-05, "loss": 0.5306, "step": 5419 }, { "epoch": 0.9403192227619709, "grad_norm": 0.9118232727050781, "learning_rate": 2.976306077443665e-05, "loss": 0.5358, "step": 5420 }, { "epoch": 0.9404927133934767, "grad_norm": 0.5323087573051453, "learning_rate": 2.9757113883445056e-05, "loss": 0.4699, "step": 5421 }, { "epoch": 0.9406662040249827, "grad_norm": 1.0102770328521729, "learning_rate": 2.9751165860129024e-05, "loss": 0.4867, "step": 5422 }, { "epoch": 0.9408396946564885, "grad_norm": 0.6074740886688232, "learning_rate": 2.974521670517883e-05, "loss": 0.6996, "step": 5423 }, { "epoch": 0.9410131852879945, "grad_norm": 0.9579439163208008, "learning_rate": 2.973926641928489e-05, "loss": 0.5344, "step": 5424 }, { "epoch": 0.9411866759195003, "grad_norm": 0.7391860485076904, "learning_rate": 2.9733315003137725e-05, "loss": 0.4725, "step": 5425 }, { "epoch": 0.9413601665510063, "grad_norm": 1.3167799711227417, "learning_rate": 2.9727362457428012e-05, "loss": 0.4675, "step": 5426 }, { "epoch": 0.9415336571825121, "grad_norm": 1.0091986656188965, "learning_rate": 2.9721408782846554e-05, "loss": 0.5354, "step": 5427 }, { "epoch": 0.9417071478140181, "grad_norm": 0.6604625582695007, "learning_rate": 2.971545398008428e-05, "loss": 0.5947, "step": 5428 }, { "epoch": 0.9418806384455239, "grad_norm": 0.723139226436615, "learning_rate": 2.970949804983225e-05, "loss": 0.6659, "step": 5429 }, { "epoch": 0.9420541290770298, "grad_norm": 0.6684121489524841, "learning_rate": 2.970354099278166e-05, "loss": 0.5742, "step": 5430 }, { "epoch": 0.9422276197085357, "grad_norm": 0.7406533360481262, "learning_rate": 2.9697582809623828e-05, "loss": 0.4503, "step": 5431 }, { "epoch": 0.9424011103400416, "grad_norm": 1.013691782951355, "learning_rate": 2.9691623501050212e-05, "loss": 0.6056, "step": 5432 }, { "epoch": 0.9425746009715475, "grad_norm": 0.9413995146751404, "learning_rate": 2.968566306775239e-05, "loss": 0.6575, "step": 5433 }, { "epoch": 0.9427480916030534, "grad_norm": 0.6486615538597107, "learning_rate": 2.967970151042209e-05, "loss": 0.5552, "step": 5434 }, { "epoch": 0.9429215822345594, "grad_norm": 0.7300331592559814, "learning_rate": 2.9673738829751148e-05, "loss": 0.6271, "step": 5435 }, { "epoch": 0.9430950728660652, "grad_norm": 0.7458223700523376, "learning_rate": 2.9667775026431544e-05, "loss": 0.4535, "step": 5436 }, { "epoch": 0.9432685634975712, "grad_norm": 0.8116676807403564, "learning_rate": 2.9661810101155387e-05, "loss": 0.5339, "step": 5437 }, { "epoch": 0.943442054129077, "grad_norm": 0.9047504663467407, "learning_rate": 2.96558440546149e-05, "loss": 0.5934, "step": 5438 }, { "epoch": 0.943615544760583, "grad_norm": 0.8103256225585938, "learning_rate": 2.9649876887502467e-05, "loss": 0.5372, "step": 5439 }, { "epoch": 0.9437890353920888, "grad_norm": 0.9196052551269531, "learning_rate": 2.9643908600510572e-05, "loss": 0.5477, "step": 5440 }, { "epoch": 0.9439625260235948, "grad_norm": 0.8215181827545166, "learning_rate": 2.9637939194331848e-05, "loss": 0.5109, "step": 5441 }, { "epoch": 0.9441360166551006, "grad_norm": 0.7650948166847229, "learning_rate": 2.9631968669659047e-05, "loss": 0.5592, "step": 5442 }, { "epoch": 0.9443095072866066, "grad_norm": 0.8600346446037292, "learning_rate": 2.9625997027185064e-05, "loss": 0.5566, "step": 5443 }, { "epoch": 0.9444829979181124, "grad_norm": 0.6058077216148376, "learning_rate": 2.9620024267602906e-05, "loss": 0.6108, "step": 5444 }, { "epoch": 0.9446564885496184, "grad_norm": 0.6449906229972839, "learning_rate": 2.9614050391605725e-05, "loss": 0.6129, "step": 5445 }, { "epoch": 0.9448299791811242, "grad_norm": 0.6964638829231262, "learning_rate": 2.9608075399886792e-05, "loss": 0.5753, "step": 5446 }, { "epoch": 0.9450034698126302, "grad_norm": 0.840380072593689, "learning_rate": 2.9602099293139512e-05, "loss": 0.5975, "step": 5447 }, { "epoch": 0.945176960444136, "grad_norm": 0.8918290734291077, "learning_rate": 2.9596122072057424e-05, "loss": 0.5198, "step": 5448 }, { "epoch": 0.9453504510756419, "grad_norm": 0.6563118100166321, "learning_rate": 2.9590143737334182e-05, "loss": 0.5398, "step": 5449 }, { "epoch": 0.9455239417071478, "grad_norm": 0.7787469625473022, "learning_rate": 2.958416428966359e-05, "loss": 0.6144, "step": 5450 }, { "epoch": 0.9456974323386537, "grad_norm": 0.6563782691955566, "learning_rate": 2.9578183729739566e-05, "loss": 0.5747, "step": 5451 }, { "epoch": 0.9458709229701596, "grad_norm": 1.1232762336730957, "learning_rate": 2.957220205825615e-05, "loss": 0.5459, "step": 5452 }, { "epoch": 0.9460444136016655, "grad_norm": 1.3598390817642212, "learning_rate": 2.956621927590754e-05, "loss": 0.5029, "step": 5453 }, { "epoch": 0.9462179042331714, "grad_norm": 0.5797144174575806, "learning_rate": 2.956023538338803e-05, "loss": 0.5321, "step": 5454 }, { "epoch": 0.9463913948646773, "grad_norm": 0.8988631367683411, "learning_rate": 2.9554250381392067e-05, "loss": 0.5005, "step": 5455 }, { "epoch": 0.9465648854961832, "grad_norm": 0.6104587316513062, "learning_rate": 2.9548264270614217e-05, "loss": 0.5681, "step": 5456 }, { "epoch": 0.9467383761276891, "grad_norm": 0.6378024816513062, "learning_rate": 2.954227705174917e-05, "loss": 0.5446, "step": 5457 }, { "epoch": 0.946911866759195, "grad_norm": 0.6203073859214783, "learning_rate": 2.953628872549175e-05, "loss": 0.5626, "step": 5458 }, { "epoch": 0.9470853573907009, "grad_norm": 1.1577155590057373, "learning_rate": 2.9530299292536918e-05, "loss": 0.6069, "step": 5459 }, { "epoch": 0.9472588480222068, "grad_norm": 0.7365251779556274, "learning_rate": 2.952430875357974e-05, "loss": 0.6189, "step": 5460 }, { "epoch": 0.9474323386537127, "grad_norm": 0.8239716291427612, "learning_rate": 2.951831710931544e-05, "loss": 0.5234, "step": 5461 }, { "epoch": 0.9476058292852186, "grad_norm": 0.8461446166038513, "learning_rate": 2.9512324360439347e-05, "loss": 0.4756, "step": 5462 }, { "epoch": 0.9477793199167245, "grad_norm": 0.7904476523399353, "learning_rate": 2.950633050764692e-05, "loss": 0.635, "step": 5463 }, { "epoch": 0.9479528105482304, "grad_norm": 0.792469322681427, "learning_rate": 2.9500335551633773e-05, "loss": 0.5447, "step": 5464 }, { "epoch": 0.9481263011797363, "grad_norm": 0.7433012127876282, "learning_rate": 2.9494339493095605e-05, "loss": 0.4449, "step": 5465 }, { "epoch": 0.9482997918112422, "grad_norm": 0.8306467533111572, "learning_rate": 2.9488342332728276e-05, "loss": 0.4879, "step": 5466 }, { "epoch": 0.9484732824427481, "grad_norm": 0.7154919505119324, "learning_rate": 2.9482344071227767e-05, "loss": 0.4698, "step": 5467 }, { "epoch": 0.9486467730742539, "grad_norm": 0.6896546483039856, "learning_rate": 2.9476344709290175e-05, "loss": 0.6769, "step": 5468 }, { "epoch": 0.9488202637057599, "grad_norm": 0.977916955947876, "learning_rate": 2.9470344247611744e-05, "loss": 0.5424, "step": 5469 }, { "epoch": 0.9489937543372657, "grad_norm": 0.7709428071975708, "learning_rate": 2.9464342686888826e-05, "loss": 0.6493, "step": 5470 }, { "epoch": 0.9491672449687717, "grad_norm": 0.7862691879272461, "learning_rate": 2.9458340027817912e-05, "loss": 0.6556, "step": 5471 }, { "epoch": 0.9493407356002775, "grad_norm": 0.9022845029830933, "learning_rate": 2.9452336271095613e-05, "loss": 0.6194, "step": 5472 }, { "epoch": 0.9495142262317835, "grad_norm": 0.6442118883132935, "learning_rate": 2.9446331417418678e-05, "loss": 0.5963, "step": 5473 }, { "epoch": 0.9496877168632893, "grad_norm": 0.9894945621490479, "learning_rate": 2.9440325467483974e-05, "loss": 0.5223, "step": 5474 }, { "epoch": 0.9498612074947953, "grad_norm": 0.7461522817611694, "learning_rate": 2.94343184219885e-05, "loss": 0.6187, "step": 5475 }, { "epoch": 0.9500346981263011, "grad_norm": 0.615246057510376, "learning_rate": 2.942831028162938e-05, "loss": 0.6678, "step": 5476 }, { "epoch": 0.9502081887578071, "grad_norm": 0.7718769311904907, "learning_rate": 2.942230104710387e-05, "loss": 0.5106, "step": 5477 }, { "epoch": 0.950381679389313, "grad_norm": 0.45673322677612305, "learning_rate": 2.9416290719109333e-05, "loss": 0.6527, "step": 5478 }, { "epoch": 0.9505551700208189, "grad_norm": 0.7970420122146606, "learning_rate": 2.9410279298343294e-05, "loss": 0.6426, "step": 5479 }, { "epoch": 0.9507286606523248, "grad_norm": 0.7532060146331787, "learning_rate": 2.9404266785503376e-05, "loss": 0.4949, "step": 5480 }, { "epoch": 0.9509021512838307, "grad_norm": 0.6640486717224121, "learning_rate": 2.939825318128734e-05, "loss": 0.6591, "step": 5481 }, { "epoch": 0.9510756419153366, "grad_norm": 0.5723349452018738, "learning_rate": 2.9392238486393068e-05, "loss": 0.7288, "step": 5482 }, { "epoch": 0.9512491325468425, "grad_norm": 0.6749547123908997, "learning_rate": 2.9386222701518582e-05, "loss": 0.5728, "step": 5483 }, { "epoch": 0.9514226231783484, "grad_norm": 1.8638474941253662, "learning_rate": 2.9380205827362007e-05, "loss": 0.519, "step": 5484 }, { "epoch": 0.9515961138098543, "grad_norm": 0.7419898509979248, "learning_rate": 2.937418786462162e-05, "loss": 0.6128, "step": 5485 }, { "epoch": 0.9517696044413602, "grad_norm": 1.0060269832611084, "learning_rate": 2.9368168813995806e-05, "loss": 0.5758, "step": 5486 }, { "epoch": 0.9519430950728661, "grad_norm": 0.7519690990447998, "learning_rate": 2.9362148676183087e-05, "loss": 0.5039, "step": 5487 }, { "epoch": 0.952116585704372, "grad_norm": 0.8661027550697327, "learning_rate": 2.9356127451882105e-05, "loss": 0.6177, "step": 5488 }, { "epoch": 0.9522900763358778, "grad_norm": 0.7929277420043945, "learning_rate": 2.9350105141791627e-05, "loss": 0.5514, "step": 5489 }, { "epoch": 0.9524635669673838, "grad_norm": 1.2426139116287231, "learning_rate": 2.934408174661055e-05, "loss": 0.6614, "step": 5490 }, { "epoch": 0.9526370575988896, "grad_norm": 1.2336061000823975, "learning_rate": 2.9338057267037906e-05, "loss": 0.5786, "step": 5491 }, { "epoch": 0.9528105482303956, "grad_norm": 0.7504399418830872, "learning_rate": 2.933203170377283e-05, "loss": 0.5699, "step": 5492 }, { "epoch": 0.9529840388619014, "grad_norm": 0.7243566513061523, "learning_rate": 2.9326005057514605e-05, "loss": 0.5646, "step": 5493 }, { "epoch": 0.9531575294934074, "grad_norm": 0.7563011050224304, "learning_rate": 2.931997732896262e-05, "loss": 0.5378, "step": 5494 }, { "epoch": 0.9533310201249132, "grad_norm": 0.727756917476654, "learning_rate": 2.9313948518816417e-05, "loss": 0.5839, "step": 5495 }, { "epoch": 0.9535045107564192, "grad_norm": 0.7686708569526672, "learning_rate": 2.9307918627775627e-05, "loss": 0.6272, "step": 5496 }, { "epoch": 0.953678001387925, "grad_norm": 0.8600760698318481, "learning_rate": 2.9301887656540034e-05, "loss": 0.4978, "step": 5497 }, { "epoch": 0.953851492019431, "grad_norm": 1.7401413917541504, "learning_rate": 2.9295855605809543e-05, "loss": 0.521, "step": 5498 }, { "epoch": 0.9540249826509368, "grad_norm": 1.0921040773391724, "learning_rate": 2.9289822476284172e-05, "loss": 0.6127, "step": 5499 }, { "epoch": 0.9541984732824428, "grad_norm": 0.7612084746360779, "learning_rate": 2.9283788268664085e-05, "loss": 0.6558, "step": 5500 }, { "epoch": 0.9543719639139486, "grad_norm": 0.7520211338996887, "learning_rate": 2.9277752983649548e-05, "loss": 0.5397, "step": 5501 }, { "epoch": 0.9545454545454546, "grad_norm": 0.8717694878578186, "learning_rate": 2.9271716621940965e-05, "loss": 0.6041, "step": 5502 }, { "epoch": 0.9547189451769604, "grad_norm": 0.8843144774436951, "learning_rate": 2.9265679184238867e-05, "loss": 0.6799, "step": 5503 }, { "epoch": 0.9548924358084664, "grad_norm": 0.555732250213623, "learning_rate": 2.9259640671243903e-05, "loss": 0.658, "step": 5504 }, { "epoch": 0.9550659264399722, "grad_norm": 0.6608411073684692, "learning_rate": 2.9253601083656846e-05, "loss": 0.5549, "step": 5505 }, { "epoch": 0.9552394170714782, "grad_norm": 0.6673864722251892, "learning_rate": 2.9247560422178604e-05, "loss": 0.6238, "step": 5506 }, { "epoch": 0.955412907702984, "grad_norm": 0.8510689735412598, "learning_rate": 2.92415186875102e-05, "loss": 0.6591, "step": 5507 }, { "epoch": 0.9555863983344899, "grad_norm": 0.6071183681488037, "learning_rate": 2.923547588035278e-05, "loss": 0.6448, "step": 5508 }, { "epoch": 0.9557598889659958, "grad_norm": 0.6564860939979553, "learning_rate": 2.922943200140763e-05, "loss": 0.609, "step": 5509 }, { "epoch": 0.9559333795975017, "grad_norm": 0.9711827635765076, "learning_rate": 2.9223387051376133e-05, "loss": 0.6007, "step": 5510 }, { "epoch": 0.9561068702290076, "grad_norm": 0.6167823076248169, "learning_rate": 2.9217341030959824e-05, "loss": 0.6223, "step": 5511 }, { "epoch": 0.9562803608605135, "grad_norm": 0.6490203738212585, "learning_rate": 2.921129394086035e-05, "loss": 0.5486, "step": 5512 }, { "epoch": 0.9564538514920194, "grad_norm": 0.8874254822731018, "learning_rate": 2.920524578177948e-05, "loss": 0.5619, "step": 5513 }, { "epoch": 0.9566273421235253, "grad_norm": 0.8154889941215515, "learning_rate": 2.919919655441911e-05, "loss": 0.6151, "step": 5514 }, { "epoch": 0.9568008327550312, "grad_norm": 0.6881371736526489, "learning_rate": 2.9193146259481265e-05, "loss": 0.6265, "step": 5515 }, { "epoch": 0.9569743233865371, "grad_norm": 0.6467137336730957, "learning_rate": 2.9187094897668076e-05, "loss": 0.7117, "step": 5516 }, { "epoch": 0.957147814018043, "grad_norm": 0.7240439057350159, "learning_rate": 2.9181042469681825e-05, "loss": 0.7471, "step": 5517 }, { "epoch": 0.9573213046495489, "grad_norm": 0.6998037099838257, "learning_rate": 2.9174988976224897e-05, "loss": 0.5426, "step": 5518 }, { "epoch": 0.9574947952810549, "grad_norm": 0.7013698816299438, "learning_rate": 2.9168934417999803e-05, "loss": 0.6407, "step": 5519 }, { "epoch": 0.9576682859125607, "grad_norm": 0.5440288782119751, "learning_rate": 2.916287879570919e-05, "loss": 0.5219, "step": 5520 }, { "epoch": 0.9578417765440667, "grad_norm": 0.8371912837028503, "learning_rate": 2.9156822110055816e-05, "loss": 0.5396, "step": 5521 }, { "epoch": 0.9580152671755725, "grad_norm": 0.7916197180747986, "learning_rate": 2.9150764361742564e-05, "loss": 0.4921, "step": 5522 }, { "epoch": 0.9581887578070785, "grad_norm": 0.7627412676811218, "learning_rate": 2.9144705551472445e-05, "loss": 0.5939, "step": 5523 }, { "epoch": 0.9583622484385843, "grad_norm": 0.6743812561035156, "learning_rate": 2.9138645679948596e-05, "loss": 0.5223, "step": 5524 }, { "epoch": 0.9585357390700903, "grad_norm": 0.8483150601387024, "learning_rate": 2.9132584747874265e-05, "loss": 0.5143, "step": 5525 }, { "epoch": 0.9587092297015961, "grad_norm": 0.8951861262321472, "learning_rate": 2.912652275595283e-05, "loss": 0.502, "step": 5526 }, { "epoch": 0.9588827203331021, "grad_norm": 0.8937050104141235, "learning_rate": 2.91204597048878e-05, "loss": 0.5006, "step": 5527 }, { "epoch": 0.9590562109646079, "grad_norm": 0.7550032138824463, "learning_rate": 2.9114395595382786e-05, "loss": 0.5442, "step": 5528 }, { "epoch": 0.9592297015961138, "grad_norm": 0.800572395324707, "learning_rate": 2.9108330428141544e-05, "loss": 0.5387, "step": 5529 }, { "epoch": 0.9594031922276197, "grad_norm": 0.8911494612693787, "learning_rate": 2.9102264203867948e-05, "loss": 0.6061, "step": 5530 }, { "epoch": 0.9595766828591256, "grad_norm": 0.7251107096672058, "learning_rate": 2.9096196923265984e-05, "loss": 0.5394, "step": 5531 }, { "epoch": 0.9597501734906315, "grad_norm": 0.8301741480827332, "learning_rate": 2.9090128587039763e-05, "loss": 0.5825, "step": 5532 }, { "epoch": 0.9599236641221374, "grad_norm": 0.7158095836639404, "learning_rate": 2.9084059195893536e-05, "loss": 0.5177, "step": 5533 }, { "epoch": 0.9600971547536433, "grad_norm": 1.2218570709228516, "learning_rate": 2.9077988750531645e-05, "loss": 0.4814, "step": 5534 }, { "epoch": 0.9602706453851492, "grad_norm": 0.881252110004425, "learning_rate": 2.9071917251658583e-05, "loss": 0.6406, "step": 5535 }, { "epoch": 0.9604441360166551, "grad_norm": 0.8924074172973633, "learning_rate": 2.9065844699978957e-05, "loss": 0.5591, "step": 5536 }, { "epoch": 0.960617626648161, "grad_norm": 0.8034957051277161, "learning_rate": 2.9059771096197488e-05, "loss": 0.4939, "step": 5537 }, { "epoch": 0.9607911172796669, "grad_norm": 0.9034562706947327, "learning_rate": 2.9053696441019022e-05, "loss": 0.5133, "step": 5538 }, { "epoch": 0.9609646079111728, "grad_norm": 0.6369368433952332, "learning_rate": 2.9047620735148542e-05, "loss": 0.5203, "step": 5539 }, { "epoch": 0.9611380985426787, "grad_norm": 0.6362287402153015, "learning_rate": 2.9041543979291125e-05, "loss": 0.558, "step": 5540 }, { "epoch": 0.9613115891741846, "grad_norm": 0.7860467433929443, "learning_rate": 2.9035466174152004e-05, "loss": 0.6414, "step": 5541 }, { "epoch": 0.9614850798056905, "grad_norm": 0.8414846062660217, "learning_rate": 2.90293873204365e-05, "loss": 0.5997, "step": 5542 }, { "epoch": 0.9616585704371964, "grad_norm": 0.5543221831321716, "learning_rate": 2.9023307418850074e-05, "loss": 0.6477, "step": 5543 }, { "epoch": 0.9618320610687023, "grad_norm": 0.8663203120231628, "learning_rate": 2.9017226470098307e-05, "loss": 0.6692, "step": 5544 }, { "epoch": 0.9620055517002082, "grad_norm": 0.7039609551429749, "learning_rate": 2.9011144474886905e-05, "loss": 0.5739, "step": 5545 }, { "epoch": 0.9621790423317141, "grad_norm": 0.7383800148963928, "learning_rate": 2.9005061433921685e-05, "loss": 0.5753, "step": 5546 }, { "epoch": 0.96235253296322, "grad_norm": 1.215936303138733, "learning_rate": 2.8998977347908593e-05, "loss": 0.6697, "step": 5547 }, { "epoch": 0.9625260235947258, "grad_norm": 0.8985759615898132, "learning_rate": 2.8992892217553693e-05, "loss": 0.5801, "step": 5548 }, { "epoch": 0.9626995142262318, "grad_norm": 0.7822864055633545, "learning_rate": 2.8986806043563174e-05, "loss": 0.5975, "step": 5549 }, { "epoch": 0.9628730048577376, "grad_norm": 0.5498910546302795, "learning_rate": 2.898071882664334e-05, "loss": 0.5688, "step": 5550 }, { "epoch": 0.9630464954892436, "grad_norm": 0.6806402802467346, "learning_rate": 2.8974630567500627e-05, "loss": 0.5627, "step": 5551 }, { "epoch": 0.9632199861207494, "grad_norm": 0.7067112326622009, "learning_rate": 2.896854126684157e-05, "loss": 0.6111, "step": 5552 }, { "epoch": 0.9633934767522554, "grad_norm": 0.7096337080001831, "learning_rate": 2.8962450925372855e-05, "loss": 0.5426, "step": 5553 }, { "epoch": 0.9635669673837612, "grad_norm": 0.6538836359977722, "learning_rate": 2.895635954380127e-05, "loss": 0.6331, "step": 5554 }, { "epoch": 0.9637404580152672, "grad_norm": 0.6250468492507935, "learning_rate": 2.8950267122833712e-05, "loss": 0.5671, "step": 5555 }, { "epoch": 0.963913948646773, "grad_norm": 0.6868553161621094, "learning_rate": 2.894417366317724e-05, "loss": 0.5541, "step": 5556 }, { "epoch": 0.964087439278279, "grad_norm": 1.0072047710418701, "learning_rate": 2.893807916553898e-05, "loss": 0.5559, "step": 5557 }, { "epoch": 0.9642609299097848, "grad_norm": 0.5556061267852783, "learning_rate": 2.8931983630626218e-05, "loss": 0.6761, "step": 5558 }, { "epoch": 0.9644344205412908, "grad_norm": 0.7940955758094788, "learning_rate": 2.8925887059146357e-05, "loss": 0.647, "step": 5559 }, { "epoch": 0.9646079111727967, "grad_norm": 0.6780730485916138, "learning_rate": 2.8919789451806893e-05, "loss": 0.5903, "step": 5560 }, { "epoch": 0.9647814018043026, "grad_norm": 0.7893752455711365, "learning_rate": 2.891369080931548e-05, "loss": 0.625, "step": 5561 }, { "epoch": 0.9649548924358085, "grad_norm": 0.701627254486084, "learning_rate": 2.890759113237985e-05, "loss": 0.5433, "step": 5562 }, { "epoch": 0.9651283830673144, "grad_norm": 0.5994738340377808, "learning_rate": 2.89014904217079e-05, "loss": 0.5035, "step": 5563 }, { "epoch": 0.9653018736988203, "grad_norm": 1.1059223413467407, "learning_rate": 2.8895388678007602e-05, "loss": 0.4935, "step": 5564 }, { "epoch": 0.9654753643303262, "grad_norm": 0.6889150142669678, "learning_rate": 2.8889285901987097e-05, "loss": 0.502, "step": 5565 }, { "epoch": 0.9656488549618321, "grad_norm": 0.5731109380722046, "learning_rate": 2.8883182094354594e-05, "loss": 0.5271, "step": 5566 }, { "epoch": 0.9658223455933379, "grad_norm": 0.678790271282196, "learning_rate": 2.8877077255818464e-05, "loss": 0.6252, "step": 5567 }, { "epoch": 0.9659958362248439, "grad_norm": 0.6979020237922668, "learning_rate": 2.887097138708717e-05, "loss": 0.5166, "step": 5568 }, { "epoch": 0.9661693268563497, "grad_norm": 0.8345228433609009, "learning_rate": 2.8864864488869314e-05, "loss": 0.5016, "step": 5569 }, { "epoch": 0.9663428174878557, "grad_norm": 0.6399042010307312, "learning_rate": 2.8858756561873605e-05, "loss": 0.5341, "step": 5570 }, { "epoch": 0.9665163081193615, "grad_norm": 0.7701651453971863, "learning_rate": 2.885264760680887e-05, "loss": 0.6294, "step": 5571 }, { "epoch": 0.9666897987508675, "grad_norm": 0.8324815630912781, "learning_rate": 2.884653762438407e-05, "loss": 0.5715, "step": 5572 }, { "epoch": 0.9668632893823733, "grad_norm": 1.1969701051712036, "learning_rate": 2.8840426615308267e-05, "loss": 0.6501, "step": 5573 }, { "epoch": 0.9670367800138793, "grad_norm": 0.6024868488311768, "learning_rate": 2.8834314580290655e-05, "loss": 0.5708, "step": 5574 }, { "epoch": 0.9672102706453851, "grad_norm": 0.8180156946182251, "learning_rate": 2.8828201520040546e-05, "loss": 0.5875, "step": 5575 }, { "epoch": 0.9673837612768911, "grad_norm": 0.7437762022018433, "learning_rate": 2.882208743526736e-05, "loss": 0.6156, "step": 5576 }, { "epoch": 0.9675572519083969, "grad_norm": 0.721993625164032, "learning_rate": 2.8815972326680648e-05, "loss": 0.4769, "step": 5577 }, { "epoch": 0.9677307425399029, "grad_norm": 0.56771320104599, "learning_rate": 2.8809856194990067e-05, "loss": 0.6461, "step": 5578 }, { "epoch": 0.9679042331714087, "grad_norm": 0.8299348950386047, "learning_rate": 2.880373904090542e-05, "loss": 0.4725, "step": 5579 }, { "epoch": 0.9680777238029147, "grad_norm": 0.789548933506012, "learning_rate": 2.8797620865136594e-05, "loss": 0.4939, "step": 5580 }, { "epoch": 0.9682512144344205, "grad_norm": 0.7346118688583374, "learning_rate": 2.8791501668393614e-05, "loss": 0.5712, "step": 5581 }, { "epoch": 0.9684247050659265, "grad_norm": 0.6299523711204529, "learning_rate": 2.8785381451386628e-05, "loss": 0.6372, "step": 5582 }, { "epoch": 0.9685981956974323, "grad_norm": 0.5474387407302856, "learning_rate": 2.8779260214825883e-05, "loss": 0.5952, "step": 5583 }, { "epoch": 0.9687716863289383, "grad_norm": 0.9798077940940857, "learning_rate": 2.877313795942176e-05, "loss": 0.5024, "step": 5584 }, { "epoch": 0.9689451769604441, "grad_norm": 0.5727953314781189, "learning_rate": 2.8767014685884755e-05, "loss": 0.6917, "step": 5585 }, { "epoch": 0.9691186675919501, "grad_norm": 0.7917117476463318, "learning_rate": 2.8760890394925477e-05, "loss": 0.5623, "step": 5586 }, { "epoch": 0.9692921582234559, "grad_norm": 0.8574097156524658, "learning_rate": 2.8754765087254665e-05, "loss": 0.4679, "step": 5587 }, { "epoch": 0.9694656488549618, "grad_norm": 3.6565754413604736, "learning_rate": 2.8748638763583158e-05, "loss": 0.6477, "step": 5588 }, { "epoch": 0.9696391394864677, "grad_norm": 0.7693340182304382, "learning_rate": 2.874251142462193e-05, "loss": 0.5785, "step": 5589 }, { "epoch": 0.9698126301179736, "grad_norm": 0.8621879816055298, "learning_rate": 2.8736383071082065e-05, "loss": 0.492, "step": 5590 }, { "epoch": 0.9699861207494795, "grad_norm": 0.7890018820762634, "learning_rate": 2.8730253703674765e-05, "loss": 0.5876, "step": 5591 }, { "epoch": 0.9701596113809854, "grad_norm": 0.5805262923240662, "learning_rate": 2.872412332311135e-05, "loss": 0.7037, "step": 5592 }, { "epoch": 0.9703331020124913, "grad_norm": 0.784298837184906, "learning_rate": 2.8717991930103258e-05, "loss": 0.5427, "step": 5593 }, { "epoch": 0.9705065926439972, "grad_norm": 0.8179328441619873, "learning_rate": 2.8711859525362045e-05, "loss": 0.469, "step": 5594 }, { "epoch": 0.9706800832755031, "grad_norm": 0.7602160573005676, "learning_rate": 2.8705726109599382e-05, "loss": 0.4801, "step": 5595 }, { "epoch": 0.970853573907009, "grad_norm": 1.113956332206726, "learning_rate": 2.8699591683527058e-05, "loss": 0.5945, "step": 5596 }, { "epoch": 0.971027064538515, "grad_norm": 1.1886241436004639, "learning_rate": 2.869345624785698e-05, "loss": 0.4926, "step": 5597 }, { "epoch": 0.9712005551700208, "grad_norm": 0.6510448455810547, "learning_rate": 2.8687319803301183e-05, "loss": 0.6802, "step": 5598 }, { "epoch": 0.9713740458015268, "grad_norm": 0.7982162237167358, "learning_rate": 2.8681182350571795e-05, "loss": 0.5955, "step": 5599 }, { "epoch": 0.9715475364330326, "grad_norm": 0.6104753017425537, "learning_rate": 2.867504389038108e-05, "loss": 0.5809, "step": 5600 }, { "epoch": 0.9717210270645386, "grad_norm": 0.6703321933746338, "learning_rate": 2.8668904423441413e-05, "loss": 0.5819, "step": 5601 }, { "epoch": 0.9718945176960444, "grad_norm": 0.7472213506698608, "learning_rate": 2.8662763950465284e-05, "loss": 0.4871, "step": 5602 }, { "epoch": 0.9720680083275504, "grad_norm": 0.7371354699134827, "learning_rate": 2.865662247216531e-05, "loss": 0.4873, "step": 5603 }, { "epoch": 0.9722414989590562, "grad_norm": 0.8960732817649841, "learning_rate": 2.8650479989254206e-05, "loss": 0.5057, "step": 5604 }, { "epoch": 0.9724149895905622, "grad_norm": 0.9224213361740112, "learning_rate": 2.864433650244482e-05, "loss": 0.4885, "step": 5605 }, { "epoch": 0.972588480222068, "grad_norm": 0.6924038529396057, "learning_rate": 2.8638192012450115e-05, "loss": 0.6155, "step": 5606 }, { "epoch": 0.9727619708535739, "grad_norm": 1.2647117376327515, "learning_rate": 2.8632046519983157e-05, "loss": 0.5897, "step": 5607 }, { "epoch": 0.9729354614850798, "grad_norm": 0.5417555570602417, "learning_rate": 2.862590002575714e-05, "loss": 0.5942, "step": 5608 }, { "epoch": 0.9731089521165857, "grad_norm": 1.6354655027389526, "learning_rate": 2.861975253048538e-05, "loss": 0.4634, "step": 5609 }, { "epoch": 0.9732824427480916, "grad_norm": 0.838060736656189, "learning_rate": 2.861360403488129e-05, "loss": 0.618, "step": 5610 }, { "epoch": 0.9734559333795975, "grad_norm": 0.8327847719192505, "learning_rate": 2.8607454539658417e-05, "loss": 0.4602, "step": 5611 }, { "epoch": 0.9736294240111034, "grad_norm": 0.6787186861038208, "learning_rate": 2.8601304045530414e-05, "loss": 0.5684, "step": 5612 }, { "epoch": 0.9738029146426093, "grad_norm": 0.8590366840362549, "learning_rate": 2.859515255321105e-05, "loss": 0.562, "step": 5613 }, { "epoch": 0.9739764052741152, "grad_norm": 0.8135871291160583, "learning_rate": 2.858900006341422e-05, "loss": 0.4685, "step": 5614 }, { "epoch": 0.9741498959056211, "grad_norm": 1.042456865310669, "learning_rate": 2.858284657685393e-05, "loss": 0.4919, "step": 5615 }, { "epoch": 0.974323386537127, "grad_norm": 0.6399260759353638, "learning_rate": 2.8576692094244286e-05, "loss": 0.5925, "step": 5616 }, { "epoch": 0.9744968771686329, "grad_norm": 0.6510165333747864, "learning_rate": 2.8570536616299538e-05, "loss": 0.5336, "step": 5617 }, { "epoch": 0.9746703678001388, "grad_norm": 0.6991555094718933, "learning_rate": 2.856438014373402e-05, "loss": 0.4651, "step": 5618 }, { "epoch": 0.9748438584316447, "grad_norm": 0.7268310189247131, "learning_rate": 2.855822267726222e-05, "loss": 0.5339, "step": 5619 }, { "epoch": 0.9750173490631506, "grad_norm": 0.6740028858184814, "learning_rate": 2.8552064217598694e-05, "loss": 0.524, "step": 5620 }, { "epoch": 0.9751908396946565, "grad_norm": 0.7472015023231506, "learning_rate": 2.8545904765458153e-05, "loss": 0.5776, "step": 5621 }, { "epoch": 0.9753643303261624, "grad_norm": 0.7645635604858398, "learning_rate": 2.853974432155541e-05, "loss": 0.5522, "step": 5622 }, { "epoch": 0.9755378209576683, "grad_norm": 0.8446869254112244, "learning_rate": 2.8533582886605383e-05, "loss": 0.6638, "step": 5623 }, { "epoch": 0.9757113115891742, "grad_norm": 0.760636568069458, "learning_rate": 2.852742046132312e-05, "loss": 0.5383, "step": 5624 }, { "epoch": 0.9758848022206801, "grad_norm": 0.7456691265106201, "learning_rate": 2.8521257046423782e-05, "loss": 0.6187, "step": 5625 }, { "epoch": 0.9760582928521859, "grad_norm": 0.6363016963005066, "learning_rate": 2.8515092642622625e-05, "loss": 0.6128, "step": 5626 }, { "epoch": 0.9762317834836919, "grad_norm": 0.6683642864227295, "learning_rate": 2.850892725063505e-05, "loss": 0.532, "step": 5627 }, { "epoch": 0.9764052741151977, "grad_norm": 1.0434778928756714, "learning_rate": 2.8502760871176546e-05, "loss": 0.6349, "step": 5628 }, { "epoch": 0.9765787647467037, "grad_norm": 0.871713399887085, "learning_rate": 2.849659350496275e-05, "loss": 0.5149, "step": 5629 }, { "epoch": 0.9767522553782095, "grad_norm": 0.6335962414741516, "learning_rate": 2.8490425152709367e-05, "loss": 0.6923, "step": 5630 }, { "epoch": 0.9769257460097155, "grad_norm": 0.8314769268035889, "learning_rate": 2.848425581513225e-05, "loss": 0.5887, "step": 5631 }, { "epoch": 0.9770992366412213, "grad_norm": 0.5725652575492859, "learning_rate": 2.847808549294736e-05, "loss": 0.6077, "step": 5632 }, { "epoch": 0.9772727272727273, "grad_norm": 1.0846760272979736, "learning_rate": 2.847191418687078e-05, "loss": 0.6786, "step": 5633 }, { "epoch": 0.9774462179042331, "grad_norm": 0.6418253779411316, "learning_rate": 2.8465741897618673e-05, "loss": 0.6667, "step": 5634 }, { "epoch": 0.9776197085357391, "grad_norm": 0.5402447581291199, "learning_rate": 2.845956862590736e-05, "loss": 0.5862, "step": 5635 }, { "epoch": 0.9777931991672449, "grad_norm": 0.6820518970489502, "learning_rate": 2.8453394372453253e-05, "loss": 0.589, "step": 5636 }, { "epoch": 0.9779666897987509, "grad_norm": 0.9091362357139587, "learning_rate": 2.844721913797287e-05, "loss": 0.6671, "step": 5637 }, { "epoch": 0.9781401804302567, "grad_norm": 0.7631445527076721, "learning_rate": 2.8441042923182872e-05, "loss": 0.5135, "step": 5638 }, { "epoch": 0.9783136710617627, "grad_norm": 1.4382708072662354, "learning_rate": 2.84348657288e-05, "loss": 0.5564, "step": 5639 }, { "epoch": 0.9784871616932685, "grad_norm": 0.7542216181755066, "learning_rate": 2.842868755554114e-05, "loss": 0.4821, "step": 5640 }, { "epoch": 0.9786606523247745, "grad_norm": 0.823925256729126, "learning_rate": 2.8422508404123264e-05, "loss": 0.46, "step": 5641 }, { "epoch": 0.9788341429562804, "grad_norm": 0.7728753089904785, "learning_rate": 2.8416328275263472e-05, "loss": 0.5385, "step": 5642 }, { "epoch": 0.9790076335877863, "grad_norm": 0.7813199758529663, "learning_rate": 2.841014716967898e-05, "loss": 0.6272, "step": 5643 }, { "epoch": 0.9791811242192922, "grad_norm": 0.7330827713012695, "learning_rate": 2.8403965088087105e-05, "loss": 0.6547, "step": 5644 }, { "epoch": 0.9793546148507981, "grad_norm": 0.7045038342475891, "learning_rate": 2.8397782031205295e-05, "loss": 0.4763, "step": 5645 }, { "epoch": 0.979528105482304, "grad_norm": 1.028846263885498, "learning_rate": 2.839159799975109e-05, "loss": 0.6293, "step": 5646 }, { "epoch": 0.9797015961138098, "grad_norm": 0.7624039649963379, "learning_rate": 2.838541299444216e-05, "loss": 0.5862, "step": 5647 }, { "epoch": 0.9798750867453158, "grad_norm": 0.6337740421295166, "learning_rate": 2.8379227015996283e-05, "loss": 0.5353, "step": 5648 }, { "epoch": 0.9800485773768216, "grad_norm": 0.6328892111778259, "learning_rate": 2.837304006513135e-05, "loss": 0.6538, "step": 5649 }, { "epoch": 0.9802220680083276, "grad_norm": 1.041323184967041, "learning_rate": 2.8366852142565352e-05, "loss": 0.575, "step": 5650 }, { "epoch": 0.9803955586398334, "grad_norm": 0.763387143611908, "learning_rate": 2.8360663249016417e-05, "loss": 0.5221, "step": 5651 }, { "epoch": 0.9805690492713394, "grad_norm": 0.7254363298416138, "learning_rate": 2.8354473385202772e-05, "loss": 0.584, "step": 5652 }, { "epoch": 0.9807425399028452, "grad_norm": 0.6395940780639648, "learning_rate": 2.8348282551842756e-05, "loss": 0.6965, "step": 5653 }, { "epoch": 0.9809160305343512, "grad_norm": 1.0305383205413818, "learning_rate": 2.834209074965482e-05, "loss": 0.5254, "step": 5654 }, { "epoch": 0.981089521165857, "grad_norm": 1.0242565870285034, "learning_rate": 2.833589797935753e-05, "loss": 0.4504, "step": 5655 }, { "epoch": 0.981263011797363, "grad_norm": 0.6797073483467102, "learning_rate": 2.8329704241669574e-05, "loss": 0.6068, "step": 5656 }, { "epoch": 0.9814365024288688, "grad_norm": 0.5994985699653625, "learning_rate": 2.8323509537309725e-05, "loss": 0.5944, "step": 5657 }, { "epoch": 0.9816099930603748, "grad_norm": 0.935546338558197, "learning_rate": 2.8317313866996897e-05, "loss": 0.4546, "step": 5658 }, { "epoch": 0.9817834836918806, "grad_norm": 0.7284477353096008, "learning_rate": 2.831111723145011e-05, "loss": 0.5553, "step": 5659 }, { "epoch": 0.9819569743233866, "grad_norm": 0.919700026512146, "learning_rate": 2.830491963138848e-05, "loss": 0.491, "step": 5660 }, { "epoch": 0.9821304649548924, "grad_norm": 0.7011570334434509, "learning_rate": 2.8298721067531248e-05, "loss": 0.5303, "step": 5661 }, { "epoch": 0.9823039555863984, "grad_norm": 1.7850123643875122, "learning_rate": 2.8292521540597767e-05, "loss": 0.5337, "step": 5662 }, { "epoch": 0.9824774462179042, "grad_norm": 0.6571459770202637, "learning_rate": 2.8286321051307494e-05, "loss": 0.4933, "step": 5663 }, { "epoch": 0.9826509368494102, "grad_norm": 0.6553524136543274, "learning_rate": 2.828011960038002e-05, "loss": 0.5347, "step": 5664 }, { "epoch": 0.982824427480916, "grad_norm": 0.7700760960578918, "learning_rate": 2.827391718853501e-05, "loss": 0.4912, "step": 5665 }, { "epoch": 0.9829979181124219, "grad_norm": 1.0490473508834839, "learning_rate": 2.826771381649227e-05, "loss": 0.5784, "step": 5666 }, { "epoch": 0.9831714087439278, "grad_norm": 0.749843418598175, "learning_rate": 2.826150948497171e-05, "loss": 0.4961, "step": 5667 }, { "epoch": 0.9833448993754337, "grad_norm": 0.8119151592254639, "learning_rate": 2.8255304194693343e-05, "loss": 0.5551, "step": 5668 }, { "epoch": 0.9835183900069396, "grad_norm": 0.6026073098182678, "learning_rate": 2.8249097946377307e-05, "loss": 0.5271, "step": 5669 }, { "epoch": 0.9836918806384455, "grad_norm": 0.6521915793418884, "learning_rate": 2.8242890740743844e-05, "loss": 0.5321, "step": 5670 }, { "epoch": 0.9838653712699514, "grad_norm": 0.8184699416160583, "learning_rate": 2.8236682578513302e-05, "loss": 0.6404, "step": 5671 }, { "epoch": 0.9840388619014573, "grad_norm": 0.5834811925888062, "learning_rate": 2.8230473460406154e-05, "loss": 0.6033, "step": 5672 }, { "epoch": 0.9842123525329632, "grad_norm": 1.45811128616333, "learning_rate": 2.8224263387142963e-05, "loss": 0.5653, "step": 5673 }, { "epoch": 0.9843858431644691, "grad_norm": 0.9198524355888367, "learning_rate": 2.8218052359444434e-05, "loss": 0.556, "step": 5674 }, { "epoch": 0.984559333795975, "grad_norm": 0.6337687969207764, "learning_rate": 2.821184037803135e-05, "loss": 0.5148, "step": 5675 }, { "epoch": 0.9847328244274809, "grad_norm": 1.02034330368042, "learning_rate": 2.8205627443624616e-05, "loss": 0.4585, "step": 5676 }, { "epoch": 0.9849063150589868, "grad_norm": 0.5336766839027405, "learning_rate": 2.8199413556945256e-05, "loss": 0.6122, "step": 5677 }, { "epoch": 0.9850798056904927, "grad_norm": 0.7545201182365417, "learning_rate": 2.8193198718714402e-05, "loss": 0.52, "step": 5678 }, { "epoch": 0.9852532963219987, "grad_norm": 0.6611053943634033, "learning_rate": 2.8186982929653287e-05, "loss": 0.5245, "step": 5679 }, { "epoch": 0.9854267869535045, "grad_norm": 0.827336311340332, "learning_rate": 2.8180766190483263e-05, "loss": 0.535, "step": 5680 }, { "epoch": 0.9856002775850105, "grad_norm": 0.975527286529541, "learning_rate": 2.817454850192579e-05, "loss": 0.5465, "step": 5681 }, { "epoch": 0.9857737682165163, "grad_norm": 0.9336392879486084, "learning_rate": 2.8168329864702443e-05, "loss": 0.5498, "step": 5682 }, { "epoch": 0.9859472588480223, "grad_norm": 0.7438031435012817, "learning_rate": 2.8162110279534893e-05, "loss": 0.6061, "step": 5683 }, { "epoch": 0.9861207494795281, "grad_norm": 0.5993937253952026, "learning_rate": 2.8155889747144933e-05, "loss": 0.6034, "step": 5684 }, { "epoch": 0.9862942401110341, "grad_norm": 0.5549498200416565, "learning_rate": 2.8149668268254465e-05, "loss": 0.6147, "step": 5685 }, { "epoch": 0.9864677307425399, "grad_norm": 1.1180832386016846, "learning_rate": 2.8143445843585498e-05, "loss": 0.5997, "step": 5686 }, { "epoch": 0.9866412213740458, "grad_norm": 0.8614696860313416, "learning_rate": 2.8137222473860154e-05, "loss": 0.6512, "step": 5687 }, { "epoch": 0.9868147120055517, "grad_norm": 0.5003150105476379, "learning_rate": 2.8130998159800663e-05, "loss": 0.7042, "step": 5688 }, { "epoch": 0.9869882026370576, "grad_norm": 0.699743390083313, "learning_rate": 2.8124772902129353e-05, "loss": 0.6189, "step": 5689 }, { "epoch": 0.9871616932685635, "grad_norm": 0.646152913570404, "learning_rate": 2.8118546701568687e-05, "loss": 0.5361, "step": 5690 }, { "epoch": 0.9873351839000694, "grad_norm": 0.6917030811309814, "learning_rate": 2.8112319558841216e-05, "loss": 0.6029, "step": 5691 }, { "epoch": 0.9875086745315753, "grad_norm": 0.9531429409980774, "learning_rate": 2.81060914746696e-05, "loss": 0.4884, "step": 5692 }, { "epoch": 0.9876821651630812, "grad_norm": 0.6426189541816711, "learning_rate": 2.8099862449776637e-05, "loss": 0.5273, "step": 5693 }, { "epoch": 0.9878556557945871, "grad_norm": 0.6073868274688721, "learning_rate": 2.8093632484885182e-05, "loss": 0.576, "step": 5694 }, { "epoch": 0.988029146426093, "grad_norm": 0.6698435544967651, "learning_rate": 2.8087401580718258e-05, "loss": 0.4551, "step": 5695 }, { "epoch": 0.9882026370575989, "grad_norm": 0.7084513306617737, "learning_rate": 2.8081169737998956e-05, "loss": 0.5062, "step": 5696 }, { "epoch": 0.9883761276891048, "grad_norm": 0.8095421195030212, "learning_rate": 2.8074936957450485e-05, "loss": 0.5396, "step": 5697 }, { "epoch": 0.9885496183206107, "grad_norm": 0.8599531650543213, "learning_rate": 2.8068703239796175e-05, "loss": 0.5001, "step": 5698 }, { "epoch": 0.9887231089521166, "grad_norm": 0.5939335823059082, "learning_rate": 2.806246858575945e-05, "loss": 0.5999, "step": 5699 }, { "epoch": 0.9888965995836225, "grad_norm": 0.7356471419334412, "learning_rate": 2.805623299606385e-05, "loss": 0.5591, "step": 5700 }, { "epoch": 0.9890700902151284, "grad_norm": 0.6846799850463867, "learning_rate": 2.8049996471433022e-05, "loss": 0.5365, "step": 5701 }, { "epoch": 0.9892435808466343, "grad_norm": 1.5366616249084473, "learning_rate": 2.8043759012590723e-05, "loss": 0.603, "step": 5702 }, { "epoch": 0.9894170714781402, "grad_norm": 0.6540420055389404, "learning_rate": 2.8037520620260826e-05, "loss": 0.6267, "step": 5703 }, { "epoch": 0.9895905621096461, "grad_norm": 0.7610870599746704, "learning_rate": 2.803128129516729e-05, "loss": 0.4932, "step": 5704 }, { "epoch": 0.989764052741152, "grad_norm": 0.8002392053604126, "learning_rate": 2.8025041038034197e-05, "loss": 0.5995, "step": 5705 }, { "epoch": 0.9899375433726578, "grad_norm": 0.940723180770874, "learning_rate": 2.801879984958575e-05, "loss": 0.4882, "step": 5706 }, { "epoch": 0.9901110340041638, "grad_norm": 0.7501986026763916, "learning_rate": 2.8012557730546224e-05, "loss": 0.5312, "step": 5707 }, { "epoch": 0.9902845246356696, "grad_norm": 0.745395839214325, "learning_rate": 2.800631468164005e-05, "loss": 0.5092, "step": 5708 }, { "epoch": 0.9904580152671756, "grad_norm": 0.9255587458610535, "learning_rate": 2.800007070359172e-05, "loss": 0.5626, "step": 5709 }, { "epoch": 0.9906315058986814, "grad_norm": 0.6927536725997925, "learning_rate": 2.7993825797125866e-05, "loss": 0.5692, "step": 5710 }, { "epoch": 0.9908049965301874, "grad_norm": 0.8264254927635193, "learning_rate": 2.798757996296721e-05, "loss": 0.5314, "step": 5711 }, { "epoch": 0.9909784871616932, "grad_norm": 0.7039380073547363, "learning_rate": 2.7981333201840595e-05, "loss": 0.547, "step": 5712 }, { "epoch": 0.9911519777931992, "grad_norm": 0.7396357655525208, "learning_rate": 2.7975085514470958e-05, "loss": 0.5565, "step": 5713 }, { "epoch": 0.991325468424705, "grad_norm": 0.8510541319847107, "learning_rate": 2.7968836901583364e-05, "loss": 0.5774, "step": 5714 }, { "epoch": 0.991498959056211, "grad_norm": 0.5891192555427551, "learning_rate": 2.7962587363902952e-05, "loss": 0.6868, "step": 5715 }, { "epoch": 0.9916724496877168, "grad_norm": 0.6883437633514404, "learning_rate": 2.7956336902155003e-05, "loss": 0.6849, "step": 5716 }, { "epoch": 0.9918459403192228, "grad_norm": 0.8984211683273315, "learning_rate": 2.7950085517064884e-05, "loss": 0.5027, "step": 5717 }, { "epoch": 0.9920194309507286, "grad_norm": 0.796939492225647, "learning_rate": 2.7943833209358076e-05, "loss": 0.644, "step": 5718 }, { "epoch": 0.9921929215822346, "grad_norm": 0.7189422845840454, "learning_rate": 2.793757997976017e-05, "loss": 0.5895, "step": 5719 }, { "epoch": 0.9923664122137404, "grad_norm": 0.780312180519104, "learning_rate": 2.793132582899686e-05, "loss": 0.5587, "step": 5720 }, { "epoch": 0.9925399028452464, "grad_norm": 0.8351185321807861, "learning_rate": 2.7925070757793943e-05, "loss": 0.5417, "step": 5721 }, { "epoch": 0.9927133934767522, "grad_norm": 4.6577019691467285, "learning_rate": 2.791881476687733e-05, "loss": 0.5024, "step": 5722 }, { "epoch": 0.9928868841082582, "grad_norm": 0.6500504016876221, "learning_rate": 2.7912557856973035e-05, "loss": 0.5898, "step": 5723 }, { "epoch": 0.993060374739764, "grad_norm": 0.8028048276901245, "learning_rate": 2.790630002880718e-05, "loss": 0.67, "step": 5724 }, { "epoch": 0.9932338653712699, "grad_norm": 0.9770735502243042, "learning_rate": 2.7900041283106e-05, "loss": 0.5446, "step": 5725 }, { "epoch": 0.9934073560027759, "grad_norm": 0.9949390888214111, "learning_rate": 2.7893781620595818e-05, "loss": 0.5074, "step": 5726 }, { "epoch": 0.9935808466342817, "grad_norm": 0.6422823667526245, "learning_rate": 2.7887521042003084e-05, "loss": 0.588, "step": 5727 }, { "epoch": 0.9937543372657877, "grad_norm": 0.697289764881134, "learning_rate": 2.788125954805434e-05, "loss": 0.5432, "step": 5728 }, { "epoch": 0.9939278278972935, "grad_norm": 0.6521013975143433, "learning_rate": 2.787499713947624e-05, "loss": 0.559, "step": 5729 }, { "epoch": 0.9941013185287995, "grad_norm": 0.7985821962356567, "learning_rate": 2.7868733816995553e-05, "loss": 0.6085, "step": 5730 }, { "epoch": 0.9942748091603053, "grad_norm": 0.817812979221344, "learning_rate": 2.786246958133913e-05, "loss": 0.5981, "step": 5731 }, { "epoch": 0.9944482997918113, "grad_norm": 0.7825731635093689, "learning_rate": 2.7856204433233954e-05, "loss": 0.4961, "step": 5732 }, { "epoch": 0.9946217904233171, "grad_norm": 0.6842442154884338, "learning_rate": 2.7849938373407095e-05, "loss": 0.5099, "step": 5733 }, { "epoch": 0.9947952810548231, "grad_norm": 1.0025458335876465, "learning_rate": 2.7843671402585747e-05, "loss": 0.5641, "step": 5734 }, { "epoch": 0.9949687716863289, "grad_norm": 0.9568683505058289, "learning_rate": 2.783740352149719e-05, "loss": 0.5083, "step": 5735 }, { "epoch": 0.9951422623178349, "grad_norm": 0.6607463955879211, "learning_rate": 2.783113473086882e-05, "loss": 0.499, "step": 5736 }, { "epoch": 0.9953157529493407, "grad_norm": 0.6710206866264343, "learning_rate": 2.7824865031428144e-05, "loss": 0.5931, "step": 5737 }, { "epoch": 0.9954892435808467, "grad_norm": 0.637427806854248, "learning_rate": 2.781859442390276e-05, "loss": 0.4478, "step": 5738 }, { "epoch": 0.9956627342123525, "grad_norm": 1.1940019130706787, "learning_rate": 2.7812322909020385e-05, "loss": 0.5146, "step": 5739 }, { "epoch": 0.9958362248438585, "grad_norm": 0.8191688060760498, "learning_rate": 2.780605048750883e-05, "loss": 0.5422, "step": 5740 }, { "epoch": 0.9960097154753643, "grad_norm": 0.7413262724876404, "learning_rate": 2.7799777160096025e-05, "loss": 0.5463, "step": 5741 }, { "epoch": 0.9961832061068703, "grad_norm": 0.672318160533905, "learning_rate": 2.7793502927509988e-05, "loss": 0.6199, "step": 5742 }, { "epoch": 0.9963566967383761, "grad_norm": 0.687721848487854, "learning_rate": 2.7787227790478856e-05, "loss": 0.5553, "step": 5743 }, { "epoch": 0.9965301873698821, "grad_norm": 0.7222127318382263, "learning_rate": 2.7780951749730864e-05, "loss": 0.589, "step": 5744 }, { "epoch": 0.9967036780013879, "grad_norm": 0.6709451079368591, "learning_rate": 2.7774674805994356e-05, "loss": 0.5746, "step": 5745 }, { "epoch": 0.9968771686328938, "grad_norm": 0.7386717200279236, "learning_rate": 2.7768396959997783e-05, "loss": 0.5798, "step": 5746 }, { "epoch": 0.9970506592643997, "grad_norm": 0.6941803097724915, "learning_rate": 2.7762118212469686e-05, "loss": 0.5363, "step": 5747 }, { "epoch": 0.9972241498959056, "grad_norm": 0.6951861381530762, "learning_rate": 2.7755838564138722e-05, "loss": 0.6049, "step": 5748 }, { "epoch": 0.9973976405274115, "grad_norm": 0.812441349029541, "learning_rate": 2.774955801573366e-05, "loss": 0.5265, "step": 5749 }, { "epoch": 0.9975711311589174, "grad_norm": 0.7692736983299255, "learning_rate": 2.7743276567983354e-05, "loss": 0.5095, "step": 5750 }, { "epoch": 0.9977446217904233, "grad_norm": 0.8522500395774841, "learning_rate": 2.7736994221616788e-05, "loss": 0.4911, "step": 5751 }, { "epoch": 0.9979181124219292, "grad_norm": 0.6917999386787415, "learning_rate": 2.7730710977363023e-05, "loss": 0.4574, "step": 5752 }, { "epoch": 0.9980916030534351, "grad_norm": 0.7634033560752869, "learning_rate": 2.7724426835951242e-05, "loss": 0.5029, "step": 5753 }, { "epoch": 0.998265093684941, "grad_norm": 0.9081072807312012, "learning_rate": 2.771814179811073e-05, "loss": 0.5377, "step": 5754 }, { "epoch": 0.9984385843164469, "grad_norm": 1.0557156801223755, "learning_rate": 2.7711855864570858e-05, "loss": 0.543, "step": 5755 }, { "epoch": 0.9986120749479528, "grad_norm": 1.009839415550232, "learning_rate": 2.7705569036061137e-05, "loss": 0.535, "step": 5756 }, { "epoch": 0.9987855655794587, "grad_norm": 0.6140355467796326, "learning_rate": 2.7699281313311144e-05, "loss": 0.4337, "step": 5757 }, { "epoch": 0.9989590562109646, "grad_norm": 0.7017039060592651, "learning_rate": 2.7692992697050587e-05, "loss": 0.5054, "step": 5758 }, { "epoch": 0.9991325468424705, "grad_norm": 0.6694011688232422, "learning_rate": 2.768670318800926e-05, "loss": 0.674, "step": 5759 }, { "epoch": 0.9993060374739764, "grad_norm": 0.9655817151069641, "learning_rate": 2.7680412786917074e-05, "loss": 0.5253, "step": 5760 }, { "epoch": 0.9994795281054824, "grad_norm": 0.8425588607788086, "learning_rate": 2.7674121494504032e-05, "loss": 0.5544, "step": 5761 }, { "epoch": 0.9996530187369882, "grad_norm": 0.7655010223388672, "learning_rate": 2.7667829311500255e-05, "loss": 0.6002, "step": 5762 }, { "epoch": 0.9998265093684942, "grad_norm": 0.8534713983535767, "learning_rate": 2.766153623863594e-05, "loss": 0.5909, "step": 5763 }, { "epoch": 1.0, "grad_norm": 0.6403183937072754, "learning_rate": 2.765524227664143e-05, "loss": 0.4885, "step": 5764 }, { "epoch": 1.0001734906315058, "grad_norm": 0.7058083415031433, "learning_rate": 2.7648947426247122e-05, "loss": 0.6553, "step": 5765 }, { "epoch": 1.0003469812630117, "grad_norm": 0.8651044368743896, "learning_rate": 2.7642651688183558e-05, "loss": 0.5359, "step": 5766 }, { "epoch": 1.0005204718945178, "grad_norm": 0.7307851910591125, "learning_rate": 2.763635506318137e-05, "loss": 0.4159, "step": 5767 }, { "epoch": 1.0006939625260236, "grad_norm": 0.5823991298675537, "learning_rate": 2.763005755197126e-05, "loss": 0.4789, "step": 5768 }, { "epoch": 1.0008674531575295, "grad_norm": 0.9836352467536926, "learning_rate": 2.7623759155284093e-05, "loss": 0.463, "step": 5769 }, { "epoch": 1.0010409437890353, "grad_norm": 1.1837478876113892, "learning_rate": 2.7617459873850792e-05, "loss": 0.6111, "step": 5770 }, { "epoch": 1.0012144344205414, "grad_norm": 0.6720978617668152, "learning_rate": 2.7611159708402387e-05, "loss": 0.5699, "step": 5771 }, { "epoch": 1.0013879250520472, "grad_norm": 0.6235537528991699, "learning_rate": 2.760485865967004e-05, "loss": 0.6168, "step": 5772 }, { "epoch": 1.001561415683553, "grad_norm": 0.8067814111709595, "learning_rate": 2.759855672838498e-05, "loss": 0.3896, "step": 5773 }, { "epoch": 1.001734906315059, "grad_norm": 1.0587890148162842, "learning_rate": 2.7592253915278556e-05, "loss": 0.5634, "step": 5774 }, { "epoch": 1.001908396946565, "grad_norm": 0.7729614973068237, "learning_rate": 2.7585950221082223e-05, "loss": 0.5593, "step": 5775 }, { "epoch": 1.0020818875780708, "grad_norm": 0.7301870584487915, "learning_rate": 2.7579645646527522e-05, "loss": 0.4178, "step": 5776 }, { "epoch": 1.0022553782095767, "grad_norm": 0.9195185303688049, "learning_rate": 2.7573340192346117e-05, "loss": 0.5452, "step": 5777 }, { "epoch": 1.0024288688410825, "grad_norm": 1.3216475248336792, "learning_rate": 2.7567033859269754e-05, "loss": 0.4032, "step": 5778 }, { "epoch": 1.0026023594725886, "grad_norm": 0.8470260500907898, "learning_rate": 2.7560726648030294e-05, "loss": 0.448, "step": 5779 }, { "epoch": 1.0027758501040944, "grad_norm": 0.721225917339325, "learning_rate": 2.75544185593597e-05, "loss": 0.4741, "step": 5780 }, { "epoch": 1.0029493407356003, "grad_norm": 0.7026661038398743, "learning_rate": 2.7548109593990022e-05, "loss": 0.5828, "step": 5781 }, { "epoch": 1.0031228313671061, "grad_norm": 1.6201528310775757, "learning_rate": 2.754179975265344e-05, "loss": 0.6151, "step": 5782 }, { "epoch": 1.0032963219986122, "grad_norm": 0.8122080564498901, "learning_rate": 2.7535489036082198e-05, "loss": 0.4321, "step": 5783 }, { "epoch": 1.003469812630118, "grad_norm": 1.0058845281600952, "learning_rate": 2.752917744500868e-05, "loss": 0.5387, "step": 5784 }, { "epoch": 1.0036433032616239, "grad_norm": 0.8528434634208679, "learning_rate": 2.7522864980165346e-05, "loss": 0.6672, "step": 5785 }, { "epoch": 1.0038167938931297, "grad_norm": 1.0794541835784912, "learning_rate": 2.7516551642284765e-05, "loss": 0.5281, "step": 5786 }, { "epoch": 1.0039902845246356, "grad_norm": 1.606688380241394, "learning_rate": 2.7510237432099605e-05, "loss": 0.5308, "step": 5787 }, { "epoch": 1.0041637751561416, "grad_norm": 0.6467757225036621, "learning_rate": 2.7503922350342645e-05, "loss": 0.5367, "step": 5788 }, { "epoch": 1.0043372657876475, "grad_norm": 0.7525832056999207, "learning_rate": 2.7497606397746745e-05, "loss": 0.5387, "step": 5789 }, { "epoch": 1.0045107564191533, "grad_norm": 0.8825986981391907, "learning_rate": 2.7491289575044893e-05, "loss": 0.514, "step": 5790 }, { "epoch": 1.0046842470506592, "grad_norm": 0.8700129389762878, "learning_rate": 2.7484971882970156e-05, "loss": 0.521, "step": 5791 }, { "epoch": 1.0048577376821652, "grad_norm": 0.7932398319244385, "learning_rate": 2.7478653322255707e-05, "loss": 0.54, "step": 5792 }, { "epoch": 1.005031228313671, "grad_norm": 0.6790376305580139, "learning_rate": 2.7472333893634824e-05, "loss": 0.4625, "step": 5793 }, { "epoch": 1.005204718945177, "grad_norm": 0.6960176229476929, "learning_rate": 2.746601359784089e-05, "loss": 0.5845, "step": 5794 }, { "epoch": 1.0053782095766828, "grad_norm": 0.7734279036521912, "learning_rate": 2.7459692435607376e-05, "loss": 0.5856, "step": 5795 }, { "epoch": 1.0055517002081888, "grad_norm": 0.7826294302940369, "learning_rate": 2.745337040766787e-05, "loss": 0.4456, "step": 5796 }, { "epoch": 1.0057251908396947, "grad_norm": 0.6511348485946655, "learning_rate": 2.7447047514756032e-05, "loss": 0.5326, "step": 5797 }, { "epoch": 1.0058986814712005, "grad_norm": 0.9289817214012146, "learning_rate": 2.744072375760566e-05, "loss": 0.406, "step": 5798 }, { "epoch": 1.0060721721027064, "grad_norm": 0.8878532648086548, "learning_rate": 2.7434399136950625e-05, "loss": 0.4139, "step": 5799 }, { "epoch": 1.0062456627342125, "grad_norm": 0.8880317807197571, "learning_rate": 2.74280736535249e-05, "loss": 0.4412, "step": 5800 }, { "epoch": 1.0064191533657183, "grad_norm": 0.7840941548347473, "learning_rate": 2.742174730806258e-05, "loss": 0.4554, "step": 5801 }, { "epoch": 1.0065926439972241, "grad_norm": 0.8049899339675903, "learning_rate": 2.7415420101297836e-05, "loss": 0.4913, "step": 5802 }, { "epoch": 1.00676613462873, "grad_norm": 0.8128389716148376, "learning_rate": 2.7409092033964943e-05, "loss": 0.6445, "step": 5803 }, { "epoch": 1.0069396252602358, "grad_norm": 0.9518760442733765, "learning_rate": 2.7402763106798295e-05, "loss": 0.5773, "step": 5804 }, { "epoch": 1.007113115891742, "grad_norm": 0.845658004283905, "learning_rate": 2.7396433320532356e-05, "loss": 0.5273, "step": 5805 }, { "epoch": 1.0072866065232478, "grad_norm": 0.7681678533554077, "learning_rate": 2.739010267590171e-05, "loss": 0.5714, "step": 5806 }, { "epoch": 1.0074600971547536, "grad_norm": 1.1148523092269897, "learning_rate": 2.7383771173641037e-05, "loss": 0.4296, "step": 5807 }, { "epoch": 1.0076335877862594, "grad_norm": 0.7010675072669983, "learning_rate": 2.7377438814485117e-05, "loss": 0.4215, "step": 5808 }, { "epoch": 1.0078070784177655, "grad_norm": 0.8457129001617432, "learning_rate": 2.7371105599168833e-05, "loss": 0.5698, "step": 5809 }, { "epoch": 1.0079805690492714, "grad_norm": 0.8173154592514038, "learning_rate": 2.7364771528427145e-05, "loss": 0.5417, "step": 5810 }, { "epoch": 1.0081540596807772, "grad_norm": 0.8678355813026428, "learning_rate": 2.735843660299515e-05, "loss": 0.5249, "step": 5811 }, { "epoch": 1.008327550312283, "grad_norm": 0.7418676614761353, "learning_rate": 2.7352100823608006e-05, "loss": 0.5223, "step": 5812 }, { "epoch": 1.0085010409437891, "grad_norm": 1.0119794607162476, "learning_rate": 2.7345764191000993e-05, "loss": 0.3997, "step": 5813 }, { "epoch": 1.008674531575295, "grad_norm": 0.6230090856552124, "learning_rate": 2.733942670590949e-05, "loss": 0.6511, "step": 5814 }, { "epoch": 1.0088480222068008, "grad_norm": 0.6754621863365173, "learning_rate": 2.7333088369068967e-05, "loss": 0.5382, "step": 5815 }, { "epoch": 1.0090215128383067, "grad_norm": 1.1556576490402222, "learning_rate": 2.7326749181214992e-05, "loss": 0.3966, "step": 5816 }, { "epoch": 1.0091950034698127, "grad_norm": 0.7548868656158447, "learning_rate": 2.732040914308324e-05, "loss": 0.4733, "step": 5817 }, { "epoch": 1.0093684941013186, "grad_norm": 0.690839409828186, "learning_rate": 2.7314068255409466e-05, "loss": 0.5928, "step": 5818 }, { "epoch": 1.0095419847328244, "grad_norm": 0.7861557006835938, "learning_rate": 2.7307726518929562e-05, "loss": 0.4268, "step": 5819 }, { "epoch": 1.0097154753643303, "grad_norm": 0.936643660068512, "learning_rate": 2.7301383934379475e-05, "loss": 0.4392, "step": 5820 }, { "epoch": 1.0098889659958363, "grad_norm": 1.1157642602920532, "learning_rate": 2.7295040502495274e-05, "loss": 0.5464, "step": 5821 }, { "epoch": 1.0100624566273422, "grad_norm": 0.893825113773346, "learning_rate": 2.7288696224013124e-05, "loss": 0.4768, "step": 5822 }, { "epoch": 1.010235947258848, "grad_norm": 0.7682893872261047, "learning_rate": 2.728235109966928e-05, "loss": 0.4808, "step": 5823 }, { "epoch": 1.0104094378903539, "grad_norm": 0.7515265941619873, "learning_rate": 2.727600513020011e-05, "loss": 0.4105, "step": 5824 }, { "epoch": 1.0105829285218597, "grad_norm": 0.788699209690094, "learning_rate": 2.7269658316342065e-05, "loss": 0.4908, "step": 5825 }, { "epoch": 1.0107564191533658, "grad_norm": 0.8162208199501038, "learning_rate": 2.7263310658831697e-05, "loss": 0.6309, "step": 5826 }, { "epoch": 1.0109299097848716, "grad_norm": 0.7742594480514526, "learning_rate": 2.725696215840567e-05, "loss": 0.517, "step": 5827 }, { "epoch": 1.0111034004163775, "grad_norm": 0.7700182795524597, "learning_rate": 2.725061281580073e-05, "loss": 0.5311, "step": 5828 }, { "epoch": 1.0112768910478833, "grad_norm": 0.7160996794700623, "learning_rate": 2.724426263175372e-05, "loss": 0.5254, "step": 5829 }, { "epoch": 1.0114503816793894, "grad_norm": 0.7404236793518066, "learning_rate": 2.7237911607001586e-05, "loss": 0.4312, "step": 5830 }, { "epoch": 1.0116238723108952, "grad_norm": 1.0733016729354858, "learning_rate": 2.7231559742281382e-05, "loss": 0.6321, "step": 5831 }, { "epoch": 1.011797362942401, "grad_norm": 0.6563270688056946, "learning_rate": 2.722520703833024e-05, "loss": 0.6345, "step": 5832 }, { "epoch": 1.011970853573907, "grad_norm": 0.7572863698005676, "learning_rate": 2.7218853495885406e-05, "loss": 0.5138, "step": 5833 }, { "epoch": 1.012144344205413, "grad_norm": 0.7853530645370483, "learning_rate": 2.7212499115684204e-05, "loss": 0.6611, "step": 5834 }, { "epoch": 1.0123178348369188, "grad_norm": 0.724513053894043, "learning_rate": 2.7206143898464084e-05, "loss": 0.6832, "step": 5835 }, { "epoch": 1.0124913254684247, "grad_norm": 0.826076328754425, "learning_rate": 2.719978784496257e-05, "loss": 0.3856, "step": 5836 }, { "epoch": 1.0126648160999305, "grad_norm": 1.0083286762237549, "learning_rate": 2.719343095591728e-05, "loss": 0.5342, "step": 5837 }, { "epoch": 1.0128383067314366, "grad_norm": 0.6915223002433777, "learning_rate": 2.718707323206595e-05, "loss": 0.4821, "step": 5838 }, { "epoch": 1.0130117973629424, "grad_norm": 0.7911536693572998, "learning_rate": 2.7180714674146388e-05, "loss": 0.449, "step": 5839 }, { "epoch": 1.0131852879944483, "grad_norm": 0.8074101209640503, "learning_rate": 2.717435528289653e-05, "loss": 0.5811, "step": 5840 }, { "epoch": 1.0133587786259541, "grad_norm": 0.7548245787620544, "learning_rate": 2.7167995059054386e-05, "loss": 0.589, "step": 5841 }, { "epoch": 1.0135322692574602, "grad_norm": 0.8580533266067505, "learning_rate": 2.7161634003358056e-05, "loss": 0.3796, "step": 5842 }, { "epoch": 1.013705759888966, "grad_norm": 1.4111472368240356, "learning_rate": 2.715527211654575e-05, "loss": 0.5515, "step": 5843 }, { "epoch": 1.013879250520472, "grad_norm": 1.0080534219741821, "learning_rate": 2.7148909399355785e-05, "loss": 0.4456, "step": 5844 }, { "epoch": 1.0140527411519777, "grad_norm": 0.6560617685317993, "learning_rate": 2.7142545852526555e-05, "loss": 0.6492, "step": 5845 }, { "epoch": 1.0142262317834836, "grad_norm": 0.8145388960838318, "learning_rate": 2.713618147679655e-05, "loss": 0.371, "step": 5846 }, { "epoch": 1.0143997224149897, "grad_norm": 0.7898300290107727, "learning_rate": 2.7129816272904372e-05, "loss": 0.5088, "step": 5847 }, { "epoch": 1.0145732130464955, "grad_norm": 0.8385614156723022, "learning_rate": 2.712345024158871e-05, "loss": 0.5608, "step": 5848 }, { "epoch": 1.0147467036780013, "grad_norm": 0.7383617758750916, "learning_rate": 2.711708338358835e-05, "loss": 0.5056, "step": 5849 }, { "epoch": 1.0149201943095072, "grad_norm": 0.8515306115150452, "learning_rate": 2.711071569964216e-05, "loss": 0.5203, "step": 5850 }, { "epoch": 1.0150936849410133, "grad_norm": 0.7870065569877625, "learning_rate": 2.7104347190489134e-05, "loss": 0.5743, "step": 5851 }, { "epoch": 1.015267175572519, "grad_norm": 0.8708816766738892, "learning_rate": 2.7097977856868336e-05, "loss": 0.4529, "step": 5852 }, { "epoch": 1.015440666204025, "grad_norm": 1.0223897695541382, "learning_rate": 2.7091607699518936e-05, "loss": 0.3867, "step": 5853 }, { "epoch": 1.0156141568355308, "grad_norm": 0.8871798515319824, "learning_rate": 2.70852367191802e-05, "loss": 0.528, "step": 5854 }, { "epoch": 1.0157876474670369, "grad_norm": 0.9174553751945496, "learning_rate": 2.707886491659149e-05, "loss": 0.495, "step": 5855 }, { "epoch": 1.0159611380985427, "grad_norm": 0.9138659238815308, "learning_rate": 2.707249229249225e-05, "loss": 0.408, "step": 5856 }, { "epoch": 1.0161346287300486, "grad_norm": 0.7654126882553101, "learning_rate": 2.7066118847622053e-05, "loss": 0.545, "step": 5857 }, { "epoch": 1.0163081193615544, "grad_norm": 0.9373764991760254, "learning_rate": 2.7059744582720515e-05, "loss": 0.4532, "step": 5858 }, { "epoch": 1.0164816099930605, "grad_norm": 0.8717721104621887, "learning_rate": 2.7053369498527404e-05, "loss": 0.4827, "step": 5859 }, { "epoch": 1.0166551006245663, "grad_norm": 0.6174218654632568, "learning_rate": 2.7046993595782532e-05, "loss": 0.4834, "step": 5860 }, { "epoch": 1.0168285912560722, "grad_norm": 0.5347632765769958, "learning_rate": 2.704061687522585e-05, "loss": 0.6573, "step": 5861 }, { "epoch": 1.017002081887578, "grad_norm": 0.7912558913230896, "learning_rate": 2.7034239337597378e-05, "loss": 0.4384, "step": 5862 }, { "epoch": 1.017175572519084, "grad_norm": 0.6055047512054443, "learning_rate": 2.7027860983637223e-05, "loss": 0.6099, "step": 5863 }, { "epoch": 1.01734906315059, "grad_norm": 0.7508504986763, "learning_rate": 2.7021481814085622e-05, "loss": 0.446, "step": 5864 }, { "epoch": 1.0175225537820958, "grad_norm": 0.8576595783233643, "learning_rate": 2.7015101829682867e-05, "loss": 0.5245, "step": 5865 }, { "epoch": 1.0176960444136016, "grad_norm": 0.8401842713356018, "learning_rate": 2.7008721031169378e-05, "loss": 0.5066, "step": 5866 }, { "epoch": 1.0178695350451075, "grad_norm": 1.3381179571151733, "learning_rate": 2.7002339419285646e-05, "loss": 0.4844, "step": 5867 }, { "epoch": 1.0180430256766135, "grad_norm": 1.0883938074111938, "learning_rate": 2.699595699477226e-05, "loss": 0.5787, "step": 5868 }, { "epoch": 1.0182165163081194, "grad_norm": 0.7692278623580933, "learning_rate": 2.6989573758369915e-05, "loss": 0.4121, "step": 5869 }, { "epoch": 1.0183900069396252, "grad_norm": 0.6606525182723999, "learning_rate": 2.6983189710819396e-05, "loss": 0.4698, "step": 5870 }, { "epoch": 1.018563497571131, "grad_norm": 0.838937520980835, "learning_rate": 2.6976804852861564e-05, "loss": 0.3987, "step": 5871 }, { "epoch": 1.0187369882026371, "grad_norm": 0.7664431929588318, "learning_rate": 2.697041918523741e-05, "loss": 0.4128, "step": 5872 }, { "epoch": 1.018910478834143, "grad_norm": 0.7541807889938354, "learning_rate": 2.696403270868798e-05, "loss": 0.5994, "step": 5873 }, { "epoch": 1.0190839694656488, "grad_norm": 2.5131638050079346, "learning_rate": 2.6957645423954438e-05, "loss": 0.5707, "step": 5874 }, { "epoch": 1.0192574600971547, "grad_norm": 0.7696883082389832, "learning_rate": 2.6951257331778045e-05, "loss": 0.6599, "step": 5875 }, { "epoch": 1.0194309507286607, "grad_norm": 0.8057942390441895, "learning_rate": 2.694486843290013e-05, "loss": 0.3791, "step": 5876 }, { "epoch": 1.0196044413601666, "grad_norm": 0.7075010538101196, "learning_rate": 2.6938478728062148e-05, "loss": 0.4614, "step": 5877 }, { "epoch": 1.0197779319916724, "grad_norm": 0.8975362777709961, "learning_rate": 2.6932088218005623e-05, "loss": 0.4589, "step": 5878 }, { "epoch": 1.0199514226231783, "grad_norm": 0.728349506855011, "learning_rate": 2.692569690347218e-05, "loss": 0.5753, "step": 5879 }, { "epoch": 1.0201249132546844, "grad_norm": 0.7772476077079773, "learning_rate": 2.6919304785203543e-05, "loss": 0.4753, "step": 5880 }, { "epoch": 1.0202984038861902, "grad_norm": 0.7649878263473511, "learning_rate": 2.6912911863941525e-05, "loss": 0.4459, "step": 5881 }, { "epoch": 1.020471894517696, "grad_norm": 0.8078417181968689, "learning_rate": 2.6906518140428027e-05, "loss": 0.5367, "step": 5882 }, { "epoch": 1.020645385149202, "grad_norm": 0.7875314950942993, "learning_rate": 2.6900123615405052e-05, "loss": 0.3754, "step": 5883 }, { "epoch": 1.0208188757807077, "grad_norm": 0.8608262538909912, "learning_rate": 2.6893728289614693e-05, "loss": 0.3492, "step": 5884 }, { "epoch": 1.0209923664122138, "grad_norm": 0.7267404794692993, "learning_rate": 2.6887332163799133e-05, "loss": 0.6483, "step": 5885 }, { "epoch": 1.0211658570437196, "grad_norm": 0.8437882661819458, "learning_rate": 2.688093523870065e-05, "loss": 0.5643, "step": 5886 }, { "epoch": 1.0213393476752255, "grad_norm": 0.8284854292869568, "learning_rate": 2.6874537515061612e-05, "loss": 0.5295, "step": 5887 }, { "epoch": 1.0215128383067313, "grad_norm": 0.9316041469573975, "learning_rate": 2.6868138993624486e-05, "loss": 0.563, "step": 5888 }, { "epoch": 1.0216863289382374, "grad_norm": 3.2630345821380615, "learning_rate": 2.6861739675131823e-05, "loss": 0.4152, "step": 5889 }, { "epoch": 1.0218598195697433, "grad_norm": 0.5239765644073486, "learning_rate": 2.6855339560326284e-05, "loss": 0.5642, "step": 5890 }, { "epoch": 1.022033310201249, "grad_norm": 1.1041979789733887, "learning_rate": 2.6848938649950597e-05, "loss": 0.4003, "step": 5891 }, { "epoch": 1.022206800832755, "grad_norm": 0.6967913508415222, "learning_rate": 2.6842536944747597e-05, "loss": 0.5873, "step": 5892 }, { "epoch": 1.022380291464261, "grad_norm": 0.9682864546775818, "learning_rate": 2.6836134445460212e-05, "loss": 0.5067, "step": 5893 }, { "epoch": 1.0225537820957669, "grad_norm": 0.7582650780677795, "learning_rate": 2.682973115283146e-05, "loss": 0.3793, "step": 5894 }, { "epoch": 1.0227272727272727, "grad_norm": 0.7745401263237, "learning_rate": 2.6823327067604452e-05, "loss": 0.449, "step": 5895 }, { "epoch": 1.0229007633587786, "grad_norm": 0.7303228378295898, "learning_rate": 2.6816922190522386e-05, "loss": 0.5547, "step": 5896 }, { "epoch": 1.0230742539902846, "grad_norm": 0.7065463066101074, "learning_rate": 2.6810516522328553e-05, "loss": 0.3846, "step": 5897 }, { "epoch": 1.0232477446217905, "grad_norm": 0.5702760219573975, "learning_rate": 2.6804110063766345e-05, "loss": 0.5317, "step": 5898 }, { "epoch": 1.0234212352532963, "grad_norm": 0.6798083186149597, "learning_rate": 2.6797702815579234e-05, "loss": 0.5187, "step": 5899 }, { "epoch": 1.0235947258848022, "grad_norm": 0.8473564982414246, "learning_rate": 2.679129477851079e-05, "loss": 0.5884, "step": 5900 }, { "epoch": 1.0237682165163082, "grad_norm": 0.783953845500946, "learning_rate": 2.6784885953304676e-05, "loss": 0.4545, "step": 5901 }, { "epoch": 1.023941707147814, "grad_norm": 0.6012303829193115, "learning_rate": 2.6778476340704636e-05, "loss": 0.6201, "step": 5902 }, { "epoch": 1.02411519777932, "grad_norm": 1.1695358753204346, "learning_rate": 2.6772065941454527e-05, "loss": 0.5081, "step": 5903 }, { "epoch": 1.0242886884108258, "grad_norm": 0.6857214570045471, "learning_rate": 2.6765654756298264e-05, "loss": 0.5099, "step": 5904 }, { "epoch": 1.0244621790423316, "grad_norm": 0.6966187357902527, "learning_rate": 2.675924278597989e-05, "loss": 0.5002, "step": 5905 }, { "epoch": 1.0246356696738377, "grad_norm": 0.613068699836731, "learning_rate": 2.675283003124351e-05, "loss": 0.5436, "step": 5906 }, { "epoch": 1.0248091603053435, "grad_norm": 0.8523713946342468, "learning_rate": 2.6746416492833343e-05, "loss": 0.5601, "step": 5907 }, { "epoch": 1.0249826509368494, "grad_norm": 0.6661941409111023, "learning_rate": 2.6740002171493676e-05, "loss": 0.48, "step": 5908 }, { "epoch": 1.0251561415683552, "grad_norm": 0.6891001462936401, "learning_rate": 2.67335870679689e-05, "loss": 0.4889, "step": 5909 }, { "epoch": 1.0253296321998613, "grad_norm": 0.8027567863464355, "learning_rate": 2.6727171183003502e-05, "loss": 0.475, "step": 5910 }, { "epoch": 1.0255031228313671, "grad_norm": 1.59710693359375, "learning_rate": 2.6720754517342053e-05, "loss": 0.3439, "step": 5911 }, { "epoch": 1.025676613462873, "grad_norm": 0.9039287567138672, "learning_rate": 2.6714337071729207e-05, "loss": 0.4475, "step": 5912 }, { "epoch": 1.0258501040943788, "grad_norm": 0.7993444204330444, "learning_rate": 2.6707918846909722e-05, "loss": 0.4545, "step": 5913 }, { "epoch": 1.026023594725885, "grad_norm": 0.6327459216117859, "learning_rate": 2.6701499843628443e-05, "loss": 0.6672, "step": 5914 }, { "epoch": 1.0261970853573907, "grad_norm": 0.7319054007530212, "learning_rate": 2.669508006263029e-05, "loss": 0.4263, "step": 5915 }, { "epoch": 1.0263705759888966, "grad_norm": 0.7239307761192322, "learning_rate": 2.66886595046603e-05, "loss": 0.5853, "step": 5916 }, { "epoch": 1.0265440666204024, "grad_norm": 0.8016424179077148, "learning_rate": 2.6682238170463575e-05, "loss": 0.4938, "step": 5917 }, { "epoch": 1.0267175572519085, "grad_norm": 0.8668724298477173, "learning_rate": 2.6675816060785327e-05, "loss": 0.5533, "step": 5918 }, { "epoch": 1.0268910478834143, "grad_norm": 0.5603392720222473, "learning_rate": 2.666939317637085e-05, "loss": 0.4508, "step": 5919 }, { "epoch": 1.0270645385149202, "grad_norm": 0.5357174277305603, "learning_rate": 2.666296951796552e-05, "loss": 0.6528, "step": 5920 }, { "epoch": 1.027238029146426, "grad_norm": 0.7446817755699158, "learning_rate": 2.665654508631481e-05, "loss": 0.5292, "step": 5921 }, { "epoch": 1.027411519777932, "grad_norm": 0.7444019913673401, "learning_rate": 2.6650119882164292e-05, "loss": 0.5618, "step": 5922 }, { "epoch": 1.027585010409438, "grad_norm": 1.1233359575271606, "learning_rate": 2.664369390625961e-05, "loss": 0.5344, "step": 5923 }, { "epoch": 1.0277585010409438, "grad_norm": 0.8197610974311829, "learning_rate": 2.663726715934651e-05, "loss": 0.5358, "step": 5924 }, { "epoch": 1.0279319916724496, "grad_norm": 0.7967637777328491, "learning_rate": 2.663083964217082e-05, "loss": 0.5045, "step": 5925 }, { "epoch": 1.0281054823039555, "grad_norm": 0.8068925738334656, "learning_rate": 2.6624411355478463e-05, "loss": 0.5197, "step": 5926 }, { "epoch": 1.0282789729354616, "grad_norm": 0.9581280946731567, "learning_rate": 2.6617982300015457e-05, "loss": 0.5475, "step": 5927 }, { "epoch": 1.0284524635669674, "grad_norm": 0.6548795104026794, "learning_rate": 2.661155247652788e-05, "loss": 0.4966, "step": 5928 }, { "epoch": 1.0286259541984732, "grad_norm": 0.8756265044212341, "learning_rate": 2.6605121885761948e-05, "loss": 0.4913, "step": 5929 }, { "epoch": 1.028799444829979, "grad_norm": 0.7330228090286255, "learning_rate": 2.6598690528463916e-05, "loss": 0.4917, "step": 5930 }, { "epoch": 1.0289729354614852, "grad_norm": 0.7564222812652588, "learning_rate": 2.659225840538016e-05, "loss": 0.5046, "step": 5931 }, { "epoch": 1.029146426092991, "grad_norm": 1.3399449586868286, "learning_rate": 2.6585825517257133e-05, "loss": 0.5187, "step": 5932 }, { "epoch": 1.0293199167244969, "grad_norm": 0.6582682728767395, "learning_rate": 2.657939186484139e-05, "loss": 0.5457, "step": 5933 }, { "epoch": 1.0294934073560027, "grad_norm": 1.13554048538208, "learning_rate": 2.6572957448879547e-05, "loss": 0.4081, "step": 5934 }, { "epoch": 1.0296668979875088, "grad_norm": 1.3798998594284058, "learning_rate": 2.6566522270118333e-05, "loss": 0.4091, "step": 5935 }, { "epoch": 1.0298403886190146, "grad_norm": 1.0581423044204712, "learning_rate": 2.656008632930456e-05, "loss": 0.4448, "step": 5936 }, { "epoch": 1.0300138792505205, "grad_norm": 0.7870919108390808, "learning_rate": 2.6553649627185122e-05, "loss": 0.6226, "step": 5937 }, { "epoch": 1.0301873698820263, "grad_norm": 0.8297151923179626, "learning_rate": 2.654721216450701e-05, "loss": 0.6178, "step": 5938 }, { "epoch": 1.0303608605135324, "grad_norm": 1.201669454574585, "learning_rate": 2.65407739420173e-05, "loss": 0.5902, "step": 5939 }, { "epoch": 1.0305343511450382, "grad_norm": 0.7206531167030334, "learning_rate": 2.653433496046315e-05, "loss": 0.5454, "step": 5940 }, { "epoch": 1.030707841776544, "grad_norm": 0.9494001269340515, "learning_rate": 2.652789522059181e-05, "loss": 0.397, "step": 5941 }, { "epoch": 1.03088133240805, "grad_norm": 0.6626607179641724, "learning_rate": 2.652145472315063e-05, "loss": 0.6467, "step": 5942 }, { "epoch": 1.0310548230395558, "grad_norm": 0.6038725972175598, "learning_rate": 2.6515013468887026e-05, "loss": 0.5157, "step": 5943 }, { "epoch": 1.0312283136710618, "grad_norm": 0.885932445526123, "learning_rate": 2.650857145854852e-05, "loss": 0.5641, "step": 5944 }, { "epoch": 1.0314018043025677, "grad_norm": 0.7481269240379333, "learning_rate": 2.650212869288271e-05, "loss": 0.5497, "step": 5945 }, { "epoch": 1.0315752949340735, "grad_norm": 1.073958396911621, "learning_rate": 2.6495685172637292e-05, "loss": 0.4203, "step": 5946 }, { "epoch": 1.0317487855655794, "grad_norm": 1.1243047714233398, "learning_rate": 2.6489240898560035e-05, "loss": 0.3448, "step": 5947 }, { "epoch": 1.0319222761970854, "grad_norm": 0.5776740312576294, "learning_rate": 2.6482795871398815e-05, "loss": 0.4563, "step": 5948 }, { "epoch": 1.0320957668285913, "grad_norm": 0.9867730736732483, "learning_rate": 2.6476350091901583e-05, "loss": 0.5099, "step": 5949 }, { "epoch": 1.0322692574600971, "grad_norm": 0.8928215503692627, "learning_rate": 2.646990356081637e-05, "loss": 0.4133, "step": 5950 }, { "epoch": 1.032442748091603, "grad_norm": 0.6463452577590942, "learning_rate": 2.646345627889131e-05, "loss": 0.511, "step": 5951 }, { "epoch": 1.032616238723109, "grad_norm": 0.768272340297699, "learning_rate": 2.645700824687462e-05, "loss": 0.5146, "step": 5952 }, { "epoch": 1.0327897293546149, "grad_norm": 1.2430983781814575, "learning_rate": 2.6450559465514598e-05, "loss": 0.5798, "step": 5953 }, { "epoch": 1.0329632199861207, "grad_norm": 1.2361890077590942, "learning_rate": 2.644410993555963e-05, "loss": 0.541, "step": 5954 }, { "epoch": 1.0331367106176266, "grad_norm": 0.8147668838500977, "learning_rate": 2.6437659657758198e-05, "loss": 0.4257, "step": 5955 }, { "epoch": 1.0333102012491326, "grad_norm": 0.721660315990448, "learning_rate": 2.643120863285886e-05, "loss": 0.5403, "step": 5956 }, { "epoch": 1.0334836918806385, "grad_norm": 0.8418735861778259, "learning_rate": 2.6424756861610274e-05, "loss": 0.4564, "step": 5957 }, { "epoch": 1.0336571825121443, "grad_norm": 0.9149510860443115, "learning_rate": 2.6418304344761165e-05, "loss": 0.4342, "step": 5958 }, { "epoch": 1.0338306731436502, "grad_norm": 0.6601338982582092, "learning_rate": 2.6411851083060355e-05, "loss": 0.4678, "step": 5959 }, { "epoch": 1.0340041637751562, "grad_norm": 0.6818693280220032, "learning_rate": 2.6405397077256752e-05, "loss": 0.4143, "step": 5960 }, { "epoch": 1.034177654406662, "grad_norm": 0.9768335223197937, "learning_rate": 2.639894232809936e-05, "loss": 0.5117, "step": 5961 }, { "epoch": 1.034351145038168, "grad_norm": 0.6836830377578735, "learning_rate": 2.6392486836337256e-05, "loss": 0.5513, "step": 5962 }, { "epoch": 1.0345246356696738, "grad_norm": 0.8871609568595886, "learning_rate": 2.63860306027196e-05, "loss": 0.3868, "step": 5963 }, { "epoch": 1.0346981263011796, "grad_norm": 0.7213243842124939, "learning_rate": 2.637957362799566e-05, "loss": 0.4501, "step": 5964 }, { "epoch": 1.0348716169326857, "grad_norm": 1.380795955657959, "learning_rate": 2.637311591291476e-05, "loss": 0.3999, "step": 5965 }, { "epoch": 1.0350451075641915, "grad_norm": 0.9298617839813232, "learning_rate": 2.636665745822633e-05, "loss": 0.4683, "step": 5966 }, { "epoch": 1.0352185981956974, "grad_norm": 0.6938121914863586, "learning_rate": 2.636019826467989e-05, "loss": 0.5135, "step": 5967 }, { "epoch": 1.0353920888272032, "grad_norm": 1.094827651977539, "learning_rate": 2.6353738333025022e-05, "loss": 0.5461, "step": 5968 }, { "epoch": 1.0355655794587093, "grad_norm": 1.1556035280227661, "learning_rate": 2.6347277664011426e-05, "loss": 0.4418, "step": 5969 }, { "epoch": 1.0357390700902152, "grad_norm": 0.8316487669944763, "learning_rate": 2.6340816258388858e-05, "loss": 0.5878, "step": 5970 }, { "epoch": 1.035912560721721, "grad_norm": 0.8427453637123108, "learning_rate": 2.6334354116907173e-05, "loss": 0.512, "step": 5971 }, { "epoch": 1.0360860513532268, "grad_norm": 1.3676400184631348, "learning_rate": 2.6327891240316313e-05, "loss": 0.5244, "step": 5972 }, { "epoch": 1.036259541984733, "grad_norm": 1.054203987121582, "learning_rate": 2.6321427629366295e-05, "loss": 0.5144, "step": 5973 }, { "epoch": 1.0364330326162388, "grad_norm": 1.2002469301223755, "learning_rate": 2.6314963284807246e-05, "loss": 0.5376, "step": 5974 }, { "epoch": 1.0366065232477446, "grad_norm": 0.6875637769699097, "learning_rate": 2.6308498207389344e-05, "loss": 0.4641, "step": 5975 }, { "epoch": 1.0367800138792505, "grad_norm": 0.7891087532043457, "learning_rate": 2.630203239786287e-05, "loss": 0.4939, "step": 5976 }, { "epoch": 1.0369535045107565, "grad_norm": 1.255618929862976, "learning_rate": 2.6295565856978202e-05, "loss": 0.4891, "step": 5977 }, { "epoch": 1.0371269951422624, "grad_norm": 0.8953392505645752, "learning_rate": 2.628909858548577e-05, "loss": 0.4438, "step": 5978 }, { "epoch": 1.0373004857737682, "grad_norm": 0.8011636137962341, "learning_rate": 2.6282630584136123e-05, "loss": 0.4788, "step": 5979 }, { "epoch": 1.037473976405274, "grad_norm": 1.016404628753662, "learning_rate": 2.6276161853679877e-05, "loss": 0.5754, "step": 5980 }, { "epoch": 1.0376474670367801, "grad_norm": 0.8635426759719849, "learning_rate": 2.626969239486773e-05, "loss": 0.5616, "step": 5981 }, { "epoch": 1.037820957668286, "grad_norm": 0.9815588593482971, "learning_rate": 2.626322220845048e-05, "loss": 0.499, "step": 5982 }, { "epoch": 1.0379944482997918, "grad_norm": 1.184328317642212, "learning_rate": 2.6256751295179e-05, "loss": 0.4153, "step": 5983 }, { "epoch": 1.0381679389312977, "grad_norm": 0.6641515493392944, "learning_rate": 2.6250279655804232e-05, "loss": 0.505, "step": 5984 }, { "epoch": 1.0383414295628035, "grad_norm": 2.3295421600341797, "learning_rate": 2.624380729107723e-05, "loss": 0.4332, "step": 5985 }, { "epoch": 1.0385149201943096, "grad_norm": 0.8141412138938904, "learning_rate": 2.6237334201749126e-05, "loss": 0.5208, "step": 5986 }, { "epoch": 1.0386884108258154, "grad_norm": 0.6626212000846863, "learning_rate": 2.6230860388571103e-05, "loss": 0.4482, "step": 5987 }, { "epoch": 1.0388619014573213, "grad_norm": 3.283660650253296, "learning_rate": 2.6224385852294484e-05, "loss": 0.488, "step": 5988 }, { "epoch": 1.0390353920888271, "grad_norm": 0.806022584438324, "learning_rate": 2.6217910593670626e-05, "loss": 0.5385, "step": 5989 }, { "epoch": 1.0392088827203332, "grad_norm": 0.8127049207687378, "learning_rate": 2.6211434613451006e-05, "loss": 0.5229, "step": 5990 }, { "epoch": 1.039382373351839, "grad_norm": 0.7175759673118591, "learning_rate": 2.6204957912387156e-05, "loss": 0.5304, "step": 5991 }, { "epoch": 1.0395558639833449, "grad_norm": 0.7946687936782837, "learning_rate": 2.6198480491230712e-05, "loss": 0.4648, "step": 5992 }, { "epoch": 1.0397293546148507, "grad_norm": 1.0387405157089233, "learning_rate": 2.6192002350733387e-05, "loss": 0.5369, "step": 5993 }, { "epoch": 1.0399028452463568, "grad_norm": 0.8700816631317139, "learning_rate": 2.618552349164697e-05, "loss": 0.4854, "step": 5994 }, { "epoch": 1.0400763358778626, "grad_norm": 0.8339025378227234, "learning_rate": 2.617904391472334e-05, "loss": 0.4008, "step": 5995 }, { "epoch": 1.0402498265093685, "grad_norm": 1.0166280269622803, "learning_rate": 2.6172563620714475e-05, "loss": 0.5493, "step": 5996 }, { "epoch": 1.0404233171408743, "grad_norm": 0.6536795496940613, "learning_rate": 2.61660826103724e-05, "loss": 0.5028, "step": 5997 }, { "epoch": 1.0405968077723804, "grad_norm": 0.661294162273407, "learning_rate": 2.6159600884449258e-05, "loss": 0.63, "step": 5998 }, { "epoch": 1.0407702984038862, "grad_norm": 0.7142971158027649, "learning_rate": 2.6153118443697255e-05, "loss": 0.4796, "step": 5999 }, { "epoch": 1.040943789035392, "grad_norm": 0.8321624994277954, "learning_rate": 2.6146635288868685e-05, "loss": 0.5194, "step": 6000 }, { "epoch": 1.041117279666898, "grad_norm": 0.7722504138946533, "learning_rate": 2.6140151420715932e-05, "loss": 0.4021, "step": 6001 }, { "epoch": 1.0412907702984038, "grad_norm": 1.652472972869873, "learning_rate": 2.6133666839991444e-05, "loss": 0.4716, "step": 6002 }, { "epoch": 1.0414642609299098, "grad_norm": 0.7369078397750854, "learning_rate": 2.6127181547447773e-05, "loss": 0.4482, "step": 6003 }, { "epoch": 1.0416377515614157, "grad_norm": 0.7246720790863037, "learning_rate": 2.612069554383755e-05, "loss": 0.4106, "step": 6004 }, { "epoch": 1.0418112421929215, "grad_norm": 1.4080967903137207, "learning_rate": 2.6114208829913473e-05, "loss": 0.5305, "step": 6005 }, { "epoch": 1.0419847328244274, "grad_norm": 1.1792443990707397, "learning_rate": 2.6107721406428338e-05, "loss": 0.5676, "step": 6006 }, { "epoch": 1.0421582234559335, "grad_norm": 1.539556860923767, "learning_rate": 2.6101233274135017e-05, "loss": 0.3627, "step": 6007 }, { "epoch": 1.0423317140874393, "grad_norm": 0.6607788801193237, "learning_rate": 2.6094744433786467e-05, "loss": 0.5356, "step": 6008 }, { "epoch": 1.0425052047189451, "grad_norm": 1.1622593402862549, "learning_rate": 2.608825488613572e-05, "loss": 0.4337, "step": 6009 }, { "epoch": 1.042678695350451, "grad_norm": 0.9463575482368469, "learning_rate": 2.6081764631935896e-05, "loss": 0.441, "step": 6010 }, { "epoch": 1.042852185981957, "grad_norm": 0.7776453495025635, "learning_rate": 2.6075273671940212e-05, "loss": 0.5146, "step": 6011 }, { "epoch": 1.043025676613463, "grad_norm": 0.8027953505516052, "learning_rate": 2.606878200690193e-05, "loss": 0.4731, "step": 6012 }, { "epoch": 1.0431991672449688, "grad_norm": 0.708922266960144, "learning_rate": 2.6062289637574428e-05, "loss": 0.3625, "step": 6013 }, { "epoch": 1.0433726578764746, "grad_norm": 0.7351964712142944, "learning_rate": 2.605579656471115e-05, "loss": 0.5157, "step": 6014 }, { "epoch": 1.0435461485079807, "grad_norm": 0.7151260375976562, "learning_rate": 2.6049302789065624e-05, "loss": 0.5406, "step": 6015 }, { "epoch": 1.0437196391394865, "grad_norm": 0.673349142074585, "learning_rate": 2.6042808311391456e-05, "loss": 0.5458, "step": 6016 }, { "epoch": 1.0438931297709924, "grad_norm": 1.6523752212524414, "learning_rate": 2.603631313244235e-05, "loss": 0.4021, "step": 6017 }, { "epoch": 1.0440666204024982, "grad_norm": 0.9373065233230591, "learning_rate": 2.6029817252972064e-05, "loss": 0.6046, "step": 6018 }, { "epoch": 1.0442401110340043, "grad_norm": 1.176289677619934, "learning_rate": 2.6023320673734462e-05, "loss": 0.4153, "step": 6019 }, { "epoch": 1.0444136016655101, "grad_norm": 0.937972366809845, "learning_rate": 2.6016823395483482e-05, "loss": 0.4359, "step": 6020 }, { "epoch": 1.044587092297016, "grad_norm": 0.8680937886238098, "learning_rate": 2.6010325418973127e-05, "loss": 0.4891, "step": 6021 }, { "epoch": 1.0447605829285218, "grad_norm": 0.8315525650978088, "learning_rate": 2.600382674495751e-05, "loss": 0.4292, "step": 6022 }, { "epoch": 1.0449340735600277, "grad_norm": 0.7118164896965027, "learning_rate": 2.5997327374190797e-05, "loss": 0.4984, "step": 6023 }, { "epoch": 1.0451075641915337, "grad_norm": 0.8397243618965149, "learning_rate": 2.5990827307427263e-05, "loss": 0.511, "step": 6024 }, { "epoch": 1.0452810548230396, "grad_norm": 0.7386336326599121, "learning_rate": 2.5984326545421238e-05, "loss": 0.5754, "step": 6025 }, { "epoch": 1.0454545454545454, "grad_norm": 0.6508328914642334, "learning_rate": 2.5977825088927135e-05, "loss": 0.6221, "step": 6026 }, { "epoch": 1.0456280360860513, "grad_norm": 0.9424730539321899, "learning_rate": 2.597132293869947e-05, "loss": 0.4105, "step": 6027 }, { "epoch": 1.0458015267175573, "grad_norm": 0.8315467238426208, "learning_rate": 2.5964820095492825e-05, "loss": 0.391, "step": 6028 }, { "epoch": 1.0459750173490632, "grad_norm": 1.1879899501800537, "learning_rate": 2.5958316560061853e-05, "loss": 0.5172, "step": 6029 }, { "epoch": 1.046148507980569, "grad_norm": 1.000832438468933, "learning_rate": 2.5951812333161298e-05, "loss": 0.5194, "step": 6030 }, { "epoch": 1.0463219986120749, "grad_norm": 1.8499864339828491, "learning_rate": 2.594530741554599e-05, "loss": 0.3949, "step": 6031 }, { "epoch": 1.046495489243581, "grad_norm": 2.039126396179199, "learning_rate": 2.593880180797083e-05, "loss": 0.5708, "step": 6032 }, { "epoch": 1.0466689798750868, "grad_norm": 1.0549595355987549, "learning_rate": 2.5932295511190803e-05, "loss": 0.4266, "step": 6033 }, { "epoch": 1.0468424705065926, "grad_norm": 1.0007039308547974, "learning_rate": 2.5925788525960964e-05, "loss": 0.519, "step": 6034 }, { "epoch": 1.0470159611380985, "grad_norm": 0.7571946382522583, "learning_rate": 2.5919280853036462e-05, "loss": 0.5032, "step": 6035 }, { "epoch": 1.0471894517696045, "grad_norm": 0.6391426920890808, "learning_rate": 2.5912772493172523e-05, "loss": 0.5856, "step": 6036 }, { "epoch": 1.0473629424011104, "grad_norm": 1.0349048376083374, "learning_rate": 2.590626344712444e-05, "loss": 0.4343, "step": 6037 }, { "epoch": 1.0475364330326162, "grad_norm": 0.7470468878746033, "learning_rate": 2.5899753715647614e-05, "loss": 0.4257, "step": 6038 }, { "epoch": 1.047709923664122, "grad_norm": 0.7566617727279663, "learning_rate": 2.5893243299497483e-05, "loss": 0.4045, "step": 6039 }, { "epoch": 1.0478834142956281, "grad_norm": 0.7339170575141907, "learning_rate": 2.5886732199429606e-05, "loss": 0.5164, "step": 6040 }, { "epoch": 1.048056904927134, "grad_norm": 1.0630372762680054, "learning_rate": 2.5880220416199598e-05, "loss": 0.4393, "step": 6041 }, { "epoch": 1.0482303955586398, "grad_norm": 0.6280874609947205, "learning_rate": 2.587370795056315e-05, "loss": 0.4855, "step": 6042 }, { "epoch": 1.0484038861901457, "grad_norm": 0.7635196447372437, "learning_rate": 2.5867194803276058e-05, "loss": 0.4342, "step": 6043 }, { "epoch": 1.0485773768216515, "grad_norm": 1.8262170553207397, "learning_rate": 2.5860680975094178e-05, "loss": 0.5427, "step": 6044 }, { "epoch": 1.0487508674531576, "grad_norm": 1.0261775255203247, "learning_rate": 2.585416646677343e-05, "loss": 0.462, "step": 6045 }, { "epoch": 1.0489243580846634, "grad_norm": 0.8317374587059021, "learning_rate": 2.5847651279069847e-05, "loss": 0.5562, "step": 6046 }, { "epoch": 1.0490978487161693, "grad_norm": 1.0046920776367188, "learning_rate": 2.584113541273952e-05, "loss": 0.4017, "step": 6047 }, { "epoch": 1.0492713393476751, "grad_norm": 1.0864875316619873, "learning_rate": 2.5834618868538623e-05, "loss": 0.4116, "step": 6048 }, { "epoch": 1.0494448299791812, "grad_norm": 0.9202406406402588, "learning_rate": 2.58281016472234e-05, "loss": 0.5406, "step": 6049 }, { "epoch": 1.049618320610687, "grad_norm": 0.7802914381027222, "learning_rate": 2.58215837495502e-05, "loss": 0.5032, "step": 6050 }, { "epoch": 1.049791811242193, "grad_norm": 0.687981128692627, "learning_rate": 2.5815065176275417e-05, "loss": 0.47, "step": 6051 }, { "epoch": 1.0499653018736987, "grad_norm": 0.8128355145454407, "learning_rate": 2.5808545928155547e-05, "loss": 0.4601, "step": 6052 }, { "epoch": 1.0501387925052048, "grad_norm": 0.8574787974357605, "learning_rate": 2.580202600594715e-05, "loss": 0.3673, "step": 6053 }, { "epoch": 1.0503122831367107, "grad_norm": 0.6471509337425232, "learning_rate": 2.5795505410406878e-05, "loss": 0.6451, "step": 6054 }, { "epoch": 1.0504857737682165, "grad_norm": 0.7630587816238403, "learning_rate": 2.5788984142291448e-05, "loss": 0.6399, "step": 6055 }, { "epoch": 1.0506592643997223, "grad_norm": 1.43451988697052, "learning_rate": 2.5782462202357664e-05, "loss": 0.4636, "step": 6056 }, { "epoch": 1.0508327550312284, "grad_norm": 0.7638126015663147, "learning_rate": 2.5775939591362403e-05, "loss": 0.5337, "step": 6057 }, { "epoch": 1.0510062456627343, "grad_norm": 0.7070881724357605, "learning_rate": 2.5769416310062622e-05, "loss": 0.3953, "step": 6058 }, { "epoch": 1.05117973629424, "grad_norm": 0.7772389054298401, "learning_rate": 2.576289235921536e-05, "loss": 0.6108, "step": 6059 }, { "epoch": 1.051353226925746, "grad_norm": 0.9139055609703064, "learning_rate": 2.5756367739577713e-05, "loss": 0.4846, "step": 6060 }, { "epoch": 1.0515267175572518, "grad_norm": 0.8612098097801208, "learning_rate": 2.574984245190689e-05, "loss": 0.5166, "step": 6061 }, { "epoch": 1.0517002081887579, "grad_norm": 0.5893256068229675, "learning_rate": 2.5743316496960154e-05, "loss": 0.6597, "step": 6062 }, { "epoch": 1.0518736988202637, "grad_norm": 1.2996785640716553, "learning_rate": 2.5736789875494844e-05, "loss": 0.4073, "step": 6063 }, { "epoch": 1.0520471894517696, "grad_norm": 1.8747106790542603, "learning_rate": 2.573026258826838e-05, "loss": 0.4684, "step": 6064 }, { "epoch": 1.0522206800832754, "grad_norm": 1.3142502307891846, "learning_rate": 2.5723734636038272e-05, "loss": 0.4611, "step": 6065 }, { "epoch": 1.0523941707147815, "grad_norm": 0.8775864839553833, "learning_rate": 2.571720601956208e-05, "loss": 0.465, "step": 6066 }, { "epoch": 1.0525676613462873, "grad_norm": 0.7700573205947876, "learning_rate": 2.571067673959748e-05, "loss": 0.4945, "step": 6067 }, { "epoch": 1.0527411519777932, "grad_norm": 0.9080457091331482, "learning_rate": 2.570414679690218e-05, "loss": 0.6381, "step": 6068 }, { "epoch": 1.052914642609299, "grad_norm": 0.7340862154960632, "learning_rate": 2.5697616192234005e-05, "loss": 0.4708, "step": 6069 }, { "epoch": 1.053088133240805, "grad_norm": 0.7591330409049988, "learning_rate": 2.5691084926350825e-05, "loss": 0.5574, "step": 6070 }, { "epoch": 1.053261623872311, "grad_norm": 0.7035334706306458, "learning_rate": 2.568455300001061e-05, "loss": 0.5569, "step": 6071 }, { "epoch": 1.0534351145038168, "grad_norm": 0.6817179918289185, "learning_rate": 2.56780204139714e-05, "loss": 0.391, "step": 6072 }, { "epoch": 1.0536086051353226, "grad_norm": 0.6882315278053284, "learning_rate": 2.56714871689913e-05, "loss": 0.402, "step": 6073 }, { "epoch": 1.0537820957668287, "grad_norm": 1.1013617515563965, "learning_rate": 2.5664953265828504e-05, "loss": 0.6382, "step": 6074 }, { "epoch": 1.0539555863983345, "grad_norm": 0.8133484125137329, "learning_rate": 2.5658418705241283e-05, "loss": 0.3861, "step": 6075 }, { "epoch": 1.0541290770298404, "grad_norm": 0.8575695157051086, "learning_rate": 2.565188348798798e-05, "loss": 0.6594, "step": 6076 }, { "epoch": 1.0543025676613462, "grad_norm": 0.928375780582428, "learning_rate": 2.5645347614827008e-05, "loss": 0.5104, "step": 6077 }, { "epoch": 1.0544760582928523, "grad_norm": 0.7670015692710876, "learning_rate": 2.5638811086516873e-05, "loss": 0.4182, "step": 6078 }, { "epoch": 1.0546495489243581, "grad_norm": 1.106449842453003, "learning_rate": 2.5632273903816133e-05, "loss": 0.5043, "step": 6079 }, { "epoch": 1.054823039555864, "grad_norm": 0.8924499154090881, "learning_rate": 2.562573606748345e-05, "loss": 0.4215, "step": 6080 }, { "epoch": 1.0549965301873698, "grad_norm": 0.6301459074020386, "learning_rate": 2.561919757827754e-05, "loss": 0.5708, "step": 6081 }, { "epoch": 1.0551700208188757, "grad_norm": 0.9665195345878601, "learning_rate": 2.5612658436957204e-05, "loss": 0.5227, "step": 6082 }, { "epoch": 1.0553435114503817, "grad_norm": 0.7565470337867737, "learning_rate": 2.5606118644281318e-05, "loss": 0.4502, "step": 6083 }, { "epoch": 1.0555170020818876, "grad_norm": 0.7726820707321167, "learning_rate": 2.5599578201008824e-05, "loss": 0.4883, "step": 6084 }, { "epoch": 1.0556904927133934, "grad_norm": 0.8903718590736389, "learning_rate": 2.559303710789876e-05, "loss": 0.3729, "step": 6085 }, { "epoch": 1.0558639833448993, "grad_norm": 1.2035996913909912, "learning_rate": 2.5586495365710225e-05, "loss": 0.538, "step": 6086 }, { "epoch": 1.0560374739764053, "grad_norm": 0.8257406949996948, "learning_rate": 2.557995297520239e-05, "loss": 0.4719, "step": 6087 }, { "epoch": 1.0562109646079112, "grad_norm": 0.6504576206207275, "learning_rate": 2.5573409937134508e-05, "loss": 0.4062, "step": 6088 }, { "epoch": 1.056384455239417, "grad_norm": 0.8960192203521729, "learning_rate": 2.5566866252265908e-05, "loss": 0.5537, "step": 6089 }, { "epoch": 1.0565579458709229, "grad_norm": 0.8303012251853943, "learning_rate": 2.5560321921355996e-05, "loss": 0.3818, "step": 6090 }, { "epoch": 1.056731436502429, "grad_norm": 0.6931776404380798, "learning_rate": 2.555377694516425e-05, "loss": 0.4065, "step": 6091 }, { "epoch": 1.0569049271339348, "grad_norm": 2.857333183288574, "learning_rate": 2.554723132445021e-05, "loss": 0.6019, "step": 6092 }, { "epoch": 1.0570784177654406, "grad_norm": 0.8212158679962158, "learning_rate": 2.5540685059973514e-05, "loss": 0.5443, "step": 6093 }, { "epoch": 1.0572519083969465, "grad_norm": 0.8178073763847351, "learning_rate": 2.5534138152493863e-05, "loss": 0.5602, "step": 6094 }, { "epoch": 1.0574253990284526, "grad_norm": 0.9276249408721924, "learning_rate": 2.5527590602771026e-05, "loss": 0.4221, "step": 6095 }, { "epoch": 1.0575988896599584, "grad_norm": 0.6286492347717285, "learning_rate": 2.5521042411564866e-05, "loss": 0.498, "step": 6096 }, { "epoch": 1.0577723802914643, "grad_norm": 0.7513982653617859, "learning_rate": 2.551449357963529e-05, "loss": 0.3982, "step": 6097 }, { "epoch": 1.05794587092297, "grad_norm": 0.6628233790397644, "learning_rate": 2.5507944107742314e-05, "loss": 0.5955, "step": 6098 }, { "epoch": 1.0581193615544762, "grad_norm": 1.5238407850265503, "learning_rate": 2.550139399664601e-05, "loss": 0.3729, "step": 6099 }, { "epoch": 1.058292852185982, "grad_norm": 0.517874538898468, "learning_rate": 2.549484324710652e-05, "loss": 0.5261, "step": 6100 }, { "epoch": 1.0584663428174879, "grad_norm": 0.6734583377838135, "learning_rate": 2.5488291859884067e-05, "loss": 0.402, "step": 6101 }, { "epoch": 1.0586398334489937, "grad_norm": 0.9803544878959656, "learning_rate": 2.548173983573895e-05, "loss": 0.4781, "step": 6102 }, { "epoch": 1.0588133240804996, "grad_norm": 0.7337921857833862, "learning_rate": 2.5475187175431532e-05, "loss": 0.4492, "step": 6103 }, { "epoch": 1.0589868147120056, "grad_norm": 0.8741667866706848, "learning_rate": 2.5468633879722272e-05, "loss": 0.4561, "step": 6104 }, { "epoch": 1.0591603053435115, "grad_norm": 0.7236998677253723, "learning_rate": 2.5462079949371665e-05, "loss": 0.6299, "step": 6105 }, { "epoch": 1.0593337959750173, "grad_norm": 0.7462474703788757, "learning_rate": 2.545552538514033e-05, "loss": 0.5455, "step": 6106 }, { "epoch": 1.0595072866065232, "grad_norm": 0.7738128304481506, "learning_rate": 2.5448970187788913e-05, "loss": 0.5048, "step": 6107 }, { "epoch": 1.0596807772380292, "grad_norm": 0.6994773149490356, "learning_rate": 2.5442414358078148e-05, "loss": 0.5266, "step": 6108 }, { "epoch": 1.059854267869535, "grad_norm": 0.85459965467453, "learning_rate": 2.5435857896768862e-05, "loss": 0.421, "step": 6109 }, { "epoch": 1.060027758501041, "grad_norm": 0.7872832417488098, "learning_rate": 2.5429300804621934e-05, "loss": 0.483, "step": 6110 }, { "epoch": 1.0602012491325468, "grad_norm": 0.6063489317893982, "learning_rate": 2.542274308239832e-05, "loss": 0.5454, "step": 6111 }, { "epoch": 1.0603747397640528, "grad_norm": 0.9173235893249512, "learning_rate": 2.541618473085905e-05, "loss": 0.5068, "step": 6112 }, { "epoch": 1.0605482303955587, "grad_norm": 0.8748695850372314, "learning_rate": 2.540962575076523e-05, "loss": 0.5232, "step": 6113 }, { "epoch": 1.0607217210270645, "grad_norm": 0.7439556121826172, "learning_rate": 2.5403066142878047e-05, "loss": 0.5273, "step": 6114 }, { "epoch": 1.0608952116585704, "grad_norm": 0.6065833568572998, "learning_rate": 2.5396505907958736e-05, "loss": 0.6476, "step": 6115 }, { "epoch": 1.0610687022900764, "grad_norm": 0.7149546146392822, "learning_rate": 2.538994504676862e-05, "loss": 0.519, "step": 6116 }, { "epoch": 1.0612421929215823, "grad_norm": 1.0197675228118896, "learning_rate": 2.5383383560069113e-05, "loss": 0.4003, "step": 6117 }, { "epoch": 1.0614156835530881, "grad_norm": 0.8125647902488708, "learning_rate": 2.537682144862166e-05, "loss": 0.6007, "step": 6118 }, { "epoch": 1.061589174184594, "grad_norm": 0.702014148235321, "learning_rate": 2.537025871318782e-05, "loss": 0.5608, "step": 6119 }, { "epoch": 1.0617626648160998, "grad_norm": 0.9729758501052856, "learning_rate": 2.53636953545292e-05, "loss": 0.3749, "step": 6120 }, { "epoch": 1.061936155447606, "grad_norm": 0.6251813173294067, "learning_rate": 2.5357131373407478e-05, "loss": 0.5125, "step": 6121 }, { "epoch": 1.0621096460791117, "grad_norm": 0.6916963458061218, "learning_rate": 2.5350566770584423e-05, "loss": 0.4899, "step": 6122 }, { "epoch": 1.0622831367106176, "grad_norm": 0.8575353622436523, "learning_rate": 2.534400154682185e-05, "loss": 0.5601, "step": 6123 }, { "epoch": 1.0624566273421234, "grad_norm": 0.8720142245292664, "learning_rate": 2.5337435702881683e-05, "loss": 0.425, "step": 6124 }, { "epoch": 1.0626301179736295, "grad_norm": 0.7958077192306519, "learning_rate": 2.5330869239525874e-05, "loss": 0.5974, "step": 6125 }, { "epoch": 1.0628036086051353, "grad_norm": 0.8834172487258911, "learning_rate": 2.5324302157516486e-05, "loss": 0.3673, "step": 6126 }, { "epoch": 1.0629770992366412, "grad_norm": 0.8853662610054016, "learning_rate": 2.531773445761562e-05, "loss": 0.3999, "step": 6127 }, { "epoch": 1.063150589868147, "grad_norm": 1.272106647491455, "learning_rate": 2.531116614058548e-05, "loss": 0.657, "step": 6128 }, { "epoch": 1.063324080499653, "grad_norm": 0.8048542141914368, "learning_rate": 2.5304597207188318e-05, "loss": 0.5995, "step": 6129 }, { "epoch": 1.063497571131159, "grad_norm": 0.7750091552734375, "learning_rate": 2.5298027658186472e-05, "loss": 0.4662, "step": 6130 }, { "epoch": 1.0636710617626648, "grad_norm": 1.1804490089416504, "learning_rate": 2.529145749434234e-05, "loss": 0.4143, "step": 6131 }, { "epoch": 1.0638445523941706, "grad_norm": 1.0123077630996704, "learning_rate": 2.52848867164184e-05, "loss": 0.5084, "step": 6132 }, { "epoch": 1.0640180430256767, "grad_norm": 0.7592995762825012, "learning_rate": 2.52783153251772e-05, "loss": 0.5007, "step": 6133 }, { "epoch": 1.0641915336571826, "grad_norm": 0.8198844790458679, "learning_rate": 2.5271743321381354e-05, "loss": 0.4343, "step": 6134 }, { "epoch": 1.0643650242886884, "grad_norm": 0.7294592261314392, "learning_rate": 2.5265170705793555e-05, "loss": 0.4335, "step": 6135 }, { "epoch": 1.0645385149201942, "grad_norm": 0.7923845648765564, "learning_rate": 2.525859747917656e-05, "loss": 0.5364, "step": 6136 }, { "epoch": 1.0647120055517, "grad_norm": 0.8322098851203918, "learning_rate": 2.52520236422932e-05, "loss": 0.6079, "step": 6137 }, { "epoch": 1.0648854961832062, "grad_norm": 0.8434830904006958, "learning_rate": 2.524544919590638e-05, "loss": 0.4244, "step": 6138 }, { "epoch": 1.065058986814712, "grad_norm": 0.5925361514091492, "learning_rate": 2.5238874140779057e-05, "loss": 0.6212, "step": 6139 }, { "epoch": 1.0652324774462179, "grad_norm": 0.8045483231544495, "learning_rate": 2.5232298477674297e-05, "loss": 0.554, "step": 6140 }, { "epoch": 1.065405968077724, "grad_norm": 0.843867301940918, "learning_rate": 2.5225722207355202e-05, "loss": 0.4384, "step": 6141 }, { "epoch": 1.0655794587092298, "grad_norm": 0.8407973051071167, "learning_rate": 2.5219145330584945e-05, "loss": 0.4971, "step": 6142 }, { "epoch": 1.0657529493407356, "grad_norm": 0.8980678915977478, "learning_rate": 2.5212567848126802e-05, "loss": 0.4048, "step": 6143 }, { "epoch": 1.0659264399722415, "grad_norm": 1.1637353897094727, "learning_rate": 2.5205989760744084e-05, "loss": 0.5775, "step": 6144 }, { "epoch": 1.0660999306037473, "grad_norm": 0.9472494721412659, "learning_rate": 2.519941106920018e-05, "loss": 0.5118, "step": 6145 }, { "epoch": 1.0662734212352534, "grad_norm": 0.69376540184021, "learning_rate": 2.5192831774258575e-05, "loss": 0.5542, "step": 6146 }, { "epoch": 1.0664469118667592, "grad_norm": 0.6257830262184143, "learning_rate": 2.5186251876682782e-05, "loss": 0.4934, "step": 6147 }, { "epoch": 1.066620402498265, "grad_norm": 0.7980381846427917, "learning_rate": 2.5179671377236422e-05, "loss": 0.5593, "step": 6148 }, { "epoch": 1.066793893129771, "grad_norm": 0.8958628177642822, "learning_rate": 2.5173090276683157e-05, "loss": 0.4069, "step": 6149 }, { "epoch": 1.066967383761277, "grad_norm": 0.6968737840652466, "learning_rate": 2.516650857578674e-05, "loss": 0.5197, "step": 6150 }, { "epoch": 1.0671408743927828, "grad_norm": 0.893977165222168, "learning_rate": 2.515992627531098e-05, "loss": 0.4059, "step": 6151 }, { "epoch": 1.0673143650242887, "grad_norm": 1.103964924812317, "learning_rate": 2.515334337601977e-05, "loss": 0.4072, "step": 6152 }, { "epoch": 1.0674878556557945, "grad_norm": 0.8834951519966125, "learning_rate": 2.514675987867705e-05, "loss": 0.5861, "step": 6153 }, { "epoch": 1.0676613462873006, "grad_norm": 0.5498542189598083, "learning_rate": 2.5140175784046858e-05, "loss": 0.6534, "step": 6154 }, { "epoch": 1.0678348369188064, "grad_norm": 0.9770853519439697, "learning_rate": 2.5133591092893265e-05, "loss": 0.5331, "step": 6155 }, { "epoch": 1.0680083275503123, "grad_norm": 0.7064255475997925, "learning_rate": 2.512700580598045e-05, "loss": 0.516, "step": 6156 }, { "epoch": 1.0681818181818181, "grad_norm": 0.8084623217582703, "learning_rate": 2.512041992407264e-05, "loss": 0.4167, "step": 6157 }, { "epoch": 1.0683553088133242, "grad_norm": 1.165953516960144, "learning_rate": 2.5113833447934126e-05, "loss": 0.3626, "step": 6158 }, { "epoch": 1.06852879944483, "grad_norm": 1.1164823770523071, "learning_rate": 2.5107246378329287e-05, "loss": 0.5253, "step": 6159 }, { "epoch": 1.0687022900763359, "grad_norm": 0.7830794453620911, "learning_rate": 2.510065871602255e-05, "loss": 0.4572, "step": 6160 }, { "epoch": 1.0688757807078417, "grad_norm": 1.0383309125900269, "learning_rate": 2.5094070461778424e-05, "loss": 0.4138, "step": 6161 }, { "epoch": 1.0690492713393476, "grad_norm": 0.8762059807777405, "learning_rate": 2.5087481616361493e-05, "loss": 0.4138, "step": 6162 }, { "epoch": 1.0692227619708536, "grad_norm": 0.7487605214118958, "learning_rate": 2.5080892180536386e-05, "loss": 0.4791, "step": 6163 }, { "epoch": 1.0693962526023595, "grad_norm": 0.8426170349121094, "learning_rate": 2.5074302155067823e-05, "loss": 0.5573, "step": 6164 }, { "epoch": 1.0695697432338653, "grad_norm": 0.9255799651145935, "learning_rate": 2.506771154072058e-05, "loss": 0.5547, "step": 6165 }, { "epoch": 1.0697432338653712, "grad_norm": 0.8196790218353271, "learning_rate": 2.5061120338259512e-05, "loss": 0.5496, "step": 6166 }, { "epoch": 1.0699167244968772, "grad_norm": 0.6261298060417175, "learning_rate": 2.505452854844953e-05, "loss": 0.6918, "step": 6167 }, { "epoch": 1.070090215128383, "grad_norm": 1.5703450441360474, "learning_rate": 2.5047936172055613e-05, "loss": 0.3839, "step": 6168 }, { "epoch": 1.070263705759889, "grad_norm": 0.6691255569458008, "learning_rate": 2.504134320984283e-05, "loss": 0.5, "step": 6169 }, { "epoch": 1.0704371963913948, "grad_norm": 1.0396575927734375, "learning_rate": 2.5034749662576293e-05, "loss": 0.3956, "step": 6170 }, { "epoch": 1.0706106870229009, "grad_norm": 0.9529989957809448, "learning_rate": 2.5028155531021186e-05, "loss": 0.4274, "step": 6171 }, { "epoch": 1.0707841776544067, "grad_norm": 1.0187219381332397, "learning_rate": 2.5021560815942777e-05, "loss": 0.3755, "step": 6172 }, { "epoch": 1.0709576682859125, "grad_norm": 0.7338615655899048, "learning_rate": 2.5014965518106372e-05, "loss": 0.5005, "step": 6173 }, { "epoch": 1.0711311589174184, "grad_norm": 1.0722684860229492, "learning_rate": 2.5008369638277382e-05, "loss": 0.4725, "step": 6174 }, { "epoch": 1.0713046495489245, "grad_norm": 0.5729461908340454, "learning_rate": 2.500177317722126e-05, "loss": 0.62, "step": 6175 }, { "epoch": 1.0714781401804303, "grad_norm": 1.2102477550506592, "learning_rate": 2.4995176135703533e-05, "loss": 0.5601, "step": 6176 }, { "epoch": 1.0716516308119362, "grad_norm": 0.7282514572143555, "learning_rate": 2.4988578514489797e-05, "loss": 0.408, "step": 6177 }, { "epoch": 1.071825121443442, "grad_norm": 0.8264929056167603, "learning_rate": 2.498198031434571e-05, "loss": 0.5389, "step": 6178 }, { "epoch": 1.0719986120749478, "grad_norm": 0.5981985926628113, "learning_rate": 2.4975381536037e-05, "loss": 0.5933, "step": 6179 }, { "epoch": 1.072172102706454, "grad_norm": 0.8617196083068848, "learning_rate": 2.496878218032947e-05, "loss": 0.4402, "step": 6180 }, { "epoch": 1.0723455933379598, "grad_norm": 1.0304713249206543, "learning_rate": 2.4962182247988974e-05, "loss": 0.5398, "step": 6181 }, { "epoch": 1.0725190839694656, "grad_norm": 0.7486497163772583, "learning_rate": 2.495558173978145e-05, "loss": 0.5283, "step": 6182 }, { "epoch": 1.0726925746009714, "grad_norm": 0.7354799509048462, "learning_rate": 2.494898065647289e-05, "loss": 0.5323, "step": 6183 }, { "epoch": 1.0728660652324775, "grad_norm": 0.7985234260559082, "learning_rate": 2.494237899882935e-05, "loss": 0.4429, "step": 6184 }, { "epoch": 1.0730395558639834, "grad_norm": 1.1000069379806519, "learning_rate": 2.4935776767616978e-05, "loss": 0.3912, "step": 6185 }, { "epoch": 1.0732130464954892, "grad_norm": 0.9294086694717407, "learning_rate": 2.4929173963601958e-05, "loss": 0.4817, "step": 6186 }, { "epoch": 1.073386537126995, "grad_norm": 0.6434595584869385, "learning_rate": 2.4922570587550552e-05, "loss": 0.5696, "step": 6187 }, { "epoch": 1.0735600277585011, "grad_norm": 0.7728206515312195, "learning_rate": 2.4915966640229098e-05, "loss": 0.5353, "step": 6188 }, { "epoch": 1.073733518390007, "grad_norm": 0.5889047980308533, "learning_rate": 2.4909362122403984e-05, "loss": 0.6283, "step": 6189 }, { "epoch": 1.0739070090215128, "grad_norm": 0.8491992354393005, "learning_rate": 2.4902757034841674e-05, "loss": 0.4575, "step": 6190 }, { "epoch": 1.0740804996530187, "grad_norm": 0.7175102233886719, "learning_rate": 2.4896151378308706e-05, "loss": 0.5492, "step": 6191 }, { "epoch": 1.0742539902845247, "grad_norm": 0.6997820138931274, "learning_rate": 2.4889545153571657e-05, "loss": 0.4293, "step": 6192 }, { "epoch": 1.0744274809160306, "grad_norm": 0.9219583868980408, "learning_rate": 2.48829383613972e-05, "loss": 0.6298, "step": 6193 }, { "epoch": 1.0746009715475364, "grad_norm": 0.6821068525314331, "learning_rate": 2.4876331002552055e-05, "loss": 0.5533, "step": 6194 }, { "epoch": 1.0747744621790423, "grad_norm": 0.7255200147628784, "learning_rate": 2.486972307780301e-05, "loss": 0.4609, "step": 6195 }, { "epoch": 1.0749479528105481, "grad_norm": 0.6879765391349792, "learning_rate": 2.4863114587916933e-05, "loss": 0.3905, "step": 6196 }, { "epoch": 1.0751214434420542, "grad_norm": 0.6694192886352539, "learning_rate": 2.485650553366074e-05, "loss": 0.6221, "step": 6197 }, { "epoch": 1.07529493407356, "grad_norm": 0.8467466235160828, "learning_rate": 2.484989591580142e-05, "loss": 0.5143, "step": 6198 }, { "epoch": 1.0754684247050659, "grad_norm": 0.7441421747207642, "learning_rate": 2.484328573510603e-05, "loss": 0.4595, "step": 6199 }, { "epoch": 1.075641915336572, "grad_norm": 1.1482595205307007, "learning_rate": 2.4836674992341684e-05, "loss": 0.5396, "step": 6200 }, { "epoch": 1.0758154059680778, "grad_norm": 0.7369665503501892, "learning_rate": 2.483006368827557e-05, "loss": 0.4896, "step": 6201 }, { "epoch": 1.0759888965995836, "grad_norm": 0.897925078868866, "learning_rate": 2.4823451823674943e-05, "loss": 0.4695, "step": 6202 }, { "epoch": 1.0761623872310895, "grad_norm": 0.7122481465339661, "learning_rate": 2.4816839399307102e-05, "loss": 0.5557, "step": 6203 }, { "epoch": 1.0763358778625953, "grad_norm": 1.171522617340088, "learning_rate": 2.481022641593944e-05, "loss": 0.3496, "step": 6204 }, { "epoch": 1.0765093684941014, "grad_norm": 0.8862089514732361, "learning_rate": 2.48036128743394e-05, "loss": 0.4128, "step": 6205 }, { "epoch": 1.0766828591256072, "grad_norm": 0.7411527037620544, "learning_rate": 2.4796998775274482e-05, "loss": 0.4863, "step": 6206 }, { "epoch": 1.076856349757113, "grad_norm": 0.9385290145874023, "learning_rate": 2.4790384119512275e-05, "loss": 0.4824, "step": 6207 }, { "epoch": 1.077029840388619, "grad_norm": 0.7270717024803162, "learning_rate": 2.4783768907820403e-05, "loss": 0.5505, "step": 6208 }, { "epoch": 1.077203331020125, "grad_norm": 2.0359649658203125, "learning_rate": 2.4777153140966583e-05, "loss": 0.4534, "step": 6209 }, { "epoch": 1.0773768216516308, "grad_norm": 0.7701866626739502, "learning_rate": 2.4770536819718562e-05, "loss": 0.3992, "step": 6210 }, { "epoch": 1.0775503122831367, "grad_norm": 0.6886849999427795, "learning_rate": 2.476391994484419e-05, "loss": 0.6028, "step": 6211 }, { "epoch": 1.0777238029146425, "grad_norm": 2.4191465377807617, "learning_rate": 2.475730251711136e-05, "loss": 0.4359, "step": 6212 }, { "epoch": 1.0778972935461486, "grad_norm": 0.8629088997840881, "learning_rate": 2.4750684537288024e-05, "loss": 0.582, "step": 6213 }, { "epoch": 1.0780707841776545, "grad_norm": 0.7598599791526794, "learning_rate": 2.4744066006142218e-05, "loss": 0.4397, "step": 6214 }, { "epoch": 1.0782442748091603, "grad_norm": 0.9446561932563782, "learning_rate": 2.4737446924442025e-05, "loss": 0.4485, "step": 6215 }, { "epoch": 1.0784177654406661, "grad_norm": 0.7072758674621582, "learning_rate": 2.4730827292955592e-05, "loss": 0.4698, "step": 6216 }, { "epoch": 1.0785912560721722, "grad_norm": 0.8763805627822876, "learning_rate": 2.472420711245114e-05, "loss": 0.4739, "step": 6217 }, { "epoch": 1.078764746703678, "grad_norm": 0.7658042311668396, "learning_rate": 2.4717586383696947e-05, "loss": 0.7263, "step": 6218 }, { "epoch": 1.078938237335184, "grad_norm": 0.678760826587677, "learning_rate": 2.4710965107461354e-05, "loss": 0.5195, "step": 6219 }, { "epoch": 1.0791117279666897, "grad_norm": 2.6144840717315674, "learning_rate": 2.470434328451278e-05, "loss": 0.5101, "step": 6220 }, { "epoch": 1.0792852185981956, "grad_norm": 0.6626024842262268, "learning_rate": 2.469772091561968e-05, "loss": 0.5975, "step": 6221 }, { "epoch": 1.0794587092297017, "grad_norm": 1.007093071937561, "learning_rate": 2.4691098001550588e-05, "loss": 0.5303, "step": 6222 }, { "epoch": 1.0796321998612075, "grad_norm": 0.8530629873275757, "learning_rate": 2.4684474543074116e-05, "loss": 0.5444, "step": 6223 }, { "epoch": 1.0798056904927134, "grad_norm": 0.6781681180000305, "learning_rate": 2.4677850540958906e-05, "loss": 0.53, "step": 6224 }, { "epoch": 1.0799791811242192, "grad_norm": 0.6077238321304321, "learning_rate": 2.46712259959737e-05, "loss": 0.5985, "step": 6225 }, { "epoch": 1.0801526717557253, "grad_norm": 2.4894297122955322, "learning_rate": 2.4664600908887272e-05, "loss": 0.6018, "step": 6226 }, { "epoch": 1.0803261623872311, "grad_norm": 0.8022773265838623, "learning_rate": 2.465797528046847e-05, "loss": 0.4695, "step": 6227 }, { "epoch": 1.080499653018737, "grad_norm": 0.6410969495773315, "learning_rate": 2.4651349111486212e-05, "loss": 0.4201, "step": 6228 }, { "epoch": 1.0806731436502428, "grad_norm": 0.9513944387435913, "learning_rate": 2.4644722402709467e-05, "loss": 0.6313, "step": 6229 }, { "epoch": 1.0808466342817489, "grad_norm": 0.890672504901886, "learning_rate": 2.4638095154907276e-05, "loss": 0.4587, "step": 6230 }, { "epoch": 1.0810201249132547, "grad_norm": 1.2217200994491577, "learning_rate": 2.463146736884874e-05, "loss": 0.5266, "step": 6231 }, { "epoch": 1.0811936155447606, "grad_norm": 0.7415736317634583, "learning_rate": 2.4624839045303014e-05, "loss": 0.5656, "step": 6232 }, { "epoch": 1.0813671061762664, "grad_norm": 1.1575895547866821, "learning_rate": 2.4618210185039333e-05, "loss": 0.4934, "step": 6233 }, { "epoch": 1.0815405968077725, "grad_norm": 1.0706515312194824, "learning_rate": 2.4611580788826973e-05, "loss": 0.4232, "step": 6234 }, { "epoch": 1.0817140874392783, "grad_norm": 2.1923775672912598, "learning_rate": 2.4604950857435297e-05, "loss": 0.5995, "step": 6235 }, { "epoch": 1.0818875780707842, "grad_norm": 0.788577139377594, "learning_rate": 2.4598320391633702e-05, "loss": 0.5975, "step": 6236 }, { "epoch": 1.08206106870229, "grad_norm": 1.1212619543075562, "learning_rate": 2.4591689392191667e-05, "loss": 0.4346, "step": 6237 }, { "epoch": 1.0822345593337959, "grad_norm": 0.6778693795204163, "learning_rate": 2.4585057859878732e-05, "loss": 0.4893, "step": 6238 }, { "epoch": 1.082408049965302, "grad_norm": 0.6982914805412292, "learning_rate": 2.4578425795464487e-05, "loss": 0.5394, "step": 6239 }, { "epoch": 1.0825815405968078, "grad_norm": 0.6827831864356995, "learning_rate": 2.4571793199718593e-05, "loss": 0.53, "step": 6240 }, { "epoch": 1.0827550312283136, "grad_norm": 0.6521167159080505, "learning_rate": 2.4565160073410774e-05, "loss": 0.5121, "step": 6241 }, { "epoch": 1.0829285218598195, "grad_norm": 1.1949217319488525, "learning_rate": 2.4558526417310805e-05, "loss": 0.4103, "step": 6242 }, { "epoch": 1.0831020124913255, "grad_norm": 0.7318608164787292, "learning_rate": 2.4551892232188535e-05, "loss": 0.4344, "step": 6243 }, { "epoch": 1.0832755031228314, "grad_norm": 0.9500438570976257, "learning_rate": 2.4545257518813866e-05, "loss": 0.4824, "step": 6244 }, { "epoch": 1.0834489937543372, "grad_norm": 0.7673509120941162, "learning_rate": 2.453862227795677e-05, "loss": 0.5067, "step": 6245 }, { "epoch": 1.083622484385843, "grad_norm": 0.9132089614868164, "learning_rate": 2.4531986510387268e-05, "loss": 0.5088, "step": 6246 }, { "epoch": 1.0837959750173491, "grad_norm": 1.1912236213684082, "learning_rate": 2.452535021687545e-05, "loss": 0.5366, "step": 6247 }, { "epoch": 1.083969465648855, "grad_norm": 0.8719744086265564, "learning_rate": 2.4518713398191464e-05, "loss": 0.5454, "step": 6248 }, { "epoch": 1.0841429562803608, "grad_norm": 0.7627516388893127, "learning_rate": 2.4512076055105527e-05, "loss": 0.5272, "step": 6249 }, { "epoch": 1.0843164469118667, "grad_norm": 0.7611238956451416, "learning_rate": 2.45054381883879e-05, "loss": 0.4019, "step": 6250 }, { "epoch": 1.0844899375433728, "grad_norm": 0.6928776502609253, "learning_rate": 2.4498799798808926e-05, "loss": 0.5763, "step": 6251 }, { "epoch": 1.0846634281748786, "grad_norm": 1.986975908279419, "learning_rate": 2.4492160887138998e-05, "loss": 0.4725, "step": 6252 }, { "epoch": 1.0848369188063844, "grad_norm": 0.8200432658195496, "learning_rate": 2.4485521454148558e-05, "loss": 0.4753, "step": 6253 }, { "epoch": 1.0850104094378903, "grad_norm": 0.7308650612831116, "learning_rate": 2.447888150060813e-05, "loss": 0.577, "step": 6254 }, { "epoch": 1.0851839000693964, "grad_norm": 0.8015735149383545, "learning_rate": 2.4472241027288276e-05, "loss": 0.5227, "step": 6255 }, { "epoch": 1.0853573907009022, "grad_norm": 1.1953891515731812, "learning_rate": 2.4465600034959654e-05, "loss": 0.4601, "step": 6256 }, { "epoch": 1.085530881332408, "grad_norm": 0.8995559215545654, "learning_rate": 2.4458958524392937e-05, "loss": 0.4825, "step": 6257 }, { "epoch": 1.085704371963914, "grad_norm": 1.450422763824463, "learning_rate": 2.4452316496358885e-05, "loss": 0.4043, "step": 6258 }, { "epoch": 1.08587786259542, "grad_norm": 0.616809070110321, "learning_rate": 2.444567395162832e-05, "loss": 0.5177, "step": 6259 }, { "epoch": 1.0860513532269258, "grad_norm": 0.7927566766738892, "learning_rate": 2.443903089097211e-05, "loss": 0.4994, "step": 6260 }, { "epoch": 1.0862248438584317, "grad_norm": 0.7003375887870789, "learning_rate": 2.4432387315161194e-05, "loss": 0.5319, "step": 6261 }, { "epoch": 1.0863983344899375, "grad_norm": 0.7656951546669006, "learning_rate": 2.4425743224966567e-05, "loss": 0.5613, "step": 6262 }, { "epoch": 1.0865718251214433, "grad_norm": 0.8551862835884094, "learning_rate": 2.4419098621159275e-05, "loss": 0.5496, "step": 6263 }, { "epoch": 1.0867453157529494, "grad_norm": 0.6719685792922974, "learning_rate": 2.4412453504510447e-05, "loss": 0.3932, "step": 6264 }, { "epoch": 1.0869188063844553, "grad_norm": 0.7628917098045349, "learning_rate": 2.4405807875791246e-05, "loss": 0.5336, "step": 6265 }, { "epoch": 1.087092297015961, "grad_norm": 0.5696899890899658, "learning_rate": 2.43991617357729e-05, "loss": 0.477, "step": 6266 }, { "epoch": 1.087265787647467, "grad_norm": 1.0026742219924927, "learning_rate": 2.4392515085226722e-05, "loss": 0.4136, "step": 6267 }, { "epoch": 1.087439278278973, "grad_norm": 1.2803168296813965, "learning_rate": 2.4385867924924037e-05, "loss": 0.4993, "step": 6268 }, { "epoch": 1.0876127689104789, "grad_norm": 1.0770628452301025, "learning_rate": 2.4379220255636278e-05, "loss": 0.5979, "step": 6269 }, { "epoch": 1.0877862595419847, "grad_norm": 0.5608989596366882, "learning_rate": 2.43725720781349e-05, "loss": 0.5822, "step": 6270 }, { "epoch": 1.0879597501734906, "grad_norm": 3.370060920715332, "learning_rate": 2.4365923393191443e-05, "loss": 0.4653, "step": 6271 }, { "epoch": 1.0881332408049966, "grad_norm": 0.8479523658752441, "learning_rate": 2.4359274201577478e-05, "loss": 0.4254, "step": 6272 }, { "epoch": 1.0883067314365025, "grad_norm": 0.6120976209640503, "learning_rate": 2.4352624504064672e-05, "loss": 0.5345, "step": 6273 }, { "epoch": 1.0884802220680083, "grad_norm": 1.6839667558670044, "learning_rate": 2.4345974301424717e-05, "loss": 0.4978, "step": 6274 }, { "epoch": 1.0886537126995142, "grad_norm": 0.5557832717895508, "learning_rate": 2.433932359442938e-05, "loss": 0.5186, "step": 6275 }, { "epoch": 1.0888272033310202, "grad_norm": 0.9949855208396912, "learning_rate": 2.433267238385048e-05, "loss": 0.4625, "step": 6276 }, { "epoch": 1.089000693962526, "grad_norm": 0.7671464681625366, "learning_rate": 2.4326020670459912e-05, "loss": 0.4021, "step": 6277 }, { "epoch": 1.089174184594032, "grad_norm": 0.6967581510543823, "learning_rate": 2.4319368455029598e-05, "loss": 0.4733, "step": 6278 }, { "epoch": 1.0893476752255378, "grad_norm": 0.8932548761367798, "learning_rate": 2.4312715738331542e-05, "loss": 0.4131, "step": 6279 }, { "epoch": 1.0895211658570436, "grad_norm": 0.8050430417060852, "learning_rate": 2.43060625211378e-05, "loss": 0.4819, "step": 6280 }, { "epoch": 1.0896946564885497, "grad_norm": 0.7218684554100037, "learning_rate": 2.4299408804220485e-05, "loss": 0.4854, "step": 6281 }, { "epoch": 1.0898681471200555, "grad_norm": 1.1423853635787964, "learning_rate": 2.4292754588351768e-05, "loss": 0.4004, "step": 6282 }, { "epoch": 1.0900416377515614, "grad_norm": 0.8457189202308655, "learning_rate": 2.4286099874303876e-05, "loss": 0.495, "step": 6283 }, { "epoch": 1.0902151283830672, "grad_norm": 1.2640057802200317, "learning_rate": 2.42794446628491e-05, "loss": 0.5071, "step": 6284 }, { "epoch": 1.0903886190145733, "grad_norm": 1.1674327850341797, "learning_rate": 2.4272788954759793e-05, "loss": 0.4744, "step": 6285 }, { "epoch": 1.0905621096460791, "grad_norm": 0.755543053150177, "learning_rate": 2.426613275080834e-05, "loss": 0.4208, "step": 6286 }, { "epoch": 1.090735600277585, "grad_norm": 0.7371096611022949, "learning_rate": 2.4259476051767213e-05, "loss": 0.3998, "step": 6287 }, { "epoch": 1.0909090909090908, "grad_norm": 1.3511090278625488, "learning_rate": 2.4252818858408923e-05, "loss": 0.3707, "step": 6288 }, { "epoch": 1.091082581540597, "grad_norm": 0.7861598134040833, "learning_rate": 2.4246161171506054e-05, "loss": 0.5133, "step": 6289 }, { "epoch": 1.0912560721721027, "grad_norm": 1.5742945671081543, "learning_rate": 2.4239502991831233e-05, "loss": 0.3892, "step": 6290 }, { "epoch": 1.0914295628036086, "grad_norm": 0.6407701969146729, "learning_rate": 2.4232844320157146e-05, "loss": 0.572, "step": 6291 }, { "epoch": 1.0916030534351144, "grad_norm": 0.5764026045799255, "learning_rate": 2.4226185157256546e-05, "loss": 0.6453, "step": 6292 }, { "epoch": 1.0917765440666205, "grad_norm": 1.245748519897461, "learning_rate": 2.4219525503902234e-05, "loss": 0.3743, "step": 6293 }, { "epoch": 1.0919500346981263, "grad_norm": 0.7878282070159912, "learning_rate": 2.421286536086707e-05, "loss": 0.4815, "step": 6294 }, { "epoch": 1.0921235253296322, "grad_norm": 0.9979248046875, "learning_rate": 2.4206204728923974e-05, "loss": 0.4969, "step": 6295 }, { "epoch": 1.092297015961138, "grad_norm": 0.8535788655281067, "learning_rate": 2.4199543608845916e-05, "loss": 0.4816, "step": 6296 }, { "epoch": 1.0924705065926439, "grad_norm": 1.433529257774353, "learning_rate": 2.419288200140593e-05, "loss": 0.386, "step": 6297 }, { "epoch": 1.09264399722415, "grad_norm": 0.6729404926300049, "learning_rate": 2.4186219907377097e-05, "loss": 0.4941, "step": 6298 }, { "epoch": 1.0928174878556558, "grad_norm": 0.5830033421516418, "learning_rate": 2.4179557327532574e-05, "loss": 0.4778, "step": 6299 }, { "epoch": 1.0929909784871616, "grad_norm": 0.6071484684944153, "learning_rate": 2.4172894262645544e-05, "loss": 0.4766, "step": 6300 }, { "epoch": 1.0931644691186675, "grad_norm": 0.9821061491966248, "learning_rate": 2.4166230713489277e-05, "loss": 0.4204, "step": 6301 }, { "epoch": 1.0933379597501736, "grad_norm": 0.8359472155570984, "learning_rate": 2.4159566680837086e-05, "loss": 0.5692, "step": 6302 }, { "epoch": 1.0935114503816794, "grad_norm": 0.5361720323562622, "learning_rate": 2.415290216546233e-05, "loss": 0.6711, "step": 6303 }, { "epoch": 1.0936849410131853, "grad_norm": 0.7677605152130127, "learning_rate": 2.414623716813844e-05, "loss": 0.3818, "step": 6304 }, { "epoch": 1.093858431644691, "grad_norm": 0.8098879456520081, "learning_rate": 2.4139571689638893e-05, "loss": 0.4827, "step": 6305 }, { "epoch": 1.0940319222761972, "grad_norm": 0.8161999583244324, "learning_rate": 2.413290573073723e-05, "loss": 0.3973, "step": 6306 }, { "epoch": 1.094205412907703, "grad_norm": 0.6727495789527893, "learning_rate": 2.412623929220704e-05, "loss": 0.5421, "step": 6307 }, { "epoch": 1.0943789035392089, "grad_norm": 0.7811001539230347, "learning_rate": 2.4119572374821968e-05, "loss": 0.405, "step": 6308 }, { "epoch": 1.0945523941707147, "grad_norm": 0.706080973148346, "learning_rate": 2.411290497935573e-05, "loss": 0.5844, "step": 6309 }, { "epoch": 1.0947258848022208, "grad_norm": 0.9299414753913879, "learning_rate": 2.4106237106582072e-05, "loss": 0.4591, "step": 6310 }, { "epoch": 1.0948993754337266, "grad_norm": 2.2729573249816895, "learning_rate": 2.4099568757274812e-05, "loss": 0.3805, "step": 6311 }, { "epoch": 1.0950728660652325, "grad_norm": 0.7673837542533875, "learning_rate": 2.4092899932207824e-05, "loss": 0.4224, "step": 6312 }, { "epoch": 1.0952463566967383, "grad_norm": 0.6698319315910339, "learning_rate": 2.408623063215503e-05, "loss": 0.4814, "step": 6313 }, { "epoch": 1.0954198473282444, "grad_norm": 1.160279631614685, "learning_rate": 2.4079560857890405e-05, "loss": 0.5238, "step": 6314 }, { "epoch": 1.0955933379597502, "grad_norm": 0.9155153036117554, "learning_rate": 2.4072890610187997e-05, "loss": 0.4193, "step": 6315 }, { "epoch": 1.095766828591256, "grad_norm": 0.9269014596939087, "learning_rate": 2.406621988982188e-05, "loss": 0.4788, "step": 6316 }, { "epoch": 1.095940319222762, "grad_norm": 0.8309975862503052, "learning_rate": 2.405954869756621e-05, "loss": 0.5477, "step": 6317 }, { "epoch": 1.096113809854268, "grad_norm": 0.7591610550880432, "learning_rate": 2.405287703419518e-05, "loss": 0.4873, "step": 6318 }, { "epoch": 1.0962873004857738, "grad_norm": 1.0823493003845215, "learning_rate": 2.4046204900483052e-05, "loss": 0.5029, "step": 6319 }, { "epoch": 1.0964607911172797, "grad_norm": 0.8195936679840088, "learning_rate": 2.4039532297204125e-05, "loss": 0.3933, "step": 6320 }, { "epoch": 1.0966342817487855, "grad_norm": 0.8526312112808228, "learning_rate": 2.403285922513277e-05, "loss": 0.3937, "step": 6321 }, { "epoch": 1.0968077723802914, "grad_norm": 2.5832276344299316, "learning_rate": 2.4026185685043405e-05, "loss": 0.5116, "step": 6322 }, { "epoch": 1.0969812630117974, "grad_norm": 0.7093821167945862, "learning_rate": 2.40195116777105e-05, "loss": 0.6206, "step": 6323 }, { "epoch": 1.0971547536433033, "grad_norm": 0.8102952241897583, "learning_rate": 2.4012837203908582e-05, "loss": 0.5405, "step": 6324 }, { "epoch": 1.0973282442748091, "grad_norm": 0.992218554019928, "learning_rate": 2.4006162264412227e-05, "loss": 0.5253, "step": 6325 }, { "epoch": 1.097501734906315, "grad_norm": 0.6748207211494446, "learning_rate": 2.3999486859996073e-05, "loss": 0.4137, "step": 6326 }, { "epoch": 1.097675225537821, "grad_norm": 0.8782357573509216, "learning_rate": 2.3992810991434815e-05, "loss": 0.5492, "step": 6327 }, { "epoch": 1.0978487161693269, "grad_norm": 0.6080440282821655, "learning_rate": 2.3986134659503187e-05, "loss": 0.4718, "step": 6328 }, { "epoch": 1.0980222068008327, "grad_norm": 0.7995212078094482, "learning_rate": 2.397945786497598e-05, "loss": 0.4375, "step": 6329 }, { "epoch": 1.0981956974323386, "grad_norm": 1.1713985204696655, "learning_rate": 2.3972780608628057e-05, "loss": 0.5001, "step": 6330 }, { "epoch": 1.0983691880638446, "grad_norm": 0.8009462952613831, "learning_rate": 2.3966102891234314e-05, "loss": 0.5101, "step": 6331 }, { "epoch": 1.0985426786953505, "grad_norm": 0.7381086349487305, "learning_rate": 2.3959424713569708e-05, "loss": 0.5708, "step": 6332 }, { "epoch": 1.0987161693268563, "grad_norm": 0.7604125738143921, "learning_rate": 2.395274607640925e-05, "loss": 0.5179, "step": 6333 }, { "epoch": 1.0988896599583622, "grad_norm": 0.8270940184593201, "learning_rate": 2.3946066980528e-05, "loss": 0.546, "step": 6334 }, { "epoch": 1.0990631505898683, "grad_norm": 0.7609905004501343, "learning_rate": 2.393938742670109e-05, "loss": 0.4622, "step": 6335 }, { "epoch": 1.099236641221374, "grad_norm": 0.6999890208244324, "learning_rate": 2.3932707415703673e-05, "loss": 0.557, "step": 6336 }, { "epoch": 1.09941013185288, "grad_norm": 0.7146739363670349, "learning_rate": 2.3926026948310975e-05, "loss": 0.631, "step": 6337 }, { "epoch": 1.0995836224843858, "grad_norm": 0.9941717386245728, "learning_rate": 2.391934602529828e-05, "loss": 0.5343, "step": 6338 }, { "epoch": 1.0997571131158916, "grad_norm": 1.3625564575195312, "learning_rate": 2.3912664647440903e-05, "loss": 0.5497, "step": 6339 }, { "epoch": 1.0999306037473977, "grad_norm": 0.8814844489097595, "learning_rate": 2.3905982815514243e-05, "loss": 0.5535, "step": 6340 }, { "epoch": 1.1001040943789036, "grad_norm": 1.0550538301467896, "learning_rate": 2.3899300530293728e-05, "loss": 0.4424, "step": 6341 }, { "epoch": 1.1002775850104094, "grad_norm": 0.6645036935806274, "learning_rate": 2.3892617792554833e-05, "loss": 0.4606, "step": 6342 }, { "epoch": 1.1004510756419152, "grad_norm": 0.6874741911888123, "learning_rate": 2.3885934603073117e-05, "loss": 0.6188, "step": 6343 }, { "epoch": 1.1006245662734213, "grad_norm": 0.8363273739814758, "learning_rate": 2.3879250962624152e-05, "loss": 0.4849, "step": 6344 }, { "epoch": 1.1007980569049272, "grad_norm": 0.7517639994621277, "learning_rate": 2.38725668719836e-05, "loss": 0.4653, "step": 6345 }, { "epoch": 1.100971547536433, "grad_norm": 0.671183168888092, "learning_rate": 2.386588233192715e-05, "loss": 0.4808, "step": 6346 }, { "epoch": 1.1011450381679388, "grad_norm": 0.7739551067352295, "learning_rate": 2.3859197343230546e-05, "loss": 0.5072, "step": 6347 }, { "epoch": 1.101318528799445, "grad_norm": 0.5653104782104492, "learning_rate": 2.38525119066696e-05, "loss": 0.5321, "step": 6348 }, { "epoch": 1.1014920194309508, "grad_norm": 0.7437562942504883, "learning_rate": 2.3845826023020156e-05, "loss": 0.5099, "step": 6349 }, { "epoch": 1.1016655100624566, "grad_norm": 1.2061591148376465, "learning_rate": 2.3839139693058116e-05, "loss": 0.5231, "step": 6350 }, { "epoch": 1.1018390006939625, "grad_norm": 0.8140386343002319, "learning_rate": 2.3832452917559446e-05, "loss": 0.516, "step": 6351 }, { "epoch": 1.1020124913254685, "grad_norm": 0.7298310995101929, "learning_rate": 2.382576569730015e-05, "loss": 0.4113, "step": 6352 }, { "epoch": 1.1021859819569744, "grad_norm": 1.2516953945159912, "learning_rate": 2.3819078033056284e-05, "loss": 0.5157, "step": 6353 }, { "epoch": 1.1023594725884802, "grad_norm": 0.9569164514541626, "learning_rate": 2.3812389925603963e-05, "loss": 0.4233, "step": 6354 }, { "epoch": 1.102532963219986, "grad_norm": 0.7385008335113525, "learning_rate": 2.380570137571935e-05, "loss": 0.5309, "step": 6355 }, { "epoch": 1.102706453851492, "grad_norm": 0.854784369468689, "learning_rate": 2.3799012384178654e-05, "loss": 0.4039, "step": 6356 }, { "epoch": 1.102879944482998, "grad_norm": 0.8272437453269958, "learning_rate": 2.3792322951758152e-05, "loss": 0.4584, "step": 6357 }, { "epoch": 1.1030534351145038, "grad_norm": 0.6209151148796082, "learning_rate": 2.3785633079234144e-05, "loss": 0.5188, "step": 6358 }, { "epoch": 1.1032269257460097, "grad_norm": 0.6605640649795532, "learning_rate": 2.3778942767383012e-05, "loss": 0.6174, "step": 6359 }, { "epoch": 1.1034004163775155, "grad_norm": 0.8699027299880981, "learning_rate": 2.377225201698117e-05, "loss": 0.4495, "step": 6360 }, { "epoch": 1.1035739070090216, "grad_norm": 1.1163221597671509, "learning_rate": 2.3765560828805075e-05, "loss": 0.5166, "step": 6361 }, { "epoch": 1.1037473976405274, "grad_norm": 0.8650309443473816, "learning_rate": 2.3758869203631266e-05, "loss": 0.418, "step": 6362 }, { "epoch": 1.1039208882720333, "grad_norm": 0.6874659061431885, "learning_rate": 2.3752177142236303e-05, "loss": 0.453, "step": 6363 }, { "epoch": 1.1040943789035391, "grad_norm": 0.7917651534080505, "learning_rate": 2.3745484645396816e-05, "loss": 0.3976, "step": 6364 }, { "epoch": 1.1042678695350452, "grad_norm": 0.704910397529602, "learning_rate": 2.3738791713889467e-05, "loss": 0.5693, "step": 6365 }, { "epoch": 1.104441360166551, "grad_norm": 1.8388170003890991, "learning_rate": 2.373209834849098e-05, "loss": 0.5923, "step": 6366 }, { "epoch": 1.1046148507980569, "grad_norm": 0.5804650783538818, "learning_rate": 2.3725404549978143e-05, "loss": 0.5958, "step": 6367 }, { "epoch": 1.1047883414295627, "grad_norm": 0.6623818278312683, "learning_rate": 2.3718710319127755e-05, "loss": 0.4172, "step": 6368 }, { "epoch": 1.1049618320610688, "grad_norm": 1.1011728048324585, "learning_rate": 2.3712015656716703e-05, "loss": 0.48, "step": 6369 }, { "epoch": 1.1051353226925746, "grad_norm": 1.700384259223938, "learning_rate": 2.370532056352191e-05, "loss": 0.37, "step": 6370 }, { "epoch": 1.1053088133240805, "grad_norm": 0.6900466680526733, "learning_rate": 2.3698625040320346e-05, "loss": 0.4216, "step": 6371 }, { "epoch": 1.1054823039555863, "grad_norm": 1.1002362966537476, "learning_rate": 2.3691929087889042e-05, "loss": 0.5177, "step": 6372 }, { "epoch": 1.1056557945870924, "grad_norm": 0.7985047698020935, "learning_rate": 2.3685232707005064e-05, "loss": 0.4059, "step": 6373 }, { "epoch": 1.1058292852185982, "grad_norm": 0.8414343595504761, "learning_rate": 2.3678535898445533e-05, "loss": 0.4829, "step": 6374 }, { "epoch": 1.106002775850104, "grad_norm": 0.9657633304595947, "learning_rate": 2.367183866298763e-05, "loss": 0.5266, "step": 6375 }, { "epoch": 1.10617626648161, "grad_norm": 0.7242559194564819, "learning_rate": 2.3665141001408562e-05, "loss": 0.5453, "step": 6376 }, { "epoch": 1.106349757113116, "grad_norm": 0.6628023982048035, "learning_rate": 2.365844291448562e-05, "loss": 0.5432, "step": 6377 }, { "epoch": 1.1065232477446219, "grad_norm": 0.8673304915428162, "learning_rate": 2.3651744402996114e-05, "loss": 0.4769, "step": 6378 }, { "epoch": 1.1066967383761277, "grad_norm": 0.6545118093490601, "learning_rate": 2.3645045467717405e-05, "loss": 0.5405, "step": 6379 }, { "epoch": 1.1068702290076335, "grad_norm": 0.8671982884407043, "learning_rate": 2.3638346109426932e-05, "loss": 0.4698, "step": 6380 }, { "epoch": 1.1070437196391394, "grad_norm": 0.8583928942680359, "learning_rate": 2.3631646328902153e-05, "loss": 0.3796, "step": 6381 }, { "epoch": 1.1072172102706455, "grad_norm": 1.0254278182983398, "learning_rate": 2.362494612692058e-05, "loss": 0.3856, "step": 6382 }, { "epoch": 1.1073907009021513, "grad_norm": 0.7909040451049805, "learning_rate": 2.361824550425979e-05, "loss": 0.3735, "step": 6383 }, { "epoch": 1.1075641915336571, "grad_norm": 0.7472656965255737, "learning_rate": 2.361154446169739e-05, "loss": 0.385, "step": 6384 }, { "epoch": 1.107737682165163, "grad_norm": 0.8990941643714905, "learning_rate": 2.360484300001105e-05, "loss": 0.5165, "step": 6385 }, { "epoch": 1.107911172796669, "grad_norm": 0.8338322639465332, "learning_rate": 2.3598141119978482e-05, "loss": 0.4694, "step": 6386 }, { "epoch": 1.108084663428175, "grad_norm": 0.9645411968231201, "learning_rate": 2.3591438822377434e-05, "loss": 0.4554, "step": 6387 }, { "epoch": 1.1082581540596808, "grad_norm": 1.1773382425308228, "learning_rate": 2.3584736107985737e-05, "loss": 0.5756, "step": 6388 }, { "epoch": 1.1084316446911866, "grad_norm": 0.9414104223251343, "learning_rate": 2.3578032977581234e-05, "loss": 0.4437, "step": 6389 }, { "epoch": 1.1086051353226927, "grad_norm": 0.7613778114318848, "learning_rate": 2.357132943194183e-05, "loss": 0.3824, "step": 6390 }, { "epoch": 1.1087786259541985, "grad_norm": 0.6773701906204224, "learning_rate": 2.356462547184549e-05, "loss": 0.5557, "step": 6391 }, { "epoch": 1.1089521165857044, "grad_norm": 0.7761004567146301, "learning_rate": 2.35579210980702e-05, "loss": 0.502, "step": 6392 }, { "epoch": 1.1091256072172102, "grad_norm": 0.8524529933929443, "learning_rate": 2.355121631139403e-05, "loss": 0.4213, "step": 6393 }, { "epoch": 1.1092990978487163, "grad_norm": 1.0654125213623047, "learning_rate": 2.3544511112595068e-05, "loss": 0.3893, "step": 6394 }, { "epoch": 1.1094725884802221, "grad_norm": 0.7196467518806458, "learning_rate": 2.353780550245146e-05, "loss": 0.4558, "step": 6395 }, { "epoch": 1.109646079111728, "grad_norm": 0.6411495208740234, "learning_rate": 2.3531099481741403e-05, "loss": 0.4595, "step": 6396 }, { "epoch": 1.1098195697432338, "grad_norm": 0.68552166223526, "learning_rate": 2.352439305124313e-05, "loss": 0.4657, "step": 6397 }, { "epoch": 1.1099930603747397, "grad_norm": 0.7618826031684875, "learning_rate": 2.351768621173495e-05, "loss": 0.4104, "step": 6398 }, { "epoch": 1.1101665510062457, "grad_norm": 0.6081966161727905, "learning_rate": 2.3510978963995176e-05, "loss": 0.6011, "step": 6399 }, { "epoch": 1.1103400416377516, "grad_norm": 0.8520535826683044, "learning_rate": 2.3504271308802204e-05, "loss": 0.438, "step": 6400 }, { "epoch": 1.1105135322692574, "grad_norm": 0.724348783493042, "learning_rate": 2.3497563246934464e-05, "loss": 0.6761, "step": 6401 }, { "epoch": 1.1106870229007633, "grad_norm": 0.5886250138282776, "learning_rate": 2.3490854779170436e-05, "loss": 0.6243, "step": 6402 }, { "epoch": 1.1108605135322693, "grad_norm": 0.6331730484962463, "learning_rate": 2.348414590628864e-05, "loss": 0.5045, "step": 6403 }, { "epoch": 1.1110340041637752, "grad_norm": 0.7192178964614868, "learning_rate": 2.347743662906765e-05, "loss": 0.3782, "step": 6404 }, { "epoch": 1.111207494795281, "grad_norm": 1.171298623085022, "learning_rate": 2.347072694828609e-05, "loss": 0.3865, "step": 6405 }, { "epoch": 1.1113809854267869, "grad_norm": 0.6927591562271118, "learning_rate": 2.3464016864722625e-05, "loss": 0.4514, "step": 6406 }, { "epoch": 1.111554476058293, "grad_norm": 1.1788617372512817, "learning_rate": 2.3457306379155965e-05, "loss": 0.5404, "step": 6407 }, { "epoch": 1.1117279666897988, "grad_norm": 2.0231029987335205, "learning_rate": 2.345059549236487e-05, "loss": 0.3983, "step": 6408 }, { "epoch": 1.1119014573213046, "grad_norm": 0.7832633852958679, "learning_rate": 2.3443884205128148e-05, "loss": 0.4203, "step": 6409 }, { "epoch": 1.1120749479528105, "grad_norm": 0.8718054890632629, "learning_rate": 2.343717251822465e-05, "loss": 0.4708, "step": 6410 }, { "epoch": 1.1122484385843165, "grad_norm": 0.8739398717880249, "learning_rate": 2.343046043243328e-05, "loss": 0.506, "step": 6411 }, { "epoch": 1.1124219292158224, "grad_norm": 0.8444788455963135, "learning_rate": 2.3423747948532976e-05, "loss": 0.5104, "step": 6412 }, { "epoch": 1.1125954198473282, "grad_norm": 0.9693549275398254, "learning_rate": 2.3417035067302733e-05, "loss": 0.473, "step": 6413 }, { "epoch": 1.112768910478834, "grad_norm": 0.640600323677063, "learning_rate": 2.341032178952159e-05, "loss": 0.4905, "step": 6414 }, { "epoch": 1.11294240111034, "grad_norm": 0.6085249781608582, "learning_rate": 2.340360811596863e-05, "loss": 0.5171, "step": 6415 }, { "epoch": 1.113115891741846, "grad_norm": 0.8247771263122559, "learning_rate": 2.339689404742298e-05, "loss": 0.4896, "step": 6416 }, { "epoch": 1.1132893823733518, "grad_norm": 0.7656151652336121, "learning_rate": 2.3390179584663815e-05, "loss": 0.5387, "step": 6417 }, { "epoch": 1.1134628730048577, "grad_norm": 0.899502158164978, "learning_rate": 2.338346472847037e-05, "loss": 0.4705, "step": 6418 }, { "epoch": 1.1136363636363635, "grad_norm": 0.7397540807723999, "learning_rate": 2.3376749479621886e-05, "loss": 0.4369, "step": 6419 }, { "epoch": 1.1138098542678696, "grad_norm": 0.9227269291877747, "learning_rate": 2.3370033838897702e-05, "loss": 0.3817, "step": 6420 }, { "epoch": 1.1139833448993754, "grad_norm": 0.7966392636299133, "learning_rate": 2.3363317807077157e-05, "loss": 0.4976, "step": 6421 }, { "epoch": 1.1141568355308813, "grad_norm": 0.6516497731208801, "learning_rate": 2.3356601384939665e-05, "loss": 0.5044, "step": 6422 }, { "epoch": 1.1143303261623871, "grad_norm": 0.8058405518531799, "learning_rate": 2.3349884573264673e-05, "loss": 0.4901, "step": 6423 }, { "epoch": 1.1145038167938932, "grad_norm": 0.6853688955307007, "learning_rate": 2.3343167372831665e-05, "loss": 0.428, "step": 6424 }, { "epoch": 1.114677307425399, "grad_norm": 0.7880139946937561, "learning_rate": 2.3336449784420197e-05, "loss": 0.5093, "step": 6425 }, { "epoch": 1.114850798056905, "grad_norm": 0.7561163306236267, "learning_rate": 2.3329731808809836e-05, "loss": 0.4244, "step": 6426 }, { "epoch": 1.1150242886884107, "grad_norm": 1.0141123533248901, "learning_rate": 2.3323013446780226e-05, "loss": 0.4185, "step": 6427 }, { "epoch": 1.1151977793199168, "grad_norm": 0.9526662826538086, "learning_rate": 2.331629469911103e-05, "loss": 0.4655, "step": 6428 }, { "epoch": 1.1153712699514227, "grad_norm": 0.9247124791145325, "learning_rate": 2.3309575566581968e-05, "loss": 0.4409, "step": 6429 }, { "epoch": 1.1155447605829285, "grad_norm": 0.7481706738471985, "learning_rate": 2.330285604997281e-05, "loss": 0.5492, "step": 6430 }, { "epoch": 1.1157182512144344, "grad_norm": 1.311652660369873, "learning_rate": 2.329613615006336e-05, "loss": 0.4746, "step": 6431 }, { "epoch": 1.1158917418459404, "grad_norm": 0.8068498969078064, "learning_rate": 2.328941586763346e-05, "loss": 0.4562, "step": 6432 }, { "epoch": 1.1160652324774463, "grad_norm": 0.7765712141990662, "learning_rate": 2.3282695203463022e-05, "loss": 0.5546, "step": 6433 }, { "epoch": 1.1162387231089521, "grad_norm": 0.5867044925689697, "learning_rate": 2.3275974158331977e-05, "loss": 0.5376, "step": 6434 }, { "epoch": 1.116412213740458, "grad_norm": 0.7233651876449585, "learning_rate": 2.326925273302032e-05, "loss": 0.5057, "step": 6435 }, { "epoch": 1.116585704371964, "grad_norm": 0.7924695014953613, "learning_rate": 2.3262530928308068e-05, "loss": 0.4043, "step": 6436 }, { "epoch": 1.1167591950034699, "grad_norm": 0.843597948551178, "learning_rate": 2.32558087449753e-05, "loss": 0.5204, "step": 6437 }, { "epoch": 1.1169326856349757, "grad_norm": 0.6833871603012085, "learning_rate": 2.3249086183802137e-05, "loss": 0.4555, "step": 6438 }, { "epoch": 1.1171061762664816, "grad_norm": 0.8030936121940613, "learning_rate": 2.324236324556873e-05, "loss": 0.5671, "step": 6439 }, { "epoch": 1.1172796668979874, "grad_norm": 1.6954410076141357, "learning_rate": 2.32356399310553e-05, "loss": 0.5236, "step": 6440 }, { "epoch": 1.1174531575294935, "grad_norm": 0.8243252635002136, "learning_rate": 2.3228916241042078e-05, "loss": 0.4556, "step": 6441 }, { "epoch": 1.1176266481609993, "grad_norm": 0.6916935443878174, "learning_rate": 2.3222192176309367e-05, "loss": 0.5424, "step": 6442 }, { "epoch": 1.1178001387925052, "grad_norm": 0.9195966124534607, "learning_rate": 2.3215467737637498e-05, "loss": 0.3984, "step": 6443 }, { "epoch": 1.117973629424011, "grad_norm": 0.6890717148780823, "learning_rate": 2.320874292580685e-05, "loss": 0.6116, "step": 6444 }, { "epoch": 1.118147120055517, "grad_norm": 1.1159943342208862, "learning_rate": 2.320201774159785e-05, "loss": 0.5779, "step": 6445 }, { "epoch": 1.118320610687023, "grad_norm": 0.8788090348243713, "learning_rate": 2.3195292185790957e-05, "loss": 0.4939, "step": 6446 }, { "epoch": 1.1184941013185288, "grad_norm": 0.8379570841789246, "learning_rate": 2.318856625916668e-05, "loss": 0.509, "step": 6447 }, { "epoch": 1.1186675919500346, "grad_norm": 0.9516486525535583, "learning_rate": 2.318183996250558e-05, "loss": 0.3376, "step": 6448 }, { "epoch": 1.1188410825815407, "grad_norm": 0.7245095372200012, "learning_rate": 2.3175113296588244e-05, "loss": 0.6335, "step": 6449 }, { "epoch": 1.1190145732130465, "grad_norm": 0.6819574236869812, "learning_rate": 2.3168386262195307e-05, "loss": 0.5155, "step": 6450 }, { "epoch": 1.1191880638445524, "grad_norm": 0.7670549154281616, "learning_rate": 2.3161658860107457e-05, "loss": 0.5471, "step": 6451 }, { "epoch": 1.1193615544760582, "grad_norm": 0.7976487874984741, "learning_rate": 2.315493109110541e-05, "loss": 0.4602, "step": 6452 }, { "epoch": 1.1195350451075643, "grad_norm": 0.9008978009223938, "learning_rate": 2.314820295596993e-05, "loss": 0.4371, "step": 6453 }, { "epoch": 1.1197085357390701, "grad_norm": 0.8811214566230774, "learning_rate": 2.314147445548183e-05, "loss": 0.52, "step": 6454 }, { "epoch": 1.119882026370576, "grad_norm": 0.7722797393798828, "learning_rate": 2.313474559042196e-05, "loss": 0.5618, "step": 6455 }, { "epoch": 1.1200555170020818, "grad_norm": 0.7748006582260132, "learning_rate": 2.3128016361571213e-05, "loss": 0.5454, "step": 6456 }, { "epoch": 1.1202290076335877, "grad_norm": 0.789991021156311, "learning_rate": 2.312128676971052e-05, "loss": 0.4666, "step": 6457 }, { "epoch": 1.1204024982650937, "grad_norm": 0.7222610712051392, "learning_rate": 2.3114556815620863e-05, "loss": 0.562, "step": 6458 }, { "epoch": 1.1205759888965996, "grad_norm": 0.8120276927947998, "learning_rate": 2.310782650008326e-05, "loss": 0.5955, "step": 6459 }, { "epoch": 1.1207494795281054, "grad_norm": 0.70384281873703, "learning_rate": 2.3101095823878764e-05, "loss": 0.5282, "step": 6460 }, { "epoch": 1.1209229701596113, "grad_norm": 0.7047963738441467, "learning_rate": 2.3094364787788487e-05, "loss": 0.6172, "step": 6461 }, { "epoch": 1.1210964607911174, "grad_norm": 0.7741065621376038, "learning_rate": 2.308763339259357e-05, "loss": 0.6201, "step": 6462 }, { "epoch": 1.1212699514226232, "grad_norm": 0.6245689392089844, "learning_rate": 2.30809016390752e-05, "loss": 0.6821, "step": 6463 }, { "epoch": 1.121443442054129, "grad_norm": 1.0588994026184082, "learning_rate": 2.3074169528014605e-05, "loss": 0.5713, "step": 6464 }, { "epoch": 1.121616932685635, "grad_norm": 0.6639510989189148, "learning_rate": 2.3067437060193055e-05, "loss": 0.5079, "step": 6465 }, { "epoch": 1.121790423317141, "grad_norm": 0.6362642645835876, "learning_rate": 2.306070423639186e-05, "loss": 0.4519, "step": 6466 }, { "epoch": 1.1219639139486468, "grad_norm": 0.654573917388916, "learning_rate": 2.3053971057392368e-05, "loss": 0.5739, "step": 6467 }, { "epoch": 1.1221374045801527, "grad_norm": 0.9201018810272217, "learning_rate": 2.3047237523975984e-05, "loss": 0.5176, "step": 6468 }, { "epoch": 1.1223108952116585, "grad_norm": 0.6924317479133606, "learning_rate": 2.3040503636924126e-05, "loss": 0.5597, "step": 6469 }, { "epoch": 1.1224843858431646, "grad_norm": 0.6017991900444031, "learning_rate": 2.3033769397018286e-05, "loss": 0.6281, "step": 6470 }, { "epoch": 1.1226578764746704, "grad_norm": 0.8645722270011902, "learning_rate": 2.3027034805039965e-05, "loss": 0.4135, "step": 6471 }, { "epoch": 1.1228313671061763, "grad_norm": 0.7092148661613464, "learning_rate": 2.3020299861770732e-05, "loss": 0.4896, "step": 6472 }, { "epoch": 1.123004857737682, "grad_norm": 1.2765874862670898, "learning_rate": 2.3013564567992184e-05, "loss": 0.4609, "step": 6473 }, { "epoch": 1.123178348369188, "grad_norm": 0.8498753905296326, "learning_rate": 2.300682892448595e-05, "loss": 0.5022, "step": 6474 }, { "epoch": 1.123351839000694, "grad_norm": 0.6794124245643616, "learning_rate": 2.3000092932033718e-05, "loss": 0.5402, "step": 6475 }, { "epoch": 1.1235253296321999, "grad_norm": 0.8482364416122437, "learning_rate": 2.2993356591417203e-05, "loss": 0.5017, "step": 6476 }, { "epoch": 1.1236988202637057, "grad_norm": 0.9400708675384521, "learning_rate": 2.2986619903418172e-05, "loss": 0.373, "step": 6477 }, { "epoch": 1.1238723108952116, "grad_norm": 0.742933452129364, "learning_rate": 2.2979882868818422e-05, "loss": 0.5658, "step": 6478 }, { "epoch": 1.1240458015267176, "grad_norm": 1.7342190742492676, "learning_rate": 2.2973145488399792e-05, "loss": 0.437, "step": 6479 }, { "epoch": 1.1242192921582235, "grad_norm": 0.7767460942268372, "learning_rate": 2.296640776294416e-05, "loss": 0.5402, "step": 6480 }, { "epoch": 1.1243927827897293, "grad_norm": 0.9296532273292542, "learning_rate": 2.2959669693233453e-05, "loss": 0.6931, "step": 6481 }, { "epoch": 1.1245662734212352, "grad_norm": 0.8746662735939026, "learning_rate": 2.2952931280049628e-05, "loss": 0.5812, "step": 6482 }, { "epoch": 1.1247397640527412, "grad_norm": 1.2094800472259521, "learning_rate": 2.294619252417469e-05, "loss": 0.4949, "step": 6483 }, { "epoch": 1.124913254684247, "grad_norm": 0.6965013742446899, "learning_rate": 2.293945342639067e-05, "loss": 0.4502, "step": 6484 }, { "epoch": 1.125086745315753, "grad_norm": 1.1303836107254028, "learning_rate": 2.2932713987479664e-05, "loss": 0.5649, "step": 6485 }, { "epoch": 1.1252602359472588, "grad_norm": 0.8138877749443054, "learning_rate": 2.2925974208223778e-05, "loss": 0.5535, "step": 6486 }, { "epoch": 1.1254337265787648, "grad_norm": 0.7776005268096924, "learning_rate": 2.2919234089405173e-05, "loss": 0.5015, "step": 6487 }, { "epoch": 1.1256072172102707, "grad_norm": 0.7546865940093994, "learning_rate": 2.2912493631806055e-05, "loss": 0.4994, "step": 6488 }, { "epoch": 1.1257807078417765, "grad_norm": 0.8697831034660339, "learning_rate": 2.290575283620865e-05, "loss": 0.3845, "step": 6489 }, { "epoch": 1.1259541984732824, "grad_norm": 0.6860870122909546, "learning_rate": 2.2899011703395254e-05, "loss": 0.51, "step": 6490 }, { "epoch": 1.1261276891047882, "grad_norm": 1.552304983139038, "learning_rate": 2.289227023414816e-05, "loss": 0.5311, "step": 6491 }, { "epoch": 1.1263011797362943, "grad_norm": 1.3038265705108643, "learning_rate": 2.288552842924974e-05, "loss": 0.5414, "step": 6492 }, { "epoch": 1.1264746703678001, "grad_norm": 0.7931041121482849, "learning_rate": 2.2878786289482384e-05, "loss": 0.4524, "step": 6493 }, { "epoch": 1.126648160999306, "grad_norm": 0.7245680093765259, "learning_rate": 2.2872043815628525e-05, "loss": 0.5699, "step": 6494 }, { "epoch": 1.126821651630812, "grad_norm": 1.0535913705825806, "learning_rate": 2.2865301008470633e-05, "loss": 0.4155, "step": 6495 }, { "epoch": 1.126995142262318, "grad_norm": 0.80494225025177, "learning_rate": 2.2858557868791222e-05, "loss": 0.45, "step": 6496 }, { "epoch": 1.1271686328938237, "grad_norm": 0.7536813020706177, "learning_rate": 2.2851814397372838e-05, "loss": 0.5144, "step": 6497 }, { "epoch": 1.1273421235253296, "grad_norm": 1.258178949356079, "learning_rate": 2.284507059499807e-05, "loss": 0.5242, "step": 6498 }, { "epoch": 1.1275156141568354, "grad_norm": 0.7766262292861938, "learning_rate": 2.283832646244955e-05, "loss": 0.5414, "step": 6499 }, { "epoch": 1.1276891047883415, "grad_norm": 0.6833403706550598, "learning_rate": 2.283158200050993e-05, "loss": 0.5, "step": 6500 }, { "epoch": 1.1278625954198473, "grad_norm": 0.7784548401832581, "learning_rate": 2.2824837209961924e-05, "loss": 0.4131, "step": 6501 }, { "epoch": 1.1280360860513532, "grad_norm": 0.8495446443557739, "learning_rate": 2.2818092091588266e-05, "loss": 0.4895, "step": 6502 }, { "epoch": 1.128209576682859, "grad_norm": 0.7297409176826477, "learning_rate": 2.2811346646171734e-05, "loss": 0.5499, "step": 6503 }, { "epoch": 1.128383067314365, "grad_norm": 1.2626932859420776, "learning_rate": 2.280460087449515e-05, "loss": 0.6128, "step": 6504 }, { "epoch": 1.128556557945871, "grad_norm": 0.6915184259414673, "learning_rate": 2.2797854777341368e-05, "loss": 0.5267, "step": 6505 }, { "epoch": 1.1287300485773768, "grad_norm": 0.9704550504684448, "learning_rate": 2.2791108355493278e-05, "loss": 0.3778, "step": 6506 }, { "epoch": 1.1289035392088826, "grad_norm": 0.6383293271064758, "learning_rate": 2.2784361609733812e-05, "loss": 0.6055, "step": 6507 }, { "epoch": 1.1290770298403887, "grad_norm": 0.7976787686347961, "learning_rate": 2.2777614540845934e-05, "loss": 0.4866, "step": 6508 }, { "epoch": 1.1292505204718946, "grad_norm": 1.0124448537826538, "learning_rate": 2.2770867149612658e-05, "loss": 0.4137, "step": 6509 }, { "epoch": 1.1294240111034004, "grad_norm": 1.5829224586486816, "learning_rate": 2.2764119436817015e-05, "loss": 0.3836, "step": 6510 }, { "epoch": 1.1295975017349063, "grad_norm": 0.8856865167617798, "learning_rate": 2.2757371403242094e-05, "loss": 0.5831, "step": 6511 }, { "epoch": 1.1297709923664123, "grad_norm": 0.6782200932502747, "learning_rate": 2.2750623049671003e-05, "loss": 0.5133, "step": 6512 }, { "epoch": 1.1299444829979182, "grad_norm": 0.9485509991645813, "learning_rate": 2.2743874376886903e-05, "loss": 0.5314, "step": 6513 }, { "epoch": 1.130117973629424, "grad_norm": 1.1593315601348877, "learning_rate": 2.273712538567299e-05, "loss": 0.4221, "step": 6514 }, { "epoch": 1.1302914642609299, "grad_norm": 0.564075767993927, "learning_rate": 2.273037607681248e-05, "loss": 0.6469, "step": 6515 }, { "epoch": 1.1304649548924357, "grad_norm": 1.0049749612808228, "learning_rate": 2.2723626451088644e-05, "loss": 0.462, "step": 6516 }, { "epoch": 1.1306384455239418, "grad_norm": 3.31482195854187, "learning_rate": 2.2716876509284794e-05, "loss": 0.5275, "step": 6517 }, { "epoch": 1.1308119361554476, "grad_norm": 0.814273476600647, "learning_rate": 2.2710126252184255e-05, "loss": 0.4277, "step": 6518 }, { "epoch": 1.1309854267869535, "grad_norm": 3.442901134490967, "learning_rate": 2.2703375680570402e-05, "loss": 0.5223, "step": 6519 }, { "epoch": 1.1311589174184593, "grad_norm": 1.0197612047195435, "learning_rate": 2.2696624795226662e-05, "loss": 0.4286, "step": 6520 }, { "epoch": 1.1313324080499654, "grad_norm": 0.8209486603736877, "learning_rate": 2.2689873596936458e-05, "loss": 0.4031, "step": 6521 }, { "epoch": 1.1315058986814712, "grad_norm": 0.786411464214325, "learning_rate": 2.2683122086483297e-05, "loss": 0.4977, "step": 6522 }, { "epoch": 1.131679389312977, "grad_norm": 0.7514742016792297, "learning_rate": 2.2676370264650694e-05, "loss": 0.4406, "step": 6523 }, { "epoch": 1.131852879944483, "grad_norm": 0.9746724367141724, "learning_rate": 2.26696181322222e-05, "loss": 0.4857, "step": 6524 }, { "epoch": 1.132026370575989, "grad_norm": 0.7940140962600708, "learning_rate": 2.266286568998141e-05, "loss": 0.4613, "step": 6525 }, { "epoch": 1.1321998612074948, "grad_norm": 1.2118453979492188, "learning_rate": 2.2656112938711952e-05, "loss": 0.4551, "step": 6526 }, { "epoch": 1.1323733518390007, "grad_norm": 1.165552020072937, "learning_rate": 2.2649359879197497e-05, "loss": 0.5624, "step": 6527 }, { "epoch": 1.1325468424705065, "grad_norm": 0.8620389699935913, "learning_rate": 2.264260651222174e-05, "loss": 0.4746, "step": 6528 }, { "epoch": 1.1327203331020126, "grad_norm": 0.7156081795692444, "learning_rate": 2.263585283856841e-05, "loss": 0.5294, "step": 6529 }, { "epoch": 1.1328938237335184, "grad_norm": 0.8914456963539124, "learning_rate": 2.26290988590213e-05, "loss": 0.5168, "step": 6530 }, { "epoch": 1.1330673143650243, "grad_norm": 1.0415455102920532, "learning_rate": 2.2622344574364197e-05, "loss": 0.4011, "step": 6531 }, { "epoch": 1.1332408049965301, "grad_norm": 0.8060207366943359, "learning_rate": 2.261558998538095e-05, "loss": 0.4276, "step": 6532 }, { "epoch": 1.133414295628036, "grad_norm": 0.8099369406700134, "learning_rate": 2.2608835092855443e-05, "loss": 0.5209, "step": 6533 }, { "epoch": 1.133587786259542, "grad_norm": 0.6553003191947937, "learning_rate": 2.2602079897571576e-05, "loss": 0.5065, "step": 6534 }, { "epoch": 1.1337612768910479, "grad_norm": 1.8057334423065186, "learning_rate": 2.2595324400313306e-05, "loss": 0.4036, "step": 6535 }, { "epoch": 1.1339347675225537, "grad_norm": 0.6745595335960388, "learning_rate": 2.258856860186462e-05, "loss": 0.675, "step": 6536 }, { "epoch": 1.1341082581540598, "grad_norm": 0.9904066920280457, "learning_rate": 2.2581812503009527e-05, "loss": 0.5397, "step": 6537 }, { "epoch": 1.1342817487855656, "grad_norm": 0.7607759833335876, "learning_rate": 2.257505610453209e-05, "loss": 0.5304, "step": 6538 }, { "epoch": 1.1344552394170715, "grad_norm": 1.4457356929779053, "learning_rate": 2.2568299407216384e-05, "loss": 0.5128, "step": 6539 }, { "epoch": 1.1346287300485773, "grad_norm": 0.9140667915344238, "learning_rate": 2.2561542411846537e-05, "loss": 0.4609, "step": 6540 }, { "epoch": 1.1348022206800832, "grad_norm": 0.7092764377593994, "learning_rate": 2.255478511920672e-05, "loss": 0.49, "step": 6541 }, { "epoch": 1.1349757113115893, "grad_norm": 0.6851636171340942, "learning_rate": 2.25480275300811e-05, "loss": 0.4692, "step": 6542 }, { "epoch": 1.135149201943095, "grad_norm": 1.046521544456482, "learning_rate": 2.254126964525393e-05, "loss": 0.4111, "step": 6543 }, { "epoch": 1.135322692574601, "grad_norm": 0.7946211695671082, "learning_rate": 2.253451146550945e-05, "loss": 0.5338, "step": 6544 }, { "epoch": 1.1354961832061068, "grad_norm": 0.7515681982040405, "learning_rate": 2.2527752991631958e-05, "loss": 0.5461, "step": 6545 }, { "epoch": 1.1356696738376129, "grad_norm": 0.5754083395004272, "learning_rate": 2.252099422440579e-05, "loss": 0.5704, "step": 6546 }, { "epoch": 1.1358431644691187, "grad_norm": 0.8206495046615601, "learning_rate": 2.2514235164615305e-05, "loss": 0.6459, "step": 6547 }, { "epoch": 1.1360166551006246, "grad_norm": 0.8565614819526672, "learning_rate": 2.2507475813044896e-05, "loss": 0.4427, "step": 6548 }, { "epoch": 1.1361901457321304, "grad_norm": 0.9310941696166992, "learning_rate": 2.2500716170479e-05, "loss": 0.5603, "step": 6549 }, { "epoch": 1.1363636363636362, "grad_norm": 0.9128825068473816, "learning_rate": 2.2493956237702075e-05, "loss": 0.4984, "step": 6550 }, { "epoch": 1.1365371269951423, "grad_norm": 0.7900086045265198, "learning_rate": 2.248719601549863e-05, "loss": 0.379, "step": 6551 }, { "epoch": 1.1367106176266482, "grad_norm": 0.7916425466537476, "learning_rate": 2.2480435504653185e-05, "loss": 0.468, "step": 6552 }, { "epoch": 1.136884108258154, "grad_norm": 0.6726658940315247, "learning_rate": 2.2473674705950303e-05, "loss": 0.4086, "step": 6553 }, { "epoch": 1.13705759888966, "grad_norm": 0.7393019199371338, "learning_rate": 2.24669136201746e-05, "loss": 0.4841, "step": 6554 }, { "epoch": 1.137231089521166, "grad_norm": 0.7832701802253723, "learning_rate": 2.246015224811069e-05, "loss": 0.6074, "step": 6555 }, { "epoch": 1.1374045801526718, "grad_norm": 0.8238747715950012, "learning_rate": 2.2453390590543246e-05, "loss": 0.5914, "step": 6556 }, { "epoch": 1.1375780707841776, "grad_norm": 1.0960816144943237, "learning_rate": 2.2446628648256964e-05, "loss": 0.6648, "step": 6557 }, { "epoch": 1.1377515614156835, "grad_norm": 0.8119200468063354, "learning_rate": 2.243986642203658e-05, "loss": 0.4757, "step": 6558 }, { "epoch": 1.1379250520471895, "grad_norm": 1.2356538772583008, "learning_rate": 2.2433103912666852e-05, "loss": 0.433, "step": 6559 }, { "epoch": 1.1380985426786954, "grad_norm": 0.7967992424964905, "learning_rate": 2.2426341120932582e-05, "loss": 0.5477, "step": 6560 }, { "epoch": 1.1382720333102012, "grad_norm": 0.8230440616607666, "learning_rate": 2.2419578047618598e-05, "loss": 0.4103, "step": 6561 }, { "epoch": 1.138445523941707, "grad_norm": 1.0610239505767822, "learning_rate": 2.241281469350976e-05, "loss": 0.4462, "step": 6562 }, { "epoch": 1.1386190145732131, "grad_norm": 0.8043401837348938, "learning_rate": 2.2406051059390963e-05, "loss": 0.4216, "step": 6563 }, { "epoch": 1.138792505204719, "grad_norm": 0.8361603021621704, "learning_rate": 2.2399287146047137e-05, "loss": 0.4207, "step": 6564 }, { "epoch": 1.1389659958362248, "grad_norm": 0.9205870628356934, "learning_rate": 2.239252295426325e-05, "loss": 0.6172, "step": 6565 }, { "epoch": 1.1391394864677307, "grad_norm": 0.8399495482444763, "learning_rate": 2.2385758484824275e-05, "loss": 0.4995, "step": 6566 }, { "epoch": 1.1393129770992367, "grad_norm": 0.5843160152435303, "learning_rate": 2.2378993738515255e-05, "loss": 0.6514, "step": 6567 }, { "epoch": 1.1394864677307426, "grad_norm": 0.7770143747329712, "learning_rate": 2.2372228716121246e-05, "loss": 0.3884, "step": 6568 }, { "epoch": 1.1396599583622484, "grad_norm": 0.6687589883804321, "learning_rate": 2.2365463418427317e-05, "loss": 0.5896, "step": 6569 }, { "epoch": 1.1398334489937543, "grad_norm": 0.7951526641845703, "learning_rate": 2.235869784621861e-05, "loss": 0.5137, "step": 6570 }, { "epoch": 1.1400069396252603, "grad_norm": 0.7361968755722046, "learning_rate": 2.2351932000280266e-05, "loss": 0.5129, "step": 6571 }, { "epoch": 1.1401804302567662, "grad_norm": 0.8000061511993408, "learning_rate": 2.2345165881397475e-05, "loss": 0.3605, "step": 6572 }, { "epoch": 1.140353920888272, "grad_norm": 0.7454240322113037, "learning_rate": 2.233839949035545e-05, "loss": 0.4493, "step": 6573 }, { "epoch": 1.1405274115197779, "grad_norm": 0.924987256526947, "learning_rate": 2.2331632827939438e-05, "loss": 0.4083, "step": 6574 }, { "epoch": 1.1407009021512837, "grad_norm": 0.7753768563270569, "learning_rate": 2.232486589493472e-05, "loss": 0.5869, "step": 6575 }, { "epoch": 1.1408743927827898, "grad_norm": 0.604107141494751, "learning_rate": 2.231809869212661e-05, "loss": 0.4639, "step": 6576 }, { "epoch": 1.1410478834142956, "grad_norm": 2.0864806175231934, "learning_rate": 2.231133122030044e-05, "loss": 0.417, "step": 6577 }, { "epoch": 1.1412213740458015, "grad_norm": 0.7135872840881348, "learning_rate": 2.230456348024159e-05, "loss": 0.5907, "step": 6578 }, { "epoch": 1.1413948646773073, "grad_norm": 0.9648030996322632, "learning_rate": 2.2297795472735462e-05, "loss": 0.496, "step": 6579 }, { "epoch": 1.1415683553088134, "grad_norm": 0.7891243100166321, "learning_rate": 2.2291027198567502e-05, "loss": 0.4845, "step": 6580 }, { "epoch": 1.1417418459403192, "grad_norm": 1.6100218296051025, "learning_rate": 2.228425865852316e-05, "loss": 0.4495, "step": 6581 }, { "epoch": 1.141915336571825, "grad_norm": 0.6381610631942749, "learning_rate": 2.2277489853387932e-05, "loss": 0.5763, "step": 6582 }, { "epoch": 1.142088827203331, "grad_norm": 0.6607668995857239, "learning_rate": 2.2270720783947358e-05, "loss": 0.5686, "step": 6583 }, { "epoch": 1.142262317834837, "grad_norm": 0.9227593541145325, "learning_rate": 2.2263951450986987e-05, "loss": 0.5004, "step": 6584 }, { "epoch": 1.1424358084663429, "grad_norm": 0.7867388129234314, "learning_rate": 2.225718185529242e-05, "loss": 0.4375, "step": 6585 }, { "epoch": 1.1426092990978487, "grad_norm": 1.0895323753356934, "learning_rate": 2.2250411997649266e-05, "loss": 0.4457, "step": 6586 }, { "epoch": 1.1427827897293545, "grad_norm": 0.8809779286384583, "learning_rate": 2.2243641878843172e-05, "loss": 0.4347, "step": 6587 }, { "epoch": 1.1429562803608606, "grad_norm": 0.9329822659492493, "learning_rate": 2.2236871499659824e-05, "loss": 0.3826, "step": 6588 }, { "epoch": 1.1431297709923665, "grad_norm": 0.9002099633216858, "learning_rate": 2.2230100860884937e-05, "loss": 0.6514, "step": 6589 }, { "epoch": 1.1433032616238723, "grad_norm": 0.7116411328315735, "learning_rate": 2.2223329963304242e-05, "loss": 0.5087, "step": 6590 }, { "epoch": 1.1434767522553781, "grad_norm": 0.8289925456047058, "learning_rate": 2.2216558807703522e-05, "loss": 0.418, "step": 6591 }, { "epoch": 1.143650242886884, "grad_norm": 1.378301978111267, "learning_rate": 2.2209787394868562e-05, "loss": 0.3944, "step": 6592 }, { "epoch": 1.14382373351839, "grad_norm": 0.5482950210571289, "learning_rate": 2.22030157255852e-05, "loss": 0.486, "step": 6593 }, { "epoch": 1.143997224149896, "grad_norm": 1.0869117975234985, "learning_rate": 2.2196243800639303e-05, "loss": 0.4205, "step": 6594 }, { "epoch": 1.1441707147814018, "grad_norm": 0.9532647132873535, "learning_rate": 2.2189471620816745e-05, "loss": 0.4965, "step": 6595 }, { "epoch": 1.1443442054129078, "grad_norm": 0.9237571358680725, "learning_rate": 2.2182699186903462e-05, "loss": 0.4149, "step": 6596 }, { "epoch": 1.1445176960444137, "grad_norm": 0.6246567964553833, "learning_rate": 2.217592649968539e-05, "loss": 0.5579, "step": 6597 }, { "epoch": 1.1446911866759195, "grad_norm": 0.9275211095809937, "learning_rate": 2.2169153559948513e-05, "loss": 0.5593, "step": 6598 }, { "epoch": 1.1448646773074254, "grad_norm": 0.7967746257781982, "learning_rate": 2.2162380368478836e-05, "loss": 0.5905, "step": 6599 }, { "epoch": 1.1450381679389312, "grad_norm": 0.6600043177604675, "learning_rate": 2.21556069260624e-05, "loss": 0.4712, "step": 6600 }, { "epoch": 1.1452116585704373, "grad_norm": 0.8281075954437256, "learning_rate": 2.2148833233485273e-05, "loss": 0.5726, "step": 6601 }, { "epoch": 1.1453851492019431, "grad_norm": 0.8647617697715759, "learning_rate": 2.2142059291533542e-05, "loss": 0.4822, "step": 6602 }, { "epoch": 1.145558639833449, "grad_norm": 0.9288738965988159, "learning_rate": 2.2135285100993328e-05, "loss": 0.5446, "step": 6603 }, { "epoch": 1.1457321304649548, "grad_norm": 0.8471190929412842, "learning_rate": 2.2128510662650796e-05, "loss": 0.476, "step": 6604 }, { "epoch": 1.1459056210964609, "grad_norm": 1.2163231372833252, "learning_rate": 2.212173597729212e-05, "loss": 0.4095, "step": 6605 }, { "epoch": 1.1460791117279667, "grad_norm": 0.9635229706764221, "learning_rate": 2.211496104570351e-05, "loss": 0.5013, "step": 6606 }, { "epoch": 1.1462526023594726, "grad_norm": 0.6926249861717224, "learning_rate": 2.210818586867121e-05, "loss": 0.5568, "step": 6607 }, { "epoch": 1.1464260929909784, "grad_norm": 1.0889391899108887, "learning_rate": 2.210141044698148e-05, "loss": 0.4818, "step": 6608 }, { "epoch": 1.1465995836224843, "grad_norm": 0.873817503452301, "learning_rate": 2.2094634781420626e-05, "loss": 0.4248, "step": 6609 }, { "epoch": 1.1467730742539903, "grad_norm": 0.7545918822288513, "learning_rate": 2.2087858872774954e-05, "loss": 0.4852, "step": 6610 }, { "epoch": 1.1469465648854962, "grad_norm": 0.6246511936187744, "learning_rate": 2.2081082721830834e-05, "loss": 0.5958, "step": 6611 }, { "epoch": 1.147120055517002, "grad_norm": 0.6936851739883423, "learning_rate": 2.2074306329374636e-05, "loss": 0.5459, "step": 6612 }, { "epoch": 1.147293546148508, "grad_norm": 0.6763320565223694, "learning_rate": 2.2067529696192772e-05, "loss": 0.5985, "step": 6613 }, { "epoch": 1.147467036780014, "grad_norm": 0.9481201767921448, "learning_rate": 2.206075282307168e-05, "loss": 0.543, "step": 6614 }, { "epoch": 1.1476405274115198, "grad_norm": 1.1455981731414795, "learning_rate": 2.205397571079782e-05, "loss": 0.4767, "step": 6615 }, { "epoch": 1.1478140180430256, "grad_norm": 0.8845289945602417, "learning_rate": 2.2047198360157683e-05, "loss": 0.4875, "step": 6616 }, { "epoch": 1.1479875086745315, "grad_norm": 1.9621926546096802, "learning_rate": 2.2040420771937793e-05, "loss": 0.4832, "step": 6617 }, { "epoch": 1.1481609993060375, "grad_norm": 0.9349886775016785, "learning_rate": 2.2033642946924698e-05, "loss": 0.4557, "step": 6618 }, { "epoch": 1.1483344899375434, "grad_norm": 0.5708053708076477, "learning_rate": 2.2026864885904965e-05, "loss": 0.5789, "step": 6619 }, { "epoch": 1.1485079805690492, "grad_norm": 0.6791675686836243, "learning_rate": 2.2020086589665203e-05, "loss": 0.464, "step": 6620 }, { "epoch": 1.148681471200555, "grad_norm": 0.9479997754096985, "learning_rate": 2.2013308058992037e-05, "loss": 0.4625, "step": 6621 }, { "epoch": 1.1488549618320612, "grad_norm": 0.6508433222770691, "learning_rate": 2.2006529294672126e-05, "loss": 0.7183, "step": 6622 }, { "epoch": 1.149028452463567, "grad_norm": 0.7447717785835266, "learning_rate": 2.199975029749215e-05, "loss": 0.4012, "step": 6623 }, { "epoch": 1.1492019430950728, "grad_norm": 2.0179295539855957, "learning_rate": 2.1992971068238826e-05, "loss": 0.4464, "step": 6624 }, { "epoch": 1.1493754337265787, "grad_norm": 0.6852936148643494, "learning_rate": 2.198619160769888e-05, "loss": 0.5293, "step": 6625 }, { "epoch": 1.1495489243580848, "grad_norm": 0.7375458478927612, "learning_rate": 2.197941191665909e-05, "loss": 0.5673, "step": 6626 }, { "epoch": 1.1497224149895906, "grad_norm": 1.103060245513916, "learning_rate": 2.1972631995906237e-05, "loss": 0.4238, "step": 6627 }, { "epoch": 1.1498959056210964, "grad_norm": 0.7563209533691406, "learning_rate": 2.196585184622715e-05, "loss": 0.5815, "step": 6628 }, { "epoch": 1.1500693962526023, "grad_norm": 0.9774815440177917, "learning_rate": 2.1959071468408656e-05, "loss": 0.4817, "step": 6629 }, { "epoch": 1.1502428868841084, "grad_norm": 1.1103121042251587, "learning_rate": 2.195229086323764e-05, "loss": 0.6536, "step": 6630 }, { "epoch": 1.1504163775156142, "grad_norm": 0.6559248566627502, "learning_rate": 2.1945510031500992e-05, "loss": 0.589, "step": 6631 }, { "epoch": 1.15058986814712, "grad_norm": 0.7209063172340393, "learning_rate": 2.193872897398564e-05, "loss": 0.5403, "step": 6632 }, { "epoch": 1.150763358778626, "grad_norm": 0.6752347946166992, "learning_rate": 2.193194769147853e-05, "loss": 0.4971, "step": 6633 }, { "epoch": 1.1509368494101317, "grad_norm": 0.8687592148780823, "learning_rate": 2.1925166184766636e-05, "loss": 0.5104, "step": 6634 }, { "epoch": 1.1511103400416378, "grad_norm": 0.8973918557167053, "learning_rate": 2.191838445463697e-05, "loss": 0.5557, "step": 6635 }, { "epoch": 1.1512838306731437, "grad_norm": 1.0971548557281494, "learning_rate": 2.1911602501876546e-05, "loss": 0.5561, "step": 6636 }, { "epoch": 1.1514573213046495, "grad_norm": 0.8915349841117859, "learning_rate": 2.190482032727243e-05, "loss": 0.4652, "step": 6637 }, { "epoch": 1.1516308119361554, "grad_norm": 1.8851386308670044, "learning_rate": 2.1898037931611688e-05, "loss": 0.488, "step": 6638 }, { "epoch": 1.1518043025676614, "grad_norm": 0.885982871055603, "learning_rate": 2.1891255315681443e-05, "loss": 0.5558, "step": 6639 }, { "epoch": 1.1519777931991673, "grad_norm": 0.9724069833755493, "learning_rate": 2.1884472480268806e-05, "loss": 0.4536, "step": 6640 }, { "epoch": 1.1521512838306731, "grad_norm": 0.7551723122596741, "learning_rate": 2.1877689426160943e-05, "loss": 0.538, "step": 6641 }, { "epoch": 1.152324774462179, "grad_norm": 0.8001475930213928, "learning_rate": 2.1870906154145035e-05, "loss": 0.5338, "step": 6642 }, { "epoch": 1.152498265093685, "grad_norm": 1.3312873840332031, "learning_rate": 2.1864122665008294e-05, "loss": 0.5228, "step": 6643 }, { "epoch": 1.1526717557251909, "grad_norm": 0.7069399356842041, "learning_rate": 2.185733895953794e-05, "loss": 0.6171, "step": 6644 }, { "epoch": 1.1528452463566967, "grad_norm": 0.9802401661872864, "learning_rate": 2.1850555038521236e-05, "loss": 0.5704, "step": 6645 }, { "epoch": 1.1530187369882026, "grad_norm": 0.6348333954811096, "learning_rate": 2.1843770902745462e-05, "loss": 0.585, "step": 6646 }, { "epoch": 1.1531922276197086, "grad_norm": 0.8841515183448792, "learning_rate": 2.183698655299793e-05, "loss": 0.5117, "step": 6647 }, { "epoch": 1.1533657182512145, "grad_norm": 0.8299418091773987, "learning_rate": 2.1830201990065966e-05, "loss": 0.5485, "step": 6648 }, { "epoch": 1.1535392088827203, "grad_norm": 0.7538990378379822, "learning_rate": 2.182341721473693e-05, "loss": 0.5186, "step": 6649 }, { "epoch": 1.1537126995142262, "grad_norm": 2.092827796936035, "learning_rate": 2.1816632227798196e-05, "loss": 0.6028, "step": 6650 }, { "epoch": 1.153886190145732, "grad_norm": 0.8751448392868042, "learning_rate": 2.1809847030037182e-05, "loss": 0.4607, "step": 6651 }, { "epoch": 1.154059680777238, "grad_norm": 2.1474010944366455, "learning_rate": 2.180306162224131e-05, "loss": 0.5428, "step": 6652 }, { "epoch": 1.154233171408744, "grad_norm": 0.8482744097709656, "learning_rate": 2.179627600519803e-05, "loss": 0.4346, "step": 6653 }, { "epoch": 1.1544066620402498, "grad_norm": 0.900211751461029, "learning_rate": 2.1789490179694833e-05, "loss": 0.5604, "step": 6654 }, { "epoch": 1.1545801526717558, "grad_norm": 0.6769447326660156, "learning_rate": 2.1782704146519212e-05, "loss": 0.6034, "step": 6655 }, { "epoch": 1.1547536433032617, "grad_norm": 0.7091245651245117, "learning_rate": 2.1775917906458698e-05, "loss": 0.4375, "step": 6656 }, { "epoch": 1.1549271339347675, "grad_norm": 0.8113880157470703, "learning_rate": 2.1769131460300844e-05, "loss": 0.6483, "step": 6657 }, { "epoch": 1.1551006245662734, "grad_norm": 0.8065187931060791, "learning_rate": 2.176234480883322e-05, "loss": 0.5001, "step": 6658 }, { "epoch": 1.1552741151977792, "grad_norm": 0.6611795425415039, "learning_rate": 2.175555795284343e-05, "loss": 0.5692, "step": 6659 }, { "epoch": 1.1554476058292853, "grad_norm": 0.6740517020225525, "learning_rate": 2.174877089311909e-05, "loss": 0.626, "step": 6660 }, { "epoch": 1.1556210964607911, "grad_norm": 1.0208956003189087, "learning_rate": 2.1741983630447852e-05, "loss": 0.5341, "step": 6661 }, { "epoch": 1.155794587092297, "grad_norm": 0.5938760042190552, "learning_rate": 2.1735196165617385e-05, "loss": 0.6029, "step": 6662 }, { "epoch": 1.1559680777238028, "grad_norm": 0.6635843515396118, "learning_rate": 2.172840849941538e-05, "loss": 0.52, "step": 6663 }, { "epoch": 1.156141568355309, "grad_norm": 0.7051963210105896, "learning_rate": 2.1721620632629552e-05, "loss": 0.6143, "step": 6664 }, { "epoch": 1.1563150589868147, "grad_norm": 0.934929370880127, "learning_rate": 2.171483256604765e-05, "loss": 0.5151, "step": 6665 }, { "epoch": 1.1564885496183206, "grad_norm": 0.6479410529136658, "learning_rate": 2.1708044300457423e-05, "loss": 0.5288, "step": 6666 }, { "epoch": 1.1566620402498264, "grad_norm": 0.6074390411376953, "learning_rate": 2.1701255836646672e-05, "loss": 0.5096, "step": 6667 }, { "epoch": 1.1568355308813323, "grad_norm": 1.3985155820846558, "learning_rate": 2.1694467175403197e-05, "loss": 0.5046, "step": 6668 }, { "epoch": 1.1570090215128384, "grad_norm": 0.6436254978179932, "learning_rate": 2.168767831751483e-05, "loss": 0.611, "step": 6669 }, { "epoch": 1.1571825121443442, "grad_norm": 0.7783954739570618, "learning_rate": 2.1680889263769425e-05, "loss": 0.5181, "step": 6670 }, { "epoch": 1.15735600277585, "grad_norm": 0.5539189577102661, "learning_rate": 2.1674100014954864e-05, "loss": 0.686, "step": 6671 }, { "epoch": 1.1575294934073561, "grad_norm": 1.0195469856262207, "learning_rate": 2.166731057185905e-05, "loss": 0.548, "step": 6672 }, { "epoch": 1.157702984038862, "grad_norm": 0.6438109278678894, "learning_rate": 2.16605209352699e-05, "loss": 0.5165, "step": 6673 }, { "epoch": 1.1578764746703678, "grad_norm": 1.053868293762207, "learning_rate": 2.1653731105975355e-05, "loss": 0.549, "step": 6674 }, { "epoch": 1.1580499653018737, "grad_norm": 0.8601235747337341, "learning_rate": 2.1646941084763397e-05, "loss": 0.546, "step": 6675 }, { "epoch": 1.1582234559333795, "grad_norm": 1.214060664176941, "learning_rate": 2.1640150872421997e-05, "loss": 0.5416, "step": 6676 }, { "epoch": 1.1583969465648856, "grad_norm": 0.7202631235122681, "learning_rate": 2.1633360469739183e-05, "loss": 0.5417, "step": 6677 }, { "epoch": 1.1585704371963914, "grad_norm": 0.8196054697036743, "learning_rate": 2.1626569877502985e-05, "loss": 0.5106, "step": 6678 }, { "epoch": 1.1587439278278973, "grad_norm": 0.7475643157958984, "learning_rate": 2.161977909650145e-05, "loss": 0.5076, "step": 6679 }, { "epoch": 1.158917418459403, "grad_norm": 0.7019733190536499, "learning_rate": 2.161298812752267e-05, "loss": 0.4905, "step": 6680 }, { "epoch": 1.1590909090909092, "grad_norm": 0.6765922904014587, "learning_rate": 2.160619697135474e-05, "loss": 0.6139, "step": 6681 }, { "epoch": 1.159264399722415, "grad_norm": 0.7036373615264893, "learning_rate": 2.1599405628785773e-05, "loss": 0.5338, "step": 6682 }, { "epoch": 1.1594378903539209, "grad_norm": 0.7694693803787231, "learning_rate": 2.1592614100603925e-05, "loss": 0.53, "step": 6683 }, { "epoch": 1.1596113809854267, "grad_norm": 1.6063934564590454, "learning_rate": 2.158582238759735e-05, "loss": 0.4412, "step": 6684 }, { "epoch": 1.1597848716169328, "grad_norm": 0.7708662152290344, "learning_rate": 2.157903049055424e-05, "loss": 0.6633, "step": 6685 }, { "epoch": 1.1599583622484386, "grad_norm": 0.7185639142990112, "learning_rate": 2.15722384102628e-05, "loss": 0.4767, "step": 6686 }, { "epoch": 1.1601318528799445, "grad_norm": 0.8182277083396912, "learning_rate": 2.156544614751127e-05, "loss": 0.5151, "step": 6687 }, { "epoch": 1.1603053435114503, "grad_norm": 0.7145664691925049, "learning_rate": 2.1558653703087876e-05, "loss": 0.4597, "step": 6688 }, { "epoch": 1.1604788341429564, "grad_norm": 0.8788052201271057, "learning_rate": 2.1551861077780914e-05, "loss": 0.4357, "step": 6689 }, { "epoch": 1.1606523247744622, "grad_norm": 0.693592369556427, "learning_rate": 2.1545068272378664e-05, "loss": 0.6262, "step": 6690 }, { "epoch": 1.160825815405968, "grad_norm": 0.5431566834449768, "learning_rate": 2.153827528766944e-05, "loss": 0.5736, "step": 6691 }, { "epoch": 1.160999306037474, "grad_norm": 0.7231242060661316, "learning_rate": 2.1531482124441574e-05, "loss": 0.4352, "step": 6692 }, { "epoch": 1.1611727966689798, "grad_norm": 0.6315079927444458, "learning_rate": 2.1524688783483424e-05, "loss": 0.6288, "step": 6693 }, { "epoch": 1.1613462873004858, "grad_norm": 0.7496607899665833, "learning_rate": 2.151789526558337e-05, "loss": 0.551, "step": 6694 }, { "epoch": 1.1615197779319917, "grad_norm": 0.9412398934364319, "learning_rate": 2.1511101571529793e-05, "loss": 0.447, "step": 6695 }, { "epoch": 1.1616932685634975, "grad_norm": 0.7098371386528015, "learning_rate": 2.1504307702111125e-05, "loss": 0.4487, "step": 6696 }, { "epoch": 1.1618667591950034, "grad_norm": 0.915355920791626, "learning_rate": 2.1497513658115792e-05, "loss": 0.6139, "step": 6697 }, { "epoch": 1.1620402498265094, "grad_norm": 0.858681857585907, "learning_rate": 2.1490719440332252e-05, "loss": 0.5276, "step": 6698 }, { "epoch": 1.1622137404580153, "grad_norm": 0.674950897693634, "learning_rate": 2.148392504954899e-05, "loss": 0.5796, "step": 6699 }, { "epoch": 1.1623872310895211, "grad_norm": 0.7628039717674255, "learning_rate": 2.147713048655449e-05, "loss": 0.505, "step": 6700 }, { "epoch": 1.162560721721027, "grad_norm": 0.8838892579078674, "learning_rate": 2.147033575213728e-05, "loss": 0.5066, "step": 6701 }, { "epoch": 1.162734212352533, "grad_norm": 0.7454177141189575, "learning_rate": 2.1463540847085892e-05, "loss": 0.5548, "step": 6702 }, { "epoch": 1.162907702984039, "grad_norm": 0.6347874402999878, "learning_rate": 2.145674577218888e-05, "loss": 0.5387, "step": 6703 }, { "epoch": 1.1630811936155447, "grad_norm": 0.9110173583030701, "learning_rate": 2.1449950528234828e-05, "loss": 0.4803, "step": 6704 }, { "epoch": 1.1632546842470506, "grad_norm": 0.7709625363349915, "learning_rate": 2.1443155116012328e-05, "loss": 0.5662, "step": 6705 }, { "epoch": 1.1634281748785567, "grad_norm": 1.081397294998169, "learning_rate": 2.143635953630999e-05, "loss": 0.5341, "step": 6706 }, { "epoch": 1.1636016655100625, "grad_norm": 0.9578310251235962, "learning_rate": 2.142956378991646e-05, "loss": 0.5344, "step": 6707 }, { "epoch": 1.1637751561415683, "grad_norm": 0.7448020577430725, "learning_rate": 2.1422767877620382e-05, "loss": 0.5674, "step": 6708 }, { "epoch": 1.1639486467730742, "grad_norm": 1.105573058128357, "learning_rate": 2.1415971800210437e-05, "loss": 0.4822, "step": 6709 }, { "epoch": 1.16412213740458, "grad_norm": 1.0320541858673096, "learning_rate": 2.1409175558475307e-05, "loss": 0.6262, "step": 6710 }, { "epoch": 1.164295628036086, "grad_norm": 0.6695195436477661, "learning_rate": 2.1402379153203716e-05, "loss": 0.4943, "step": 6711 }, { "epoch": 1.164469118667592, "grad_norm": 0.8333204984664917, "learning_rate": 2.1395582585184397e-05, "loss": 0.4807, "step": 6712 }, { "epoch": 1.1646426092990978, "grad_norm": 0.642285943031311, "learning_rate": 2.1388785855206083e-05, "loss": 0.538, "step": 6713 }, { "epoch": 1.1648160999306039, "grad_norm": 1.5361217260360718, "learning_rate": 2.138198896405756e-05, "loss": 0.5182, "step": 6714 }, { "epoch": 1.1649895905621097, "grad_norm": 0.8022282719612122, "learning_rate": 2.1375191912527605e-05, "loss": 0.4966, "step": 6715 }, { "epoch": 1.1651630811936156, "grad_norm": 0.7354987859725952, "learning_rate": 2.1368394701405023e-05, "loss": 0.531, "step": 6716 }, { "epoch": 1.1653365718251214, "grad_norm": 0.6895592212677002, "learning_rate": 2.1361597331478647e-05, "loss": 0.6543, "step": 6717 }, { "epoch": 1.1655100624566272, "grad_norm": 1.0040481090545654, "learning_rate": 2.1354799803537312e-05, "loss": 0.504, "step": 6718 }, { "epoch": 1.1656835530881333, "grad_norm": 1.0474437475204468, "learning_rate": 2.1348002118369878e-05, "loss": 0.5848, "step": 6719 }, { "epoch": 1.1658570437196392, "grad_norm": 0.9867828488349915, "learning_rate": 2.134120427676523e-05, "loss": 0.5018, "step": 6720 }, { "epoch": 1.166030534351145, "grad_norm": 0.7619190216064453, "learning_rate": 2.133440627951226e-05, "loss": 0.5389, "step": 6721 }, { "epoch": 1.1662040249826509, "grad_norm": 0.8639862537384033, "learning_rate": 2.1327608127399895e-05, "loss": 0.467, "step": 6722 }, { "epoch": 1.166377515614157, "grad_norm": 0.8269357681274414, "learning_rate": 2.1320809821217052e-05, "loss": 0.5723, "step": 6723 }, { "epoch": 1.1665510062456628, "grad_norm": 0.6355014443397522, "learning_rate": 2.1314011361752687e-05, "loss": 0.52, "step": 6724 }, { "epoch": 1.1667244968771686, "grad_norm": 1.0886286497116089, "learning_rate": 2.1307212749795782e-05, "loss": 0.4779, "step": 6725 }, { "epoch": 1.1668979875086745, "grad_norm": 0.6519858837127686, "learning_rate": 2.1300413986135313e-05, "loss": 0.6099, "step": 6726 }, { "epoch": 1.1670714781401803, "grad_norm": 0.614009439945221, "learning_rate": 2.1293615071560277e-05, "loss": 0.5667, "step": 6727 }, { "epoch": 1.1672449687716864, "grad_norm": 1.0716733932495117, "learning_rate": 2.128681600685971e-05, "loss": 0.4767, "step": 6728 }, { "epoch": 1.1674184594031922, "grad_norm": 0.8412541747093201, "learning_rate": 2.1280016792822645e-05, "loss": 0.5222, "step": 6729 }, { "epoch": 1.167591950034698, "grad_norm": 0.8214019536972046, "learning_rate": 2.1273217430238146e-05, "loss": 0.5557, "step": 6730 }, { "epoch": 1.1677654406662041, "grad_norm": 0.6152772903442383, "learning_rate": 2.1266417919895274e-05, "loss": 0.5397, "step": 6731 }, { "epoch": 1.16793893129771, "grad_norm": 0.9900790452957153, "learning_rate": 2.1259618262583122e-05, "loss": 0.5312, "step": 6732 }, { "epoch": 1.1681124219292158, "grad_norm": 0.837279736995697, "learning_rate": 2.1252818459090814e-05, "loss": 0.4365, "step": 6733 }, { "epoch": 1.1682859125607217, "grad_norm": 1.2113510370254517, "learning_rate": 2.1246018510207452e-05, "loss": 0.5173, "step": 6734 }, { "epoch": 1.1684594031922275, "grad_norm": 0.5946635007858276, "learning_rate": 2.12392184167222e-05, "loss": 0.4924, "step": 6735 }, { "epoch": 1.1686328938237336, "grad_norm": 0.6552371382713318, "learning_rate": 2.1232418179424204e-05, "loss": 0.6875, "step": 6736 }, { "epoch": 1.1688063844552394, "grad_norm": 0.6151350736618042, "learning_rate": 2.1225617799102638e-05, "loss": 0.6597, "step": 6737 }, { "epoch": 1.1689798750867453, "grad_norm": 0.6632144451141357, "learning_rate": 2.12188172765467e-05, "loss": 0.6016, "step": 6738 }, { "epoch": 1.1691533657182511, "grad_norm": 1.4138222932815552, "learning_rate": 2.1212016612545604e-05, "loss": 0.4871, "step": 6739 }, { "epoch": 1.1693268563497572, "grad_norm": 0.7806059718132019, "learning_rate": 2.120521580788856e-05, "loss": 0.5596, "step": 6740 }, { "epoch": 1.169500346981263, "grad_norm": 0.7281691431999207, "learning_rate": 2.1198414863364822e-05, "loss": 0.5558, "step": 6741 }, { "epoch": 1.1696738376127689, "grad_norm": 0.6963688135147095, "learning_rate": 2.1191613779763635e-05, "loss": 0.5559, "step": 6742 }, { "epoch": 1.1698473282442747, "grad_norm": 0.8871251940727234, "learning_rate": 2.1184812557874287e-05, "loss": 0.6001, "step": 6743 }, { "epoch": 1.1700208188757808, "grad_norm": 0.7859398722648621, "learning_rate": 2.1178011198486064e-05, "loss": 0.5193, "step": 6744 }, { "epoch": 1.1701943095072866, "grad_norm": 0.736751139163971, "learning_rate": 2.117120970238826e-05, "loss": 0.4547, "step": 6745 }, { "epoch": 1.1703678001387925, "grad_norm": 0.8680247664451599, "learning_rate": 2.1164408070370212e-05, "loss": 0.4491, "step": 6746 }, { "epoch": 1.1705412907702983, "grad_norm": 0.9516239166259766, "learning_rate": 2.1157606303221253e-05, "loss": 0.4543, "step": 6747 }, { "epoch": 1.1707147814018044, "grad_norm": 0.6905824542045593, "learning_rate": 2.1150804401730724e-05, "loss": 0.6207, "step": 6748 }, { "epoch": 1.1708882720333103, "grad_norm": 0.7213749289512634, "learning_rate": 2.114400236668801e-05, "loss": 0.5787, "step": 6749 }, { "epoch": 1.171061762664816, "grad_norm": 0.8580946922302246, "learning_rate": 2.1137200198882484e-05, "loss": 0.5155, "step": 6750 }, { "epoch": 1.171235253296322, "grad_norm": 0.9151411652565002, "learning_rate": 2.113039789910355e-05, "loss": 0.5774, "step": 6751 }, { "epoch": 1.1714087439278278, "grad_norm": 1.038617730140686, "learning_rate": 2.112359546814063e-05, "loss": 0.373, "step": 6752 }, { "epoch": 1.1715822345593339, "grad_norm": 0.8604259490966797, "learning_rate": 2.111679290678314e-05, "loss": 0.5082, "step": 6753 }, { "epoch": 1.1717557251908397, "grad_norm": 0.9970014691352844, "learning_rate": 2.110999021582053e-05, "loss": 0.4684, "step": 6754 }, { "epoch": 1.1719292158223455, "grad_norm": 0.6839314103126526, "learning_rate": 2.110318739604227e-05, "loss": 0.462, "step": 6755 }, { "epoch": 1.1721027064538514, "grad_norm": 0.75113844871521, "learning_rate": 2.1096384448237824e-05, "loss": 0.6161, "step": 6756 }, { "epoch": 1.1722761970853575, "grad_norm": 1.0658999681472778, "learning_rate": 2.1089581373196686e-05, "loss": 0.448, "step": 6757 }, { "epoch": 1.1724496877168633, "grad_norm": 1.0717624425888062, "learning_rate": 2.1082778171708355e-05, "loss": 0.5045, "step": 6758 }, { "epoch": 1.1726231783483692, "grad_norm": 0.7669315338134766, "learning_rate": 2.1075974844562354e-05, "loss": 0.564, "step": 6759 }, { "epoch": 1.172796668979875, "grad_norm": 0.8839965462684631, "learning_rate": 2.1069171392548226e-05, "loss": 0.5275, "step": 6760 }, { "epoch": 1.172970159611381, "grad_norm": 0.8475131988525391, "learning_rate": 2.106236781645551e-05, "loss": 0.5673, "step": 6761 }, { "epoch": 1.173143650242887, "grad_norm": 0.7701882123947144, "learning_rate": 2.1055564117073767e-05, "loss": 0.4849, "step": 6762 }, { "epoch": 1.1733171408743928, "grad_norm": 1.208903193473816, "learning_rate": 2.104876029519258e-05, "loss": 0.4667, "step": 6763 }, { "epoch": 1.1734906315058986, "grad_norm": 1.1139265298843384, "learning_rate": 2.1041956351601543e-05, "loss": 0.5071, "step": 6764 }, { "epoch": 1.1736641221374047, "grad_norm": 0.6981279253959656, "learning_rate": 2.1035152287090254e-05, "loss": 0.499, "step": 6765 }, { "epoch": 1.1738376127689105, "grad_norm": 0.7927271127700806, "learning_rate": 2.1028348102448338e-05, "loss": 0.4532, "step": 6766 }, { "epoch": 1.1740111034004164, "grad_norm": 0.7971651554107666, "learning_rate": 2.1021543798465426e-05, "loss": 0.5505, "step": 6767 }, { "epoch": 1.1741845940319222, "grad_norm": 0.6657699346542358, "learning_rate": 2.1014739375931166e-05, "loss": 0.5161, "step": 6768 }, { "epoch": 1.174358084663428, "grad_norm": 1.4957996606826782, "learning_rate": 2.1007934835635213e-05, "loss": 0.5529, "step": 6769 }, { "epoch": 1.1745315752949341, "grad_norm": 0.5472845435142517, "learning_rate": 2.1001130178367256e-05, "loss": 0.6241, "step": 6770 }, { "epoch": 1.17470506592644, "grad_norm": 0.6698144674301147, "learning_rate": 2.0994325404916967e-05, "loss": 0.5625, "step": 6771 }, { "epoch": 1.1748785565579458, "grad_norm": 1.0643738508224487, "learning_rate": 2.098752051607406e-05, "loss": 0.3807, "step": 6772 }, { "epoch": 1.1750520471894519, "grad_norm": 1.1606651544570923, "learning_rate": 2.0980715512628255e-05, "loss": 0.5408, "step": 6773 }, { "epoch": 1.1752255378209577, "grad_norm": 0.7926031351089478, "learning_rate": 2.097391039536926e-05, "loss": 0.4792, "step": 6774 }, { "epoch": 1.1753990284524636, "grad_norm": 0.6972630620002747, "learning_rate": 2.0967105165086835e-05, "loss": 0.5724, "step": 6775 }, { "epoch": 1.1755725190839694, "grad_norm": 0.8949503302574158, "learning_rate": 2.0960299822570728e-05, "loss": 0.489, "step": 6776 }, { "epoch": 1.1757460097154753, "grad_norm": 0.7007628083229065, "learning_rate": 2.0953494368610702e-05, "loss": 0.5793, "step": 6777 }, { "epoch": 1.1759195003469813, "grad_norm": 0.7773805260658264, "learning_rate": 2.094668880399655e-05, "loss": 0.5586, "step": 6778 }, { "epoch": 1.1760929909784872, "grad_norm": 0.6971990466117859, "learning_rate": 2.0939883129518056e-05, "loss": 0.5836, "step": 6779 }, { "epoch": 1.176266481609993, "grad_norm": 0.7796101570129395, "learning_rate": 2.0933077345965032e-05, "loss": 0.6204, "step": 6780 }, { "epoch": 1.1764399722414989, "grad_norm": 1.1607211828231812, "learning_rate": 2.092627145412729e-05, "loss": 0.556, "step": 6781 }, { "epoch": 1.176613462873005, "grad_norm": 0.8690618276596069, "learning_rate": 2.0919465454794672e-05, "loss": 0.546, "step": 6782 }, { "epoch": 1.1767869535045108, "grad_norm": 0.6725994348526001, "learning_rate": 2.091265934875701e-05, "loss": 0.5078, "step": 6783 }, { "epoch": 1.1769604441360166, "grad_norm": 2.117467164993286, "learning_rate": 2.0905853136804173e-05, "loss": 0.4205, "step": 6784 }, { "epoch": 1.1771339347675225, "grad_norm": 1.204289436340332, "learning_rate": 2.0899046819726025e-05, "loss": 0.5707, "step": 6785 }, { "epoch": 1.1773074253990283, "grad_norm": 0.7071922421455383, "learning_rate": 2.089224039831244e-05, "loss": 0.5833, "step": 6786 }, { "epoch": 1.1774809160305344, "grad_norm": 0.6543411016464233, "learning_rate": 2.088543387335332e-05, "loss": 0.5482, "step": 6787 }, { "epoch": 1.1776544066620402, "grad_norm": 0.9622500538825989, "learning_rate": 2.087862724563857e-05, "loss": 0.613, "step": 6788 }, { "epoch": 1.177827897293546, "grad_norm": 1.0394973754882812, "learning_rate": 2.0871820515958102e-05, "loss": 0.5042, "step": 6789 }, { "epoch": 1.1780013879250522, "grad_norm": 0.8372440338134766, "learning_rate": 2.0865013685101844e-05, "loss": 0.5587, "step": 6790 }, { "epoch": 1.178174878556558, "grad_norm": 0.999835193157196, "learning_rate": 2.085820675385975e-05, "loss": 0.5917, "step": 6791 }, { "epoch": 1.1783483691880638, "grad_norm": 0.7350196242332458, "learning_rate": 2.085139972302175e-05, "loss": 0.5868, "step": 6792 }, { "epoch": 1.1785218598195697, "grad_norm": 1.2748345136642456, "learning_rate": 2.0844592593377827e-05, "loss": 0.4858, "step": 6793 }, { "epoch": 1.1786953504510755, "grad_norm": 0.6886605024337769, "learning_rate": 2.083778536571795e-05, "loss": 0.5068, "step": 6794 }, { "epoch": 1.1788688410825816, "grad_norm": 0.7593858242034912, "learning_rate": 2.0830978040832098e-05, "loss": 0.5242, "step": 6795 }, { "epoch": 1.1790423317140875, "grad_norm": 0.7221627831459045, "learning_rate": 2.0824170619510283e-05, "loss": 0.4913, "step": 6796 }, { "epoch": 1.1792158223455933, "grad_norm": 0.8000275492668152, "learning_rate": 2.081736310254251e-05, "loss": 0.5968, "step": 6797 }, { "epoch": 1.1793893129770991, "grad_norm": 0.7031962871551514, "learning_rate": 2.0810555490718787e-05, "loss": 0.538, "step": 6798 }, { "epoch": 1.1795628036086052, "grad_norm": 0.6669413447380066, "learning_rate": 2.0803747784829166e-05, "loss": 0.4932, "step": 6799 }, { "epoch": 1.179736294240111, "grad_norm": 0.6491285562515259, "learning_rate": 2.0796939985663666e-05, "loss": 0.454, "step": 6800 }, { "epoch": 1.179909784871617, "grad_norm": 0.90521240234375, "learning_rate": 2.079013209401236e-05, "loss": 0.4735, "step": 6801 }, { "epoch": 1.1800832755031228, "grad_norm": 0.7703285217285156, "learning_rate": 2.0783324110665306e-05, "loss": 0.4625, "step": 6802 }, { "epoch": 1.1802567661346288, "grad_norm": 0.7215616703033447, "learning_rate": 2.0776516036412565e-05, "loss": 0.6068, "step": 6803 }, { "epoch": 1.1804302567661347, "grad_norm": 0.9224920272827148, "learning_rate": 2.0769707872044242e-05, "loss": 0.5234, "step": 6804 }, { "epoch": 1.1806037473976405, "grad_norm": 0.664199948310852, "learning_rate": 2.076289961835042e-05, "loss": 0.5541, "step": 6805 }, { "epoch": 1.1807772380291464, "grad_norm": 0.6375593543052673, "learning_rate": 2.0756091276121212e-05, "loss": 0.4492, "step": 6806 }, { "epoch": 1.1809507286606524, "grad_norm": 0.7353429198265076, "learning_rate": 2.074928284614673e-05, "loss": 0.417, "step": 6807 }, { "epoch": 1.1811242192921583, "grad_norm": 0.7317572236061096, "learning_rate": 2.0742474329217094e-05, "loss": 0.4982, "step": 6808 }, { "epoch": 1.1812977099236641, "grad_norm": 0.7748720645904541, "learning_rate": 2.0735665726122453e-05, "loss": 0.4381, "step": 6809 }, { "epoch": 1.18147120055517, "grad_norm": 0.6508405804634094, "learning_rate": 2.0728857037652945e-05, "loss": 0.5945, "step": 6810 }, { "epoch": 1.1816446911866758, "grad_norm": 1.3365366458892822, "learning_rate": 2.0722048264598727e-05, "loss": 0.4197, "step": 6811 }, { "epoch": 1.1818181818181819, "grad_norm": 0.8428879380226135, "learning_rate": 2.0715239407749973e-05, "loss": 0.5508, "step": 6812 }, { "epoch": 1.1819916724496877, "grad_norm": 1.0093331336975098, "learning_rate": 2.0708430467896848e-05, "loss": 0.5153, "step": 6813 }, { "epoch": 1.1821651630811936, "grad_norm": 3.6245226860046387, "learning_rate": 2.070162144582954e-05, "loss": 0.5547, "step": 6814 }, { "epoch": 1.1823386537126996, "grad_norm": 0.6548336744308472, "learning_rate": 2.0694812342338252e-05, "loss": 0.5793, "step": 6815 }, { "epoch": 1.1825121443442055, "grad_norm": 0.7358191013336182, "learning_rate": 2.0688003158213172e-05, "loss": 0.5156, "step": 6816 }, { "epoch": 1.1826856349757113, "grad_norm": 0.8403127193450928, "learning_rate": 2.0681193894244536e-05, "loss": 0.4742, "step": 6817 }, { "epoch": 1.1828591256072172, "grad_norm": 0.7100670337677002, "learning_rate": 2.067438455122255e-05, "loss": 0.3762, "step": 6818 }, { "epoch": 1.183032616238723, "grad_norm": 0.7438713908195496, "learning_rate": 2.0667575129937446e-05, "loss": 0.5098, "step": 6819 }, { "epoch": 1.183206106870229, "grad_norm": 0.8709805607795715, "learning_rate": 2.0660765631179474e-05, "loss": 0.3984, "step": 6820 }, { "epoch": 1.183379597501735, "grad_norm": 1.246625304222107, "learning_rate": 2.0653956055738876e-05, "loss": 0.495, "step": 6821 }, { "epoch": 1.1835530881332408, "grad_norm": 0.9622408747673035, "learning_rate": 2.0647146404405923e-05, "loss": 0.5278, "step": 6822 }, { "epoch": 1.1837265787647466, "grad_norm": 0.6290227770805359, "learning_rate": 2.0640336677970875e-05, "loss": 0.5858, "step": 6823 }, { "epoch": 1.1839000693962527, "grad_norm": 0.7876569032669067, "learning_rate": 2.0633526877224006e-05, "loss": 0.5173, "step": 6824 }, { "epoch": 1.1840735600277585, "grad_norm": 0.7701499462127686, "learning_rate": 2.0626717002955606e-05, "loss": 0.5896, "step": 6825 }, { "epoch": 1.1842470506592644, "grad_norm": 0.605043888092041, "learning_rate": 2.061990705595597e-05, "loss": 0.6094, "step": 6826 }, { "epoch": 1.1844205412907702, "grad_norm": 1.107033610343933, "learning_rate": 2.061309703701539e-05, "loss": 0.496, "step": 6827 }, { "epoch": 1.184594031922276, "grad_norm": 0.8433276414871216, "learning_rate": 2.060628694692419e-05, "loss": 0.6025, "step": 6828 }, { "epoch": 1.1847675225537821, "grad_norm": 0.6223244071006775, "learning_rate": 2.0599476786472686e-05, "loss": 0.558, "step": 6829 }, { "epoch": 1.184941013185288, "grad_norm": 0.5799712538719177, "learning_rate": 2.0592666556451197e-05, "loss": 0.5922, "step": 6830 }, { "epoch": 1.1851145038167938, "grad_norm": 0.9107916951179504, "learning_rate": 2.0585856257650067e-05, "loss": 0.4255, "step": 6831 }, { "epoch": 1.1852879944483, "grad_norm": 3.0029711723327637, "learning_rate": 2.0579045890859635e-05, "loss": 0.4358, "step": 6832 }, { "epoch": 1.1854614850798058, "grad_norm": 0.6497785449028015, "learning_rate": 2.057223545687025e-05, "loss": 0.5289, "step": 6833 }, { "epoch": 1.1856349757113116, "grad_norm": 0.9125445485115051, "learning_rate": 2.0565424956472278e-05, "loss": 0.5168, "step": 6834 }, { "epoch": 1.1858084663428174, "grad_norm": 0.8241649270057678, "learning_rate": 2.0558614390456075e-05, "loss": 0.5028, "step": 6835 }, { "epoch": 1.1859819569743233, "grad_norm": 1.6441798210144043, "learning_rate": 2.055180375961203e-05, "loss": 0.5607, "step": 6836 }, { "epoch": 1.1861554476058294, "grad_norm": 2.1024038791656494, "learning_rate": 2.0544993064730507e-05, "loss": 0.5106, "step": 6837 }, { "epoch": 1.1863289382373352, "grad_norm": 0.6179422736167908, "learning_rate": 2.053818230660191e-05, "loss": 0.5565, "step": 6838 }, { "epoch": 1.186502428868841, "grad_norm": 0.6559538245201111, "learning_rate": 2.053137148601662e-05, "loss": 0.4733, "step": 6839 }, { "epoch": 1.186675919500347, "grad_norm": 0.7132574915885925, "learning_rate": 2.052456060376506e-05, "loss": 0.5116, "step": 6840 }, { "epoch": 1.186849410131853, "grad_norm": 0.9821913838386536, "learning_rate": 2.051774966063763e-05, "loss": 0.4854, "step": 6841 }, { "epoch": 1.1870229007633588, "grad_norm": 0.6002830862998962, "learning_rate": 2.051093865742474e-05, "loss": 0.6154, "step": 6842 }, { "epoch": 1.1871963913948647, "grad_norm": 0.6456484198570251, "learning_rate": 2.0504127594916833e-05, "loss": 0.4579, "step": 6843 }, { "epoch": 1.1873698820263705, "grad_norm": 0.7543825507164001, "learning_rate": 2.0497316473904324e-05, "loss": 0.516, "step": 6844 }, { "epoch": 1.1875433726578764, "grad_norm": 0.8759680986404419, "learning_rate": 2.049050529517766e-05, "loss": 0.5153, "step": 6845 }, { "epoch": 1.1877168632893824, "grad_norm": 0.7334983348846436, "learning_rate": 2.048369405952729e-05, "loss": 0.5588, "step": 6846 }, { "epoch": 1.1878903539208883, "grad_norm": 0.7166071534156799, "learning_rate": 2.047688276774366e-05, "loss": 0.5132, "step": 6847 }, { "epoch": 1.188063844552394, "grad_norm": 0.6016421318054199, "learning_rate": 2.0470071420617222e-05, "loss": 0.59, "step": 6848 }, { "epoch": 1.1882373351839002, "grad_norm": 0.6861176490783691, "learning_rate": 2.046326001893846e-05, "loss": 0.6113, "step": 6849 }, { "epoch": 1.188410825815406, "grad_norm": 0.6174821257591248, "learning_rate": 2.045644856349782e-05, "loss": 0.5931, "step": 6850 }, { "epoch": 1.1885843164469119, "grad_norm": 1.0377147197723389, "learning_rate": 2.0449637055085798e-05, "loss": 0.5638, "step": 6851 }, { "epoch": 1.1887578070784177, "grad_norm": 0.6978501081466675, "learning_rate": 2.0442825494492876e-05, "loss": 0.5193, "step": 6852 }, { "epoch": 1.1889312977099236, "grad_norm": 0.7452558875083923, "learning_rate": 2.043601388250953e-05, "loss": 0.4707, "step": 6853 }, { "epoch": 1.1891047883414296, "grad_norm": 1.0568429231643677, "learning_rate": 2.0429202219926273e-05, "loss": 0.4493, "step": 6854 }, { "epoch": 1.1892782789729355, "grad_norm": 0.8652346134185791, "learning_rate": 2.0422390507533593e-05, "loss": 0.5179, "step": 6855 }, { "epoch": 1.1894517696044413, "grad_norm": 0.9493082761764526, "learning_rate": 2.0415578746122007e-05, "loss": 0.4335, "step": 6856 }, { "epoch": 1.1896252602359472, "grad_norm": 0.6517853140830994, "learning_rate": 2.0408766936482016e-05, "loss": 0.5033, "step": 6857 }, { "epoch": 1.1897987508674532, "grad_norm": 0.8095531463623047, "learning_rate": 2.0401955079404154e-05, "loss": 0.5819, "step": 6858 }, { "epoch": 1.189972241498959, "grad_norm": 0.5268284678459167, "learning_rate": 2.039514317567893e-05, "loss": 0.5623, "step": 6859 }, { "epoch": 1.190145732130465, "grad_norm": 0.6493697762489319, "learning_rate": 2.0388331226096886e-05, "loss": 0.5021, "step": 6860 }, { "epoch": 1.1903192227619708, "grad_norm": 0.6705477833747864, "learning_rate": 2.0381519231448544e-05, "loss": 0.6041, "step": 6861 }, { "epoch": 1.1904927133934768, "grad_norm": 0.8729289770126343, "learning_rate": 2.0374707192524455e-05, "loss": 0.4406, "step": 6862 }, { "epoch": 1.1906662040249827, "grad_norm": 0.8540909886360168, "learning_rate": 2.0367895110115166e-05, "loss": 0.4896, "step": 6863 }, { "epoch": 1.1908396946564885, "grad_norm": 0.8730484843254089, "learning_rate": 2.036108298501121e-05, "loss": 0.4495, "step": 6864 }, { "epoch": 1.1910131852879944, "grad_norm": 1.2369223833084106, "learning_rate": 2.035427081800316e-05, "loss": 0.6508, "step": 6865 }, { "epoch": 1.1911866759195004, "grad_norm": 0.7968800067901611, "learning_rate": 2.034745860988156e-05, "loss": 0.514, "step": 6866 }, { "epoch": 1.1913601665510063, "grad_norm": 0.8077332973480225, "learning_rate": 2.0340646361436994e-05, "loss": 0.4382, "step": 6867 }, { "epoch": 1.1915336571825121, "grad_norm": 0.9408939480781555, "learning_rate": 2.0333834073460018e-05, "loss": 0.4429, "step": 6868 }, { "epoch": 1.191707147814018, "grad_norm": 0.7798374891281128, "learning_rate": 2.03270217467412e-05, "loss": 0.4854, "step": 6869 }, { "epoch": 1.1918806384455238, "grad_norm": 0.5698572397232056, "learning_rate": 2.032020938207114e-05, "loss": 0.573, "step": 6870 }, { "epoch": 1.19205412907703, "grad_norm": 0.6410461068153381, "learning_rate": 2.03133969802404e-05, "loss": 0.5502, "step": 6871 }, { "epoch": 1.1922276197085357, "grad_norm": 0.6756616234779358, "learning_rate": 2.030658454203958e-05, "loss": 0.4795, "step": 6872 }, { "epoch": 1.1924011103400416, "grad_norm": 0.9079252481460571, "learning_rate": 2.0299772068259263e-05, "loss": 0.4208, "step": 6873 }, { "epoch": 1.1925746009715477, "grad_norm": 0.7513494491577148, "learning_rate": 2.029295955969005e-05, "loss": 0.5061, "step": 6874 }, { "epoch": 1.1927480916030535, "grad_norm": 0.7170233726501465, "learning_rate": 2.0286147017122538e-05, "loss": 0.5616, "step": 6875 }, { "epoch": 1.1929215822345594, "grad_norm": 0.675975501537323, "learning_rate": 2.027933444134733e-05, "loss": 0.5427, "step": 6876 }, { "epoch": 1.1930950728660652, "grad_norm": 2.301645278930664, "learning_rate": 2.0272521833155038e-05, "loss": 0.5244, "step": 6877 }, { "epoch": 1.193268563497571, "grad_norm": 0.7549624443054199, "learning_rate": 2.0265709193336266e-05, "loss": 0.6567, "step": 6878 }, { "epoch": 1.1934420541290771, "grad_norm": 0.7844575047492981, "learning_rate": 2.0258896522681635e-05, "loss": 0.5343, "step": 6879 }, { "epoch": 1.193615544760583, "grad_norm": 0.9245356321334839, "learning_rate": 2.025208382198176e-05, "loss": 0.4023, "step": 6880 }, { "epoch": 1.1937890353920888, "grad_norm": 1.1952787637710571, "learning_rate": 2.024527109202726e-05, "loss": 0.4994, "step": 6881 }, { "epoch": 1.1939625260235947, "grad_norm": 0.7105661034584045, "learning_rate": 2.0238458333608766e-05, "loss": 0.5658, "step": 6882 }, { "epoch": 1.1941360166551007, "grad_norm": 0.8205832839012146, "learning_rate": 2.023164554751691e-05, "loss": 0.4775, "step": 6883 }, { "epoch": 1.1943095072866066, "grad_norm": 0.6414397358894348, "learning_rate": 2.0224832734542314e-05, "loss": 0.5118, "step": 6884 }, { "epoch": 1.1944829979181124, "grad_norm": 0.6226764917373657, "learning_rate": 2.0218019895475612e-05, "loss": 0.5337, "step": 6885 }, { "epoch": 1.1946564885496183, "grad_norm": 0.8643163442611694, "learning_rate": 2.0211207031107457e-05, "loss": 0.5604, "step": 6886 }, { "epoch": 1.194829979181124, "grad_norm": 0.5629163980484009, "learning_rate": 2.0204394142228473e-05, "loss": 0.5669, "step": 6887 }, { "epoch": 1.1950034698126302, "grad_norm": 2.306591033935547, "learning_rate": 2.0197581229629317e-05, "loss": 0.4446, "step": 6888 }, { "epoch": 1.195176960444136, "grad_norm": 0.6565482020378113, "learning_rate": 2.0190768294100626e-05, "loss": 0.6115, "step": 6889 }, { "epoch": 1.1953504510756419, "grad_norm": 0.9194337129592896, "learning_rate": 2.018395533643305e-05, "loss": 0.5658, "step": 6890 }, { "epoch": 1.195523941707148, "grad_norm": 0.6584299802780151, "learning_rate": 2.0177142357417243e-05, "loss": 0.6061, "step": 6891 }, { "epoch": 1.1956974323386538, "grad_norm": 3.2561211585998535, "learning_rate": 2.017032935784386e-05, "loss": 0.5623, "step": 6892 }, { "epoch": 1.1958709229701596, "grad_norm": 0.9865792393684387, "learning_rate": 2.0163516338503556e-05, "loss": 0.4666, "step": 6893 }, { "epoch": 1.1960444136016655, "grad_norm": 0.7090287804603577, "learning_rate": 2.0156703300186997e-05, "loss": 0.5883, "step": 6894 }, { "epoch": 1.1962179042331713, "grad_norm": 1.2316094636917114, "learning_rate": 2.0149890243684827e-05, "loss": 0.4607, "step": 6895 }, { "epoch": 1.1963913948646774, "grad_norm": 1.4981300830841064, "learning_rate": 2.0143077169787725e-05, "loss": 0.3534, "step": 6896 }, { "epoch": 1.1965648854961832, "grad_norm": 0.7626928687095642, "learning_rate": 2.0136264079286354e-05, "loss": 0.4762, "step": 6897 }, { "epoch": 1.196738376127689, "grad_norm": 0.8046074509620667, "learning_rate": 2.012945097297137e-05, "loss": 0.4624, "step": 6898 }, { "epoch": 1.196911866759195, "grad_norm": 1.0258681774139404, "learning_rate": 2.0122637851633455e-05, "loss": 0.4755, "step": 6899 }, { "epoch": 1.197085357390701, "grad_norm": 0.85794997215271, "learning_rate": 2.0115824716063273e-05, "loss": 0.4977, "step": 6900 }, { "epoch": 1.1972588480222068, "grad_norm": 0.6790227890014648, "learning_rate": 2.01090115670515e-05, "loss": 0.5133, "step": 6901 }, { "epoch": 1.1974323386537127, "grad_norm": 1.0489752292633057, "learning_rate": 2.0102198405388806e-05, "loss": 0.42, "step": 6902 }, { "epoch": 1.1976058292852185, "grad_norm": 0.8256232142448425, "learning_rate": 2.0095385231865864e-05, "loss": 0.4509, "step": 6903 }, { "epoch": 1.1977793199167244, "grad_norm": 0.8152967691421509, "learning_rate": 2.008857204727336e-05, "loss": 0.4639, "step": 6904 }, { "epoch": 1.1979528105482304, "grad_norm": 0.693350076675415, "learning_rate": 2.0081758852401964e-05, "loss": 0.5236, "step": 6905 }, { "epoch": 1.1981263011797363, "grad_norm": 1.2258223295211792, "learning_rate": 2.0074945648042353e-05, "loss": 0.424, "step": 6906 }, { "epoch": 1.1982997918112421, "grad_norm": 0.9876985549926758, "learning_rate": 2.006813243498522e-05, "loss": 0.537, "step": 6907 }, { "epoch": 1.1984732824427482, "grad_norm": 0.880332887172699, "learning_rate": 2.0061319214021237e-05, "loss": 0.5062, "step": 6908 }, { "epoch": 1.198646773074254, "grad_norm": 0.7460529804229736, "learning_rate": 2.005450598594109e-05, "loss": 0.5757, "step": 6909 }, { "epoch": 1.19882026370576, "grad_norm": 0.6492012143135071, "learning_rate": 2.0047692751535454e-05, "loss": 0.598, "step": 6910 }, { "epoch": 1.1989937543372657, "grad_norm": 0.9850520491600037, "learning_rate": 2.004087951159502e-05, "loss": 0.4226, "step": 6911 }, { "epoch": 1.1991672449687716, "grad_norm": 1.0020174980163574, "learning_rate": 2.0034066266910475e-05, "loss": 0.5101, "step": 6912 }, { "epoch": 1.1993407356002777, "grad_norm": 0.7792971730232239, "learning_rate": 2.0027253018272498e-05, "loss": 0.5876, "step": 6913 }, { "epoch": 1.1995142262317835, "grad_norm": 2.3527796268463135, "learning_rate": 2.0020439766471775e-05, "loss": 0.486, "step": 6914 }, { "epoch": 1.1996877168632893, "grad_norm": 0.6588824391365051, "learning_rate": 2.0013626512298996e-05, "loss": 0.6024, "step": 6915 }, { "epoch": 1.1998612074947952, "grad_norm": 0.7276008129119873, "learning_rate": 2.000681325654484e-05, "loss": 0.4839, "step": 6916 }, { "epoch": 1.2000346981263013, "grad_norm": 0.8354933261871338, "learning_rate": 2e-05, "loss": 0.5, "step": 6917 }, { "epoch": 1.200208188757807, "grad_norm": 0.727831244468689, "learning_rate": 1.999318674345516e-05, "loss": 0.5079, "step": 6918 }, { "epoch": 1.200381679389313, "grad_norm": 0.8418210744857788, "learning_rate": 1.9986373487701014e-05, "loss": 0.526, "step": 6919 }, { "epoch": 1.2005551700208188, "grad_norm": 1.2001993656158447, "learning_rate": 1.997956023352823e-05, "loss": 0.4738, "step": 6920 }, { "epoch": 1.2007286606523249, "grad_norm": 0.7162483930587769, "learning_rate": 1.9972746981727505e-05, "loss": 0.4237, "step": 6921 }, { "epoch": 1.2009021512838307, "grad_norm": 0.7253560423851013, "learning_rate": 1.9965933733089535e-05, "loss": 0.5894, "step": 6922 }, { "epoch": 1.2010756419153366, "grad_norm": 1.0020893812179565, "learning_rate": 1.9959120488404986e-05, "loss": 0.6227, "step": 6923 }, { "epoch": 1.2012491325468424, "grad_norm": 0.7527416348457336, "learning_rate": 1.995230724846455e-05, "loss": 0.4722, "step": 6924 }, { "epoch": 1.2014226231783485, "grad_norm": 0.6637478470802307, "learning_rate": 1.9945494014058915e-05, "loss": 0.5936, "step": 6925 }, { "epoch": 1.2015961138098543, "grad_norm": 0.8485209941864014, "learning_rate": 1.993868078597877e-05, "loss": 0.4106, "step": 6926 }, { "epoch": 1.2017696044413602, "grad_norm": 0.8287883400917053, "learning_rate": 1.9931867565014785e-05, "loss": 0.5531, "step": 6927 }, { "epoch": 1.201943095072866, "grad_norm": 0.9901884198188782, "learning_rate": 1.9925054351957647e-05, "loss": 0.5551, "step": 6928 }, { "epoch": 1.2021165857043719, "grad_norm": 0.8183797001838684, "learning_rate": 1.9918241147598043e-05, "loss": 0.5022, "step": 6929 }, { "epoch": 1.202290076335878, "grad_norm": 0.7539851069450378, "learning_rate": 1.9911427952726644e-05, "loss": 0.4634, "step": 6930 }, { "epoch": 1.2024635669673838, "grad_norm": 0.6804836988449097, "learning_rate": 1.990461476813414e-05, "loss": 0.5204, "step": 6931 }, { "epoch": 1.2026370575988896, "grad_norm": 0.7465506792068481, "learning_rate": 1.9897801594611204e-05, "loss": 0.4366, "step": 6932 }, { "epoch": 1.2028105482303957, "grad_norm": 1.096960186958313, "learning_rate": 1.9890988432948508e-05, "loss": 0.4982, "step": 6933 }, { "epoch": 1.2029840388619015, "grad_norm": 1.396376371383667, "learning_rate": 1.988417528393673e-05, "loss": 0.5154, "step": 6934 }, { "epoch": 1.2031575294934074, "grad_norm": 0.8130907416343689, "learning_rate": 1.9877362148366555e-05, "loss": 0.5557, "step": 6935 }, { "epoch": 1.2033310201249132, "grad_norm": 0.7420282959938049, "learning_rate": 1.9870549027028635e-05, "loss": 0.4702, "step": 6936 }, { "epoch": 1.203504510756419, "grad_norm": 0.7370325922966003, "learning_rate": 1.9863735920713653e-05, "loss": 0.4708, "step": 6937 }, { "epoch": 1.2036780013879251, "grad_norm": 0.9016007781028748, "learning_rate": 1.9856922830212286e-05, "loss": 0.5317, "step": 6938 }, { "epoch": 1.203851492019431, "grad_norm": 0.8386825919151306, "learning_rate": 1.9850109756315176e-05, "loss": 0.4097, "step": 6939 }, { "epoch": 1.2040249826509368, "grad_norm": 0.7259785532951355, "learning_rate": 1.984329669981301e-05, "loss": 0.4994, "step": 6940 }, { "epoch": 1.2041984732824427, "grad_norm": 0.8297507762908936, "learning_rate": 1.983648366149644e-05, "loss": 0.5159, "step": 6941 }, { "epoch": 1.2043719639139487, "grad_norm": 1.1169674396514893, "learning_rate": 1.9829670642156147e-05, "loss": 0.4608, "step": 6942 }, { "epoch": 1.2045454545454546, "grad_norm": 0.5992581844329834, "learning_rate": 1.982285764258276e-05, "loss": 0.5321, "step": 6943 }, { "epoch": 1.2047189451769604, "grad_norm": 0.663050651550293, "learning_rate": 1.981604466356695e-05, "loss": 0.5244, "step": 6944 }, { "epoch": 1.2048924358084663, "grad_norm": 0.8010140657424927, "learning_rate": 1.9809231705899384e-05, "loss": 0.4982, "step": 6945 }, { "epoch": 1.2050659264399721, "grad_norm": 1.018222689628601, "learning_rate": 1.980241877037069e-05, "loss": 0.5137, "step": 6946 }, { "epoch": 1.2052394170714782, "grad_norm": 0.7997981905937195, "learning_rate": 1.9795605857771527e-05, "loss": 0.5393, "step": 6947 }, { "epoch": 1.205412907702984, "grad_norm": 0.842365562915802, "learning_rate": 1.9788792968892553e-05, "loss": 0.5146, "step": 6948 }, { "epoch": 1.2055863983344899, "grad_norm": 1.6468077898025513, "learning_rate": 1.978198010452439e-05, "loss": 0.66, "step": 6949 }, { "epoch": 1.205759888965996, "grad_norm": 0.8329322338104248, "learning_rate": 1.977516726545769e-05, "loss": 0.5454, "step": 6950 }, { "epoch": 1.2059333795975018, "grad_norm": 0.5599431395530701, "learning_rate": 1.97683544524831e-05, "loss": 0.6841, "step": 6951 }, { "epoch": 1.2061068702290076, "grad_norm": 0.7028522491455078, "learning_rate": 1.976154166639124e-05, "loss": 0.5775, "step": 6952 }, { "epoch": 1.2062803608605135, "grad_norm": 0.8986274003982544, "learning_rate": 1.9754728907972745e-05, "loss": 0.4996, "step": 6953 }, { "epoch": 1.2064538514920193, "grad_norm": 1.0744249820709229, "learning_rate": 1.9747916178018246e-05, "loss": 0.5764, "step": 6954 }, { "epoch": 1.2066273421235254, "grad_norm": 1.011263132095337, "learning_rate": 1.974110347731837e-05, "loss": 0.5446, "step": 6955 }, { "epoch": 1.2068008327550312, "grad_norm": 0.8077258467674255, "learning_rate": 1.9734290806663738e-05, "loss": 0.5444, "step": 6956 }, { "epoch": 1.206974323386537, "grad_norm": 0.702093243598938, "learning_rate": 1.972747816684497e-05, "loss": 0.3857, "step": 6957 }, { "epoch": 1.207147814018043, "grad_norm": 0.7129010558128357, "learning_rate": 1.9720665558652676e-05, "loss": 0.543, "step": 6958 }, { "epoch": 1.207321304649549, "grad_norm": 0.7714444398880005, "learning_rate": 1.971385298287747e-05, "loss": 0.4484, "step": 6959 }, { "epoch": 1.2074947952810549, "grad_norm": 0.7922438979148865, "learning_rate": 1.970704044030995e-05, "loss": 0.4916, "step": 6960 }, { "epoch": 1.2076682859125607, "grad_norm": 1.1409392356872559, "learning_rate": 1.9700227931740747e-05, "loss": 0.5165, "step": 6961 }, { "epoch": 1.2078417765440665, "grad_norm": 0.777527391910553, "learning_rate": 1.9693415457960426e-05, "loss": 0.484, "step": 6962 }, { "epoch": 1.2080152671755726, "grad_norm": 0.7120514512062073, "learning_rate": 1.9686603019759602e-05, "loss": 0.5157, "step": 6963 }, { "epoch": 1.2081887578070785, "grad_norm": 0.5948834419250488, "learning_rate": 1.9679790617928872e-05, "loss": 0.7015, "step": 6964 }, { "epoch": 1.2083622484385843, "grad_norm": 1.2719899415969849, "learning_rate": 1.9672978253258802e-05, "loss": 0.5376, "step": 6965 }, { "epoch": 1.2085357390700902, "grad_norm": 0.8294084072113037, "learning_rate": 1.966616592653999e-05, "loss": 0.4955, "step": 6966 }, { "epoch": 1.2087092297015962, "grad_norm": 0.8743494153022766, "learning_rate": 1.9659353638563016e-05, "loss": 0.4963, "step": 6967 }, { "epoch": 1.208882720333102, "grad_norm": 1.3656015396118164, "learning_rate": 1.9652541390118443e-05, "loss": 0.4631, "step": 6968 }, { "epoch": 1.209056210964608, "grad_norm": 1.0569608211517334, "learning_rate": 1.9645729181996846e-05, "loss": 0.5775, "step": 6969 }, { "epoch": 1.2092297015961138, "grad_norm": 0.7379013299942017, "learning_rate": 1.963891701498879e-05, "loss": 0.5563, "step": 6970 }, { "epoch": 1.2094031922276196, "grad_norm": 0.7099375128746033, "learning_rate": 1.9632104889884844e-05, "loss": 0.6627, "step": 6971 }, { "epoch": 1.2095766828591257, "grad_norm": 0.7381976246833801, "learning_rate": 1.9625292807475548e-05, "loss": 0.6183, "step": 6972 }, { "epoch": 1.2097501734906315, "grad_norm": 0.9388188719749451, "learning_rate": 1.9618480768551456e-05, "loss": 0.4502, "step": 6973 }, { "epoch": 1.2099236641221374, "grad_norm": 0.6752588748931885, "learning_rate": 1.9611668773903124e-05, "loss": 0.5327, "step": 6974 }, { "epoch": 1.2100971547536432, "grad_norm": 1.0585280656814575, "learning_rate": 1.9604856824321076e-05, "loss": 0.4553, "step": 6975 }, { "epoch": 1.2102706453851493, "grad_norm": 0.8340199589729309, "learning_rate": 1.9598044920595853e-05, "loss": 0.5798, "step": 6976 }, { "epoch": 1.2104441360166551, "grad_norm": 0.7418455481529236, "learning_rate": 1.9591233063517988e-05, "loss": 0.4629, "step": 6977 }, { "epoch": 1.210617626648161, "grad_norm": 0.8385781049728394, "learning_rate": 1.9584421253878e-05, "loss": 0.4479, "step": 6978 }, { "epoch": 1.2107911172796668, "grad_norm": 0.6666460633277893, "learning_rate": 1.957760949246641e-05, "loss": 0.4623, "step": 6979 }, { "epoch": 1.2109646079111729, "grad_norm": 0.6215448975563049, "learning_rate": 1.9570797780073737e-05, "loss": 0.4785, "step": 6980 }, { "epoch": 1.2111380985426787, "grad_norm": 1.3658274412155151, "learning_rate": 1.9563986117490476e-05, "loss": 0.4884, "step": 6981 }, { "epoch": 1.2113115891741846, "grad_norm": 0.9017288088798523, "learning_rate": 1.955717450550713e-05, "loss": 0.4233, "step": 6982 }, { "epoch": 1.2114850798056904, "grad_norm": 0.8542715311050415, "learning_rate": 1.9550362944914202e-05, "loss": 0.4851, "step": 6983 }, { "epoch": 1.2116585704371965, "grad_norm": 0.8546389937400818, "learning_rate": 1.9543551436502186e-05, "loss": 0.4491, "step": 6984 }, { "epoch": 1.2118320610687023, "grad_norm": 0.6703542470932007, "learning_rate": 1.953673998106155e-05, "loss": 0.5707, "step": 6985 }, { "epoch": 1.2120055517002082, "grad_norm": 0.6389185786247253, "learning_rate": 1.9529928579382778e-05, "loss": 0.6223, "step": 6986 }, { "epoch": 1.212179042331714, "grad_norm": 0.7991594672203064, "learning_rate": 1.952311723225635e-05, "loss": 0.5618, "step": 6987 }, { "epoch": 1.2123525329632199, "grad_norm": 0.7087690234184265, "learning_rate": 1.9516305940472714e-05, "loss": 0.5809, "step": 6988 }, { "epoch": 1.212526023594726, "grad_norm": 1.2076226472854614, "learning_rate": 1.950949470482234e-05, "loss": 0.5221, "step": 6989 }, { "epoch": 1.2126995142262318, "grad_norm": 0.9708346724510193, "learning_rate": 1.9502683526095683e-05, "loss": 0.4517, "step": 6990 }, { "epoch": 1.2128730048577376, "grad_norm": 1.456707239151001, "learning_rate": 1.9495872405083178e-05, "loss": 0.5212, "step": 6991 }, { "epoch": 1.2130464954892437, "grad_norm": 1.0726432800292969, "learning_rate": 1.948906134257526e-05, "loss": 0.4384, "step": 6992 }, { "epoch": 1.2132199861207495, "grad_norm": 1.0962409973144531, "learning_rate": 1.9482250339362383e-05, "loss": 0.4515, "step": 6993 }, { "epoch": 1.2133934767522554, "grad_norm": 0.7538032531738281, "learning_rate": 1.947543939623495e-05, "loss": 0.4922, "step": 6994 }, { "epoch": 1.2135669673837612, "grad_norm": 0.6666850447654724, "learning_rate": 1.9468628513983382e-05, "loss": 0.5846, "step": 6995 }, { "epoch": 1.213740458015267, "grad_norm": 1.1026028394699097, "learning_rate": 1.9461817693398105e-05, "loss": 0.4419, "step": 6996 }, { "epoch": 1.2139139486467732, "grad_norm": 0.9549009799957275, "learning_rate": 1.94550069352695e-05, "loss": 0.3739, "step": 6997 }, { "epoch": 1.214087439278279, "grad_norm": 0.7214818596839905, "learning_rate": 1.944819624038798e-05, "loss": 0.4482, "step": 6998 }, { "epoch": 1.2142609299097848, "grad_norm": 0.8108447194099426, "learning_rate": 1.9441385609543925e-05, "loss": 0.4645, "step": 6999 }, { "epoch": 1.2144344205412907, "grad_norm": 1.1380990743637085, "learning_rate": 1.943457504352773e-05, "loss": 0.4808, "step": 7000 }, { "epoch": 1.2146079111727968, "grad_norm": 1.3156373500823975, "learning_rate": 1.9427764543129756e-05, "loss": 0.4548, "step": 7001 }, { "epoch": 1.2147814018043026, "grad_norm": 0.7099040746688843, "learning_rate": 1.942095410914037e-05, "loss": 0.6019, "step": 7002 }, { "epoch": 1.2149548924358085, "grad_norm": 1.1159850358963013, "learning_rate": 1.941414374234994e-05, "loss": 0.6477, "step": 7003 }, { "epoch": 1.2151283830673143, "grad_norm": 1.122902512550354, "learning_rate": 1.9407333443548806e-05, "loss": 0.481, "step": 7004 }, { "epoch": 1.2153018736988201, "grad_norm": 0.8170135021209717, "learning_rate": 1.9400523213527324e-05, "loss": 0.6008, "step": 7005 }, { "epoch": 1.2154753643303262, "grad_norm": 0.811800479888916, "learning_rate": 1.9393713053075816e-05, "loss": 0.6046, "step": 7006 }, { "epoch": 1.215648854961832, "grad_norm": 0.790324866771698, "learning_rate": 1.9386902962984613e-05, "loss": 0.4691, "step": 7007 }, { "epoch": 1.215822345593338, "grad_norm": 0.954623281955719, "learning_rate": 1.9380092944044036e-05, "loss": 0.5197, "step": 7008 }, { "epoch": 1.215995836224844, "grad_norm": 0.7938142418861389, "learning_rate": 1.9373282997044404e-05, "loss": 0.4347, "step": 7009 }, { "epoch": 1.2161693268563498, "grad_norm": 0.6294236183166504, "learning_rate": 1.9366473122776e-05, "loss": 0.5979, "step": 7010 }, { "epoch": 1.2163428174878557, "grad_norm": 0.7136385440826416, "learning_rate": 1.935966332202913e-05, "loss": 0.5465, "step": 7011 }, { "epoch": 1.2165163081193615, "grad_norm": 2.5835301876068115, "learning_rate": 1.9352853595594077e-05, "loss": 0.4447, "step": 7012 }, { "epoch": 1.2166897987508674, "grad_norm": 0.7536041140556335, "learning_rate": 1.9346043944261127e-05, "loss": 0.6012, "step": 7013 }, { "epoch": 1.2168632893823734, "grad_norm": 0.5537289381027222, "learning_rate": 1.9339234368820533e-05, "loss": 0.6119, "step": 7014 }, { "epoch": 1.2170367800138793, "grad_norm": 0.7403371334075928, "learning_rate": 1.9332424870062557e-05, "loss": 0.5985, "step": 7015 }, { "epoch": 1.2172102706453851, "grad_norm": 1.8811901807785034, "learning_rate": 1.932561544877746e-05, "loss": 0.4784, "step": 7016 }, { "epoch": 1.217383761276891, "grad_norm": 0.7084232568740845, "learning_rate": 1.9318806105755474e-05, "loss": 0.4897, "step": 7017 }, { "epoch": 1.217557251908397, "grad_norm": 0.8779373168945312, "learning_rate": 1.9311996841786825e-05, "loss": 0.4393, "step": 7018 }, { "epoch": 1.2177307425399029, "grad_norm": 0.6649523973464966, "learning_rate": 1.9305187657661758e-05, "loss": 0.4427, "step": 7019 }, { "epoch": 1.2179042331714087, "grad_norm": 0.7244396805763245, "learning_rate": 1.9298378554170463e-05, "loss": 0.5028, "step": 7020 }, { "epoch": 1.2180777238029146, "grad_norm": 1.0198639631271362, "learning_rate": 1.9291569532103155e-05, "loss": 0.4269, "step": 7021 }, { "epoch": 1.2182512144344206, "grad_norm": 0.6823749542236328, "learning_rate": 1.9284760592250037e-05, "loss": 0.5574, "step": 7022 }, { "epoch": 1.2184247050659265, "grad_norm": 1.2214984893798828, "learning_rate": 1.9277951735401276e-05, "loss": 0.5712, "step": 7023 }, { "epoch": 1.2185981956974323, "grad_norm": 0.7772981524467468, "learning_rate": 1.9271142962347058e-05, "loss": 0.5751, "step": 7024 }, { "epoch": 1.2187716863289382, "grad_norm": 1.054054617881775, "learning_rate": 1.926433427387755e-05, "loss": 0.5153, "step": 7025 }, { "epoch": 1.2189451769604442, "grad_norm": 0.9166788458824158, "learning_rate": 1.925752567078291e-05, "loss": 0.5312, "step": 7026 }, { "epoch": 1.21911866759195, "grad_norm": 0.7016672492027283, "learning_rate": 1.925071715385328e-05, "loss": 0.5461, "step": 7027 }, { "epoch": 1.219292158223456, "grad_norm": 0.7995874285697937, "learning_rate": 1.924390872387879e-05, "loss": 0.614, "step": 7028 }, { "epoch": 1.2194656488549618, "grad_norm": 0.8818855285644531, "learning_rate": 1.9237100381649586e-05, "loss": 0.6071, "step": 7029 }, { "epoch": 1.2196391394864676, "grad_norm": 0.7914130687713623, "learning_rate": 1.923029212795576e-05, "loss": 0.4543, "step": 7030 }, { "epoch": 1.2198126301179737, "grad_norm": 0.7113505005836487, "learning_rate": 1.9223483963587435e-05, "loss": 0.4501, "step": 7031 }, { "epoch": 1.2199861207494795, "grad_norm": 0.7274286150932312, "learning_rate": 1.9216675889334704e-05, "loss": 0.5541, "step": 7032 }, { "epoch": 1.2201596113809854, "grad_norm": 0.7852982878684998, "learning_rate": 1.9209867905987643e-05, "loss": 0.5742, "step": 7033 }, { "epoch": 1.2203331020124912, "grad_norm": 1.0195103883743286, "learning_rate": 1.9203060014336334e-05, "loss": 0.5359, "step": 7034 }, { "epoch": 1.2205065926439973, "grad_norm": 0.9056409597396851, "learning_rate": 1.9196252215170848e-05, "loss": 0.5608, "step": 7035 }, { "epoch": 1.2206800832755031, "grad_norm": 0.7680935263633728, "learning_rate": 1.9189444509281216e-05, "loss": 0.4695, "step": 7036 }, { "epoch": 1.220853573907009, "grad_norm": 0.6084681153297424, "learning_rate": 1.9182636897457496e-05, "loss": 0.5288, "step": 7037 }, { "epoch": 1.2210270645385148, "grad_norm": 0.9058395028114319, "learning_rate": 1.9175829380489727e-05, "loss": 0.4277, "step": 7038 }, { "epoch": 1.221200555170021, "grad_norm": 0.7618827819824219, "learning_rate": 1.9169021959167905e-05, "loss": 0.4824, "step": 7039 }, { "epoch": 1.2213740458015268, "grad_norm": 1.0226870775222778, "learning_rate": 1.9162214634282055e-05, "loss": 0.4717, "step": 7040 }, { "epoch": 1.2215475364330326, "grad_norm": 0.8193103671073914, "learning_rate": 1.9155407406622177e-05, "loss": 0.5593, "step": 7041 }, { "epoch": 1.2217210270645384, "grad_norm": 0.6056613326072693, "learning_rate": 1.9148600276978254e-05, "loss": 0.6573, "step": 7042 }, { "epoch": 1.2218945176960445, "grad_norm": 0.8092714548110962, "learning_rate": 1.9141793246140257e-05, "loss": 0.5009, "step": 7043 }, { "epoch": 1.2220680083275504, "grad_norm": 0.8361021876335144, "learning_rate": 1.9134986314898156e-05, "loss": 0.4344, "step": 7044 }, { "epoch": 1.2222414989590562, "grad_norm": 0.816299557685852, "learning_rate": 1.9128179484041908e-05, "loss": 0.5144, "step": 7045 }, { "epoch": 1.222414989590562, "grad_norm": 0.8609830141067505, "learning_rate": 1.9121372754361437e-05, "loss": 0.5228, "step": 7046 }, { "epoch": 1.222588480222068, "grad_norm": 0.8750868439674377, "learning_rate": 1.911456612664668e-05, "loss": 0.3973, "step": 7047 }, { "epoch": 1.222761970853574, "grad_norm": 0.809299111366272, "learning_rate": 1.9107759601687562e-05, "loss": 0.5521, "step": 7048 }, { "epoch": 1.2229354614850798, "grad_norm": 0.811198890209198, "learning_rate": 1.9100953180273985e-05, "loss": 0.4379, "step": 7049 }, { "epoch": 1.2231089521165857, "grad_norm": 0.823857843875885, "learning_rate": 1.909414686319583e-05, "loss": 0.4657, "step": 7050 }, { "epoch": 1.2232824427480917, "grad_norm": 0.8796674609184265, "learning_rate": 1.9087340651242995e-05, "loss": 0.4768, "step": 7051 }, { "epoch": 1.2234559333795976, "grad_norm": 1.166199803352356, "learning_rate": 1.9080534545205334e-05, "loss": 0.4156, "step": 7052 }, { "epoch": 1.2236294240111034, "grad_norm": 1.3353817462921143, "learning_rate": 1.9073728545872717e-05, "loss": 0.5551, "step": 7053 }, { "epoch": 1.2238029146426093, "grad_norm": 0.8513011932373047, "learning_rate": 1.9066922654034975e-05, "loss": 0.5559, "step": 7054 }, { "epoch": 1.223976405274115, "grad_norm": 1.4038827419281006, "learning_rate": 1.906011687048195e-05, "loss": 0.4769, "step": 7055 }, { "epoch": 1.2241498959056212, "grad_norm": 0.9031059741973877, "learning_rate": 1.9053311196003457e-05, "loss": 0.4774, "step": 7056 }, { "epoch": 1.224323386537127, "grad_norm": 1.3607302904129028, "learning_rate": 1.9046505631389297e-05, "loss": 0.5842, "step": 7057 }, { "epoch": 1.2244968771686329, "grad_norm": 0.5526803731918335, "learning_rate": 1.9039700177429282e-05, "loss": 0.6354, "step": 7058 }, { "epoch": 1.2246703678001387, "grad_norm": 0.6610570549964905, "learning_rate": 1.9032894834913172e-05, "loss": 0.4768, "step": 7059 }, { "epoch": 1.2248438584316448, "grad_norm": 1.3063814640045166, "learning_rate": 1.9026089604630743e-05, "loss": 0.5011, "step": 7060 }, { "epoch": 1.2250173490631506, "grad_norm": 1.0547101497650146, "learning_rate": 1.901928448737176e-05, "loss": 0.4799, "step": 7061 }, { "epoch": 1.2251908396946565, "grad_norm": 0.7201014757156372, "learning_rate": 1.9012479483925942e-05, "loss": 0.5045, "step": 7062 }, { "epoch": 1.2253643303261623, "grad_norm": 0.7078681588172913, "learning_rate": 1.9005674595083033e-05, "loss": 0.4971, "step": 7063 }, { "epoch": 1.2255378209576682, "grad_norm": 0.8380757570266724, "learning_rate": 1.8998869821632757e-05, "loss": 0.4874, "step": 7064 }, { "epoch": 1.2257113115891742, "grad_norm": 0.7439054846763611, "learning_rate": 1.8992065164364793e-05, "loss": 0.5192, "step": 7065 }, { "epoch": 1.22588480222068, "grad_norm": 0.697297990322113, "learning_rate": 1.898526062406884e-05, "loss": 0.4739, "step": 7066 }, { "epoch": 1.226058292852186, "grad_norm": 0.8759002089500427, "learning_rate": 1.8978456201534587e-05, "loss": 0.5293, "step": 7067 }, { "epoch": 1.226231783483692, "grad_norm": 0.665690541267395, "learning_rate": 1.8971651897551672e-05, "loss": 0.6355, "step": 7068 }, { "epoch": 1.2264052741151978, "grad_norm": 0.8324854969978333, "learning_rate": 1.896484771290975e-05, "loss": 0.53, "step": 7069 }, { "epoch": 1.2265787647467037, "grad_norm": 0.949212908744812, "learning_rate": 1.8958043648398457e-05, "loss": 0.5593, "step": 7070 }, { "epoch": 1.2267522553782095, "grad_norm": 1.048187017440796, "learning_rate": 1.8951239704807424e-05, "loss": 0.4734, "step": 7071 }, { "epoch": 1.2269257460097154, "grad_norm": 0.7703348994255066, "learning_rate": 1.8944435882926236e-05, "loss": 0.5366, "step": 7072 }, { "epoch": 1.2270992366412214, "grad_norm": 0.6312597990036011, "learning_rate": 1.8937632183544495e-05, "loss": 0.4113, "step": 7073 }, { "epoch": 1.2272727272727273, "grad_norm": 1.056952953338623, "learning_rate": 1.893082860745178e-05, "loss": 0.5793, "step": 7074 }, { "epoch": 1.2274462179042331, "grad_norm": 0.8766586780548096, "learning_rate": 1.892402515543765e-05, "loss": 0.5913, "step": 7075 }, { "epoch": 1.227619708535739, "grad_norm": 0.6282052397727966, "learning_rate": 1.8917221828291652e-05, "loss": 0.6182, "step": 7076 }, { "epoch": 1.227793199167245, "grad_norm": 0.7187272310256958, "learning_rate": 1.8910418626803328e-05, "loss": 0.5498, "step": 7077 }, { "epoch": 1.227966689798751, "grad_norm": 0.7514996528625488, "learning_rate": 1.8903615551762182e-05, "loss": 0.5405, "step": 7078 }, { "epoch": 1.2281401804302567, "grad_norm": 0.8368105888366699, "learning_rate": 1.8896812603957732e-05, "loss": 0.4512, "step": 7079 }, { "epoch": 1.2283136710617626, "grad_norm": 1.9764368534088135, "learning_rate": 1.8890009784179476e-05, "loss": 0.4839, "step": 7080 }, { "epoch": 1.2284871616932687, "grad_norm": 0.899013340473175, "learning_rate": 1.8883207093216865e-05, "loss": 0.4847, "step": 7081 }, { "epoch": 1.2286606523247745, "grad_norm": 1.7209386825561523, "learning_rate": 1.8876404531859376e-05, "loss": 0.4948, "step": 7082 }, { "epoch": 1.2288341429562804, "grad_norm": 0.5783351063728333, "learning_rate": 1.8869602100896446e-05, "loss": 0.5078, "step": 7083 }, { "epoch": 1.2290076335877862, "grad_norm": 0.771289050579071, "learning_rate": 1.8862799801117523e-05, "loss": 0.5085, "step": 7084 }, { "epoch": 1.2291811242192923, "grad_norm": 0.8002333641052246, "learning_rate": 1.8855997633311995e-05, "loss": 0.4974, "step": 7085 }, { "epoch": 1.229354614850798, "grad_norm": 0.791644275188446, "learning_rate": 1.884919559826928e-05, "loss": 0.464, "step": 7086 }, { "epoch": 1.229528105482304, "grad_norm": 0.8913290500640869, "learning_rate": 1.884239369677876e-05, "loss": 0.4539, "step": 7087 }, { "epoch": 1.2297015961138098, "grad_norm": 0.8145409226417542, "learning_rate": 1.8835591929629795e-05, "loss": 0.4933, "step": 7088 }, { "epoch": 1.2298750867453156, "grad_norm": 0.63204425573349, "learning_rate": 1.8828790297611742e-05, "loss": 0.5541, "step": 7089 }, { "epoch": 1.2300485773768217, "grad_norm": 0.7683725953102112, "learning_rate": 1.882198880151395e-05, "loss": 0.5432, "step": 7090 }, { "epoch": 1.2302220680083276, "grad_norm": 0.8539783954620361, "learning_rate": 1.8815187442125716e-05, "loss": 0.4684, "step": 7091 }, { "epoch": 1.2303955586398334, "grad_norm": 0.7257925868034363, "learning_rate": 1.8808386220236365e-05, "loss": 0.4198, "step": 7092 }, { "epoch": 1.2305690492713393, "grad_norm": 0.622333288192749, "learning_rate": 1.8801585136635188e-05, "loss": 0.5259, "step": 7093 }, { "epoch": 1.2307425399028453, "grad_norm": 0.7729082107543945, "learning_rate": 1.8794784192111448e-05, "loss": 0.5856, "step": 7094 }, { "epoch": 1.2309160305343512, "grad_norm": 0.7876334190368652, "learning_rate": 1.8787983387454402e-05, "loss": 0.5573, "step": 7095 }, { "epoch": 1.231089521165857, "grad_norm": 0.693033754825592, "learning_rate": 1.8781182723453303e-05, "loss": 0.6444, "step": 7096 }, { "epoch": 1.2312630117973629, "grad_norm": 0.5837717652320862, "learning_rate": 1.877438220089737e-05, "loss": 0.5847, "step": 7097 }, { "epoch": 1.231436502428869, "grad_norm": 0.7758795022964478, "learning_rate": 1.8767581820575803e-05, "loss": 0.6039, "step": 7098 }, { "epoch": 1.2316099930603748, "grad_norm": 1.2334827184677124, "learning_rate": 1.8760781583277804e-05, "loss": 0.4965, "step": 7099 }, { "epoch": 1.2317834836918806, "grad_norm": 0.7810057401657104, "learning_rate": 1.875398148979255e-05, "loss": 0.5089, "step": 7100 }, { "epoch": 1.2319569743233865, "grad_norm": 0.6216914653778076, "learning_rate": 1.8747181540909193e-05, "loss": 0.4302, "step": 7101 }, { "epoch": 1.2321304649548925, "grad_norm": 1.337182641029358, "learning_rate": 1.874038173741688e-05, "loss": 0.512, "step": 7102 }, { "epoch": 1.2323039555863984, "grad_norm": 0.7715322971343994, "learning_rate": 1.8733582080104736e-05, "loss": 0.5513, "step": 7103 }, { "epoch": 1.2324774462179042, "grad_norm": 1.0621769428253174, "learning_rate": 1.8726782569761864e-05, "loss": 0.5614, "step": 7104 }, { "epoch": 1.23265093684941, "grad_norm": 1.0144551992416382, "learning_rate": 1.8719983207177358e-05, "loss": 0.5898, "step": 7105 }, { "epoch": 1.232824427480916, "grad_norm": 0.8312714695930481, "learning_rate": 1.87131839931403e-05, "loss": 0.4198, "step": 7106 }, { "epoch": 1.232997918112422, "grad_norm": 0.7275205850601196, "learning_rate": 1.8706384928439726e-05, "loss": 0.3987, "step": 7107 }, { "epoch": 1.2331714087439278, "grad_norm": 1.1540769338607788, "learning_rate": 1.8699586013864694e-05, "loss": 0.5364, "step": 7108 }, { "epoch": 1.2333448993754337, "grad_norm": 0.7301573753356934, "learning_rate": 1.8692787250204228e-05, "loss": 0.4919, "step": 7109 }, { "epoch": 1.2335183900069397, "grad_norm": 0.9503788948059082, "learning_rate": 1.8685988638247316e-05, "loss": 0.4124, "step": 7110 }, { "epoch": 1.2336918806384456, "grad_norm": 1.893370270729065, "learning_rate": 1.8679190178782954e-05, "loss": 0.6019, "step": 7111 }, { "epoch": 1.2338653712699514, "grad_norm": 0.7545531392097473, "learning_rate": 1.8672391872600108e-05, "loss": 0.4888, "step": 7112 }, { "epoch": 1.2340388619014573, "grad_norm": 0.8170567750930786, "learning_rate": 1.8665593720487743e-05, "loss": 0.4727, "step": 7113 }, { "epoch": 1.2342123525329631, "grad_norm": 0.7147437930107117, "learning_rate": 1.8658795723234774e-05, "loss": 0.5671, "step": 7114 }, { "epoch": 1.2343858431644692, "grad_norm": 1.3411918878555298, "learning_rate": 1.8651997881630125e-05, "loss": 0.4481, "step": 7115 }, { "epoch": 1.234559333795975, "grad_norm": 0.723257303237915, "learning_rate": 1.8645200196462698e-05, "loss": 0.4497, "step": 7116 }, { "epoch": 1.234732824427481, "grad_norm": 1.2285536527633667, "learning_rate": 1.8638402668521356e-05, "loss": 0.4507, "step": 7117 }, { "epoch": 1.2349063150589867, "grad_norm": 0.7114037275314331, "learning_rate": 1.8631605298594977e-05, "loss": 0.5894, "step": 7118 }, { "epoch": 1.2350798056904928, "grad_norm": 0.6353415846824646, "learning_rate": 1.8624808087472405e-05, "loss": 0.5189, "step": 7119 }, { "epoch": 1.2352532963219987, "grad_norm": 0.8124098777770996, "learning_rate": 1.8618011035942444e-05, "loss": 0.4941, "step": 7120 }, { "epoch": 1.2354267869535045, "grad_norm": 0.5769098401069641, "learning_rate": 1.8611214144793917e-05, "loss": 0.504, "step": 7121 }, { "epoch": 1.2356002775850103, "grad_norm": 1.2070446014404297, "learning_rate": 1.860441741481561e-05, "loss": 0.5132, "step": 7122 }, { "epoch": 1.2357737682165162, "grad_norm": 1.2472407817840576, "learning_rate": 1.8597620846796287e-05, "loss": 0.4606, "step": 7123 }, { "epoch": 1.2359472588480223, "grad_norm": 0.790421724319458, "learning_rate": 1.8590824441524696e-05, "loss": 0.3658, "step": 7124 }, { "epoch": 1.236120749479528, "grad_norm": 0.7804902791976929, "learning_rate": 1.8584028199789573e-05, "loss": 0.4025, "step": 7125 }, { "epoch": 1.236294240111034, "grad_norm": 1.3904036283493042, "learning_rate": 1.8577232122379625e-05, "loss": 0.4933, "step": 7126 }, { "epoch": 1.23646773074254, "grad_norm": 0.6702448725700378, "learning_rate": 1.8570436210083547e-05, "loss": 0.6199, "step": 7127 }, { "epoch": 1.2366412213740459, "grad_norm": 1.8516900539398193, "learning_rate": 1.8563640463690015e-05, "loss": 0.5674, "step": 7128 }, { "epoch": 1.2368147120055517, "grad_norm": 0.7270691394805908, "learning_rate": 1.8556844883987682e-05, "loss": 0.5043, "step": 7129 }, { "epoch": 1.2369882026370576, "grad_norm": 0.6551758050918579, "learning_rate": 1.8550049471765176e-05, "loss": 0.6722, "step": 7130 }, { "epoch": 1.2371616932685634, "grad_norm": 0.953046977519989, "learning_rate": 1.854325422781112e-05, "loss": 0.63, "step": 7131 }, { "epoch": 1.2373351839000695, "grad_norm": 0.7443362474441528, "learning_rate": 1.853645915291412e-05, "loss": 0.4866, "step": 7132 }, { "epoch": 1.2375086745315753, "grad_norm": 0.6354418992996216, "learning_rate": 1.8529664247862726e-05, "loss": 0.5452, "step": 7133 }, { "epoch": 1.2376821651630812, "grad_norm": 0.6309183239936829, "learning_rate": 1.8522869513445515e-05, "loss": 0.5303, "step": 7134 }, { "epoch": 1.237855655794587, "grad_norm": 1.0496909618377686, "learning_rate": 1.851607495045102e-05, "loss": 0.5178, "step": 7135 }, { "epoch": 1.238029146426093, "grad_norm": 1.0238380432128906, "learning_rate": 1.850928055966775e-05, "loss": 0.5168, "step": 7136 }, { "epoch": 1.238202637057599, "grad_norm": 1.1535464525222778, "learning_rate": 1.8502486341884215e-05, "loss": 0.4329, "step": 7137 }, { "epoch": 1.2383761276891048, "grad_norm": 1.152807593345642, "learning_rate": 1.8495692297888885e-05, "loss": 0.4663, "step": 7138 }, { "epoch": 1.2385496183206106, "grad_norm": 0.844763457775116, "learning_rate": 1.8488898428470213e-05, "loss": 0.4979, "step": 7139 }, { "epoch": 1.2387231089521167, "grad_norm": 0.8161091208457947, "learning_rate": 1.848210473441664e-05, "loss": 0.5482, "step": 7140 }, { "epoch": 1.2388965995836225, "grad_norm": 0.9885815382003784, "learning_rate": 1.8475311216516576e-05, "loss": 0.5081, "step": 7141 }, { "epoch": 1.2390700902151284, "grad_norm": 0.8437579274177551, "learning_rate": 1.8468517875558433e-05, "loss": 0.5328, "step": 7142 }, { "epoch": 1.2392435808466342, "grad_norm": 1.621742606163025, "learning_rate": 1.8461724712330567e-05, "loss": 0.3857, "step": 7143 }, { "epoch": 1.2394170714781403, "grad_norm": 0.7981905341148376, "learning_rate": 1.845493172762134e-05, "loss": 0.5271, "step": 7144 }, { "epoch": 1.2395905621096461, "grad_norm": 0.6473326086997986, "learning_rate": 1.8448138922219093e-05, "loss": 0.5758, "step": 7145 }, { "epoch": 1.239764052741152, "grad_norm": 0.7194153070449829, "learning_rate": 1.8441346296912128e-05, "loss": 0.5077, "step": 7146 }, { "epoch": 1.2399375433726578, "grad_norm": 0.6089683175086975, "learning_rate": 1.843455385248874e-05, "loss": 0.5817, "step": 7147 }, { "epoch": 1.2401110340041637, "grad_norm": 0.868064284324646, "learning_rate": 1.8427761589737203e-05, "loss": 0.6823, "step": 7148 }, { "epoch": 1.2402845246356697, "grad_norm": 0.8100531697273254, "learning_rate": 1.8420969509445764e-05, "loss": 0.4725, "step": 7149 }, { "epoch": 1.2404580152671756, "grad_norm": 1.2863043546676636, "learning_rate": 1.8414177612402657e-05, "loss": 0.4534, "step": 7150 }, { "epoch": 1.2406315058986814, "grad_norm": 0.734415590763092, "learning_rate": 1.8407385899396085e-05, "loss": 0.4753, "step": 7151 }, { "epoch": 1.2408049965301873, "grad_norm": 0.9520745277404785, "learning_rate": 1.8400594371214234e-05, "loss": 0.4424, "step": 7152 }, { "epoch": 1.2409784871616933, "grad_norm": 1.0317492485046387, "learning_rate": 1.8393803028645267e-05, "loss": 0.5057, "step": 7153 }, { "epoch": 1.2411519777931992, "grad_norm": 0.8807188272476196, "learning_rate": 1.8387011872477338e-05, "loss": 0.4983, "step": 7154 }, { "epoch": 1.241325468424705, "grad_norm": 1.0905085802078247, "learning_rate": 1.8380220903498554e-05, "loss": 0.432, "step": 7155 }, { "epoch": 1.2414989590562109, "grad_norm": 0.6536372900009155, "learning_rate": 1.8373430122497022e-05, "loss": 0.5674, "step": 7156 }, { "epoch": 1.241672449687717, "grad_norm": 0.8610592484474182, "learning_rate": 1.836663953026082e-05, "loss": 0.5138, "step": 7157 }, { "epoch": 1.2418459403192228, "grad_norm": 0.6895486116409302, "learning_rate": 1.835984912757801e-05, "loss": 0.5752, "step": 7158 }, { "epoch": 1.2420194309507286, "grad_norm": 0.7571020722389221, "learning_rate": 1.8353058915236613e-05, "loss": 0.4442, "step": 7159 }, { "epoch": 1.2421929215822345, "grad_norm": 0.7846590280532837, "learning_rate": 1.8346268894024644e-05, "loss": 0.505, "step": 7160 }, { "epoch": 1.2423664122137406, "grad_norm": 0.7504282593727112, "learning_rate": 1.833947906473011e-05, "loss": 0.4613, "step": 7161 }, { "epoch": 1.2425399028452464, "grad_norm": 0.6968479156494141, "learning_rate": 1.8332689428140956e-05, "loss": 0.5884, "step": 7162 }, { "epoch": 1.2427133934767522, "grad_norm": 0.6622795462608337, "learning_rate": 1.8325899985045135e-05, "loss": 0.4684, "step": 7163 }, { "epoch": 1.242886884108258, "grad_norm": 0.7619268298149109, "learning_rate": 1.831911073623058e-05, "loss": 0.5044, "step": 7164 }, { "epoch": 1.243060374739764, "grad_norm": 0.7499359846115112, "learning_rate": 1.8312321682485178e-05, "loss": 0.6053, "step": 7165 }, { "epoch": 1.24323386537127, "grad_norm": 1.0497268438339233, "learning_rate": 1.830553282459681e-05, "loss": 0.5619, "step": 7166 }, { "epoch": 1.2434073560027759, "grad_norm": 0.6962741017341614, "learning_rate": 1.8298744163353338e-05, "loss": 0.4986, "step": 7167 }, { "epoch": 1.2435808466342817, "grad_norm": 0.6543954610824585, "learning_rate": 1.8291955699542584e-05, "loss": 0.557, "step": 7168 }, { "epoch": 1.2437543372657878, "grad_norm": 0.6712220311164856, "learning_rate": 1.828516743395236e-05, "loss": 0.5276, "step": 7169 }, { "epoch": 1.2439278278972936, "grad_norm": 0.9903103113174438, "learning_rate": 1.8278379367370448e-05, "loss": 0.4713, "step": 7170 }, { "epoch": 1.2441013185287995, "grad_norm": 1.084958791732788, "learning_rate": 1.8271591500584625e-05, "loss": 0.4508, "step": 7171 }, { "epoch": 1.2442748091603053, "grad_norm": 1.1252861022949219, "learning_rate": 1.8264803834382622e-05, "loss": 0.5098, "step": 7172 }, { "epoch": 1.2444482997918112, "grad_norm": 1.4772077798843384, "learning_rate": 1.825801636955215e-05, "loss": 0.4685, "step": 7173 }, { "epoch": 1.2446217904233172, "grad_norm": 0.9101685881614685, "learning_rate": 1.8251229106880916e-05, "loss": 0.5321, "step": 7174 }, { "epoch": 1.244795281054823, "grad_norm": 0.6553255319595337, "learning_rate": 1.8244442047156577e-05, "loss": 0.5962, "step": 7175 }, { "epoch": 1.244968771686329, "grad_norm": 1.0963441133499146, "learning_rate": 1.8237655191166785e-05, "loss": 0.3542, "step": 7176 }, { "epoch": 1.2451422623178348, "grad_norm": 0.8238844275474548, "learning_rate": 1.8230868539699166e-05, "loss": 0.4502, "step": 7177 }, { "epoch": 1.2453157529493408, "grad_norm": 0.8224257826805115, "learning_rate": 1.8224082093541306e-05, "loss": 0.5392, "step": 7178 }, { "epoch": 1.2454892435808467, "grad_norm": 0.69416344165802, "learning_rate": 1.821729585348079e-05, "loss": 0.6357, "step": 7179 }, { "epoch": 1.2456627342123525, "grad_norm": 0.916454553604126, "learning_rate": 1.8210509820305174e-05, "loss": 0.5184, "step": 7180 }, { "epoch": 1.2458362248438584, "grad_norm": 0.7400712370872498, "learning_rate": 1.8203723994801974e-05, "loss": 0.5072, "step": 7181 }, { "epoch": 1.2460097154753642, "grad_norm": 0.8262057304382324, "learning_rate": 1.8196938377758696e-05, "loss": 0.6155, "step": 7182 }, { "epoch": 1.2461832061068703, "grad_norm": 0.7413461208343506, "learning_rate": 1.819015296996282e-05, "loss": 0.5012, "step": 7183 }, { "epoch": 1.2463566967383761, "grad_norm": 0.7607156038284302, "learning_rate": 1.818336777220181e-05, "loss": 0.4351, "step": 7184 }, { "epoch": 1.246530187369882, "grad_norm": 1.2906935214996338, "learning_rate": 1.817658278526308e-05, "loss": 0.4164, "step": 7185 }, { "epoch": 1.246703678001388, "grad_norm": 0.6653996109962463, "learning_rate": 1.8169798009934038e-05, "loss": 0.5944, "step": 7186 }, { "epoch": 1.2468771686328939, "grad_norm": 0.8081101179122925, "learning_rate": 1.816301344700208e-05, "loss": 0.5083, "step": 7187 }, { "epoch": 1.2470506592643997, "grad_norm": 0.8156636953353882, "learning_rate": 1.815622909725454e-05, "loss": 0.5066, "step": 7188 }, { "epoch": 1.2472241498959056, "grad_norm": 0.6418240666389465, "learning_rate": 1.8149444961478767e-05, "loss": 0.5635, "step": 7189 }, { "epoch": 1.2473976405274114, "grad_norm": 0.6648577451705933, "learning_rate": 1.8142661040462068e-05, "loss": 0.4795, "step": 7190 }, { "epoch": 1.2475711311589175, "grad_norm": 1.0434131622314453, "learning_rate": 1.8135877334991713e-05, "loss": 0.5547, "step": 7191 }, { "epoch": 1.2477446217904233, "grad_norm": 0.9893107414245605, "learning_rate": 1.8129093845854965e-05, "loss": 0.3964, "step": 7192 }, { "epoch": 1.2479181124219292, "grad_norm": 1.225264310836792, "learning_rate": 1.8122310573839063e-05, "loss": 0.3656, "step": 7193 }, { "epoch": 1.248091603053435, "grad_norm": 1.0399143695831299, "learning_rate": 1.81155275197312e-05, "loss": 0.5874, "step": 7194 }, { "epoch": 1.248265093684941, "grad_norm": 0.6252385377883911, "learning_rate": 1.8108744684318564e-05, "loss": 0.6443, "step": 7195 }, { "epoch": 1.248438584316447, "grad_norm": 0.6317516565322876, "learning_rate": 1.8101962068388315e-05, "loss": 0.6039, "step": 7196 }, { "epoch": 1.2486120749479528, "grad_norm": 0.7467001676559448, "learning_rate": 1.8095179672727575e-05, "loss": 0.524, "step": 7197 }, { "epoch": 1.2487855655794586, "grad_norm": 0.682138979434967, "learning_rate": 1.808839749812346e-05, "loss": 0.4747, "step": 7198 }, { "epoch": 1.2489590562109647, "grad_norm": 1.088470458984375, "learning_rate": 1.8081615545363035e-05, "loss": 0.4355, "step": 7199 }, { "epoch": 1.2491325468424705, "grad_norm": 0.7856414318084717, "learning_rate": 1.807483381523337e-05, "loss": 0.441, "step": 7200 }, { "epoch": 1.2493060374739764, "grad_norm": 0.794278621673584, "learning_rate": 1.8068052308521474e-05, "loss": 0.5021, "step": 7201 }, { "epoch": 1.2494795281054822, "grad_norm": 0.9811384081840515, "learning_rate": 1.8061271026014364e-05, "loss": 0.3582, "step": 7202 }, { "epoch": 1.2496530187369883, "grad_norm": 0.8610075116157532, "learning_rate": 1.8054489968499018e-05, "loss": 0.4745, "step": 7203 }, { "epoch": 1.2498265093684942, "grad_norm": 0.6773804426193237, "learning_rate": 1.8047709136762368e-05, "loss": 0.5815, "step": 7204 }, { "epoch": 1.25, "grad_norm": 0.9444378018379211, "learning_rate": 1.8040928531591347e-05, "loss": 0.4127, "step": 7205 }, { "epoch": 1.2501734906315058, "grad_norm": 0.7318785786628723, "learning_rate": 1.8034148153772864e-05, "loss": 0.5726, "step": 7206 }, { "epoch": 1.2503469812630117, "grad_norm": 0.863391637802124, "learning_rate": 1.8027368004093766e-05, "loss": 0.4684, "step": 7207 }, { "epoch": 1.2505204718945178, "grad_norm": 0.8419562578201294, "learning_rate": 1.8020588083340912e-05, "loss": 0.4878, "step": 7208 }, { "epoch": 1.2506939625260236, "grad_norm": 0.8791180849075317, "learning_rate": 1.801380839230113e-05, "loss": 0.532, "step": 7209 }, { "epoch": 1.2508674531575295, "grad_norm": 0.8953050971031189, "learning_rate": 1.8007028931761184e-05, "loss": 0.4703, "step": 7210 }, { "epoch": 1.2510409437890355, "grad_norm": 1.189442753791809, "learning_rate": 1.8000249702507854e-05, "loss": 0.4456, "step": 7211 }, { "epoch": 1.2512144344205414, "grad_norm": 1.0011205673217773, "learning_rate": 1.7993470705327877e-05, "loss": 0.495, "step": 7212 }, { "epoch": 1.2513879250520472, "grad_norm": 0.903160035610199, "learning_rate": 1.798669194100797e-05, "loss": 0.4813, "step": 7213 }, { "epoch": 1.251561415683553, "grad_norm": 0.7249449491500854, "learning_rate": 1.79799134103348e-05, "loss": 0.4936, "step": 7214 }, { "epoch": 1.251734906315059, "grad_norm": 2.5167765617370605, "learning_rate": 1.7973135114095038e-05, "loss": 0.5417, "step": 7215 }, { "epoch": 1.2519083969465647, "grad_norm": 0.715190589427948, "learning_rate": 1.7966357053075312e-05, "loss": 0.6365, "step": 7216 }, { "epoch": 1.2520818875780708, "grad_norm": 0.7164963483810425, "learning_rate": 1.795957922806221e-05, "loss": 0.6732, "step": 7217 }, { "epoch": 1.2522553782095767, "grad_norm": 1.0228049755096436, "learning_rate": 1.795280163984232e-05, "loss": 0.5239, "step": 7218 }, { "epoch": 1.2524288688410825, "grad_norm": 0.6803464889526367, "learning_rate": 1.7946024289202188e-05, "loss": 0.4541, "step": 7219 }, { "epoch": 1.2526023594725886, "grad_norm": 0.797497034072876, "learning_rate": 1.7939247176928328e-05, "loss": 0.4326, "step": 7220 }, { "epoch": 1.2527758501040944, "grad_norm": 0.9367165565490723, "learning_rate": 1.793247030380723e-05, "loss": 0.6234, "step": 7221 }, { "epoch": 1.2529493407356003, "grad_norm": 0.7852381467819214, "learning_rate": 1.792569367062537e-05, "loss": 0.5708, "step": 7222 }, { "epoch": 1.2531228313671061, "grad_norm": 0.7588016390800476, "learning_rate": 1.7918917278169173e-05, "loss": 0.4335, "step": 7223 }, { "epoch": 1.253296321998612, "grad_norm": 0.87742680311203, "learning_rate": 1.791214112722505e-05, "loss": 0.5884, "step": 7224 }, { "epoch": 1.253469812630118, "grad_norm": 1.2502604722976685, "learning_rate": 1.7905365218579387e-05, "loss": 0.5402, "step": 7225 }, { "epoch": 1.2536433032616239, "grad_norm": 1.3978842496871948, "learning_rate": 1.7898589553018523e-05, "loss": 0.5352, "step": 7226 }, { "epoch": 1.2538167938931297, "grad_norm": 0.7687990069389343, "learning_rate": 1.7891814131328795e-05, "loss": 0.3994, "step": 7227 }, { "epoch": 1.2539902845246358, "grad_norm": 1.105607032775879, "learning_rate": 1.788503895429649e-05, "loss": 0.4652, "step": 7228 }, { "epoch": 1.2541637751561416, "grad_norm": 0.8369463682174683, "learning_rate": 1.787826402270789e-05, "loss": 0.4209, "step": 7229 }, { "epoch": 1.2543372657876475, "grad_norm": 0.6711232662200928, "learning_rate": 1.7871489337349208e-05, "loss": 0.4785, "step": 7230 }, { "epoch": 1.2545107564191533, "grad_norm": 10.019854545593262, "learning_rate": 1.7864714899006672e-05, "loss": 0.5475, "step": 7231 }, { "epoch": 1.2546842470506592, "grad_norm": 0.9782271385192871, "learning_rate": 1.785794070846647e-05, "loss": 0.4442, "step": 7232 }, { "epoch": 1.2548577376821652, "grad_norm": 0.7232918739318848, "learning_rate": 1.7851166766514737e-05, "loss": 0.5372, "step": 7233 }, { "epoch": 1.255031228313671, "grad_norm": 0.8433080315589905, "learning_rate": 1.78443930739376e-05, "loss": 0.386, "step": 7234 }, { "epoch": 1.255204718945177, "grad_norm": 0.7202969193458557, "learning_rate": 1.783761963152117e-05, "loss": 0.4719, "step": 7235 }, { "epoch": 1.2553782095766828, "grad_norm": 1.5871689319610596, "learning_rate": 1.7830846440051493e-05, "loss": 0.5349, "step": 7236 }, { "epoch": 1.2555517002081888, "grad_norm": 0.8011099100112915, "learning_rate": 1.7824073500314614e-05, "loss": 0.5406, "step": 7237 }, { "epoch": 1.2557251908396947, "grad_norm": 0.8340924978256226, "learning_rate": 1.7817300813096548e-05, "loss": 0.4517, "step": 7238 }, { "epoch": 1.2558986814712005, "grad_norm": 0.8279672861099243, "learning_rate": 1.7810528379183262e-05, "loss": 0.4304, "step": 7239 }, { "epoch": 1.2560721721027064, "grad_norm": 0.7791277170181274, "learning_rate": 1.7803756199360704e-05, "loss": 0.4708, "step": 7240 }, { "epoch": 1.2562456627342122, "grad_norm": 1.3934314250946045, "learning_rate": 1.7796984274414797e-05, "loss": 0.5194, "step": 7241 }, { "epoch": 1.2564191533657183, "grad_norm": 0.8346148729324341, "learning_rate": 1.7790212605131448e-05, "loss": 0.4475, "step": 7242 }, { "epoch": 1.2565926439972241, "grad_norm": 0.8930989503860474, "learning_rate": 1.7783441192296488e-05, "loss": 0.4553, "step": 7243 }, { "epoch": 1.25676613462873, "grad_norm": 0.7225217819213867, "learning_rate": 1.7776670036695758e-05, "loss": 0.4896, "step": 7244 }, { "epoch": 1.256939625260236, "grad_norm": 0.9962308406829834, "learning_rate": 1.7769899139115066e-05, "loss": 0.52, "step": 7245 }, { "epoch": 1.257113115891742, "grad_norm": 1.6759709119796753, "learning_rate": 1.776312850034018e-05, "loss": 0.5365, "step": 7246 }, { "epoch": 1.2572866065232478, "grad_norm": 1.0960193872451782, "learning_rate": 1.7756358121156835e-05, "loss": 0.48, "step": 7247 }, { "epoch": 1.2574600971547536, "grad_norm": 0.6902223229408264, "learning_rate": 1.7749588002350748e-05, "loss": 0.5099, "step": 7248 }, { "epoch": 1.2576335877862594, "grad_norm": 1.0590311288833618, "learning_rate": 1.7742818144707588e-05, "loss": 0.3909, "step": 7249 }, { "epoch": 1.2578070784177655, "grad_norm": 0.7316137552261353, "learning_rate": 1.7736048549013013e-05, "loss": 0.463, "step": 7250 }, { "epoch": 1.2579805690492714, "grad_norm": 0.8568545579910278, "learning_rate": 1.7729279216052652e-05, "loss": 0.5719, "step": 7251 }, { "epoch": 1.2581540596807772, "grad_norm": 0.799411416053772, "learning_rate": 1.7722510146612075e-05, "loss": 0.4921, "step": 7252 }, { "epoch": 1.2583275503122833, "grad_norm": 0.6816998720169067, "learning_rate": 1.771574134147685e-05, "loss": 0.5752, "step": 7253 }, { "epoch": 1.2585010409437891, "grad_norm": 0.9275938272476196, "learning_rate": 1.770897280143251e-05, "loss": 0.4592, "step": 7254 }, { "epoch": 1.258674531575295, "grad_norm": 0.764086902141571, "learning_rate": 1.770220452726454e-05, "loss": 0.4797, "step": 7255 }, { "epoch": 1.2588480222068008, "grad_norm": 0.8848465085029602, "learning_rate": 1.7695436519758412e-05, "loss": 0.5416, "step": 7256 }, { "epoch": 1.2590215128383067, "grad_norm": 0.7068691849708557, "learning_rate": 1.7688668779699562e-05, "loss": 0.4258, "step": 7257 }, { "epoch": 1.2591950034698125, "grad_norm": 2.146357297897339, "learning_rate": 1.76819013078734e-05, "loss": 0.3964, "step": 7258 }, { "epoch": 1.2593684941013186, "grad_norm": 0.7820359468460083, "learning_rate": 1.767513410506528e-05, "loss": 0.4858, "step": 7259 }, { "epoch": 1.2595419847328244, "grad_norm": 1.0822325944900513, "learning_rate": 1.7668367172060562e-05, "loss": 0.4261, "step": 7260 }, { "epoch": 1.2597154753643303, "grad_norm": 0.7217856049537659, "learning_rate": 1.766160050964456e-05, "loss": 0.5461, "step": 7261 }, { "epoch": 1.2598889659958363, "grad_norm": 2.3959295749664307, "learning_rate": 1.765483411860253e-05, "loss": 0.574, "step": 7262 }, { "epoch": 1.2600624566273422, "grad_norm": 0.7436891794204712, "learning_rate": 1.7648067999719734e-05, "loss": 0.4056, "step": 7263 }, { "epoch": 1.260235947258848, "grad_norm": 0.707612931728363, "learning_rate": 1.7641302153781402e-05, "loss": 0.374, "step": 7264 }, { "epoch": 1.2604094378903539, "grad_norm": 0.7460191249847412, "learning_rate": 1.7634536581572687e-05, "loss": 0.4739, "step": 7265 }, { "epoch": 1.2605829285218597, "grad_norm": 3.197263479232788, "learning_rate": 1.7627771283878764e-05, "loss": 0.4283, "step": 7266 }, { "epoch": 1.2607564191533658, "grad_norm": 0.7251825928688049, "learning_rate": 1.762100626148475e-05, "loss": 0.5815, "step": 7267 }, { "epoch": 1.2609299097848716, "grad_norm": 0.7742326855659485, "learning_rate": 1.761424151517573e-05, "loss": 0.4871, "step": 7268 }, { "epoch": 1.2611034004163775, "grad_norm": 0.7075135707855225, "learning_rate": 1.7607477045736758e-05, "loss": 0.4977, "step": 7269 }, { "epoch": 1.2612768910478835, "grad_norm": 1.1125656366348267, "learning_rate": 1.7600712853952863e-05, "loss": 0.4691, "step": 7270 }, { "epoch": 1.2614503816793894, "grad_norm": 0.8418900966644287, "learning_rate": 1.7593948940609043e-05, "loss": 0.5305, "step": 7271 }, { "epoch": 1.2616238723108952, "grad_norm": 0.6288782954216003, "learning_rate": 1.7587185306490245e-05, "loss": 0.4772, "step": 7272 }, { "epoch": 1.261797362942401, "grad_norm": 0.7079287171363831, "learning_rate": 1.758042195238141e-05, "loss": 0.5668, "step": 7273 }, { "epoch": 1.261970853573907, "grad_norm": 0.6741266846656799, "learning_rate": 1.7573658879067424e-05, "loss": 0.4419, "step": 7274 }, { "epoch": 1.2621443442054128, "grad_norm": 0.6497853398323059, "learning_rate": 1.756689608733315e-05, "loss": 0.6057, "step": 7275 }, { "epoch": 1.2623178348369188, "grad_norm": 0.6376200318336487, "learning_rate": 1.7560133577963423e-05, "loss": 0.5757, "step": 7276 }, { "epoch": 1.2624913254684247, "grad_norm": 1.0629897117614746, "learning_rate": 1.7553371351743043e-05, "loss": 0.4725, "step": 7277 }, { "epoch": 1.2626648160999305, "grad_norm": 0.839389979839325, "learning_rate": 1.754660940945676e-05, "loss": 0.4766, "step": 7278 }, { "epoch": 1.2628383067314366, "grad_norm": 0.9092323780059814, "learning_rate": 1.7539847751889314e-05, "loss": 0.5048, "step": 7279 }, { "epoch": 1.2630117973629424, "grad_norm": 0.790918231010437, "learning_rate": 1.753308637982541e-05, "loss": 0.3907, "step": 7280 }, { "epoch": 1.2631852879944483, "grad_norm": 1.2072266340255737, "learning_rate": 1.75263252940497e-05, "loss": 0.3888, "step": 7281 }, { "epoch": 1.2633587786259541, "grad_norm": 0.7060717344284058, "learning_rate": 1.751956449534682e-05, "loss": 0.4888, "step": 7282 }, { "epoch": 1.26353226925746, "grad_norm": 0.9373074173927307, "learning_rate": 1.7512803984501385e-05, "loss": 0.4214, "step": 7283 }, { "epoch": 1.263705759888966, "grad_norm": 0.7018997669219971, "learning_rate": 1.7506043762297932e-05, "loss": 0.4296, "step": 7284 }, { "epoch": 1.263879250520472, "grad_norm": 1.4823564291000366, "learning_rate": 1.7499283829521006e-05, "loss": 0.4584, "step": 7285 }, { "epoch": 1.2640527411519777, "grad_norm": 0.8001006245613098, "learning_rate": 1.7492524186955108e-05, "loss": 0.5175, "step": 7286 }, { "epoch": 1.2642262317834838, "grad_norm": 0.9118646383285522, "learning_rate": 1.7485764835384705e-05, "loss": 0.4099, "step": 7287 }, { "epoch": 1.2643997224149897, "grad_norm": 1.0971133708953857, "learning_rate": 1.7479005775594216e-05, "loss": 0.4161, "step": 7288 }, { "epoch": 1.2645732130464955, "grad_norm": 0.7593318819999695, "learning_rate": 1.7472247008368046e-05, "loss": 0.4926, "step": 7289 }, { "epoch": 1.2647467036780013, "grad_norm": 0.7047328352928162, "learning_rate": 1.746548853449056e-05, "loss": 0.506, "step": 7290 }, { "epoch": 1.2649201943095072, "grad_norm": 0.8287725448608398, "learning_rate": 1.7458730354746077e-05, "loss": 0.4471, "step": 7291 }, { "epoch": 1.2650936849410133, "grad_norm": 0.8523426055908203, "learning_rate": 1.74519724699189e-05, "loss": 0.4932, "step": 7292 }, { "epoch": 1.265267175572519, "grad_norm": 1.0193105936050415, "learning_rate": 1.7445214880793287e-05, "loss": 0.624, "step": 7293 }, { "epoch": 1.265440666204025, "grad_norm": 0.7864932417869568, "learning_rate": 1.7438457588153466e-05, "loss": 0.4742, "step": 7294 }, { "epoch": 1.2656141568355308, "grad_norm": 0.7212458252906799, "learning_rate": 1.7431700592783622e-05, "loss": 0.6436, "step": 7295 }, { "epoch": 1.2657876474670369, "grad_norm": 0.7995871901512146, "learning_rate": 1.742494389546792e-05, "loss": 0.4543, "step": 7296 }, { "epoch": 1.2659611380985427, "grad_norm": 0.747856080532074, "learning_rate": 1.7418187496990476e-05, "loss": 0.4777, "step": 7297 }, { "epoch": 1.2661346287300486, "grad_norm": 0.7125510573387146, "learning_rate": 1.7411431398135384e-05, "loss": 0.5, "step": 7298 }, { "epoch": 1.2663081193615544, "grad_norm": 0.7325502634048462, "learning_rate": 1.7404675599686697e-05, "loss": 0.5443, "step": 7299 }, { "epoch": 1.2664816099930603, "grad_norm": 0.9522684812545776, "learning_rate": 1.739792010242843e-05, "loss": 0.4669, "step": 7300 }, { "epoch": 1.2666551006245663, "grad_norm": 0.9179607033729553, "learning_rate": 1.7391164907144563e-05, "loss": 0.5337, "step": 7301 }, { "epoch": 1.2668285912560722, "grad_norm": 1.179361343383789, "learning_rate": 1.738441001461905e-05, "loss": 0.4153, "step": 7302 }, { "epoch": 1.267002081887578, "grad_norm": 0.8450411558151245, "learning_rate": 1.7377655425635813e-05, "loss": 0.4528, "step": 7303 }, { "epoch": 1.267175572519084, "grad_norm": 0.9140102863311768, "learning_rate": 1.7370901140978706e-05, "loss": 0.399, "step": 7304 }, { "epoch": 1.26734906315059, "grad_norm": 0.9227039813995361, "learning_rate": 1.7364147161431585e-05, "loss": 0.3571, "step": 7305 }, { "epoch": 1.2675225537820958, "grad_norm": 0.9356099963188171, "learning_rate": 1.735739348777827e-05, "loss": 0.3888, "step": 7306 }, { "epoch": 1.2676960444136016, "grad_norm": 0.7274833917617798, "learning_rate": 1.735064012080251e-05, "loss": 0.4417, "step": 7307 }, { "epoch": 1.2678695350451075, "grad_norm": 0.8161334991455078, "learning_rate": 1.734388706128805e-05, "loss": 0.5983, "step": 7308 }, { "epoch": 1.2680430256766135, "grad_norm": 0.866718053817749, "learning_rate": 1.73371343100186e-05, "loss": 0.5098, "step": 7309 }, { "epoch": 1.2682165163081194, "grad_norm": 0.5527801513671875, "learning_rate": 1.7330381867777808e-05, "loss": 0.4913, "step": 7310 }, { "epoch": 1.2683900069396252, "grad_norm": 3.248582124710083, "learning_rate": 1.7323629735349313e-05, "loss": 0.5182, "step": 7311 }, { "epoch": 1.2685634975711313, "grad_norm": 0.7193784117698669, "learning_rate": 1.731687791351671e-05, "loss": 0.4734, "step": 7312 }, { "epoch": 1.2687369882026371, "grad_norm": 0.6383418440818787, "learning_rate": 1.7310126403063545e-05, "loss": 0.6099, "step": 7313 }, { "epoch": 1.268910478834143, "grad_norm": 0.6860360503196716, "learning_rate": 1.730337520477335e-05, "loss": 0.6025, "step": 7314 }, { "epoch": 1.2690839694656488, "grad_norm": 0.7680951356887817, "learning_rate": 1.7296624319429598e-05, "loss": 0.498, "step": 7315 }, { "epoch": 1.2692574600971547, "grad_norm": 0.7124847769737244, "learning_rate": 1.7289873747815755e-05, "loss": 0.6033, "step": 7316 }, { "epoch": 1.2694309507286605, "grad_norm": 0.7696164846420288, "learning_rate": 1.7283123490715213e-05, "loss": 0.535, "step": 7317 }, { "epoch": 1.2696044413601666, "grad_norm": 0.8060824871063232, "learning_rate": 1.7276373548911355e-05, "loss": 0.427, "step": 7318 }, { "epoch": 1.2697779319916724, "grad_norm": 0.8457629084587097, "learning_rate": 1.7269623923187522e-05, "loss": 0.5208, "step": 7319 }, { "epoch": 1.2699514226231783, "grad_norm": 0.7939236164093018, "learning_rate": 1.7262874614327016e-05, "loss": 0.426, "step": 7320 }, { "epoch": 1.2701249132546844, "grad_norm": 1.6990852355957031, "learning_rate": 1.72561256231131e-05, "loss": 0.4905, "step": 7321 }, { "epoch": 1.2702984038861902, "grad_norm": 0.6765450835227966, "learning_rate": 1.7249376950329004e-05, "loss": 0.5074, "step": 7322 }, { "epoch": 1.270471894517696, "grad_norm": 0.8751128315925598, "learning_rate": 1.7242628596757916e-05, "loss": 0.4418, "step": 7323 }, { "epoch": 1.270645385149202, "grad_norm": 0.7906344532966614, "learning_rate": 1.7235880563182988e-05, "loss": 0.5089, "step": 7324 }, { "epoch": 1.2708188757807077, "grad_norm": 0.604007363319397, "learning_rate": 1.7229132850387352e-05, "loss": 0.5376, "step": 7325 }, { "epoch": 1.2709923664122138, "grad_norm": 1.5954300165176392, "learning_rate": 1.7222385459154072e-05, "loss": 0.4464, "step": 7326 }, { "epoch": 1.2711658570437196, "grad_norm": 0.7143296003341675, "learning_rate": 1.721563839026619e-05, "loss": 0.5603, "step": 7327 }, { "epoch": 1.2713393476752255, "grad_norm": 0.9084748029708862, "learning_rate": 1.720889164450672e-05, "loss": 0.4924, "step": 7328 }, { "epoch": 1.2715128383067316, "grad_norm": 0.9519553184509277, "learning_rate": 1.720214522265864e-05, "loss": 0.4237, "step": 7329 }, { "epoch": 1.2716863289382374, "grad_norm": 0.8374545574188232, "learning_rate": 1.7195399125504853e-05, "loss": 0.4375, "step": 7330 }, { "epoch": 1.2718598195697433, "grad_norm": 1.1499824523925781, "learning_rate": 1.7188653353828266e-05, "loss": 0.4177, "step": 7331 }, { "epoch": 1.272033310201249, "grad_norm": 0.8614972829818726, "learning_rate": 1.7181907908411744e-05, "loss": 0.4067, "step": 7332 }, { "epoch": 1.272206800832755, "grad_norm": 0.6160062551498413, "learning_rate": 1.7175162790038086e-05, "loss": 0.4989, "step": 7333 }, { "epoch": 1.2723802914642608, "grad_norm": 0.630744993686676, "learning_rate": 1.716841799949007e-05, "loss": 0.5487, "step": 7334 }, { "epoch": 1.2725537820957669, "grad_norm": 2.016706943511963, "learning_rate": 1.716167353755046e-05, "loss": 0.4154, "step": 7335 }, { "epoch": 1.2727272727272727, "grad_norm": 0.9351750612258911, "learning_rate": 1.7154929405001936e-05, "loss": 0.5192, "step": 7336 }, { "epoch": 1.2729007633587786, "grad_norm": 0.7126862406730652, "learning_rate": 1.7148185602627166e-05, "loss": 0.5167, "step": 7337 }, { "epoch": 1.2730742539902846, "grad_norm": 1.0575381517410278, "learning_rate": 1.7141442131208788e-05, "loss": 0.4878, "step": 7338 }, { "epoch": 1.2732477446217905, "grad_norm": 0.7802735567092896, "learning_rate": 1.7134698991529373e-05, "loss": 0.5439, "step": 7339 }, { "epoch": 1.2734212352532963, "grad_norm": 0.680596649646759, "learning_rate": 1.712795618437148e-05, "loss": 0.479, "step": 7340 }, { "epoch": 1.2735947258848022, "grad_norm": 0.6597265601158142, "learning_rate": 1.7121213710517616e-05, "loss": 0.4742, "step": 7341 }, { "epoch": 1.273768216516308, "grad_norm": 0.8055812120437622, "learning_rate": 1.7114471570750266e-05, "loss": 0.5016, "step": 7342 }, { "epoch": 1.273941707147814, "grad_norm": 0.9792829751968384, "learning_rate": 1.7107729765851847e-05, "loss": 0.609, "step": 7343 }, { "epoch": 1.27411519777932, "grad_norm": 0.7375954985618591, "learning_rate": 1.7100988296604756e-05, "loss": 0.4707, "step": 7344 }, { "epoch": 1.2742886884108258, "grad_norm": 0.9349376559257507, "learning_rate": 1.7094247163791352e-05, "loss": 0.6333, "step": 7345 }, { "epoch": 1.2744621790423318, "grad_norm": 0.82657390832901, "learning_rate": 1.708750636819395e-05, "loss": 0.5251, "step": 7346 }, { "epoch": 1.2746356696738377, "grad_norm": 0.9187333583831787, "learning_rate": 1.7080765910594833e-05, "loss": 0.4808, "step": 7347 }, { "epoch": 1.2748091603053435, "grad_norm": 1.084909200668335, "learning_rate": 1.7074025791776232e-05, "loss": 0.4784, "step": 7348 }, { "epoch": 1.2749826509368494, "grad_norm": 0.8411982655525208, "learning_rate": 1.7067286012520343e-05, "loss": 0.4703, "step": 7349 }, { "epoch": 1.2751561415683552, "grad_norm": 0.5996063351631165, "learning_rate": 1.706054657360933e-05, "loss": 0.5082, "step": 7350 }, { "epoch": 1.2753296321998613, "grad_norm": 0.7579880952835083, "learning_rate": 1.705380747582532e-05, "loss": 0.4292, "step": 7351 }, { "epoch": 1.2755031228313671, "grad_norm": 1.000863790512085, "learning_rate": 1.704706871995038e-05, "loss": 0.4073, "step": 7352 }, { "epoch": 1.275676613462873, "grad_norm": 1.7665989398956299, "learning_rate": 1.704033030676655e-05, "loss": 0.4327, "step": 7353 }, { "epoch": 1.2758501040943788, "grad_norm": 0.6814614534378052, "learning_rate": 1.703359223705585e-05, "loss": 0.5709, "step": 7354 }, { "epoch": 1.276023594725885, "grad_norm": 0.7269407510757446, "learning_rate": 1.7026854511600218e-05, "loss": 0.5576, "step": 7355 }, { "epoch": 1.2761970853573907, "grad_norm": 0.7705087661743164, "learning_rate": 1.7020117131181585e-05, "loss": 0.3433, "step": 7356 }, { "epoch": 1.2763705759888966, "grad_norm": 0.8691286444664001, "learning_rate": 1.7013380096581828e-05, "loss": 0.4166, "step": 7357 }, { "epoch": 1.2765440666204024, "grad_norm": 0.867172360420227, "learning_rate": 1.70066434085828e-05, "loss": 0.463, "step": 7358 }, { "epoch": 1.2767175572519083, "grad_norm": 0.7234309315681458, "learning_rate": 1.6999907067966285e-05, "loss": 0.5438, "step": 7359 }, { "epoch": 1.2768910478834143, "grad_norm": 0.6743608117103577, "learning_rate": 1.6993171075514054e-05, "loss": 0.5529, "step": 7360 }, { "epoch": 1.2770645385149202, "grad_norm": 0.7440758347511292, "learning_rate": 1.6986435432007826e-05, "loss": 0.4937, "step": 7361 }, { "epoch": 1.277238029146426, "grad_norm": 1.2034293413162231, "learning_rate": 1.697970013822927e-05, "loss": 0.6184, "step": 7362 }, { "epoch": 1.277411519777932, "grad_norm": 0.7799433469772339, "learning_rate": 1.6972965194960034e-05, "loss": 0.4814, "step": 7363 }, { "epoch": 1.277585010409438, "grad_norm": 0.8648630976676941, "learning_rate": 1.6966230602981727e-05, "loss": 0.5325, "step": 7364 }, { "epoch": 1.2777585010409438, "grad_norm": 0.7705937027931213, "learning_rate": 1.695949636307588e-05, "loss": 0.4996, "step": 7365 }, { "epoch": 1.2779319916724496, "grad_norm": 0.7275580167770386, "learning_rate": 1.6952762476024023e-05, "loss": 0.4856, "step": 7366 }, { "epoch": 1.2781054823039555, "grad_norm": 0.7204875349998474, "learning_rate": 1.694602894260764e-05, "loss": 0.5568, "step": 7367 }, { "epoch": 1.2782789729354616, "grad_norm": 0.8418717980384827, "learning_rate": 1.6939295763608146e-05, "loss": 0.4301, "step": 7368 }, { "epoch": 1.2784524635669674, "grad_norm": 0.7505890727043152, "learning_rate": 1.6932562939806952e-05, "loss": 0.4567, "step": 7369 }, { "epoch": 1.2786259541984732, "grad_norm": 0.7095392346382141, "learning_rate": 1.6925830471985398e-05, "loss": 0.4727, "step": 7370 }, { "epoch": 1.2787994448299793, "grad_norm": 0.9720133543014526, "learning_rate": 1.6919098360924804e-05, "loss": 0.4852, "step": 7371 }, { "epoch": 1.2789729354614852, "grad_norm": 2.4071052074432373, "learning_rate": 1.6912366607406433e-05, "loss": 0.5085, "step": 7372 }, { "epoch": 1.279146426092991, "grad_norm": 0.7946075797080994, "learning_rate": 1.6905635212211517e-05, "loss": 0.5366, "step": 7373 }, { "epoch": 1.2793199167244969, "grad_norm": 0.9184132814407349, "learning_rate": 1.6898904176121246e-05, "loss": 0.4015, "step": 7374 }, { "epoch": 1.2794934073560027, "grad_norm": 0.76685631275177, "learning_rate": 1.6892173499916752e-05, "loss": 0.4385, "step": 7375 }, { "epoch": 1.2796668979875085, "grad_norm": 0.8007482290267944, "learning_rate": 1.688544318437914e-05, "loss": 0.3929, "step": 7376 }, { "epoch": 1.2798403886190146, "grad_norm": 0.9679758548736572, "learning_rate": 1.687871323028949e-05, "loss": 0.4316, "step": 7377 }, { "epoch": 1.2800138792505205, "grad_norm": 0.6715041399002075, "learning_rate": 1.6871983638428794e-05, "loss": 0.4387, "step": 7378 }, { "epoch": 1.2801873698820263, "grad_norm": 0.9745506048202515, "learning_rate": 1.6865254409578042e-05, "loss": 0.489, "step": 7379 }, { "epoch": 1.2803608605135324, "grad_norm": 0.8233616948127747, "learning_rate": 1.685852554451818e-05, "loss": 0.4985, "step": 7380 }, { "epoch": 1.2805343511450382, "grad_norm": 0.6738633513450623, "learning_rate": 1.6851797044030076e-05, "loss": 0.4419, "step": 7381 }, { "epoch": 1.280707841776544, "grad_norm": 0.6327497959136963, "learning_rate": 1.6845068908894597e-05, "loss": 0.4037, "step": 7382 }, { "epoch": 1.28088133240805, "grad_norm": 0.6840953826904297, "learning_rate": 1.6838341139892556e-05, "loss": 0.4702, "step": 7383 }, { "epoch": 1.2810548230395558, "grad_norm": 0.9774636626243591, "learning_rate": 1.68316137378047e-05, "loss": 0.4967, "step": 7384 }, { "epoch": 1.2812283136710618, "grad_norm": 0.9470757246017456, "learning_rate": 1.682488670341176e-05, "loss": 0.5508, "step": 7385 }, { "epoch": 1.2814018043025677, "grad_norm": 1.1846847534179688, "learning_rate": 1.681816003749442e-05, "loss": 0.4384, "step": 7386 }, { "epoch": 1.2815752949340735, "grad_norm": 0.8350231647491455, "learning_rate": 1.681143374083332e-05, "loss": 0.4119, "step": 7387 }, { "epoch": 1.2817487855655796, "grad_norm": 0.8339827060699463, "learning_rate": 1.6804707814209046e-05, "loss": 0.3962, "step": 7388 }, { "epoch": 1.2819222761970854, "grad_norm": 1.056058645248413, "learning_rate": 1.6797982258402154e-05, "loss": 0.5504, "step": 7389 }, { "epoch": 1.2820957668285913, "grad_norm": 0.9062198996543884, "learning_rate": 1.6791257074193156e-05, "loss": 0.5337, "step": 7390 }, { "epoch": 1.2822692574600971, "grad_norm": 1.0762733221054077, "learning_rate": 1.678453226236251e-05, "loss": 0.4216, "step": 7391 }, { "epoch": 1.282442748091603, "grad_norm": 1.0174580812454224, "learning_rate": 1.677780782369064e-05, "loss": 0.3956, "step": 7392 }, { "epoch": 1.2826162387231088, "grad_norm": 0.8656790256500244, "learning_rate": 1.677108375895793e-05, "loss": 0.5273, "step": 7393 }, { "epoch": 1.2827897293546149, "grad_norm": 1.3800997734069824, "learning_rate": 1.6764360068944706e-05, "loss": 0.3701, "step": 7394 }, { "epoch": 1.2829632199861207, "grad_norm": 0.702471911907196, "learning_rate": 1.6757636754431272e-05, "loss": 0.5071, "step": 7395 }, { "epoch": 1.2831367106176266, "grad_norm": 1.2209949493408203, "learning_rate": 1.6750913816197873e-05, "loss": 0.3948, "step": 7396 }, { "epoch": 1.2833102012491326, "grad_norm": 0.8057335019111633, "learning_rate": 1.6744191255024707e-05, "loss": 0.4244, "step": 7397 }, { "epoch": 1.2834836918806385, "grad_norm": 0.7313793897628784, "learning_rate": 1.6737469071691936e-05, "loss": 0.4855, "step": 7398 }, { "epoch": 1.2836571825121443, "grad_norm": 0.7577414512634277, "learning_rate": 1.6730747266979683e-05, "loss": 0.4327, "step": 7399 }, { "epoch": 1.2838306731436502, "grad_norm": 1.755483865737915, "learning_rate": 1.6724025841668026e-05, "loss": 0.4133, "step": 7400 }, { "epoch": 1.284004163775156, "grad_norm": 0.8913657665252686, "learning_rate": 1.6717304796536984e-05, "loss": 0.4237, "step": 7401 }, { "epoch": 1.284177654406662, "grad_norm": 0.9089097380638123, "learning_rate": 1.6710584132366542e-05, "loss": 0.4487, "step": 7402 }, { "epoch": 1.284351145038168, "grad_norm": 0.8932932615280151, "learning_rate": 1.6703863849936654e-05, "loss": 0.5096, "step": 7403 }, { "epoch": 1.2845246356696738, "grad_norm": 0.6808133721351624, "learning_rate": 1.6697143950027194e-05, "loss": 0.4312, "step": 7404 }, { "epoch": 1.2846981263011799, "grad_norm": 0.7399442791938782, "learning_rate": 1.6690424433418032e-05, "loss": 0.5237, "step": 7405 }, { "epoch": 1.2848716169326857, "grad_norm": 1.6645556688308716, "learning_rate": 1.6683705300888977e-05, "loss": 0.5393, "step": 7406 }, { "epoch": 1.2850451075641915, "grad_norm": 1.2402348518371582, "learning_rate": 1.6676986553219778e-05, "loss": 0.6115, "step": 7407 }, { "epoch": 1.2852185981956974, "grad_norm": 0.7507150173187256, "learning_rate": 1.667026819119016e-05, "loss": 0.5576, "step": 7408 }, { "epoch": 1.2853920888272032, "grad_norm": 0.9310218691825867, "learning_rate": 1.666355021557981e-05, "loss": 0.4143, "step": 7409 }, { "epoch": 1.2855655794587093, "grad_norm": 0.7509005665779114, "learning_rate": 1.6656832627168338e-05, "loss": 0.4741, "step": 7410 }, { "epoch": 1.2857390700902152, "grad_norm": 0.6133527755737305, "learning_rate": 1.665011542673533e-05, "loss": 0.509, "step": 7411 }, { "epoch": 1.285912560721721, "grad_norm": 0.9684708714485168, "learning_rate": 1.6643398615060346e-05, "loss": 0.5171, "step": 7412 }, { "epoch": 1.2860860513532268, "grad_norm": 0.9050800204277039, "learning_rate": 1.6636682192922847e-05, "loss": 0.6178, "step": 7413 }, { "epoch": 1.286259541984733, "grad_norm": 0.8545094728469849, "learning_rate": 1.6629966161102304e-05, "loss": 0.5242, "step": 7414 }, { "epoch": 1.2864330326162388, "grad_norm": 0.8504958152770996, "learning_rate": 1.6623250520378114e-05, "loss": 0.5021, "step": 7415 }, { "epoch": 1.2866065232477446, "grad_norm": 1.0944947004318237, "learning_rate": 1.661653527152964e-05, "loss": 0.5497, "step": 7416 }, { "epoch": 1.2867800138792505, "grad_norm": 0.7630634307861328, "learning_rate": 1.6609820415336188e-05, "loss": 0.5302, "step": 7417 }, { "epoch": 1.2869535045107563, "grad_norm": 1.0300921201705933, "learning_rate": 1.6603105952577024e-05, "loss": 0.3488, "step": 7418 }, { "epoch": 1.2871269951422624, "grad_norm": 0.7179005742073059, "learning_rate": 1.6596391884031378e-05, "loss": 0.4191, "step": 7419 }, { "epoch": 1.2873004857737682, "grad_norm": 0.7966880798339844, "learning_rate": 1.6589678210478415e-05, "loss": 0.5724, "step": 7420 }, { "epoch": 1.287473976405274, "grad_norm": 0.8220646381378174, "learning_rate": 1.658296493269727e-05, "loss": 0.6222, "step": 7421 }, { "epoch": 1.2876474670367801, "grad_norm": 1.581972599029541, "learning_rate": 1.657625205146703e-05, "loss": 0.4386, "step": 7422 }, { "epoch": 1.287820957668286, "grad_norm": 0.6322770118713379, "learning_rate": 1.6569539567566726e-05, "loss": 0.3914, "step": 7423 }, { "epoch": 1.2879944482997918, "grad_norm": 0.7599250078201294, "learning_rate": 1.6562827481775353e-05, "loss": 0.4332, "step": 7424 }, { "epoch": 1.2881679389312977, "grad_norm": 1.8352106809616089, "learning_rate": 1.6556115794871862e-05, "loss": 0.3678, "step": 7425 }, { "epoch": 1.2883414295628035, "grad_norm": 0.7707764506340027, "learning_rate": 1.6549404507635135e-05, "loss": 0.5446, "step": 7426 }, { "epoch": 1.2885149201943096, "grad_norm": 0.6282917857170105, "learning_rate": 1.654269362084404e-05, "loss": 0.485, "step": 7427 }, { "epoch": 1.2886884108258154, "grad_norm": 0.9740319848060608, "learning_rate": 1.6535983135277378e-05, "loss": 0.4186, "step": 7428 }, { "epoch": 1.2888619014573213, "grad_norm": 0.7551928758621216, "learning_rate": 1.6529273051713917e-05, "loss": 0.4285, "step": 7429 }, { "epoch": 1.2890353920888273, "grad_norm": 0.8909952044487, "learning_rate": 1.6522563370932355e-05, "loss": 0.4718, "step": 7430 }, { "epoch": 1.2892088827203332, "grad_norm": 0.8301438689231873, "learning_rate": 1.6515854093711364e-05, "loss": 0.3869, "step": 7431 }, { "epoch": 1.289382373351839, "grad_norm": 1.0200198888778687, "learning_rate": 1.6509145220829574e-05, "loss": 0.4265, "step": 7432 }, { "epoch": 1.2895558639833449, "grad_norm": 0.7204471230506897, "learning_rate": 1.650243675306554e-05, "loss": 0.4183, "step": 7433 }, { "epoch": 1.2897293546148507, "grad_norm": 0.8902462124824524, "learning_rate": 1.64957286911978e-05, "loss": 0.4216, "step": 7434 }, { "epoch": 1.2899028452463566, "grad_norm": 0.7522461414337158, "learning_rate": 1.6489021036004835e-05, "loss": 0.5361, "step": 7435 }, { "epoch": 1.2900763358778626, "grad_norm": 0.9403187036514282, "learning_rate": 1.6482313788265058e-05, "loss": 0.473, "step": 7436 }, { "epoch": 1.2902498265093685, "grad_norm": 0.6944003701210022, "learning_rate": 1.647560694875687e-05, "loss": 0.4741, "step": 7437 }, { "epoch": 1.2904233171408743, "grad_norm": 0.7476251721382141, "learning_rate": 1.64689005182586e-05, "loss": 0.5466, "step": 7438 }, { "epoch": 1.2905968077723804, "grad_norm": 0.6289733648300171, "learning_rate": 1.6462194497548546e-05, "loss": 0.5042, "step": 7439 }, { "epoch": 1.2907702984038862, "grad_norm": 0.8400498628616333, "learning_rate": 1.6455488887404935e-05, "loss": 0.4337, "step": 7440 }, { "epoch": 1.290943789035392, "grad_norm": 0.7845512628555298, "learning_rate": 1.6448783688605976e-05, "loss": 0.4918, "step": 7441 }, { "epoch": 1.291117279666898, "grad_norm": 0.6369579434394836, "learning_rate": 1.6442078901929803e-05, "loss": 0.5485, "step": 7442 }, { "epoch": 1.2912907702984038, "grad_norm": 0.7335418462753296, "learning_rate": 1.6435374528154517e-05, "loss": 0.5928, "step": 7443 }, { "epoch": 1.2914642609299098, "grad_norm": 1.2369920015335083, "learning_rate": 1.6428670568058176e-05, "loss": 0.5278, "step": 7444 }, { "epoch": 1.2916377515614157, "grad_norm": 0.6675391793251038, "learning_rate": 1.6421967022418776e-05, "loss": 0.5138, "step": 7445 }, { "epoch": 1.2918112421929215, "grad_norm": 0.626063346862793, "learning_rate": 1.641526389201427e-05, "loss": 0.6233, "step": 7446 }, { "epoch": 1.2919847328244276, "grad_norm": 0.8406535387039185, "learning_rate": 1.6408561177622566e-05, "loss": 0.5139, "step": 7447 }, { "epoch": 1.2921582234559335, "grad_norm": 0.9377653002738953, "learning_rate": 1.640185888002153e-05, "loss": 0.5919, "step": 7448 }, { "epoch": 1.2923317140874393, "grad_norm": 0.6694398522377014, "learning_rate": 1.6395156999988956e-05, "loss": 0.4327, "step": 7449 }, { "epoch": 1.2925052047189451, "grad_norm": 0.6803465485572815, "learning_rate": 1.6388455538302612e-05, "loss": 0.4812, "step": 7450 }, { "epoch": 1.292678695350451, "grad_norm": 0.8606630563735962, "learning_rate": 1.638175449574022e-05, "loss": 0.4614, "step": 7451 }, { "epoch": 1.2928521859819568, "grad_norm": 0.9769142866134644, "learning_rate": 1.6375053873079424e-05, "loss": 0.4155, "step": 7452 }, { "epoch": 1.293025676613463, "grad_norm": 0.761888325214386, "learning_rate": 1.6368353671097854e-05, "loss": 0.5502, "step": 7453 }, { "epoch": 1.2931991672449688, "grad_norm": 0.7804129719734192, "learning_rate": 1.6361653890573078e-05, "loss": 0.5378, "step": 7454 }, { "epoch": 1.2933726578764746, "grad_norm": 0.7592903971672058, "learning_rate": 1.6354954532282598e-05, "loss": 0.4705, "step": 7455 }, { "epoch": 1.2935461485079807, "grad_norm": 0.7587009072303772, "learning_rate": 1.6348255597003896e-05, "loss": 0.4542, "step": 7456 }, { "epoch": 1.2937196391394865, "grad_norm": 0.7308903932571411, "learning_rate": 1.6341557085514385e-05, "loss": 0.6405, "step": 7457 }, { "epoch": 1.2938931297709924, "grad_norm": 0.7195034623146057, "learning_rate": 1.633485899859144e-05, "loss": 0.4769, "step": 7458 }, { "epoch": 1.2940666204024982, "grad_norm": 0.7280550599098206, "learning_rate": 1.6328161337012377e-05, "loss": 0.4736, "step": 7459 }, { "epoch": 1.294240111034004, "grad_norm": 0.9579290747642517, "learning_rate": 1.632146410155447e-05, "loss": 0.4453, "step": 7460 }, { "epoch": 1.2944136016655101, "grad_norm": 0.8503251671791077, "learning_rate": 1.6314767292994946e-05, "loss": 0.3949, "step": 7461 }, { "epoch": 1.294587092297016, "grad_norm": 1.023876428604126, "learning_rate": 1.6308070912110965e-05, "loss": 0.3865, "step": 7462 }, { "epoch": 1.2947605829285218, "grad_norm": 0.7069829106330872, "learning_rate": 1.6301374959679654e-05, "loss": 0.3804, "step": 7463 }, { "epoch": 1.2949340735600279, "grad_norm": 0.9230320453643799, "learning_rate": 1.6294679436478095e-05, "loss": 0.4145, "step": 7464 }, { "epoch": 1.2951075641915337, "grad_norm": 0.9587447047233582, "learning_rate": 1.6287984343283304e-05, "loss": 0.5261, "step": 7465 }, { "epoch": 1.2952810548230396, "grad_norm": 0.9868846535682678, "learning_rate": 1.6281289680872252e-05, "loss": 0.4058, "step": 7466 }, { "epoch": 1.2954545454545454, "grad_norm": 0.6967030167579651, "learning_rate": 1.627459545002187e-05, "loss": 0.4697, "step": 7467 }, { "epoch": 1.2956280360860513, "grad_norm": 1.0543104410171509, "learning_rate": 1.6267901651509022e-05, "loss": 0.4632, "step": 7468 }, { "epoch": 1.2958015267175573, "grad_norm": 0.6506008505821228, "learning_rate": 1.6261208286110536e-05, "loss": 0.5687, "step": 7469 }, { "epoch": 1.2959750173490632, "grad_norm": 0.7393031120300293, "learning_rate": 1.6254515354603194e-05, "loss": 0.4377, "step": 7470 }, { "epoch": 1.296148507980569, "grad_norm": 0.7607579827308655, "learning_rate": 1.6247822857763703e-05, "loss": 0.5089, "step": 7471 }, { "epoch": 1.2963219986120749, "grad_norm": 1.530008316040039, "learning_rate": 1.6241130796368737e-05, "loss": 0.5399, "step": 7472 }, { "epoch": 1.296495489243581, "grad_norm": 1.0021963119506836, "learning_rate": 1.6234439171194925e-05, "loss": 0.4792, "step": 7473 }, { "epoch": 1.2966689798750868, "grad_norm": 0.7233889698982239, "learning_rate": 1.6227747983018845e-05, "loss": 0.5323, "step": 7474 }, { "epoch": 1.2968424705065926, "grad_norm": 1.0551574230194092, "learning_rate": 1.6221057232616994e-05, "loss": 0.5114, "step": 7475 }, { "epoch": 1.2970159611380985, "grad_norm": 0.6602882742881775, "learning_rate": 1.6214366920765856e-05, "loss": 0.4359, "step": 7476 }, { "epoch": 1.2971894517696043, "grad_norm": 0.6236412525177002, "learning_rate": 1.6207677048241858e-05, "loss": 0.6041, "step": 7477 }, { "epoch": 1.2973629424011104, "grad_norm": 0.8288519382476807, "learning_rate": 1.620098761582135e-05, "loss": 0.4841, "step": 7478 }, { "epoch": 1.2975364330326162, "grad_norm": 1.2267411947250366, "learning_rate": 1.6194298624280653e-05, "loss": 0.6281, "step": 7479 }, { "epoch": 1.297709923664122, "grad_norm": 0.6956679224967957, "learning_rate": 1.6187610074396044e-05, "loss": 0.4227, "step": 7480 }, { "epoch": 1.2978834142956281, "grad_norm": 0.9546607136726379, "learning_rate": 1.6180921966943722e-05, "loss": 0.4219, "step": 7481 }, { "epoch": 1.298056904927134, "grad_norm": 0.9601172208786011, "learning_rate": 1.6174234302699856e-05, "loss": 0.5724, "step": 7482 }, { "epoch": 1.2982303955586398, "grad_norm": 0.7844841480255127, "learning_rate": 1.616754708244056e-05, "loss": 0.4523, "step": 7483 }, { "epoch": 1.2984038861901457, "grad_norm": 0.7818117141723633, "learning_rate": 1.616086030694189e-05, "loss": 0.5029, "step": 7484 }, { "epoch": 1.2985773768216515, "grad_norm": 0.8169832229614258, "learning_rate": 1.615417397697985e-05, "loss": 0.4934, "step": 7485 }, { "epoch": 1.2987508674531576, "grad_norm": 0.7963302731513977, "learning_rate": 1.6147488093330405e-05, "loss": 0.3839, "step": 7486 }, { "epoch": 1.2989243580846634, "grad_norm": 1.164228916168213, "learning_rate": 1.6140802656769457e-05, "loss": 0.3376, "step": 7487 }, { "epoch": 1.2990978487161693, "grad_norm": 0.7500046491622925, "learning_rate": 1.6134117668072858e-05, "loss": 0.4537, "step": 7488 }, { "epoch": 1.2992713393476754, "grad_norm": 1.1466459035873413, "learning_rate": 1.6127433128016403e-05, "loss": 0.4384, "step": 7489 }, { "epoch": 1.2994448299791812, "grad_norm": 0.790235698223114, "learning_rate": 1.612074903737585e-05, "loss": 0.3829, "step": 7490 }, { "epoch": 1.299618320610687, "grad_norm": 1.0291216373443604, "learning_rate": 1.611406539692689e-05, "loss": 0.4874, "step": 7491 }, { "epoch": 1.299791811242193, "grad_norm": 0.6450439691543579, "learning_rate": 1.610738220744517e-05, "loss": 0.6089, "step": 7492 }, { "epoch": 1.2999653018736987, "grad_norm": 0.6980123519897461, "learning_rate": 1.6100699469706285e-05, "loss": 0.4474, "step": 7493 }, { "epoch": 1.3001387925052046, "grad_norm": 0.7495476603507996, "learning_rate": 1.6094017184485763e-05, "loss": 0.5873, "step": 7494 }, { "epoch": 1.3003122831367107, "grad_norm": 0.6476255059242249, "learning_rate": 1.6087335352559097e-05, "loss": 0.5492, "step": 7495 }, { "epoch": 1.3004857737682165, "grad_norm": 0.7907722592353821, "learning_rate": 1.6080653974701732e-05, "loss": 0.4285, "step": 7496 }, { "epoch": 1.3006592643997223, "grad_norm": 0.8223806619644165, "learning_rate": 1.6073973051689032e-05, "loss": 0.4886, "step": 7497 }, { "epoch": 1.3008327550312284, "grad_norm": 0.6121790409088135, "learning_rate": 1.6067292584296333e-05, "loss": 0.4142, "step": 7498 }, { "epoch": 1.3010062456627343, "grad_norm": 0.7437500953674316, "learning_rate": 1.6060612573298912e-05, "loss": 0.4806, "step": 7499 }, { "epoch": 1.30117973629424, "grad_norm": 0.9398663640022278, "learning_rate": 1.6053933019472003e-05, "loss": 0.3938, "step": 7500 }, { "epoch": 1.301353226925746, "grad_norm": 0.9563746452331543, "learning_rate": 1.6047253923590756e-05, "loss": 0.4484, "step": 7501 }, { "epoch": 1.3015267175572518, "grad_norm": 0.5969168543815613, "learning_rate": 1.6040575286430295e-05, "loss": 0.5699, "step": 7502 }, { "epoch": 1.3017002081887579, "grad_norm": 0.7994356751441956, "learning_rate": 1.6033897108765696e-05, "loss": 0.5862, "step": 7503 }, { "epoch": 1.3018736988202637, "grad_norm": 0.7313307523727417, "learning_rate": 1.602721939137195e-05, "loss": 0.5156, "step": 7504 }, { "epoch": 1.3020471894517696, "grad_norm": 1.1211521625518799, "learning_rate": 1.6020542135024023e-05, "loss": 0.3884, "step": 7505 }, { "epoch": 1.3022206800832756, "grad_norm": 0.6634950637817383, "learning_rate": 1.6013865340496826e-05, "loss": 0.495, "step": 7506 }, { "epoch": 1.3023941707147815, "grad_norm": 0.8779208064079285, "learning_rate": 1.6007189008565195e-05, "loss": 0.5415, "step": 7507 }, { "epoch": 1.3025676613462873, "grad_norm": 0.7145334482192993, "learning_rate": 1.6000513140003927e-05, "loss": 0.4457, "step": 7508 }, { "epoch": 1.3027411519777932, "grad_norm": 0.724416971206665, "learning_rate": 1.5993837735587783e-05, "loss": 0.5167, "step": 7509 }, { "epoch": 1.302914642609299, "grad_norm": 0.7721180319786072, "learning_rate": 1.5987162796091428e-05, "loss": 0.5464, "step": 7510 }, { "epoch": 1.3030881332408049, "grad_norm": 0.8560741543769836, "learning_rate": 1.5980488322289505e-05, "loss": 0.4615, "step": 7511 }, { "epoch": 1.303261623872311, "grad_norm": 0.9781146049499512, "learning_rate": 1.5973814314956602e-05, "loss": 0.5835, "step": 7512 }, { "epoch": 1.3034351145038168, "grad_norm": 1.5704987049102783, "learning_rate": 1.5967140774867235e-05, "loss": 0.5801, "step": 7513 }, { "epoch": 1.3036086051353226, "grad_norm": 0.7586106657981873, "learning_rate": 1.596046770279588e-05, "loss": 0.4703, "step": 7514 }, { "epoch": 1.3037820957668287, "grad_norm": 0.5656479001045227, "learning_rate": 1.5953795099516955e-05, "loss": 0.5348, "step": 7515 }, { "epoch": 1.3039555863983345, "grad_norm": 1.0700138807296753, "learning_rate": 1.5947122965804827e-05, "loss": 0.4349, "step": 7516 }, { "epoch": 1.3041290770298404, "grad_norm": 0.7711705565452576, "learning_rate": 1.59404513024338e-05, "loss": 0.4879, "step": 7517 }, { "epoch": 1.3043025676613462, "grad_norm": 0.7690720558166504, "learning_rate": 1.5933780110178128e-05, "loss": 0.4318, "step": 7518 }, { "epoch": 1.304476058292852, "grad_norm": 0.8652418255805969, "learning_rate": 1.5927109389812013e-05, "loss": 0.413, "step": 7519 }, { "epoch": 1.3046495489243581, "grad_norm": 0.80824214220047, "learning_rate": 1.59204391421096e-05, "loss": 0.4968, "step": 7520 }, { "epoch": 1.304823039555864, "grad_norm": 0.8984287977218628, "learning_rate": 1.5913769367844974e-05, "loss": 0.534, "step": 7521 }, { "epoch": 1.3049965301873698, "grad_norm": 0.6900107264518738, "learning_rate": 1.5907100067792186e-05, "loss": 0.5259, "step": 7522 }, { "epoch": 1.305170020818876, "grad_norm": 0.6418942213058472, "learning_rate": 1.590043124272519e-05, "loss": 0.5896, "step": 7523 }, { "epoch": 1.3053435114503817, "grad_norm": 0.8585731387138367, "learning_rate": 1.589376289341793e-05, "loss": 0.4115, "step": 7524 }, { "epoch": 1.3055170020818876, "grad_norm": 0.7281717658042908, "learning_rate": 1.5887095020644282e-05, "loss": 0.5431, "step": 7525 }, { "epoch": 1.3056904927133934, "grad_norm": 0.6613842844963074, "learning_rate": 1.5880427625178035e-05, "loss": 0.5422, "step": 7526 }, { "epoch": 1.3058639833448993, "grad_norm": 0.7034533619880676, "learning_rate": 1.5873760707792966e-05, "loss": 0.5529, "step": 7527 }, { "epoch": 1.3060374739764053, "grad_norm": 0.905914843082428, "learning_rate": 1.586709426926277e-05, "loss": 0.5544, "step": 7528 }, { "epoch": 1.3062109646079112, "grad_norm": 0.7205399870872498, "learning_rate": 1.5860428310361117e-05, "loss": 0.3742, "step": 7529 }, { "epoch": 1.306384455239417, "grad_norm": 0.7713692784309387, "learning_rate": 1.5853762831861567e-05, "loss": 0.5132, "step": 7530 }, { "epoch": 1.3065579458709229, "grad_norm": 0.6062445044517517, "learning_rate": 1.5847097834537674e-05, "loss": 0.5809, "step": 7531 }, { "epoch": 1.306731436502429, "grad_norm": 0.7817365527153015, "learning_rate": 1.5840433319162925e-05, "loss": 0.5175, "step": 7532 }, { "epoch": 1.3069049271339348, "grad_norm": 0.6964122653007507, "learning_rate": 1.5833769286510727e-05, "loss": 0.5847, "step": 7533 }, { "epoch": 1.3070784177654406, "grad_norm": 0.9588583111763, "learning_rate": 1.5827105737354456e-05, "loss": 0.4564, "step": 7534 }, { "epoch": 1.3072519083969465, "grad_norm": 0.8326556086540222, "learning_rate": 1.5820442672467436e-05, "loss": 0.402, "step": 7535 }, { "epoch": 1.3074253990284523, "grad_norm": 0.7271561026573181, "learning_rate": 1.5813780092622907e-05, "loss": 0.476, "step": 7536 }, { "epoch": 1.3075988896599584, "grad_norm": 0.7360162734985352, "learning_rate": 1.5807117998594077e-05, "loss": 0.4076, "step": 7537 }, { "epoch": 1.3077723802914643, "grad_norm": 0.7691264152526855, "learning_rate": 1.580045639115409e-05, "loss": 0.4091, "step": 7538 }, { "epoch": 1.30794587092297, "grad_norm": 0.6766218543052673, "learning_rate": 1.5793795271076033e-05, "loss": 0.5032, "step": 7539 }, { "epoch": 1.3081193615544762, "grad_norm": 0.6278243064880371, "learning_rate": 1.5787134639132935e-05, "loss": 0.5909, "step": 7540 }, { "epoch": 1.308292852185982, "grad_norm": 1.2189621925354004, "learning_rate": 1.5780474496097773e-05, "loss": 0.58, "step": 7541 }, { "epoch": 1.3084663428174879, "grad_norm": 0.625394344329834, "learning_rate": 1.577381484274346e-05, "loss": 0.5812, "step": 7542 }, { "epoch": 1.3086398334489937, "grad_norm": 0.769202709197998, "learning_rate": 1.5767155679842857e-05, "loss": 0.5537, "step": 7543 }, { "epoch": 1.3088133240804996, "grad_norm": 0.7040926814079285, "learning_rate": 1.576049700816877e-05, "loss": 0.5208, "step": 7544 }, { "epoch": 1.3089868147120056, "grad_norm": 0.9660540819168091, "learning_rate": 1.5753838828493953e-05, "loss": 0.4706, "step": 7545 }, { "epoch": 1.3091603053435115, "grad_norm": 0.8025970458984375, "learning_rate": 1.574718114159108e-05, "loss": 0.5432, "step": 7546 }, { "epoch": 1.3093337959750173, "grad_norm": 1.054005742073059, "learning_rate": 1.574052394823279e-05, "loss": 0.4279, "step": 7547 }, { "epoch": 1.3095072866065234, "grad_norm": 1.0567641258239746, "learning_rate": 1.5733867249191667e-05, "loss": 0.4843, "step": 7548 }, { "epoch": 1.3096807772380292, "grad_norm": 2.3491523265838623, "learning_rate": 1.5727211045240217e-05, "loss": 0.4946, "step": 7549 }, { "epoch": 1.309854267869535, "grad_norm": 0.8406051397323608, "learning_rate": 1.57205553371509e-05, "loss": 0.3904, "step": 7550 }, { "epoch": 1.310027758501041, "grad_norm": 0.6834543943405151, "learning_rate": 1.571390012569613e-05, "loss": 0.4987, "step": 7551 }, { "epoch": 1.3102012491325468, "grad_norm": 0.5546103119850159, "learning_rate": 1.570724541164824e-05, "loss": 0.5178, "step": 7552 }, { "epoch": 1.3103747397640526, "grad_norm": 1.1311421394348145, "learning_rate": 1.570059119577952e-05, "loss": 0.5161, "step": 7553 }, { "epoch": 1.3105482303955587, "grad_norm": 0.652874231338501, "learning_rate": 1.569393747886221e-05, "loss": 0.5701, "step": 7554 }, { "epoch": 1.3107217210270645, "grad_norm": 0.7094644904136658, "learning_rate": 1.5687284261668465e-05, "loss": 0.5991, "step": 7555 }, { "epoch": 1.3108952116585704, "grad_norm": 0.7584097385406494, "learning_rate": 1.5680631544970405e-05, "loss": 0.5813, "step": 7556 }, { "epoch": 1.3110687022900764, "grad_norm": 0.9776495695114136, "learning_rate": 1.567397932954009e-05, "loss": 0.4181, "step": 7557 }, { "epoch": 1.3112421929215823, "grad_norm": 1.0048600435256958, "learning_rate": 1.5667327616149522e-05, "loss": 0.4086, "step": 7558 }, { "epoch": 1.3114156835530881, "grad_norm": 0.7916238903999329, "learning_rate": 1.5660676405570625e-05, "loss": 0.4926, "step": 7559 }, { "epoch": 1.311589174184594, "grad_norm": 0.6625590920448303, "learning_rate": 1.5654025698575286e-05, "loss": 0.5209, "step": 7560 }, { "epoch": 1.3117626648160998, "grad_norm": 0.8551685214042664, "learning_rate": 1.5647375495935334e-05, "loss": 0.5686, "step": 7561 }, { "epoch": 1.311936155447606, "grad_norm": 0.9185087084770203, "learning_rate": 1.5640725798422525e-05, "loss": 0.4187, "step": 7562 }, { "epoch": 1.3121096460791117, "grad_norm": 1.0313657522201538, "learning_rate": 1.5634076606808567e-05, "loss": 0.4257, "step": 7563 }, { "epoch": 1.3122831367106176, "grad_norm": 0.7563906311988831, "learning_rate": 1.5627427921865106e-05, "loss": 0.453, "step": 7564 }, { "epoch": 1.3124566273421236, "grad_norm": 0.6281924247741699, "learning_rate": 1.562077974436373e-05, "loss": 0.4663, "step": 7565 }, { "epoch": 1.3126301179736295, "grad_norm": 0.7944850325584412, "learning_rate": 1.5614132075075967e-05, "loss": 0.4352, "step": 7566 }, { "epoch": 1.3128036086051353, "grad_norm": 0.8636308908462524, "learning_rate": 1.560748491477329e-05, "loss": 0.3758, "step": 7567 }, { "epoch": 1.3129770992366412, "grad_norm": 1.3377954959869385, "learning_rate": 1.5600838264227102e-05, "loss": 0.4989, "step": 7568 }, { "epoch": 1.313150589868147, "grad_norm": 0.7625263333320618, "learning_rate": 1.5594192124208758e-05, "loss": 0.4374, "step": 7569 }, { "epoch": 1.313324080499653, "grad_norm": 0.6469513177871704, "learning_rate": 1.5587546495489563e-05, "loss": 0.4775, "step": 7570 }, { "epoch": 1.313497571131159, "grad_norm": 0.7678002119064331, "learning_rate": 1.558090137884073e-05, "loss": 0.4462, "step": 7571 }, { "epoch": 1.3136710617626648, "grad_norm": 0.7460236549377441, "learning_rate": 1.557425677503344e-05, "loss": 0.4767, "step": 7572 }, { "epoch": 1.3138445523941706, "grad_norm": 0.751082181930542, "learning_rate": 1.5567612684838805e-05, "loss": 0.4169, "step": 7573 }, { "epoch": 1.3140180430256767, "grad_norm": 1.1679073572158813, "learning_rate": 1.5560969109027896e-05, "loss": 0.4708, "step": 7574 }, { "epoch": 1.3141915336571826, "grad_norm": 0.7063091397285461, "learning_rate": 1.5554326048371686e-05, "loss": 0.5521, "step": 7575 }, { "epoch": 1.3143650242886884, "grad_norm": 0.5069116950035095, "learning_rate": 1.5547683503641115e-05, "loss": 0.5902, "step": 7576 }, { "epoch": 1.3145385149201942, "grad_norm": 0.7836443781852722, "learning_rate": 1.5541041475607073e-05, "loss": 0.4136, "step": 7577 }, { "epoch": 1.3147120055517, "grad_norm": 1.1055240631103516, "learning_rate": 1.5534399965040353e-05, "loss": 0.4011, "step": 7578 }, { "epoch": 1.3148854961832062, "grad_norm": 0.8819207549095154, "learning_rate": 1.552775897271172e-05, "loss": 0.4116, "step": 7579 }, { "epoch": 1.315058986814712, "grad_norm": 0.7257546186447144, "learning_rate": 1.552111849939188e-05, "loss": 0.5327, "step": 7580 }, { "epoch": 1.3152324774462179, "grad_norm": 1.2918899059295654, "learning_rate": 1.5514478545851452e-05, "loss": 0.4115, "step": 7581 }, { "epoch": 1.315405968077724, "grad_norm": 0.5972050428390503, "learning_rate": 1.550783911286101e-05, "loss": 0.5763, "step": 7582 }, { "epoch": 1.3155794587092298, "grad_norm": 0.8291058540344238, "learning_rate": 1.550120020119108e-05, "loss": 0.4026, "step": 7583 }, { "epoch": 1.3157529493407356, "grad_norm": 0.6583446264266968, "learning_rate": 1.5494561811612102e-05, "loss": 0.5385, "step": 7584 }, { "epoch": 1.3159264399722415, "grad_norm": 0.7057300806045532, "learning_rate": 1.548792394489448e-05, "loss": 0.4528, "step": 7585 }, { "epoch": 1.3160999306037473, "grad_norm": 0.6967214941978455, "learning_rate": 1.548128660180854e-05, "loss": 0.4697, "step": 7586 }, { "epoch": 1.3162734212352534, "grad_norm": 0.7139752507209778, "learning_rate": 1.5474649783124555e-05, "loss": 0.4589, "step": 7587 }, { "epoch": 1.3164469118667592, "grad_norm": 0.8640236854553223, "learning_rate": 1.5468013489612742e-05, "loss": 0.5447, "step": 7588 }, { "epoch": 1.316620402498265, "grad_norm": 0.8197435736656189, "learning_rate": 1.5461377722043235e-05, "loss": 0.5264, "step": 7589 }, { "epoch": 1.316793893129771, "grad_norm": 0.5978457927703857, "learning_rate": 1.5454742481186137e-05, "loss": 0.5288, "step": 7590 }, { "epoch": 1.316967383761277, "grad_norm": 0.6455949544906616, "learning_rate": 1.5448107767811468e-05, "loss": 0.5421, "step": 7591 }, { "epoch": 1.3171408743927828, "grad_norm": 1.9688469171524048, "learning_rate": 1.5441473582689198e-05, "loss": 0.641, "step": 7592 }, { "epoch": 1.3173143650242887, "grad_norm": 0.782586395740509, "learning_rate": 1.5434839926589236e-05, "loss": 0.6558, "step": 7593 }, { "epoch": 1.3174878556557945, "grad_norm": 0.7474291920661926, "learning_rate": 1.5428206800281413e-05, "loss": 0.5875, "step": 7594 }, { "epoch": 1.3176613462873004, "grad_norm": 0.7581984400749207, "learning_rate": 1.5421574204535516e-05, "loss": 0.5179, "step": 7595 }, { "epoch": 1.3178348369188064, "grad_norm": 0.6876397132873535, "learning_rate": 1.5414942140121278e-05, "loss": 0.4359, "step": 7596 }, { "epoch": 1.3180083275503123, "grad_norm": 0.6083647608757019, "learning_rate": 1.5408310607808336e-05, "loss": 0.5122, "step": 7597 }, { "epoch": 1.3181818181818181, "grad_norm": 1.8246253728866577, "learning_rate": 1.54016796083663e-05, "loss": 0.4479, "step": 7598 }, { "epoch": 1.3183553088133242, "grad_norm": 0.7424356341362, "learning_rate": 1.5395049142564717e-05, "loss": 0.449, "step": 7599 }, { "epoch": 1.31852879944483, "grad_norm": 0.7722662687301636, "learning_rate": 1.538841921117303e-05, "loss": 0.4231, "step": 7600 }, { "epoch": 1.3187022900763359, "grad_norm": 0.6274324655532837, "learning_rate": 1.5381789814960674e-05, "loss": 0.3984, "step": 7601 }, { "epoch": 1.3188757807078417, "grad_norm": 1.1475446224212646, "learning_rate": 1.5375160954696986e-05, "loss": 0.4741, "step": 7602 }, { "epoch": 1.3190492713393476, "grad_norm": 0.7600390315055847, "learning_rate": 1.536853263115127e-05, "loss": 0.373, "step": 7603 }, { "epoch": 1.3192227619708536, "grad_norm": 1.167948842048645, "learning_rate": 1.536190484509273e-05, "loss": 0.3993, "step": 7604 }, { "epoch": 1.3193962526023595, "grad_norm": 1.0073474645614624, "learning_rate": 1.5355277597290537e-05, "loss": 0.4735, "step": 7605 }, { "epoch": 1.3195697432338653, "grad_norm": 1.5348100662231445, "learning_rate": 1.5348650888513798e-05, "loss": 0.5296, "step": 7606 }, { "epoch": 1.3197432338653714, "grad_norm": 0.7548238039016724, "learning_rate": 1.5342024719531536e-05, "loss": 0.557, "step": 7607 }, { "epoch": 1.3199167244968772, "grad_norm": 0.7464498281478882, "learning_rate": 1.533539909111273e-05, "loss": 0.557, "step": 7608 }, { "epoch": 1.320090215128383, "grad_norm": 0.7466408610343933, "learning_rate": 1.5328774004026304e-05, "loss": 0.5782, "step": 7609 }, { "epoch": 1.320263705759889, "grad_norm": 0.8689438700675964, "learning_rate": 1.5322149459041097e-05, "loss": 0.4982, "step": 7610 }, { "epoch": 1.3204371963913948, "grad_norm": 0.6298958659172058, "learning_rate": 1.531552545692589e-05, "loss": 0.5846, "step": 7611 }, { "epoch": 1.3206106870229006, "grad_norm": 0.8127974271774292, "learning_rate": 1.5308901998449415e-05, "loss": 0.4543, "step": 7612 }, { "epoch": 1.3207841776544067, "grad_norm": 0.6954392194747925, "learning_rate": 1.5302279084380328e-05, "loss": 0.6154, "step": 7613 }, { "epoch": 1.3209576682859125, "grad_norm": 0.8068144917488098, "learning_rate": 1.5295656715487226e-05, "loss": 0.4207, "step": 7614 }, { "epoch": 1.3211311589174184, "grad_norm": 0.8445618748664856, "learning_rate": 1.528903489253865e-05, "loss": 0.4735, "step": 7615 }, { "epoch": 1.3213046495489245, "grad_norm": 0.7115435600280762, "learning_rate": 1.5282413616303063e-05, "loss": 0.4727, "step": 7616 }, { "epoch": 1.3214781401804303, "grad_norm": 0.7735037207603455, "learning_rate": 1.5275792887548866e-05, "loss": 0.4672, "step": 7617 }, { "epoch": 1.3216516308119362, "grad_norm": 2.0262577533721924, "learning_rate": 1.526917270704441e-05, "loss": 0.4323, "step": 7618 }, { "epoch": 1.321825121443442, "grad_norm": 0.8620812892913818, "learning_rate": 1.5262553075557985e-05, "loss": 0.5571, "step": 7619 }, { "epoch": 1.3219986120749478, "grad_norm": 0.9684853553771973, "learning_rate": 1.5255933993857785e-05, "loss": 0.5067, "step": 7620 }, { "epoch": 1.322172102706454, "grad_norm": 0.9358473420143127, "learning_rate": 1.5249315462711974e-05, "loss": 0.411, "step": 7621 }, { "epoch": 1.3223455933379598, "grad_norm": 0.888228178024292, "learning_rate": 1.5242697482888649e-05, "loss": 0.4415, "step": 7622 }, { "epoch": 1.3225190839694656, "grad_norm": 1.0634689331054688, "learning_rate": 1.5236080055155812e-05, "loss": 0.5203, "step": 7623 }, { "epoch": 1.3226925746009717, "grad_norm": 3.2980542182922363, "learning_rate": 1.5229463180281441e-05, "loss": 0.4736, "step": 7624 }, { "epoch": 1.3228660652324775, "grad_norm": 0.9480955600738525, "learning_rate": 1.522284685903343e-05, "loss": 0.5311, "step": 7625 }, { "epoch": 1.3230395558639834, "grad_norm": 0.8151806592941284, "learning_rate": 1.5216231092179604e-05, "loss": 0.4984, "step": 7626 }, { "epoch": 1.3232130464954892, "grad_norm": 0.7603073120117188, "learning_rate": 1.5209615880487728e-05, "loss": 0.4894, "step": 7627 }, { "epoch": 1.323386537126995, "grad_norm": 0.7880406379699707, "learning_rate": 1.5203001224725525e-05, "loss": 0.4729, "step": 7628 }, { "epoch": 1.3235600277585011, "grad_norm": 0.7434356212615967, "learning_rate": 1.5196387125660607e-05, "loss": 0.444, "step": 7629 }, { "epoch": 1.323733518390007, "grad_norm": 1.1501247882843018, "learning_rate": 1.5189773584060563e-05, "loss": 0.4009, "step": 7630 }, { "epoch": 1.3239070090215128, "grad_norm": 0.5699131488800049, "learning_rate": 1.51831606006929e-05, "loss": 0.5969, "step": 7631 }, { "epoch": 1.3240804996530187, "grad_norm": 0.7818866968154907, "learning_rate": 1.517654817632507e-05, "loss": 0.5359, "step": 7632 }, { "epoch": 1.3242539902845247, "grad_norm": 0.7145308256149292, "learning_rate": 1.5169936311724434e-05, "loss": 0.4215, "step": 7633 }, { "epoch": 1.3244274809160306, "grad_norm": 0.8266650438308716, "learning_rate": 1.5163325007658319e-05, "loss": 0.616, "step": 7634 }, { "epoch": 1.3246009715475364, "grad_norm": 0.9614689946174622, "learning_rate": 1.5156714264893974e-05, "loss": 0.3827, "step": 7635 }, { "epoch": 1.3247744621790423, "grad_norm": 0.6380361914634705, "learning_rate": 1.5150104084198587e-05, "loss": 0.5, "step": 7636 }, { "epoch": 1.3249479528105481, "grad_norm": 0.7799776196479797, "learning_rate": 1.5143494466339266e-05, "loss": 0.5543, "step": 7637 }, { "epoch": 1.3251214434420542, "grad_norm": 0.8001444935798645, "learning_rate": 1.5136885412083073e-05, "loss": 0.3903, "step": 7638 }, { "epoch": 1.32529493407356, "grad_norm": 0.6268643140792847, "learning_rate": 1.5130276922196993e-05, "loss": 0.5321, "step": 7639 }, { "epoch": 1.3254684247050659, "grad_norm": 0.5907706618309021, "learning_rate": 1.5123668997447948e-05, "loss": 0.4918, "step": 7640 }, { "epoch": 1.325641915336572, "grad_norm": 1.132335901260376, "learning_rate": 1.5117061638602811e-05, "loss": 0.4606, "step": 7641 }, { "epoch": 1.3258154059680778, "grad_norm": 0.7444654107093811, "learning_rate": 1.5110454846428348e-05, "loss": 0.5414, "step": 7642 }, { "epoch": 1.3259888965995836, "grad_norm": 0.735350489616394, "learning_rate": 1.51038486216913e-05, "loss": 0.3879, "step": 7643 }, { "epoch": 1.3261623872310895, "grad_norm": 0.6652734279632568, "learning_rate": 1.5097242965158322e-05, "loss": 0.486, "step": 7644 }, { "epoch": 1.3263358778625953, "grad_norm": 0.5584680438041687, "learning_rate": 1.5090637877596022e-05, "loss": 0.5537, "step": 7645 }, { "epoch": 1.3265093684941014, "grad_norm": 0.6846016645431519, "learning_rate": 1.5084033359770907e-05, "loss": 0.4653, "step": 7646 }, { "epoch": 1.3266828591256072, "grad_norm": 0.9131779670715332, "learning_rate": 1.507742941244945e-05, "loss": 0.5026, "step": 7647 }, { "epoch": 1.326856349757113, "grad_norm": 0.5463833212852478, "learning_rate": 1.5070826036398052e-05, "loss": 0.6051, "step": 7648 }, { "epoch": 1.3270298403886192, "grad_norm": 1.3209789991378784, "learning_rate": 1.5064223232383028e-05, "loss": 0.4663, "step": 7649 }, { "epoch": 1.327203331020125, "grad_norm": 0.8686030507087708, "learning_rate": 1.505762100117065e-05, "loss": 0.4188, "step": 7650 }, { "epoch": 1.3273768216516308, "grad_norm": 0.7723866105079651, "learning_rate": 1.5051019343527123e-05, "loss": 0.4111, "step": 7651 }, { "epoch": 1.3275503122831367, "grad_norm": 0.7040600180625916, "learning_rate": 1.5044418260218559e-05, "loss": 0.4075, "step": 7652 }, { "epoch": 1.3277238029146425, "grad_norm": 0.5787320137023926, "learning_rate": 1.503781775201103e-05, "loss": 0.5212, "step": 7653 }, { "epoch": 1.3278972935461484, "grad_norm": 0.7481715679168701, "learning_rate": 1.503121781967054e-05, "loss": 0.4248, "step": 7654 }, { "epoch": 1.3280707841776545, "grad_norm": 0.8455163240432739, "learning_rate": 1.5024618463963006e-05, "loss": 0.4517, "step": 7655 }, { "epoch": 1.3282442748091603, "grad_norm": 0.8468508124351501, "learning_rate": 1.5018019685654295e-05, "loss": 0.5693, "step": 7656 }, { "epoch": 1.3284177654406661, "grad_norm": 0.8724422454833984, "learning_rate": 1.5011421485510204e-05, "loss": 0.4233, "step": 7657 }, { "epoch": 1.3285912560721722, "grad_norm": 0.8734956979751587, "learning_rate": 1.5004823864296472e-05, "loss": 0.4835, "step": 7658 }, { "epoch": 1.328764746703678, "grad_norm": 1.4164583683013916, "learning_rate": 1.4998226822778743e-05, "loss": 0.419, "step": 7659 }, { "epoch": 1.328938237335184, "grad_norm": 0.7992679476737976, "learning_rate": 1.4991630361722619e-05, "loss": 0.5253, "step": 7660 }, { "epoch": 1.3291117279666897, "grad_norm": 0.8235474824905396, "learning_rate": 1.498503448189363e-05, "loss": 0.4349, "step": 7661 }, { "epoch": 1.3292852185981956, "grad_norm": 0.9846152663230896, "learning_rate": 1.4978439184057233e-05, "loss": 0.4272, "step": 7662 }, { "epoch": 1.3294587092297017, "grad_norm": 0.7669355273246765, "learning_rate": 1.497184446897882e-05, "loss": 0.3984, "step": 7663 }, { "epoch": 1.3296321998612075, "grad_norm": 0.9644906520843506, "learning_rate": 1.4965250337423718e-05, "loss": 0.342, "step": 7664 }, { "epoch": 1.3298056904927134, "grad_norm": 0.8945387601852417, "learning_rate": 1.4958656790157176e-05, "loss": 0.4828, "step": 7665 }, { "epoch": 1.3299791811242194, "grad_norm": 0.7948451042175293, "learning_rate": 1.4952063827944385e-05, "loss": 0.4005, "step": 7666 }, { "epoch": 1.3301526717557253, "grad_norm": 0.598304271697998, "learning_rate": 1.4945471451550481e-05, "loss": 0.5308, "step": 7667 }, { "epoch": 1.3303261623872311, "grad_norm": 0.7321178317070007, "learning_rate": 1.4938879661740495e-05, "loss": 0.4996, "step": 7668 }, { "epoch": 1.330499653018737, "grad_norm": 0.7548737525939941, "learning_rate": 1.4932288459279423e-05, "loss": 0.3862, "step": 7669 }, { "epoch": 1.3306731436502428, "grad_norm": 0.8476508855819702, "learning_rate": 1.4925697844932185e-05, "loss": 0.5082, "step": 7670 }, { "epoch": 1.3308466342817487, "grad_norm": 0.5477104783058167, "learning_rate": 1.491910781946362e-05, "loss": 0.693, "step": 7671 }, { "epoch": 1.3310201249132547, "grad_norm": 0.7386735677719116, "learning_rate": 1.4912518383638512e-05, "loss": 0.5115, "step": 7672 }, { "epoch": 1.3311936155447606, "grad_norm": 0.7570234537124634, "learning_rate": 1.4905929538221574e-05, "loss": 0.4691, "step": 7673 }, { "epoch": 1.3313671061762664, "grad_norm": 0.8122114539146423, "learning_rate": 1.4899341283977457e-05, "loss": 0.3833, "step": 7674 }, { "epoch": 1.3315405968077725, "grad_norm": 2.640561103820801, "learning_rate": 1.489275362167072e-05, "loss": 0.3793, "step": 7675 }, { "epoch": 1.3317140874392783, "grad_norm": 1.0989961624145508, "learning_rate": 1.4886166552065873e-05, "loss": 0.434, "step": 7676 }, { "epoch": 1.3318875780707842, "grad_norm": 0.6833113431930542, "learning_rate": 1.4879580075927367e-05, "loss": 0.5037, "step": 7677 }, { "epoch": 1.33206106870229, "grad_norm": 0.8466157913208008, "learning_rate": 1.4872994194019553e-05, "loss": 0.519, "step": 7678 }, { "epoch": 1.3322345593337959, "grad_norm": 0.6697059273719788, "learning_rate": 1.4866408907106734e-05, "loss": 0.5159, "step": 7679 }, { "epoch": 1.332408049965302, "grad_norm": 0.6055539846420288, "learning_rate": 1.4859824215953154e-05, "loss": 0.5037, "step": 7680 }, { "epoch": 1.3325815405968078, "grad_norm": 0.713620126247406, "learning_rate": 1.4853240121322951e-05, "loss": 0.4952, "step": 7681 }, { "epoch": 1.3327550312283136, "grad_norm": 0.7720019817352295, "learning_rate": 1.4846656623980234e-05, "loss": 0.6401, "step": 7682 }, { "epoch": 1.3329285218598197, "grad_norm": 0.607806384563446, "learning_rate": 1.4840073724689021e-05, "loss": 0.6267, "step": 7683 }, { "epoch": 1.3331020124913255, "grad_norm": 1.852420687675476, "learning_rate": 1.4833491424213268e-05, "loss": 0.4603, "step": 7684 }, { "epoch": 1.3332755031228314, "grad_norm": 0.6995673179626465, "learning_rate": 1.482690972331685e-05, "loss": 0.4906, "step": 7685 }, { "epoch": 1.3334489937543372, "grad_norm": 0.805355966091156, "learning_rate": 1.4820328622763584e-05, "loss": 0.4603, "step": 7686 }, { "epoch": 1.333622484385843, "grad_norm": 0.6964371204376221, "learning_rate": 1.4813748123317223e-05, "loss": 0.5219, "step": 7687 }, { "epoch": 1.3337959750173491, "grad_norm": 0.960204005241394, "learning_rate": 1.4807168225741433e-05, "loss": 0.3848, "step": 7688 }, { "epoch": 1.333969465648855, "grad_norm": 0.9914819598197937, "learning_rate": 1.4800588930799822e-05, "loss": 0.4637, "step": 7689 }, { "epoch": 1.3341429562803608, "grad_norm": 0.7618035078048706, "learning_rate": 1.4794010239255925e-05, "loss": 0.5688, "step": 7690 }, { "epoch": 1.3343164469118667, "grad_norm": 0.8735632300376892, "learning_rate": 1.4787432151873202e-05, "loss": 0.4575, "step": 7691 }, { "epoch": 1.3344899375433728, "grad_norm": 1.0803961753845215, "learning_rate": 1.4780854669415053e-05, "loss": 0.4144, "step": 7692 }, { "epoch": 1.3346634281748786, "grad_norm": 0.7738522291183472, "learning_rate": 1.4774277792644812e-05, "loss": 0.5168, "step": 7693 }, { "epoch": 1.3348369188063844, "grad_norm": 1.2571601867675781, "learning_rate": 1.4767701522325708e-05, "loss": 0.5652, "step": 7694 }, { "epoch": 1.3350104094378903, "grad_norm": 0.5889428853988647, "learning_rate": 1.4761125859220942e-05, "loss": 0.5564, "step": 7695 }, { "epoch": 1.3351839000693961, "grad_norm": 0.9888232946395874, "learning_rate": 1.4754550804093633e-05, "loss": 0.4918, "step": 7696 }, { "epoch": 1.3353573907009022, "grad_norm": 0.6778308749198914, "learning_rate": 1.4747976357706806e-05, "loss": 0.4269, "step": 7697 }, { "epoch": 1.335530881332408, "grad_norm": 0.6476753950119019, "learning_rate": 1.4741402520823442e-05, "loss": 0.5807, "step": 7698 }, { "epoch": 1.335704371963914, "grad_norm": 0.7573915719985962, "learning_rate": 1.4734829294206455e-05, "loss": 0.4604, "step": 7699 }, { "epoch": 1.33587786259542, "grad_norm": 0.7617419362068176, "learning_rate": 1.4728256678618652e-05, "loss": 0.603, "step": 7700 }, { "epoch": 1.3360513532269258, "grad_norm": 0.8916187286376953, "learning_rate": 1.4721684674822805e-05, "loss": 0.3832, "step": 7701 }, { "epoch": 1.3362248438584317, "grad_norm": 0.652928352355957, "learning_rate": 1.47151132835816e-05, "loss": 0.5734, "step": 7702 }, { "epoch": 1.3363983344899375, "grad_norm": 0.5686636567115784, "learning_rate": 1.4708542505657668e-05, "loss": 0.6082, "step": 7703 }, { "epoch": 1.3365718251214433, "grad_norm": 0.6646944284439087, "learning_rate": 1.4701972341813533e-05, "loss": 0.5999, "step": 7704 }, { "epoch": 1.3367453157529494, "grad_norm": 0.8325403332710266, "learning_rate": 1.4695402792811684e-05, "loss": 0.4495, "step": 7705 }, { "epoch": 1.3369188063844553, "grad_norm": 0.7930448055267334, "learning_rate": 1.4688833859414529e-05, "loss": 0.4022, "step": 7706 }, { "epoch": 1.337092297015961, "grad_norm": 0.7062762379646301, "learning_rate": 1.4682265542384384e-05, "loss": 0.4838, "step": 7707 }, { "epoch": 1.3372657876474672, "grad_norm": 0.6182875037193298, "learning_rate": 1.467569784248352e-05, "loss": 0.548, "step": 7708 }, { "epoch": 1.337439278278973, "grad_norm": 1.4004443883895874, "learning_rate": 1.466913076047413e-05, "loss": 0.4725, "step": 7709 }, { "epoch": 1.3376127689104789, "grad_norm": 0.8085479140281677, "learning_rate": 1.4662564297118325e-05, "loss": 0.4496, "step": 7710 }, { "epoch": 1.3377862595419847, "grad_norm": 0.6966636180877686, "learning_rate": 1.465599845317815e-05, "loss": 0.6305, "step": 7711 }, { "epoch": 1.3379597501734906, "grad_norm": 0.5736681222915649, "learning_rate": 1.4649433229415588e-05, "loss": 0.5409, "step": 7712 }, { "epoch": 1.3381332408049964, "grad_norm": 0.6678444147109985, "learning_rate": 1.4642868626592529e-05, "loss": 0.5149, "step": 7713 }, { "epoch": 1.3383067314365025, "grad_norm": 0.7446213960647583, "learning_rate": 1.4636304645470807e-05, "loss": 0.584, "step": 7714 }, { "epoch": 1.3384802220680083, "grad_norm": 0.7427680492401123, "learning_rate": 1.462974128681218e-05, "loss": 0.4988, "step": 7715 }, { "epoch": 1.3386537126995142, "grad_norm": 0.7200520634651184, "learning_rate": 1.4623178551378346e-05, "loss": 0.5967, "step": 7716 }, { "epoch": 1.3388272033310202, "grad_norm": 0.9318338632583618, "learning_rate": 1.4616616439930895e-05, "loss": 0.4989, "step": 7717 }, { "epoch": 1.339000693962526, "grad_norm": 0.6261063814163208, "learning_rate": 1.4610054953231379e-05, "loss": 0.507, "step": 7718 }, { "epoch": 1.339174184594032, "grad_norm": 0.6261553168296814, "learning_rate": 1.4603494092041275e-05, "loss": 0.6486, "step": 7719 }, { "epoch": 1.3393476752255378, "grad_norm": 0.7913298010826111, "learning_rate": 1.4596933857121963e-05, "loss": 0.52, "step": 7720 }, { "epoch": 1.3395211658570436, "grad_norm": 0.7656115293502808, "learning_rate": 1.4590374249234768e-05, "loss": 0.5051, "step": 7721 }, { "epoch": 1.3396946564885497, "grad_norm": 0.7011547684669495, "learning_rate": 1.4583815269140957e-05, "loss": 0.5421, "step": 7722 }, { "epoch": 1.3398681471200555, "grad_norm": 0.7271060943603516, "learning_rate": 1.4577256917601688e-05, "loss": 0.5173, "step": 7723 }, { "epoch": 1.3400416377515614, "grad_norm": 0.8287790417671204, "learning_rate": 1.4570699195378071e-05, "loss": 0.4644, "step": 7724 }, { "epoch": 1.3402151283830674, "grad_norm": 0.8536555767059326, "learning_rate": 1.4564142103231148e-05, "loss": 0.5801, "step": 7725 }, { "epoch": 1.3403886190145733, "grad_norm": 0.6832239627838135, "learning_rate": 1.4557585641921859e-05, "loss": 0.5763, "step": 7726 }, { "epoch": 1.3405621096460791, "grad_norm": 0.7031279802322388, "learning_rate": 1.4551029812211095e-05, "loss": 0.4196, "step": 7727 }, { "epoch": 1.340735600277585, "grad_norm": 0.6169347763061523, "learning_rate": 1.4544474614859683e-05, "loss": 0.4151, "step": 7728 }, { "epoch": 1.3409090909090908, "grad_norm": 0.7564440369606018, "learning_rate": 1.4537920050628338e-05, "loss": 0.3553, "step": 7729 }, { "epoch": 1.3410825815405967, "grad_norm": 1.0365413427352905, "learning_rate": 1.4531366120277736e-05, "loss": 0.4156, "step": 7730 }, { "epoch": 1.3412560721721027, "grad_norm": 0.6548910140991211, "learning_rate": 1.4524812824568471e-05, "loss": 0.631, "step": 7731 }, { "epoch": 1.3414295628036086, "grad_norm": 0.7457895874977112, "learning_rate": 1.4518260164261058e-05, "loss": 0.418, "step": 7732 }, { "epoch": 1.3416030534351144, "grad_norm": 0.6473345756530762, "learning_rate": 1.4511708140115942e-05, "loss": 0.5637, "step": 7733 }, { "epoch": 1.3417765440666205, "grad_norm": 1.1007072925567627, "learning_rate": 1.4505156752893488e-05, "loss": 0.5497, "step": 7734 }, { "epoch": 1.3419500346981263, "grad_norm": 1.1076034307479858, "learning_rate": 1.4498606003353998e-05, "loss": 0.4563, "step": 7735 }, { "epoch": 1.3421235253296322, "grad_norm": 0.6503068208694458, "learning_rate": 1.4492055892257688e-05, "loss": 0.5977, "step": 7736 }, { "epoch": 1.342297015961138, "grad_norm": 0.9386075735092163, "learning_rate": 1.4485506420364715e-05, "loss": 0.468, "step": 7737 }, { "epoch": 1.3424705065926439, "grad_norm": 0.7115505933761597, "learning_rate": 1.4478957588435148e-05, "loss": 0.5226, "step": 7738 }, { "epoch": 1.34264399722415, "grad_norm": 0.7394730448722839, "learning_rate": 1.4472409397228979e-05, "loss": 0.4031, "step": 7739 }, { "epoch": 1.3428174878556558, "grad_norm": 0.7652788758277893, "learning_rate": 1.4465861847506142e-05, "loss": 0.4783, "step": 7740 }, { "epoch": 1.3429909784871616, "grad_norm": 0.8783412575721741, "learning_rate": 1.4459314940026495e-05, "loss": 0.5023, "step": 7741 }, { "epoch": 1.3431644691186677, "grad_norm": 0.7272326350212097, "learning_rate": 1.4452768675549798e-05, "loss": 0.3974, "step": 7742 }, { "epoch": 1.3433379597501736, "grad_norm": 1.0041476488113403, "learning_rate": 1.4446223054835758e-05, "loss": 0.3462, "step": 7743 }, { "epoch": 1.3435114503816794, "grad_norm": 0.655902624130249, "learning_rate": 1.4439678078644004e-05, "loss": 0.5696, "step": 7744 }, { "epoch": 1.3436849410131853, "grad_norm": 0.8266571760177612, "learning_rate": 1.4433133747734097e-05, "loss": 0.5107, "step": 7745 }, { "epoch": 1.343858431644691, "grad_norm": 0.7135486602783203, "learning_rate": 1.4426590062865497e-05, "loss": 0.4677, "step": 7746 }, { "epoch": 1.3440319222761972, "grad_norm": 0.6802731156349182, "learning_rate": 1.4420047024797614e-05, "loss": 0.6051, "step": 7747 }, { "epoch": 1.344205412907703, "grad_norm": 1.187752366065979, "learning_rate": 1.4413504634289785e-05, "loss": 0.4467, "step": 7748 }, { "epoch": 1.3443789035392089, "grad_norm": 0.5917049050331116, "learning_rate": 1.4406962892101243e-05, "loss": 0.4995, "step": 7749 }, { "epoch": 1.3445523941707147, "grad_norm": 0.6279724836349487, "learning_rate": 1.4400421798991178e-05, "loss": 0.4962, "step": 7750 }, { "epoch": 1.3447258848022208, "grad_norm": 0.6500610113143921, "learning_rate": 1.4393881355718694e-05, "loss": 0.4487, "step": 7751 }, { "epoch": 1.3448993754337266, "grad_norm": 0.7289589047431946, "learning_rate": 1.4387341563042801e-05, "loss": 0.5084, "step": 7752 }, { "epoch": 1.3450728660652325, "grad_norm": 0.6606741547584534, "learning_rate": 1.4380802421722461e-05, "loss": 0.4774, "step": 7753 }, { "epoch": 1.3452463566967383, "grad_norm": 0.6247408986091614, "learning_rate": 1.4374263932516557e-05, "loss": 0.501, "step": 7754 }, { "epoch": 1.3454198473282442, "grad_norm": 0.8095901012420654, "learning_rate": 1.436772609618387e-05, "loss": 0.5699, "step": 7755 }, { "epoch": 1.3455933379597502, "grad_norm": 0.7537714242935181, "learning_rate": 1.4361188913483132e-05, "loss": 0.525, "step": 7756 }, { "epoch": 1.345766828591256, "grad_norm": 0.7632372975349426, "learning_rate": 1.4354652385172995e-05, "loss": 0.5256, "step": 7757 }, { "epoch": 1.345940319222762, "grad_norm": 0.6155702471733093, "learning_rate": 1.4348116512012024e-05, "loss": 0.5352, "step": 7758 }, { "epoch": 1.346113809854268, "grad_norm": 0.7520788311958313, "learning_rate": 1.4341581294758722e-05, "loss": 0.5274, "step": 7759 }, { "epoch": 1.3462873004857738, "grad_norm": 1.4066622257232666, "learning_rate": 1.4335046734171499e-05, "loss": 0.48, "step": 7760 }, { "epoch": 1.3464607911172797, "grad_norm": 0.667074978351593, "learning_rate": 1.4328512831008708e-05, "loss": 0.4421, "step": 7761 }, { "epoch": 1.3466342817487855, "grad_norm": 0.6374412178993225, "learning_rate": 1.4321979586028607e-05, "loss": 0.4266, "step": 7762 }, { "epoch": 1.3468077723802914, "grad_norm": 0.7195127606391907, "learning_rate": 1.431544699998939e-05, "loss": 0.5153, "step": 7763 }, { "epoch": 1.3469812630117974, "grad_norm": 0.8725728392601013, "learning_rate": 1.4308915073649182e-05, "loss": 0.3697, "step": 7764 }, { "epoch": 1.3471547536433033, "grad_norm": 0.8734724521636963, "learning_rate": 1.4302383807766003e-05, "loss": 0.3995, "step": 7765 }, { "epoch": 1.3473282442748091, "grad_norm": 0.7318059206008911, "learning_rate": 1.4295853203097823e-05, "loss": 0.4504, "step": 7766 }, { "epoch": 1.3475017349063152, "grad_norm": 0.759671151638031, "learning_rate": 1.4289323260402533e-05, "loss": 0.4802, "step": 7767 }, { "epoch": 1.347675225537821, "grad_norm": 0.8502726554870605, "learning_rate": 1.4282793980437923e-05, "loss": 0.5551, "step": 7768 }, { "epoch": 1.3478487161693269, "grad_norm": 0.7244731783866882, "learning_rate": 1.4276265363961735e-05, "loss": 0.4489, "step": 7769 }, { "epoch": 1.3480222068008327, "grad_norm": 0.6123337149620056, "learning_rate": 1.4269737411731627e-05, "loss": 0.5626, "step": 7770 }, { "epoch": 1.3481956974323386, "grad_norm": 0.876191258430481, "learning_rate": 1.4263210124505164e-05, "loss": 0.4436, "step": 7771 }, { "epoch": 1.3483691880638444, "grad_norm": 0.8973270058631897, "learning_rate": 1.425668350303985e-05, "loss": 0.4598, "step": 7772 }, { "epoch": 1.3485426786953505, "grad_norm": 0.6726403832435608, "learning_rate": 1.425015754809311e-05, "loss": 0.6379, "step": 7773 }, { "epoch": 1.3487161693268563, "grad_norm": 0.7155255675315857, "learning_rate": 1.4243632260422292e-05, "loss": 0.5647, "step": 7774 }, { "epoch": 1.3488896599583622, "grad_norm": 0.7165016531944275, "learning_rate": 1.4237107640784648e-05, "loss": 0.4246, "step": 7775 }, { "epoch": 1.3490631505898683, "grad_norm": 0.770491898059845, "learning_rate": 1.4230583689937381e-05, "loss": 0.5459, "step": 7776 }, { "epoch": 1.349236641221374, "grad_norm": 0.8517716526985168, "learning_rate": 1.4224060408637605e-05, "loss": 0.4296, "step": 7777 }, { "epoch": 1.34941013185288, "grad_norm": 0.6960669159889221, "learning_rate": 1.4217537797642343e-05, "loss": 0.4739, "step": 7778 }, { "epoch": 1.3495836224843858, "grad_norm": 0.9173792004585266, "learning_rate": 1.4211015857708555e-05, "loss": 0.4238, "step": 7779 }, { "epoch": 1.3497571131158916, "grad_norm": 0.6874178051948547, "learning_rate": 1.4204494589593127e-05, "loss": 0.4902, "step": 7780 }, { "epoch": 1.3499306037473977, "grad_norm": 0.5907986164093018, "learning_rate": 1.4197973994052855e-05, "loss": 0.574, "step": 7781 }, { "epoch": 1.3501040943789036, "grad_norm": 0.6873245239257812, "learning_rate": 1.4191454071844457e-05, "loss": 0.6541, "step": 7782 }, { "epoch": 1.3502775850104094, "grad_norm": 0.6468162536621094, "learning_rate": 1.4184934823724588e-05, "loss": 0.5618, "step": 7783 }, { "epoch": 1.3504510756419155, "grad_norm": 0.8056586384773254, "learning_rate": 1.4178416250449804e-05, "loss": 0.449, "step": 7784 }, { "epoch": 1.3506245662734213, "grad_norm": 0.9317519664764404, "learning_rate": 1.41718983527766e-05, "loss": 0.4087, "step": 7785 }, { "epoch": 1.3507980569049272, "grad_norm": 0.8486621975898743, "learning_rate": 1.4165381131461388e-05, "loss": 0.4053, "step": 7786 }, { "epoch": 1.350971547536433, "grad_norm": 0.7225809693336487, "learning_rate": 1.4158864587260488e-05, "loss": 0.4386, "step": 7787 }, { "epoch": 1.3511450381679388, "grad_norm": 1.2607461214065552, "learning_rate": 1.4152348720930156e-05, "loss": 0.5332, "step": 7788 }, { "epoch": 1.3513185287994447, "grad_norm": 0.5642040967941284, "learning_rate": 1.414583353322657e-05, "loss": 0.5754, "step": 7789 }, { "epoch": 1.3514920194309508, "grad_norm": 1.3561378717422485, "learning_rate": 1.4139319024905836e-05, "loss": 0.6034, "step": 7790 }, { "epoch": 1.3516655100624566, "grad_norm": 0.5607908368110657, "learning_rate": 1.4132805196723944e-05, "loss": 0.6348, "step": 7791 }, { "epoch": 1.3518390006939625, "grad_norm": 1.1165635585784912, "learning_rate": 1.412629204943685e-05, "loss": 0.4183, "step": 7792 }, { "epoch": 1.3520124913254685, "grad_norm": 0.7449413537979126, "learning_rate": 1.4119779583800414e-05, "loss": 0.4342, "step": 7793 }, { "epoch": 1.3521859819569744, "grad_norm": 0.7623336911201477, "learning_rate": 1.4113267800570402e-05, "loss": 0.489, "step": 7794 }, { "epoch": 1.3523594725884802, "grad_norm": 0.8422659635543823, "learning_rate": 1.4106756700502522e-05, "loss": 0.4011, "step": 7795 }, { "epoch": 1.352532963219986, "grad_norm": 0.778367817401886, "learning_rate": 1.41002462843524e-05, "loss": 0.4458, "step": 7796 }, { "epoch": 1.352706453851492, "grad_norm": 0.5561218857765198, "learning_rate": 1.4093736552875563e-05, "loss": 0.5618, "step": 7797 }, { "epoch": 1.352879944482998, "grad_norm": 0.8073115944862366, "learning_rate": 1.4087227506827482e-05, "loss": 0.4069, "step": 7798 }, { "epoch": 1.3530534351145038, "grad_norm": 0.7060088515281677, "learning_rate": 1.4080719146963548e-05, "loss": 0.478, "step": 7799 }, { "epoch": 1.3532269257460097, "grad_norm": 0.6544910669326782, "learning_rate": 1.4074211474039046e-05, "loss": 0.5046, "step": 7800 }, { "epoch": 1.3534004163775157, "grad_norm": 0.7442007660865784, "learning_rate": 1.4067704488809204e-05, "loss": 0.5033, "step": 7801 }, { "epoch": 1.3535739070090216, "grad_norm": 0.929850161075592, "learning_rate": 1.406119819202917e-05, "loss": 0.4822, "step": 7802 }, { "epoch": 1.3537473976405274, "grad_norm": 0.6624704003334045, "learning_rate": 1.4054692584454017e-05, "loss": 0.5205, "step": 7803 }, { "epoch": 1.3539208882720333, "grad_norm": 1.0278087854385376, "learning_rate": 1.4048187666838707e-05, "loss": 0.4707, "step": 7804 }, { "epoch": 1.3540943789035391, "grad_norm": 0.5705901980400085, "learning_rate": 1.4041683439938152e-05, "loss": 0.4578, "step": 7805 }, { "epoch": 1.3542678695350452, "grad_norm": 0.7489475011825562, "learning_rate": 1.4035179904507184e-05, "loss": 0.4791, "step": 7806 }, { "epoch": 1.354441360166551, "grad_norm": 0.8546077609062195, "learning_rate": 1.4028677061300535e-05, "loss": 0.5017, "step": 7807 }, { "epoch": 1.3546148507980569, "grad_norm": 0.6993005871772766, "learning_rate": 1.4022174911072868e-05, "loss": 0.5601, "step": 7808 }, { "epoch": 1.3547883414295627, "grad_norm": 1.622098445892334, "learning_rate": 1.4015673454578776e-05, "loss": 0.5212, "step": 7809 }, { "epoch": 1.3549618320610688, "grad_norm": 0.8998701572418213, "learning_rate": 1.4009172692572743e-05, "loss": 0.5009, "step": 7810 }, { "epoch": 1.3551353226925746, "grad_norm": 0.8079712390899658, "learning_rate": 1.4002672625809201e-05, "loss": 0.4517, "step": 7811 }, { "epoch": 1.3553088133240805, "grad_norm": 0.8419730067253113, "learning_rate": 1.39961732550425e-05, "loss": 0.5302, "step": 7812 }, { "epoch": 1.3554823039555863, "grad_norm": 0.980035126209259, "learning_rate": 1.3989674581026878e-05, "loss": 0.4274, "step": 7813 }, { "epoch": 1.3556557945870922, "grad_norm": 0.8754704594612122, "learning_rate": 1.3983176604516526e-05, "loss": 0.5271, "step": 7814 }, { "epoch": 1.3558292852185982, "grad_norm": 0.7613076567649841, "learning_rate": 1.3976679326265542e-05, "loss": 0.5587, "step": 7815 }, { "epoch": 1.356002775850104, "grad_norm": 0.7278582453727722, "learning_rate": 1.3970182747027944e-05, "loss": 0.4357, "step": 7816 }, { "epoch": 1.35617626648161, "grad_norm": 0.7281306385993958, "learning_rate": 1.3963686867557658e-05, "loss": 0.4153, "step": 7817 }, { "epoch": 1.356349757113116, "grad_norm": 0.8076234459877014, "learning_rate": 1.3957191688608544e-05, "loss": 0.4236, "step": 7818 }, { "epoch": 1.3565232477446219, "grad_norm": 0.7760826349258423, "learning_rate": 1.3950697210934387e-05, "loss": 0.4036, "step": 7819 }, { "epoch": 1.3566967383761277, "grad_norm": 1.1662112474441528, "learning_rate": 1.3944203435288857e-05, "loss": 0.4236, "step": 7820 }, { "epoch": 1.3568702290076335, "grad_norm": 0.9857275485992432, "learning_rate": 1.3937710362425574e-05, "loss": 0.4192, "step": 7821 }, { "epoch": 1.3570437196391394, "grad_norm": 0.9380022287368774, "learning_rate": 1.3931217993098076e-05, "loss": 0.5424, "step": 7822 }, { "epoch": 1.3572172102706455, "grad_norm": 0.9351032972335815, "learning_rate": 1.3924726328059794e-05, "loss": 0.3819, "step": 7823 }, { "epoch": 1.3573907009021513, "grad_norm": 0.7847961187362671, "learning_rate": 1.3918235368064102e-05, "loss": 0.4368, "step": 7824 }, { "epoch": 1.3575641915336571, "grad_norm": 0.6209611296653748, "learning_rate": 1.3911745113864288e-05, "loss": 0.6069, "step": 7825 }, { "epoch": 1.3577376821651632, "grad_norm": 1.0030800104141235, "learning_rate": 1.3905255566213542e-05, "loss": 0.417, "step": 7826 }, { "epoch": 1.357911172796669, "grad_norm": 0.8271725177764893, "learning_rate": 1.3898766725864988e-05, "loss": 0.3507, "step": 7827 }, { "epoch": 1.358084663428175, "grad_norm": 0.965467631816864, "learning_rate": 1.3892278593571669e-05, "loss": 0.4448, "step": 7828 }, { "epoch": 1.3582581540596808, "grad_norm": 0.6716170907020569, "learning_rate": 1.3885791170086535e-05, "loss": 0.5402, "step": 7829 }, { "epoch": 1.3584316446911866, "grad_norm": 0.8228726983070374, "learning_rate": 1.3879304456162457e-05, "loss": 0.5143, "step": 7830 }, { "epoch": 1.3586051353226924, "grad_norm": 0.9074976444244385, "learning_rate": 1.3872818452552227e-05, "loss": 0.4987, "step": 7831 }, { "epoch": 1.3587786259541985, "grad_norm": 0.6851913332939148, "learning_rate": 1.3866333160008562e-05, "loss": 0.436, "step": 7832 }, { "epoch": 1.3589521165857044, "grad_norm": 0.8769930601119995, "learning_rate": 1.3859848579284076e-05, "loss": 0.3749, "step": 7833 }, { "epoch": 1.3591256072172102, "grad_norm": 1.0876624584197998, "learning_rate": 1.3853364711131324e-05, "loss": 0.4473, "step": 7834 }, { "epoch": 1.3592990978487163, "grad_norm": 0.9927643537521362, "learning_rate": 1.3846881556302757e-05, "loss": 0.465, "step": 7835 }, { "epoch": 1.3594725884802221, "grad_norm": 0.7676509618759155, "learning_rate": 1.3840399115550748e-05, "loss": 0.491, "step": 7836 }, { "epoch": 1.359646079111728, "grad_norm": 0.8637543320655823, "learning_rate": 1.3833917389627603e-05, "loss": 0.4051, "step": 7837 }, { "epoch": 1.3598195697432338, "grad_norm": 0.7441905736923218, "learning_rate": 1.3827436379285537e-05, "loss": 0.3986, "step": 7838 }, { "epoch": 1.3599930603747397, "grad_norm": 0.8019585013389587, "learning_rate": 1.3820956085276661e-05, "loss": 0.3824, "step": 7839 }, { "epoch": 1.3601665510062457, "grad_norm": 1.0437533855438232, "learning_rate": 1.3814476508353036e-05, "loss": 0.4912, "step": 7840 }, { "epoch": 1.3603400416377516, "grad_norm": 0.6700484156608582, "learning_rate": 1.3807997649266625e-05, "loss": 0.5547, "step": 7841 }, { "epoch": 1.3605135322692574, "grad_norm": 0.743519127368927, "learning_rate": 1.3801519508769295e-05, "loss": 0.4938, "step": 7842 }, { "epoch": 1.3606870229007635, "grad_norm": 0.8325339555740356, "learning_rate": 1.3795042087612847e-05, "loss": 0.4314, "step": 7843 }, { "epoch": 1.3608605135322693, "grad_norm": 0.788335382938385, "learning_rate": 1.3788565386548996e-05, "loss": 0.4155, "step": 7844 }, { "epoch": 1.3610340041637752, "grad_norm": 3.8572866916656494, "learning_rate": 1.3782089406329377e-05, "loss": 0.4501, "step": 7845 }, { "epoch": 1.361207494795281, "grad_norm": 0.6721601486206055, "learning_rate": 1.3775614147705521e-05, "loss": 0.527, "step": 7846 }, { "epoch": 1.3613809854267869, "grad_norm": 0.9929586052894592, "learning_rate": 1.3769139611428895e-05, "loss": 0.3642, "step": 7847 }, { "epoch": 1.3615544760582927, "grad_norm": 0.8101338148117065, "learning_rate": 1.3762665798250887e-05, "loss": 0.3476, "step": 7848 }, { "epoch": 1.3617279666897988, "grad_norm": 0.8143279552459717, "learning_rate": 1.375619270892277e-05, "loss": 0.4586, "step": 7849 }, { "epoch": 1.3619014573213046, "grad_norm": 0.7993375658988953, "learning_rate": 1.3749720344195768e-05, "loss": 0.4391, "step": 7850 }, { "epoch": 1.3620749479528105, "grad_norm": 0.7568285465240479, "learning_rate": 1.3743248704821008e-05, "loss": 0.4366, "step": 7851 }, { "epoch": 1.3622484385843165, "grad_norm": 0.934909999370575, "learning_rate": 1.373677779154952e-05, "loss": 0.4734, "step": 7852 }, { "epoch": 1.3624219292158224, "grad_norm": 0.7335373759269714, "learning_rate": 1.3730307605132268e-05, "loss": 0.4347, "step": 7853 }, { "epoch": 1.3625954198473282, "grad_norm": 0.8993807435035706, "learning_rate": 1.3723838146320128e-05, "loss": 0.4152, "step": 7854 }, { "epoch": 1.362768910478834, "grad_norm": 0.7619749903678894, "learning_rate": 1.3717369415863884e-05, "loss": 0.3834, "step": 7855 }, { "epoch": 1.36294240111034, "grad_norm": 0.71795654296875, "learning_rate": 1.3710901414514235e-05, "loss": 0.4917, "step": 7856 }, { "epoch": 1.363115891741846, "grad_norm": 0.8929448127746582, "learning_rate": 1.370443414302181e-05, "loss": 0.5934, "step": 7857 }, { "epoch": 1.3632893823733518, "grad_norm": 0.7523466348648071, "learning_rate": 1.3697967602137135e-05, "loss": 0.6012, "step": 7858 }, { "epoch": 1.3634628730048577, "grad_norm": 0.6525294184684753, "learning_rate": 1.3691501792610662e-05, "loss": 0.6115, "step": 7859 }, { "epoch": 1.3636363636363638, "grad_norm": 0.7265623211860657, "learning_rate": 1.368503671519276e-05, "loss": 0.4817, "step": 7860 }, { "epoch": 1.3638098542678696, "grad_norm": 0.6999167203903198, "learning_rate": 1.3678572370633708e-05, "loss": 0.6019, "step": 7861 }, { "epoch": 1.3639833448993754, "grad_norm": 0.7755110263824463, "learning_rate": 1.3672108759683694e-05, "loss": 0.3886, "step": 7862 }, { "epoch": 1.3641568355308813, "grad_norm": 0.9417155385017395, "learning_rate": 1.366564588309283e-05, "loss": 0.4704, "step": 7863 }, { "epoch": 1.3643303261623871, "grad_norm": 0.7990795373916626, "learning_rate": 1.3659183741611154e-05, "loss": 0.4494, "step": 7864 }, { "epoch": 1.3645038167938932, "grad_norm": 1.172098994255066, "learning_rate": 1.3652722335988579e-05, "loss": 0.4503, "step": 7865 }, { "epoch": 1.364677307425399, "grad_norm": 0.8514444231987, "learning_rate": 1.3646261666974976e-05, "loss": 0.5867, "step": 7866 }, { "epoch": 1.364850798056905, "grad_norm": 0.704356849193573, "learning_rate": 1.3639801735320122e-05, "loss": 0.6355, "step": 7867 }, { "epoch": 1.3650242886884107, "grad_norm": 0.6933794617652893, "learning_rate": 1.3633342541773673e-05, "loss": 0.5706, "step": 7868 }, { "epoch": 1.3651977793199168, "grad_norm": 0.7712671756744385, "learning_rate": 1.3626884087085246e-05, "loss": 0.402, "step": 7869 }, { "epoch": 1.3653712699514227, "grad_norm": 0.6848683953285217, "learning_rate": 1.3620426372004353e-05, "loss": 0.5499, "step": 7870 }, { "epoch": 1.3655447605829285, "grad_norm": 0.7330373525619507, "learning_rate": 1.3613969397280405e-05, "loss": 0.5287, "step": 7871 }, { "epoch": 1.3657182512144344, "grad_norm": 1.203633189201355, "learning_rate": 1.360751316366275e-05, "loss": 0.5663, "step": 7872 }, { "epoch": 1.3658917418459402, "grad_norm": 0.7536115646362305, "learning_rate": 1.3601057671900639e-05, "loss": 0.5173, "step": 7873 }, { "epoch": 1.3660652324774463, "grad_norm": 0.7513604164123535, "learning_rate": 1.3594602922743252e-05, "loss": 0.3983, "step": 7874 }, { "epoch": 1.3662387231089521, "grad_norm": 0.7157559394836426, "learning_rate": 1.3588148916939651e-05, "loss": 0.4396, "step": 7875 }, { "epoch": 1.366412213740458, "grad_norm": 0.6541614532470703, "learning_rate": 1.358169565523884e-05, "loss": 0.4288, "step": 7876 }, { "epoch": 1.366585704371964, "grad_norm": 1.4620234966278076, "learning_rate": 1.3575243138389733e-05, "loss": 0.517, "step": 7877 }, { "epoch": 1.3667591950034699, "grad_norm": 0.8827812671661377, "learning_rate": 1.356879136714114e-05, "loss": 0.3667, "step": 7878 }, { "epoch": 1.3669326856349757, "grad_norm": 0.8430798053741455, "learning_rate": 1.3562340342241802e-05, "loss": 0.3443, "step": 7879 }, { "epoch": 1.3671061762664816, "grad_norm": 0.7463581562042236, "learning_rate": 1.3555890064440374e-05, "loss": 0.3945, "step": 7880 }, { "epoch": 1.3672796668979874, "grad_norm": 0.8372496366500854, "learning_rate": 1.3549440534485407e-05, "loss": 0.3915, "step": 7881 }, { "epoch": 1.3674531575294935, "grad_norm": 0.6953713893890381, "learning_rate": 1.3542991753125387e-05, "loss": 0.452, "step": 7882 }, { "epoch": 1.3676266481609993, "grad_norm": 0.8361397385597229, "learning_rate": 1.3536543721108698e-05, "loss": 0.5, "step": 7883 }, { "epoch": 1.3678001387925052, "grad_norm": 0.558370053768158, "learning_rate": 1.3530096439183637e-05, "loss": 0.5347, "step": 7884 }, { "epoch": 1.3679736294240112, "grad_norm": 0.7438952326774597, "learning_rate": 1.3523649908098423e-05, "loss": 0.3577, "step": 7885 }, { "epoch": 1.368147120055517, "grad_norm": 0.6980698108673096, "learning_rate": 1.3517204128601193e-05, "loss": 0.4368, "step": 7886 }, { "epoch": 1.368320610687023, "grad_norm": 0.9308164715766907, "learning_rate": 1.351075910143997e-05, "loss": 0.4532, "step": 7887 }, { "epoch": 1.3684941013185288, "grad_norm": 0.8904773592948914, "learning_rate": 1.3504314827362715e-05, "loss": 0.4336, "step": 7888 }, { "epoch": 1.3686675919500346, "grad_norm": 0.9000870585441589, "learning_rate": 1.3497871307117291e-05, "loss": 0.4104, "step": 7889 }, { "epoch": 1.3688410825815405, "grad_norm": 0.7551040053367615, "learning_rate": 1.3491428541451487e-05, "loss": 0.5902, "step": 7890 }, { "epoch": 1.3690145732130465, "grad_norm": 1.0482600927352905, "learning_rate": 1.3484986531112977e-05, "loss": 0.6021, "step": 7891 }, { "epoch": 1.3691880638445524, "grad_norm": 0.6290242671966553, "learning_rate": 1.3478545276849373e-05, "loss": 0.5288, "step": 7892 }, { "epoch": 1.3693615544760582, "grad_norm": 0.7598239183425903, "learning_rate": 1.3472104779408195e-05, "loss": 0.535, "step": 7893 }, { "epoch": 1.3695350451075643, "grad_norm": 0.7155095338821411, "learning_rate": 1.3465665039536857e-05, "loss": 0.5695, "step": 7894 }, { "epoch": 1.3697085357390701, "grad_norm": 1.1650316715240479, "learning_rate": 1.3459226057982706e-05, "loss": 0.3976, "step": 7895 }, { "epoch": 1.369882026370576, "grad_norm": 0.7217416167259216, "learning_rate": 1.3452787835492998e-05, "loss": 0.4963, "step": 7896 }, { "epoch": 1.3700555170020818, "grad_norm": 0.7166260480880737, "learning_rate": 1.3446350372814884e-05, "loss": 0.433, "step": 7897 }, { "epoch": 1.3702290076335877, "grad_norm": 0.7416203618049622, "learning_rate": 1.3439913670695445e-05, "loss": 0.4707, "step": 7898 }, { "epoch": 1.3704024982650937, "grad_norm": 0.6370003819465637, "learning_rate": 1.3433477729881679e-05, "loss": 0.5367, "step": 7899 }, { "epoch": 1.3705759888965996, "grad_norm": 0.7328839302062988, "learning_rate": 1.3427042551120461e-05, "loss": 0.4386, "step": 7900 }, { "epoch": 1.3707494795281054, "grad_norm": 1.436739444732666, "learning_rate": 1.342060813515862e-05, "loss": 0.452, "step": 7901 }, { "epoch": 1.3709229701596115, "grad_norm": 0.6831396222114563, "learning_rate": 1.3414174482742865e-05, "loss": 0.5085, "step": 7902 }, { "epoch": 1.3710964607911174, "grad_norm": 0.6140052676200867, "learning_rate": 1.3407741594619844e-05, "loss": 0.5034, "step": 7903 }, { "epoch": 1.3712699514226232, "grad_norm": 1.0730401277542114, "learning_rate": 1.3401309471536092e-05, "loss": 0.3562, "step": 7904 }, { "epoch": 1.371443442054129, "grad_norm": 0.6170622110366821, "learning_rate": 1.3394878114238059e-05, "loss": 0.4937, "step": 7905 }, { "epoch": 1.371616932685635, "grad_norm": 0.8561860918998718, "learning_rate": 1.3388447523472122e-05, "loss": 0.4633, "step": 7906 }, { "epoch": 1.3717904233171407, "grad_norm": 0.9009454250335693, "learning_rate": 1.3382017699984551e-05, "loss": 0.5392, "step": 7907 }, { "epoch": 1.3719639139486468, "grad_norm": 1.0146112442016602, "learning_rate": 1.337558864452154e-05, "loss": 0.467, "step": 7908 }, { "epoch": 1.3721374045801527, "grad_norm": 0.7898365259170532, "learning_rate": 1.3369160357829185e-05, "loss": 0.5266, "step": 7909 }, { "epoch": 1.3723108952116585, "grad_norm": 1.2049388885498047, "learning_rate": 1.3362732840653494e-05, "loss": 0.3904, "step": 7910 }, { "epoch": 1.3724843858431646, "grad_norm": 0.6061626076698303, "learning_rate": 1.3356306093740392e-05, "loss": 0.465, "step": 7911 }, { "epoch": 1.3726578764746704, "grad_norm": 0.7990296483039856, "learning_rate": 1.3349880117835716e-05, "loss": 0.494, "step": 7912 }, { "epoch": 1.3728313671061763, "grad_norm": 0.7539100646972656, "learning_rate": 1.3343454913685195e-05, "loss": 0.3859, "step": 7913 }, { "epoch": 1.373004857737682, "grad_norm": 0.7247158288955688, "learning_rate": 1.3337030482034485e-05, "loss": 0.4126, "step": 7914 }, { "epoch": 1.373178348369188, "grad_norm": 0.939651608467102, "learning_rate": 1.3330606823629161e-05, "loss": 0.3953, "step": 7915 }, { "epoch": 1.373351839000694, "grad_norm": 0.6934583783149719, "learning_rate": 1.332418393921468e-05, "loss": 0.4112, "step": 7916 }, { "epoch": 1.3735253296321999, "grad_norm": 0.6739574670791626, "learning_rate": 1.3317761829536428e-05, "loss": 0.5436, "step": 7917 }, { "epoch": 1.3736988202637057, "grad_norm": 1.6639131307601929, "learning_rate": 1.3311340495339704e-05, "loss": 0.5269, "step": 7918 }, { "epoch": 1.3738723108952118, "grad_norm": 0.8540711402893066, "learning_rate": 1.3304919937369718e-05, "loss": 0.4225, "step": 7919 }, { "epoch": 1.3740458015267176, "grad_norm": 0.7971619367599487, "learning_rate": 1.3298500156371565e-05, "loss": 0.3648, "step": 7920 }, { "epoch": 1.3742192921582235, "grad_norm": 0.9696105122566223, "learning_rate": 1.329208115309028e-05, "loss": 0.5089, "step": 7921 }, { "epoch": 1.3743927827897293, "grad_norm": 1.033799409866333, "learning_rate": 1.32856629282708e-05, "loss": 0.5649, "step": 7922 }, { "epoch": 1.3745662734212352, "grad_norm": 0.7602754831314087, "learning_rate": 1.3279245482657953e-05, "loss": 0.5388, "step": 7923 }, { "epoch": 1.3747397640527412, "grad_norm": 0.8027068972587585, "learning_rate": 1.3272828816996498e-05, "loss": 0.3746, "step": 7924 }, { "epoch": 1.374913254684247, "grad_norm": 0.7596278786659241, "learning_rate": 1.3266412932031108e-05, "loss": 0.4908, "step": 7925 }, { "epoch": 1.375086745315753, "grad_norm": 1.0552160739898682, "learning_rate": 1.3259997828506333e-05, "loss": 0.4192, "step": 7926 }, { "epoch": 1.3752602359472588, "grad_norm": 0.7753838300704956, "learning_rate": 1.3253583507166666e-05, "loss": 0.4374, "step": 7927 }, { "epoch": 1.3754337265787648, "grad_norm": 0.9890931248664856, "learning_rate": 1.3247169968756494e-05, "loss": 0.5284, "step": 7928 }, { "epoch": 1.3756072172102707, "grad_norm": 1.1649266481399536, "learning_rate": 1.3240757214020117e-05, "loss": 0.4145, "step": 7929 }, { "epoch": 1.3757807078417765, "grad_norm": 0.7576749324798584, "learning_rate": 1.3234345243701743e-05, "loss": 0.6349, "step": 7930 }, { "epoch": 1.3759541984732824, "grad_norm": 0.7057588696479797, "learning_rate": 1.3227934058545481e-05, "loss": 0.5475, "step": 7931 }, { "epoch": 1.3761276891047882, "grad_norm": 1.1025787591934204, "learning_rate": 1.322152365929537e-05, "loss": 0.3412, "step": 7932 }, { "epoch": 1.3763011797362943, "grad_norm": 0.7550604343414307, "learning_rate": 1.321511404669533e-05, "loss": 0.4082, "step": 7933 }, { "epoch": 1.3764746703678001, "grad_norm": 0.5956621170043945, "learning_rate": 1.320870522148921e-05, "loss": 0.6117, "step": 7934 }, { "epoch": 1.376648160999306, "grad_norm": 0.9815237522125244, "learning_rate": 1.3202297184420774e-05, "loss": 0.3808, "step": 7935 }, { "epoch": 1.376821651630812, "grad_norm": 0.6306245923042297, "learning_rate": 1.3195889936233662e-05, "loss": 0.4378, "step": 7936 }, { "epoch": 1.376995142262318, "grad_norm": 0.6237532496452332, "learning_rate": 1.318948347767145e-05, "loss": 0.521, "step": 7937 }, { "epoch": 1.3771686328938237, "grad_norm": 0.72686368227005, "learning_rate": 1.3183077809477625e-05, "loss": 0.4746, "step": 7938 }, { "epoch": 1.3773421235253296, "grad_norm": 0.8104560971260071, "learning_rate": 1.3176672932395556e-05, "loss": 0.4727, "step": 7939 }, { "epoch": 1.3775156141568354, "grad_norm": 0.7111600041389465, "learning_rate": 1.3170268847168541e-05, "loss": 0.4973, "step": 7940 }, { "epoch": 1.3776891047883415, "grad_norm": 0.9007184505462646, "learning_rate": 1.3163865554539794e-05, "loss": 0.3983, "step": 7941 }, { "epoch": 1.3778625954198473, "grad_norm": 0.7875963449478149, "learning_rate": 1.315746305525241e-05, "loss": 0.5021, "step": 7942 }, { "epoch": 1.3780360860513532, "grad_norm": 1.3286610841751099, "learning_rate": 1.3151061350049408e-05, "loss": 0.4929, "step": 7943 }, { "epoch": 1.3782095766828593, "grad_norm": 1.5478802919387817, "learning_rate": 1.3144660439673727e-05, "loss": 0.4792, "step": 7944 }, { "epoch": 1.378383067314365, "grad_norm": 0.7513118386268616, "learning_rate": 1.3138260324868179e-05, "loss": 0.5482, "step": 7945 }, { "epoch": 1.378556557945871, "grad_norm": 0.686283528804779, "learning_rate": 1.3131861006375519e-05, "loss": 0.4866, "step": 7946 }, { "epoch": 1.3787300485773768, "grad_norm": 0.8788372278213501, "learning_rate": 1.3125462484938391e-05, "loss": 0.3917, "step": 7947 }, { "epoch": 1.3789035392088826, "grad_norm": 0.772336483001709, "learning_rate": 1.311906476129936e-05, "loss": 0.5665, "step": 7948 }, { "epoch": 1.3790770298403885, "grad_norm": 0.776218831539154, "learning_rate": 1.3112667836200873e-05, "loss": 0.5111, "step": 7949 }, { "epoch": 1.3792505204718946, "grad_norm": 0.7619222402572632, "learning_rate": 1.3106271710385312e-05, "loss": 0.5242, "step": 7950 }, { "epoch": 1.3794240111034004, "grad_norm": 0.7595698833465576, "learning_rate": 1.3099876384594951e-05, "loss": 0.5463, "step": 7951 }, { "epoch": 1.3795975017349063, "grad_norm": 0.759110689163208, "learning_rate": 1.3093481859571981e-05, "loss": 0.4614, "step": 7952 }, { "epoch": 1.3797709923664123, "grad_norm": 1.032598614692688, "learning_rate": 1.3087088136058481e-05, "loss": 0.4301, "step": 7953 }, { "epoch": 1.3799444829979182, "grad_norm": 0.6489108204841614, "learning_rate": 1.3080695214796464e-05, "loss": 0.5071, "step": 7954 }, { "epoch": 1.380117973629424, "grad_norm": 0.8093173503875732, "learning_rate": 1.3074303096527823e-05, "loss": 0.3673, "step": 7955 }, { "epoch": 1.3802914642609299, "grad_norm": 1.596784234046936, "learning_rate": 1.3067911781994384e-05, "loss": 0.3483, "step": 7956 }, { "epoch": 1.3804649548924357, "grad_norm": 0.7278069853782654, "learning_rate": 1.306152127193786e-05, "loss": 0.5435, "step": 7957 }, { "epoch": 1.3806384455239418, "grad_norm": 0.5162386298179626, "learning_rate": 1.3055131567099872e-05, "loss": 0.5865, "step": 7958 }, { "epoch": 1.3808119361554476, "grad_norm": 0.8442279696464539, "learning_rate": 1.3048742668221959e-05, "loss": 0.5, "step": 7959 }, { "epoch": 1.3809854267869535, "grad_norm": 0.6834975481033325, "learning_rate": 1.3042354576045559e-05, "loss": 0.4366, "step": 7960 }, { "epoch": 1.3811589174184595, "grad_norm": 1.4897080659866333, "learning_rate": 1.3035967291312029e-05, "loss": 0.4402, "step": 7961 }, { "epoch": 1.3813324080499654, "grad_norm": 1.2184573411941528, "learning_rate": 1.3029580814762596e-05, "loss": 0.5431, "step": 7962 }, { "epoch": 1.3815058986814712, "grad_norm": 1.0185627937316895, "learning_rate": 1.3023195147138434e-05, "loss": 0.4384, "step": 7963 }, { "epoch": 1.381679389312977, "grad_norm": 0.6986583471298218, "learning_rate": 1.3016810289180615e-05, "loss": 0.5607, "step": 7964 }, { "epoch": 1.381852879944483, "grad_norm": 0.6124531626701355, "learning_rate": 1.3010426241630088e-05, "loss": 0.4775, "step": 7965 }, { "epoch": 1.3820263705759888, "grad_norm": 1.0028398036956787, "learning_rate": 1.3004043005227742e-05, "loss": 0.4349, "step": 7966 }, { "epoch": 1.3821998612074948, "grad_norm": 0.9703394770622253, "learning_rate": 1.2997660580714364e-05, "loss": 0.3924, "step": 7967 }, { "epoch": 1.3823733518390007, "grad_norm": 1.0974111557006836, "learning_rate": 1.2991278968830625e-05, "loss": 0.528, "step": 7968 }, { "epoch": 1.3825468424705065, "grad_norm": 1.0687956809997559, "learning_rate": 1.298489817031713e-05, "loss": 0.3678, "step": 7969 }, { "epoch": 1.3827203331020126, "grad_norm": 0.835202157497406, "learning_rate": 1.2978518185914388e-05, "loss": 0.3934, "step": 7970 }, { "epoch": 1.3828938237335184, "grad_norm": 0.6531467437744141, "learning_rate": 1.2972139016362782e-05, "loss": 0.4285, "step": 7971 }, { "epoch": 1.3830673143650243, "grad_norm": 0.8614683747291565, "learning_rate": 1.296576066240263e-05, "loss": 0.4725, "step": 7972 }, { "epoch": 1.3832408049965301, "grad_norm": 0.8402180075645447, "learning_rate": 1.2959383124774152e-05, "loss": 0.4806, "step": 7973 }, { "epoch": 1.383414295628036, "grad_norm": 0.6812103986740112, "learning_rate": 1.2953006404217474e-05, "loss": 0.3644, "step": 7974 }, { "epoch": 1.383587786259542, "grad_norm": 0.8534111976623535, "learning_rate": 1.2946630501472607e-05, "loss": 0.365, "step": 7975 }, { "epoch": 1.3837612768910479, "grad_norm": 0.6894952058792114, "learning_rate": 1.2940255417279486e-05, "loss": 0.3879, "step": 7976 }, { "epoch": 1.3839347675225537, "grad_norm": 0.7686868906021118, "learning_rate": 1.2933881152377956e-05, "loss": 0.4373, "step": 7977 }, { "epoch": 1.3841082581540598, "grad_norm": 0.5888801217079163, "learning_rate": 1.2927507707507751e-05, "loss": 0.412, "step": 7978 }, { "epoch": 1.3842817487855656, "grad_norm": 0.8832677006721497, "learning_rate": 1.2921135083408513e-05, "loss": 0.4, "step": 7979 }, { "epoch": 1.3844552394170715, "grad_norm": 0.8359270095825195, "learning_rate": 1.2914763280819804e-05, "loss": 0.5376, "step": 7980 }, { "epoch": 1.3846287300485773, "grad_norm": 0.8548023104667664, "learning_rate": 1.2908392300481067e-05, "loss": 0.4343, "step": 7981 }, { "epoch": 1.3848022206800832, "grad_norm": 0.7461701035499573, "learning_rate": 1.2902022143131668e-05, "loss": 0.3782, "step": 7982 }, { "epoch": 1.3849757113115893, "grad_norm": 1.1255135536193848, "learning_rate": 1.2895652809510875e-05, "loss": 0.4425, "step": 7983 }, { "epoch": 1.385149201943095, "grad_norm": 0.6272336840629578, "learning_rate": 1.2889284300357847e-05, "loss": 0.5726, "step": 7984 }, { "epoch": 1.385322692574601, "grad_norm": 0.8208845853805542, "learning_rate": 1.2882916616411658e-05, "loss": 0.4878, "step": 7985 }, { "epoch": 1.3854961832061068, "grad_norm": 0.6497074365615845, "learning_rate": 1.28765497584113e-05, "loss": 0.5112, "step": 7986 }, { "epoch": 1.3856696738376129, "grad_norm": 0.8497748970985413, "learning_rate": 1.2870183727095633e-05, "loss": 0.5624, "step": 7987 }, { "epoch": 1.3858431644691187, "grad_norm": 0.7245793342590332, "learning_rate": 1.2863818523203452e-05, "loss": 0.4678, "step": 7988 }, { "epoch": 1.3860166551006246, "grad_norm": 0.7548524737358093, "learning_rate": 1.285745414747345e-05, "loss": 0.4174, "step": 7989 }, { "epoch": 1.3861901457321304, "grad_norm": 1.084741473197937, "learning_rate": 1.2851090600644223e-05, "loss": 0.3726, "step": 7990 }, { "epoch": 1.3863636363636362, "grad_norm": 0.7011428475379944, "learning_rate": 1.284472788345425e-05, "loss": 0.4595, "step": 7991 }, { "epoch": 1.3865371269951423, "grad_norm": 0.8405042290687561, "learning_rate": 1.2838365996641949e-05, "loss": 0.5168, "step": 7992 }, { "epoch": 1.3867106176266482, "grad_norm": 0.7136561870574951, "learning_rate": 1.2832004940945626e-05, "loss": 0.477, "step": 7993 }, { "epoch": 1.386884108258154, "grad_norm": 0.668769896030426, "learning_rate": 1.2825644717103472e-05, "loss": 0.5544, "step": 7994 }, { "epoch": 1.38705759888966, "grad_norm": 2.888563394546509, "learning_rate": 1.281928532585361e-05, "loss": 0.4764, "step": 7995 }, { "epoch": 1.387231089521166, "grad_norm": 0.8526228666305542, "learning_rate": 1.281292676793406e-05, "loss": 0.4258, "step": 7996 }, { "epoch": 1.3874045801526718, "grad_norm": 0.8274763822555542, "learning_rate": 1.2806569044082729e-05, "loss": 0.504, "step": 7997 }, { "epoch": 1.3875780707841776, "grad_norm": 0.8843476176261902, "learning_rate": 1.2800212155037437e-05, "loss": 0.4702, "step": 7998 }, { "epoch": 1.3877515614156835, "grad_norm": 1.0373793840408325, "learning_rate": 1.2793856101535922e-05, "loss": 0.5594, "step": 7999 }, { "epoch": 1.3879250520471895, "grad_norm": 1.3321309089660645, "learning_rate": 1.27875008843158e-05, "loss": 0.5873, "step": 8000 }, { "epoch": 1.3880985426786954, "grad_norm": 0.6675841808319092, "learning_rate": 1.2781146504114604e-05, "loss": 0.4432, "step": 8001 }, { "epoch": 1.3882720333102012, "grad_norm": 0.6967312693595886, "learning_rate": 1.2774792961669764e-05, "loss": 0.5146, "step": 8002 }, { "epoch": 1.3884455239417073, "grad_norm": 0.7670105695724487, "learning_rate": 1.2768440257718626e-05, "loss": 0.4835, "step": 8003 }, { "epoch": 1.3886190145732131, "grad_norm": 0.7322806715965271, "learning_rate": 1.2762088392998417e-05, "loss": 0.4613, "step": 8004 }, { "epoch": 1.388792505204719, "grad_norm": 0.8436444997787476, "learning_rate": 1.275573736824629e-05, "loss": 0.4445, "step": 8005 }, { "epoch": 1.3889659958362248, "grad_norm": 2.392430067062378, "learning_rate": 1.2749387184199283e-05, "loss": 0.3987, "step": 8006 }, { "epoch": 1.3891394864677307, "grad_norm": 0.6554961204528809, "learning_rate": 1.2743037841594334e-05, "loss": 0.6082, "step": 8007 }, { "epoch": 1.3893129770992365, "grad_norm": 1.2686378955841064, "learning_rate": 1.2736689341168304e-05, "loss": 0.4048, "step": 8008 }, { "epoch": 1.3894864677307426, "grad_norm": 1.6640623807907104, "learning_rate": 1.2730341683657945e-05, "loss": 0.4293, "step": 8009 }, { "epoch": 1.3896599583622484, "grad_norm": 0.7305620908737183, "learning_rate": 1.2723994869799898e-05, "loss": 0.4161, "step": 8010 }, { "epoch": 1.3898334489937543, "grad_norm": 0.7039326429367065, "learning_rate": 1.2717648900330723e-05, "loss": 0.3781, "step": 8011 }, { "epoch": 1.3900069396252603, "grad_norm": 0.8124909400939941, "learning_rate": 1.2711303775986888e-05, "loss": 0.4572, "step": 8012 }, { "epoch": 1.3901804302567662, "grad_norm": 0.8309138417243958, "learning_rate": 1.2704959497504734e-05, "loss": 0.5268, "step": 8013 }, { "epoch": 1.390353920888272, "grad_norm": 0.7094196677207947, "learning_rate": 1.2698616065620528e-05, "loss": 0.4626, "step": 8014 }, { "epoch": 1.3905274115197779, "grad_norm": 0.8486608862876892, "learning_rate": 1.269227348107045e-05, "loss": 0.4437, "step": 8015 }, { "epoch": 1.3907009021512837, "grad_norm": 0.6205337047576904, "learning_rate": 1.2685931744590536e-05, "loss": 0.4705, "step": 8016 }, { "epoch": 1.3908743927827898, "grad_norm": 0.936288058757782, "learning_rate": 1.2679590856916769e-05, "loss": 0.4498, "step": 8017 }, { "epoch": 1.3910478834142956, "grad_norm": 0.8357410430908203, "learning_rate": 1.267325081878501e-05, "loss": 0.4055, "step": 8018 }, { "epoch": 1.3912213740458015, "grad_norm": 1.7587887048721313, "learning_rate": 1.266691163093104e-05, "loss": 0.4345, "step": 8019 }, { "epoch": 1.3913948646773076, "grad_norm": 0.9359058737754822, "learning_rate": 1.2660573294090512e-05, "loss": 0.41, "step": 8020 }, { "epoch": 1.3915683553088134, "grad_norm": 0.8059520721435547, "learning_rate": 1.2654235808999007e-05, "loss": 0.4606, "step": 8021 }, { "epoch": 1.3917418459403192, "grad_norm": 0.7084975242614746, "learning_rate": 1.2647899176392003e-05, "loss": 0.4529, "step": 8022 }, { "epoch": 1.391915336571825, "grad_norm": 0.7561855912208557, "learning_rate": 1.2641563397004859e-05, "loss": 0.5632, "step": 8023 }, { "epoch": 1.392088827203331, "grad_norm": 0.7674224376678467, "learning_rate": 1.2635228471572853e-05, "loss": 0.4802, "step": 8024 }, { "epoch": 1.3922623178348368, "grad_norm": 0.6567704081535339, "learning_rate": 1.2628894400831175e-05, "loss": 0.5457, "step": 8025 }, { "epoch": 1.3924358084663429, "grad_norm": 0.653133749961853, "learning_rate": 1.2622561185514886e-05, "loss": 0.61, "step": 8026 }, { "epoch": 1.3926092990978487, "grad_norm": 1.3273907899856567, "learning_rate": 1.2616228826358965e-05, "loss": 0.3828, "step": 8027 }, { "epoch": 1.3927827897293545, "grad_norm": 0.6018370389938354, "learning_rate": 1.2609897324098297e-05, "loss": 0.4529, "step": 8028 }, { "epoch": 1.3929562803608606, "grad_norm": 1.0048967599868774, "learning_rate": 1.2603566679467654e-05, "loss": 0.4092, "step": 8029 }, { "epoch": 1.3931297709923665, "grad_norm": 0.7821022272109985, "learning_rate": 1.2597236893201712e-05, "loss": 0.4672, "step": 8030 }, { "epoch": 1.3933032616238723, "grad_norm": 0.8553732633590698, "learning_rate": 1.259090796603506e-05, "loss": 0.4114, "step": 8031 }, { "epoch": 1.3934767522553781, "grad_norm": 1.0681912899017334, "learning_rate": 1.2584579898702175e-05, "loss": 0.462, "step": 8032 }, { "epoch": 1.393650242886884, "grad_norm": 0.9106276631355286, "learning_rate": 1.2578252691937425e-05, "loss": 0.4395, "step": 8033 }, { "epoch": 1.39382373351839, "grad_norm": 0.7456284761428833, "learning_rate": 1.25719263464751e-05, "loss": 0.4952, "step": 8034 }, { "epoch": 1.393997224149896, "grad_norm": 0.5790156126022339, "learning_rate": 1.2565600863049387e-05, "loss": 0.5056, "step": 8035 }, { "epoch": 1.3941707147814018, "grad_norm": 0.7689415216445923, "learning_rate": 1.2559276242394347e-05, "loss": 0.486, "step": 8036 }, { "epoch": 1.3943442054129078, "grad_norm": 1.6128205060958862, "learning_rate": 1.255295248524397e-05, "loss": 0.3565, "step": 8037 }, { "epoch": 1.3945176960444137, "grad_norm": 0.8087097406387329, "learning_rate": 1.254662959233214e-05, "loss": 0.4202, "step": 8038 }, { "epoch": 1.3946911866759195, "grad_norm": 0.8839709758758545, "learning_rate": 1.2540307564392627e-05, "loss": 0.4677, "step": 8039 }, { "epoch": 1.3948646773074254, "grad_norm": 0.7029903531074524, "learning_rate": 1.2533986402159113e-05, "loss": 0.59, "step": 8040 }, { "epoch": 1.3950381679389312, "grad_norm": 0.5902888178825378, "learning_rate": 1.2527666106365182e-05, "loss": 0.6255, "step": 8041 }, { "epoch": 1.3952116585704373, "grad_norm": 0.8269442915916443, "learning_rate": 1.25213466777443e-05, "loss": 0.4564, "step": 8042 }, { "epoch": 1.3953851492019431, "grad_norm": 0.6043368577957153, "learning_rate": 1.2515028117029848e-05, "loss": 0.4556, "step": 8043 }, { "epoch": 1.395558639833449, "grad_norm": 2.77972674369812, "learning_rate": 1.2508710424955117e-05, "loss": 0.5302, "step": 8044 }, { "epoch": 1.3957321304649548, "grad_norm": 0.7138830423355103, "learning_rate": 1.2502393602253258e-05, "loss": 0.5239, "step": 8045 }, { "epoch": 1.3959056210964609, "grad_norm": 0.6649674773216248, "learning_rate": 1.249607764965736e-05, "loss": 0.6154, "step": 8046 }, { "epoch": 1.3960791117279667, "grad_norm": 2.444833517074585, "learning_rate": 1.2489762567900395e-05, "loss": 0.4222, "step": 8047 }, { "epoch": 1.3962526023594726, "grad_norm": 1.1484051942825317, "learning_rate": 1.2483448357715242e-05, "loss": 0.3544, "step": 8048 }, { "epoch": 1.3964260929909784, "grad_norm": 1.0488779544830322, "learning_rate": 1.2477135019834659e-05, "loss": 0.51, "step": 8049 }, { "epoch": 1.3965995836224843, "grad_norm": 1.3062036037445068, "learning_rate": 1.2470822554991321e-05, "loss": 0.3418, "step": 8050 }, { "epoch": 1.3967730742539903, "grad_norm": 0.8650899529457092, "learning_rate": 1.2464510963917803e-05, "loss": 0.4396, "step": 8051 }, { "epoch": 1.3969465648854962, "grad_norm": 0.8141747713088989, "learning_rate": 1.2458200247346569e-05, "loss": 0.4513, "step": 8052 }, { "epoch": 1.397120055517002, "grad_norm": 0.8654581904411316, "learning_rate": 1.2451890406009981e-05, "loss": 0.3401, "step": 8053 }, { "epoch": 1.397293546148508, "grad_norm": 0.8306574821472168, "learning_rate": 1.2445581440640312e-05, "loss": 0.4193, "step": 8054 }, { "epoch": 1.397467036780014, "grad_norm": 1.0073142051696777, "learning_rate": 1.2439273351969712e-05, "loss": 0.5129, "step": 8055 }, { "epoch": 1.3976405274115198, "grad_norm": 0.6483321189880371, "learning_rate": 1.243296614073025e-05, "loss": 0.6597, "step": 8056 }, { "epoch": 1.3978140180430256, "grad_norm": 0.6348764896392822, "learning_rate": 1.2426659807653896e-05, "loss": 0.4689, "step": 8057 }, { "epoch": 1.3979875086745315, "grad_norm": 0.7256723642349243, "learning_rate": 1.2420354353472483e-05, "loss": 0.6156, "step": 8058 }, { "epoch": 1.3981609993060375, "grad_norm": 0.8737062215805054, "learning_rate": 1.2414049778917782e-05, "loss": 0.4009, "step": 8059 }, { "epoch": 1.3983344899375434, "grad_norm": 0.6082401275634766, "learning_rate": 1.2407746084721444e-05, "loss": 0.4656, "step": 8060 }, { "epoch": 1.3985079805690492, "grad_norm": 0.8343032598495483, "learning_rate": 1.2401443271615028e-05, "loss": 0.4202, "step": 8061 }, { "epoch": 1.3986814712005553, "grad_norm": 4.274116039276123, "learning_rate": 1.2395141340329966e-05, "loss": 0.4369, "step": 8062 }, { "epoch": 1.3988549618320612, "grad_norm": 0.8232946991920471, "learning_rate": 1.2388840291597611e-05, "loss": 0.3423, "step": 8063 }, { "epoch": 1.399028452463567, "grad_norm": 0.7979413270950317, "learning_rate": 1.2382540126149218e-05, "loss": 0.3979, "step": 8064 }, { "epoch": 1.3992019430950728, "grad_norm": 1.2604224681854248, "learning_rate": 1.2376240844715912e-05, "loss": 0.4266, "step": 8065 }, { "epoch": 1.3993754337265787, "grad_norm": 1.2358559370040894, "learning_rate": 1.2369942448028738e-05, "loss": 0.5542, "step": 8066 }, { "epoch": 1.3995489243580845, "grad_norm": 0.8104420900344849, "learning_rate": 1.2363644936818645e-05, "loss": 0.4565, "step": 8067 }, { "epoch": 1.3997224149895906, "grad_norm": 0.9057925343513489, "learning_rate": 1.2357348311816444e-05, "loss": 0.4686, "step": 8068 }, { "epoch": 1.3998959056210964, "grad_norm": 0.8612111806869507, "learning_rate": 1.2351052573752878e-05, "loss": 0.4067, "step": 8069 }, { "epoch": 1.4000693962526023, "grad_norm": 0.8033817410469055, "learning_rate": 1.2344757723358583e-05, "loss": 0.519, "step": 8070 }, { "epoch": 1.4002428868841084, "grad_norm": 0.6826604008674622, "learning_rate": 1.2338463761364063e-05, "loss": 0.5049, "step": 8071 }, { "epoch": 1.4004163775156142, "grad_norm": 0.7676969766616821, "learning_rate": 1.2332170688499753e-05, "loss": 0.5406, "step": 8072 }, { "epoch": 1.40058986814712, "grad_norm": 0.7458460330963135, "learning_rate": 1.2325878505495971e-05, "loss": 0.5112, "step": 8073 }, { "epoch": 1.400763358778626, "grad_norm": 0.8685405254364014, "learning_rate": 1.2319587213082931e-05, "loss": 0.5762, "step": 8074 }, { "epoch": 1.4009368494101317, "grad_norm": 0.9314450621604919, "learning_rate": 1.2313296811990747e-05, "loss": 0.4232, "step": 8075 }, { "epoch": 1.4011103400416378, "grad_norm": 2.373419761657715, "learning_rate": 1.230700730294942e-05, "loss": 0.5314, "step": 8076 }, { "epoch": 1.4012838306731437, "grad_norm": 0.9044128656387329, "learning_rate": 1.2300718686688863e-05, "loss": 0.4974, "step": 8077 }, { "epoch": 1.4014573213046495, "grad_norm": 0.7750056982040405, "learning_rate": 1.229443096393887e-05, "loss": 0.535, "step": 8078 }, { "epoch": 1.4016308119361556, "grad_norm": 0.7632639408111572, "learning_rate": 1.2288144135429146e-05, "loss": 0.4065, "step": 8079 }, { "epoch": 1.4018043025676614, "grad_norm": 0.8691059350967407, "learning_rate": 1.2281858201889283e-05, "loss": 0.505, "step": 8080 }, { "epoch": 1.4019777931991673, "grad_norm": 0.6620309352874756, "learning_rate": 1.2275573164048765e-05, "loss": 0.4806, "step": 8081 }, { "epoch": 1.4021512838306731, "grad_norm": 0.7421913743019104, "learning_rate": 1.226928902263698e-05, "loss": 0.5088, "step": 8082 }, { "epoch": 1.402324774462179, "grad_norm": 0.8207002282142639, "learning_rate": 1.2263005778383222e-05, "loss": 0.4769, "step": 8083 }, { "epoch": 1.4024982650936848, "grad_norm": 0.8392714262008667, "learning_rate": 1.2256723432016648e-05, "loss": 0.5535, "step": 8084 }, { "epoch": 1.4026717557251909, "grad_norm": 0.8502095341682434, "learning_rate": 1.2250441984266343e-05, "loss": 0.5287, "step": 8085 }, { "epoch": 1.4028452463566967, "grad_norm": 0.666627824306488, "learning_rate": 1.2244161435861286e-05, "loss": 0.5157, "step": 8086 }, { "epoch": 1.4030187369882026, "grad_norm": 0.8406854271888733, "learning_rate": 1.2237881787530324e-05, "loss": 0.4941, "step": 8087 }, { "epoch": 1.4031922276197086, "grad_norm": 0.7217394113540649, "learning_rate": 1.2231603040002225e-05, "loss": 0.4692, "step": 8088 }, { "epoch": 1.4033657182512145, "grad_norm": 6.5075297355651855, "learning_rate": 1.2225325194005642e-05, "loss": 0.4331, "step": 8089 }, { "epoch": 1.4035392088827203, "grad_norm": 0.8759097456932068, "learning_rate": 1.2219048250269141e-05, "loss": 0.4757, "step": 8090 }, { "epoch": 1.4037126995142262, "grad_norm": 0.802541196346283, "learning_rate": 1.2212772209521146e-05, "loss": 0.5006, "step": 8091 }, { "epoch": 1.403886190145732, "grad_norm": 0.6343971490859985, "learning_rate": 1.2206497072490014e-05, "loss": 0.4535, "step": 8092 }, { "epoch": 1.404059680777238, "grad_norm": 1.4083843231201172, "learning_rate": 1.2200222839903983e-05, "loss": 0.4753, "step": 8093 }, { "epoch": 1.404233171408744, "grad_norm": 1.0108699798583984, "learning_rate": 1.2193949512491172e-05, "loss": 0.4808, "step": 8094 }, { "epoch": 1.4044066620402498, "grad_norm": 0.9857318997383118, "learning_rate": 1.218767709097962e-05, "loss": 0.3835, "step": 8095 }, { "epoch": 1.4045801526717558, "grad_norm": 0.6723509430885315, "learning_rate": 1.2181405576097247e-05, "loss": 0.4135, "step": 8096 }, { "epoch": 1.4047536433032617, "grad_norm": 0.8481411337852478, "learning_rate": 1.2175134968571863e-05, "loss": 0.491, "step": 8097 }, { "epoch": 1.4049271339347675, "grad_norm": 1.2493733167648315, "learning_rate": 1.2168865269131182e-05, "loss": 0.4707, "step": 8098 }, { "epoch": 1.4051006245662734, "grad_norm": 0.6661719679832458, "learning_rate": 1.2162596478502815e-05, "loss": 0.5647, "step": 8099 }, { "epoch": 1.4052741151977792, "grad_norm": 0.7146643996238708, "learning_rate": 1.2156328597414258e-05, "loss": 0.5984, "step": 8100 }, { "epoch": 1.4054476058292853, "grad_norm": 0.996522068977356, "learning_rate": 1.2150061626592908e-05, "loss": 0.5195, "step": 8101 }, { "epoch": 1.4056210964607911, "grad_norm": 1.018540382385254, "learning_rate": 1.2143795566766054e-05, "loss": 0.5179, "step": 8102 }, { "epoch": 1.405794587092297, "grad_norm": 0.928839921951294, "learning_rate": 1.2137530418660877e-05, "loss": 0.3743, "step": 8103 }, { "epoch": 1.4059680777238028, "grad_norm": 0.7133684754371643, "learning_rate": 1.2131266183004455e-05, "loss": 0.5493, "step": 8104 }, { "epoch": 1.406141568355309, "grad_norm": 0.6732192635536194, "learning_rate": 1.212500286052376e-05, "loss": 0.408, "step": 8105 }, { "epoch": 1.4063150589868147, "grad_norm": 0.6635958552360535, "learning_rate": 1.2118740451945668e-05, "loss": 0.5432, "step": 8106 }, { "epoch": 1.4064885496183206, "grad_norm": 0.9566813111305237, "learning_rate": 1.2112478957996922e-05, "loss": 0.5143, "step": 8107 }, { "epoch": 1.4066620402498264, "grad_norm": 0.7447433471679688, "learning_rate": 1.2106218379404187e-05, "loss": 0.4912, "step": 8108 }, { "epoch": 1.4068355308813323, "grad_norm": 0.7607671618461609, "learning_rate": 1.2099958716894008e-05, "loss": 0.4187, "step": 8109 }, { "epoch": 1.4070090215128384, "grad_norm": 0.8422075510025024, "learning_rate": 1.2093699971192825e-05, "loss": 0.4257, "step": 8110 }, { "epoch": 1.4071825121443442, "grad_norm": 0.897028386592865, "learning_rate": 1.2087442143026968e-05, "loss": 0.6093, "step": 8111 }, { "epoch": 1.40735600277585, "grad_norm": 0.6197953224182129, "learning_rate": 1.208118523312268e-05, "loss": 0.4557, "step": 8112 }, { "epoch": 1.4075294934073561, "grad_norm": 1.1465601921081543, "learning_rate": 1.2074929242206066e-05, "loss": 0.3566, "step": 8113 }, { "epoch": 1.407702984038862, "grad_norm": 0.7143644094467163, "learning_rate": 1.2068674171003146e-05, "loss": 0.4926, "step": 8114 }, { "epoch": 1.4078764746703678, "grad_norm": 0.6614172458648682, "learning_rate": 1.2062420020239838e-05, "loss": 0.5656, "step": 8115 }, { "epoch": 1.4080499653018737, "grad_norm": 1.2392299175262451, "learning_rate": 1.205616679064193e-05, "loss": 0.5135, "step": 8116 }, { "epoch": 1.4082234559333795, "grad_norm": 0.8379489183425903, "learning_rate": 1.204991448293512e-05, "loss": 0.3663, "step": 8117 }, { "epoch": 1.4083969465648856, "grad_norm": 0.696752667427063, "learning_rate": 1.2043663097844999e-05, "loss": 0.3864, "step": 8118 }, { "epoch": 1.4085704371963914, "grad_norm": 0.7961304783821106, "learning_rate": 1.2037412636097056e-05, "loss": 0.5007, "step": 8119 }, { "epoch": 1.4087439278278973, "grad_norm": 0.6261388659477234, "learning_rate": 1.2031163098416644e-05, "loss": 0.5752, "step": 8120 }, { "epoch": 1.4089174184594033, "grad_norm": 0.8483225107192993, "learning_rate": 1.2024914485529042e-05, "loss": 0.3861, "step": 8121 }, { "epoch": 1.4090909090909092, "grad_norm": 0.7405794858932495, "learning_rate": 1.2018666798159408e-05, "loss": 0.3491, "step": 8122 }, { "epoch": 1.409264399722415, "grad_norm": 0.8894197344779968, "learning_rate": 1.2012420037032797e-05, "loss": 0.4347, "step": 8123 }, { "epoch": 1.4094378903539209, "grad_norm": 0.7734290361404419, "learning_rate": 1.2006174202874141e-05, "loss": 0.3259, "step": 8124 }, { "epoch": 1.4096113809854267, "grad_norm": 0.8192522525787354, "learning_rate": 1.1999929296408288e-05, "loss": 0.4576, "step": 8125 }, { "epoch": 1.4097848716169326, "grad_norm": 0.8636189699172974, "learning_rate": 1.1993685318359956e-05, "loss": 0.5552, "step": 8126 }, { "epoch": 1.4099583622484386, "grad_norm": 0.7944595217704773, "learning_rate": 1.1987442269453779e-05, "loss": 0.5653, "step": 8127 }, { "epoch": 1.4101318528799445, "grad_norm": 0.7815333008766174, "learning_rate": 1.1981200150414262e-05, "loss": 0.5242, "step": 8128 }, { "epoch": 1.4103053435114503, "grad_norm": 0.762512743473053, "learning_rate": 1.1974958961965806e-05, "loss": 0.5216, "step": 8129 }, { "epoch": 1.4104788341429564, "grad_norm": 0.6927229166030884, "learning_rate": 1.1968718704832716e-05, "loss": 0.5153, "step": 8130 }, { "epoch": 1.4106523247744622, "grad_norm": 0.5905316472053528, "learning_rate": 1.1962479379739179e-05, "loss": 0.5563, "step": 8131 }, { "epoch": 1.410825815405968, "grad_norm": 0.772675096988678, "learning_rate": 1.195624098740928e-05, "loss": 0.5208, "step": 8132 }, { "epoch": 1.410999306037474, "grad_norm": 0.6897779107093811, "learning_rate": 1.1950003528566981e-05, "loss": 0.4327, "step": 8133 }, { "epoch": 1.4111727966689798, "grad_norm": 1.1361875534057617, "learning_rate": 1.1943767003936152e-05, "loss": 0.4803, "step": 8134 }, { "epoch": 1.4113462873004858, "grad_norm": 0.6257690191268921, "learning_rate": 1.193753141424056e-05, "loss": 0.5568, "step": 8135 }, { "epoch": 1.4115197779319917, "grad_norm": 0.8639114499092102, "learning_rate": 1.1931296760203831e-05, "loss": 0.5244, "step": 8136 }, { "epoch": 1.4116932685634975, "grad_norm": 0.7131174802780151, "learning_rate": 1.1925063042549519e-05, "loss": 0.4972, "step": 8137 }, { "epoch": 1.4118667591950036, "grad_norm": 0.8591305613517761, "learning_rate": 1.1918830262001055e-05, "loss": 0.4943, "step": 8138 }, { "epoch": 1.4120402498265094, "grad_norm": 0.687491238117218, "learning_rate": 1.1912598419281747e-05, "loss": 0.4995, "step": 8139 }, { "epoch": 1.4122137404580153, "grad_norm": 0.7206785082817078, "learning_rate": 1.1906367515114816e-05, "loss": 0.5199, "step": 8140 }, { "epoch": 1.4123872310895211, "grad_norm": 1.3990131616592407, "learning_rate": 1.1900137550223376e-05, "loss": 0.3428, "step": 8141 }, { "epoch": 1.412560721721027, "grad_norm": 0.663081169128418, "learning_rate": 1.1893908525330401e-05, "loss": 0.3266, "step": 8142 }, { "epoch": 1.4127342123525328, "grad_norm": 1.1218020915985107, "learning_rate": 1.1887680441158791e-05, "loss": 0.4956, "step": 8143 }, { "epoch": 1.412907702984039, "grad_norm": 0.7569759488105774, "learning_rate": 1.1881453298431323e-05, "loss": 0.3933, "step": 8144 }, { "epoch": 1.4130811936155447, "grad_norm": 0.8735617399215698, "learning_rate": 1.1875227097870652e-05, "loss": 0.4637, "step": 8145 }, { "epoch": 1.4132546842470506, "grad_norm": 0.9186040163040161, "learning_rate": 1.1869001840199345e-05, "loss": 0.3828, "step": 8146 }, { "epoch": 1.4134281748785567, "grad_norm": 0.6188430786132812, "learning_rate": 1.1862777526139848e-05, "loss": 0.431, "step": 8147 }, { "epoch": 1.4136016655100625, "grad_norm": 0.6487597227096558, "learning_rate": 1.1856554156414503e-05, "loss": 0.4985, "step": 8148 }, { "epoch": 1.4137751561415683, "grad_norm": 0.7717430591583252, "learning_rate": 1.1850331731745541e-05, "loss": 0.3992, "step": 8149 }, { "epoch": 1.4139486467730742, "grad_norm": 0.8516520857810974, "learning_rate": 1.1844110252855072e-05, "loss": 0.4482, "step": 8150 }, { "epoch": 1.41412213740458, "grad_norm": 0.9156954884529114, "learning_rate": 1.1837889720465117e-05, "loss": 0.5394, "step": 8151 }, { "epoch": 1.414295628036086, "grad_norm": 1.0913176536560059, "learning_rate": 1.1831670135297564e-05, "loss": 0.3937, "step": 8152 }, { "epoch": 1.414469118667592, "grad_norm": 1.863344430923462, "learning_rate": 1.1825451498074209e-05, "loss": 0.431, "step": 8153 }, { "epoch": 1.4146426092990978, "grad_norm": 0.9412972927093506, "learning_rate": 1.1819233809516746e-05, "loss": 0.3961, "step": 8154 }, { "epoch": 1.4148160999306039, "grad_norm": 0.7449471950531006, "learning_rate": 1.181301707034672e-05, "loss": 0.5623, "step": 8155 }, { "epoch": 1.4149895905621097, "grad_norm": 1.3430240154266357, "learning_rate": 1.1806801281285604e-05, "loss": 0.4568, "step": 8156 }, { "epoch": 1.4151630811936156, "grad_norm": 2.3827271461486816, "learning_rate": 1.1800586443054754e-05, "loss": 0.447, "step": 8157 }, { "epoch": 1.4153365718251214, "grad_norm": 1.2478195428848267, "learning_rate": 1.1794372556375392e-05, "loss": 0.4565, "step": 8158 }, { "epoch": 1.4155100624566272, "grad_norm": 0.7910694479942322, "learning_rate": 1.178815962196866e-05, "loss": 0.4298, "step": 8159 }, { "epoch": 1.4156835530881333, "grad_norm": 0.7412468194961548, "learning_rate": 1.178194764055557e-05, "loss": 0.5117, "step": 8160 }, { "epoch": 1.4158570437196392, "grad_norm": 0.6640676856040955, "learning_rate": 1.1775736612857038e-05, "loss": 0.4578, "step": 8161 }, { "epoch": 1.416030534351145, "grad_norm": 0.8816553950309753, "learning_rate": 1.176952653959385e-05, "loss": 0.5081, "step": 8162 }, { "epoch": 1.4162040249826509, "grad_norm": 0.6530722975730896, "learning_rate": 1.17633174214867e-05, "loss": 0.604, "step": 8163 }, { "epoch": 1.416377515614157, "grad_norm": 1.0684694051742554, "learning_rate": 1.1757109259256163e-05, "loss": 0.4286, "step": 8164 }, { "epoch": 1.4165510062456628, "grad_norm": 0.7710397243499756, "learning_rate": 1.17509020536227e-05, "loss": 0.5076, "step": 8165 }, { "epoch": 1.4167244968771686, "grad_norm": 1.031315565109253, "learning_rate": 1.174469580530666e-05, "loss": 0.514, "step": 8166 }, { "epoch": 1.4168979875086745, "grad_norm": 0.7741178274154663, "learning_rate": 1.1738490515028303e-05, "loss": 0.4115, "step": 8167 }, { "epoch": 1.4170714781401803, "grad_norm": 0.9059246778488159, "learning_rate": 1.1732286183507738e-05, "loss": 0.4238, "step": 8168 }, { "epoch": 1.4172449687716864, "grad_norm": 0.7780495285987854, "learning_rate": 1.1726082811464994e-05, "loss": 0.5532, "step": 8169 }, { "epoch": 1.4174184594031922, "grad_norm": 0.7769747376441956, "learning_rate": 1.1719880399619987e-05, "loss": 0.4147, "step": 8170 }, { "epoch": 1.417591950034698, "grad_norm": 0.7542340159416199, "learning_rate": 1.171367894869251e-05, "loss": 0.517, "step": 8171 }, { "epoch": 1.4177654406662041, "grad_norm": 0.9824731349945068, "learning_rate": 1.1707478459402236e-05, "loss": 0.3795, "step": 8172 }, { "epoch": 1.41793893129771, "grad_norm": 1.423109531402588, "learning_rate": 1.170127893246876e-05, "loss": 0.4789, "step": 8173 }, { "epoch": 1.4181124219292158, "grad_norm": 0.9541971683502197, "learning_rate": 1.1695080368611526e-05, "loss": 0.4475, "step": 8174 }, { "epoch": 1.4182859125607217, "grad_norm": 0.9733325839042664, "learning_rate": 1.1688882768549895e-05, "loss": 0.5002, "step": 8175 }, { "epoch": 1.4184594031922275, "grad_norm": 0.6772567629814148, "learning_rate": 1.1682686133003105e-05, "loss": 0.4119, "step": 8176 }, { "epoch": 1.4186328938237336, "grad_norm": 0.6227893829345703, "learning_rate": 1.1676490462690282e-05, "loss": 0.4996, "step": 8177 }, { "epoch": 1.4188063844552394, "grad_norm": 0.6760939955711365, "learning_rate": 1.167029575833044e-05, "loss": 0.4785, "step": 8178 }, { "epoch": 1.4189798750867453, "grad_norm": 0.7768404483795166, "learning_rate": 1.1664102020642472e-05, "loss": 0.3849, "step": 8179 }, { "epoch": 1.4191533657182513, "grad_norm": 0.6399132609367371, "learning_rate": 1.1657909250345194e-05, "loss": 0.5537, "step": 8180 }, { "epoch": 1.4193268563497572, "grad_norm": 1.3256938457489014, "learning_rate": 1.1651717448157252e-05, "loss": 0.5266, "step": 8181 }, { "epoch": 1.419500346981263, "grad_norm": 1.1580417156219482, "learning_rate": 1.1645526614797235e-05, "loss": 0.3728, "step": 8182 }, { "epoch": 1.4196738376127689, "grad_norm": 2.0219199657440186, "learning_rate": 1.1639336750983593e-05, "loss": 0.4918, "step": 8183 }, { "epoch": 1.4198473282442747, "grad_norm": 5.228805065155029, "learning_rate": 1.1633147857434658e-05, "loss": 0.4169, "step": 8184 }, { "epoch": 1.4200208188757806, "grad_norm": 0.7683346271514893, "learning_rate": 1.162695993486866e-05, "loss": 0.392, "step": 8185 }, { "epoch": 1.4201943095072866, "grad_norm": 0.7288727760314941, "learning_rate": 1.1620772984003724e-05, "loss": 0.4781, "step": 8186 }, { "epoch": 1.4203678001387925, "grad_norm": 0.7506224513053894, "learning_rate": 1.1614587005557847e-05, "loss": 0.3904, "step": 8187 }, { "epoch": 1.4205412907702983, "grad_norm": 0.6392959356307983, "learning_rate": 1.1608402000248908e-05, "loss": 0.5076, "step": 8188 }, { "epoch": 1.4207147814018044, "grad_norm": 0.8153432607650757, "learning_rate": 1.160221796879471e-05, "loss": 0.4668, "step": 8189 }, { "epoch": 1.4208882720333103, "grad_norm": 0.6652383804321289, "learning_rate": 1.1596034911912896e-05, "loss": 0.5255, "step": 8190 }, { "epoch": 1.421061762664816, "grad_norm": 0.6237618327140808, "learning_rate": 1.1589852830321024e-05, "loss": 0.5607, "step": 8191 }, { "epoch": 1.421235253296322, "grad_norm": 0.9686141014099121, "learning_rate": 1.1583671724736526e-05, "loss": 0.5532, "step": 8192 }, { "epoch": 1.4214087439278278, "grad_norm": 2.3930442333221436, "learning_rate": 1.1577491595876744e-05, "loss": 0.4435, "step": 8193 }, { "epoch": 1.4215822345593339, "grad_norm": 0.7133120894432068, "learning_rate": 1.157131244445886e-05, "loss": 0.4365, "step": 8194 }, { "epoch": 1.4217557251908397, "grad_norm": 0.7545367479324341, "learning_rate": 1.1565134271199999e-05, "loss": 0.5017, "step": 8195 }, { "epoch": 1.4219292158223455, "grad_norm": 0.6678562164306641, "learning_rate": 1.1558957076817135e-05, "loss": 0.6106, "step": 8196 }, { "epoch": 1.4221027064538516, "grad_norm": 0.6045330762863159, "learning_rate": 1.1552780862027136e-05, "loss": 0.4875, "step": 8197 }, { "epoch": 1.4222761970853575, "grad_norm": 0.8746786117553711, "learning_rate": 1.1546605627546752e-05, "loss": 0.3557, "step": 8198 }, { "epoch": 1.4224496877168633, "grad_norm": 0.7303048372268677, "learning_rate": 1.154043137409265e-05, "loss": 0.4138, "step": 8199 }, { "epoch": 1.4226231783483692, "grad_norm": 1.039693832397461, "learning_rate": 1.1534258102381332e-05, "loss": 0.5621, "step": 8200 }, { "epoch": 1.422796668979875, "grad_norm": 0.7501853108406067, "learning_rate": 1.1528085813129233e-05, "loss": 0.5427, "step": 8201 }, { "epoch": 1.4229701596113808, "grad_norm": 1.3978708982467651, "learning_rate": 1.1521914507052646e-05, "loss": 0.4719, "step": 8202 }, { "epoch": 1.423143650242887, "grad_norm": 0.7245519757270813, "learning_rate": 1.151574418486776e-05, "loss": 0.3704, "step": 8203 }, { "epoch": 1.4233171408743928, "grad_norm": 1.0005288124084473, "learning_rate": 1.1509574847290641e-05, "loss": 0.3488, "step": 8204 }, { "epoch": 1.4234906315058986, "grad_norm": 1.342585563659668, "learning_rate": 1.150340649503726e-05, "loss": 0.5226, "step": 8205 }, { "epoch": 1.4236641221374047, "grad_norm": 1.026921272277832, "learning_rate": 1.1497239128823456e-05, "loss": 0.3259, "step": 8206 }, { "epoch": 1.4238376127689105, "grad_norm": 0.8890343308448792, "learning_rate": 1.149107274936496e-05, "loss": 0.4583, "step": 8207 }, { "epoch": 1.4240111034004164, "grad_norm": 1.0013446807861328, "learning_rate": 1.1484907357377378e-05, "loss": 0.3878, "step": 8208 }, { "epoch": 1.4241845940319222, "grad_norm": 0.8156121969223022, "learning_rate": 1.1478742953576225e-05, "loss": 0.4158, "step": 8209 }, { "epoch": 1.424358084663428, "grad_norm": 0.8877331018447876, "learning_rate": 1.1472579538676883e-05, "loss": 0.3945, "step": 8210 }, { "epoch": 1.4245315752949341, "grad_norm": 0.7141983509063721, "learning_rate": 1.1466417113394624e-05, "loss": 0.5415, "step": 8211 }, { "epoch": 1.42470506592644, "grad_norm": 0.8154292702674866, "learning_rate": 1.1460255678444598e-05, "loss": 0.5631, "step": 8212 }, { "epoch": 1.4248785565579458, "grad_norm": 0.8014858365058899, "learning_rate": 1.1454095234541848e-05, "loss": 0.4602, "step": 8213 }, { "epoch": 1.4250520471894519, "grad_norm": 0.803516149520874, "learning_rate": 1.144793578240131e-05, "loss": 0.4619, "step": 8214 }, { "epoch": 1.4252255378209577, "grad_norm": 0.6020379066467285, "learning_rate": 1.1441777322737791e-05, "loss": 0.5419, "step": 8215 }, { "epoch": 1.4253990284524636, "grad_norm": 0.6872659921646118, "learning_rate": 1.1435619856265982e-05, "loss": 0.5328, "step": 8216 }, { "epoch": 1.4255725190839694, "grad_norm": 0.6719970703125, "learning_rate": 1.1429463383700465e-05, "loss": 0.529, "step": 8217 }, { "epoch": 1.4257460097154753, "grad_norm": 0.7338549494743347, "learning_rate": 1.1423307905755714e-05, "loss": 0.4821, "step": 8218 }, { "epoch": 1.4259195003469813, "grad_norm": 1.0120731592178345, "learning_rate": 1.1417153423146074e-05, "loss": 0.4324, "step": 8219 }, { "epoch": 1.4260929909784872, "grad_norm": 0.7835046648979187, "learning_rate": 1.1410999936585782e-05, "loss": 0.5813, "step": 8220 }, { "epoch": 1.426266481609993, "grad_norm": 0.609506368637085, "learning_rate": 1.1404847446788948e-05, "loss": 0.4217, "step": 8221 }, { "epoch": 1.4264399722414989, "grad_norm": 0.7258155345916748, "learning_rate": 1.1398695954469598e-05, "loss": 0.4923, "step": 8222 }, { "epoch": 1.426613462873005, "grad_norm": 1.3275835514068604, "learning_rate": 1.1392545460341588e-05, "loss": 0.4771, "step": 8223 }, { "epoch": 1.4267869535045108, "grad_norm": 0.7754116654396057, "learning_rate": 1.1386395965118715e-05, "loss": 0.4517, "step": 8224 }, { "epoch": 1.4269604441360166, "grad_norm": 0.7081223130226135, "learning_rate": 1.1380247469514628e-05, "loss": 0.5043, "step": 8225 }, { "epoch": 1.4271339347675225, "grad_norm": 1.3863879442214966, "learning_rate": 1.1374099974242867e-05, "loss": 0.5066, "step": 8226 }, { "epoch": 1.4273074253990283, "grad_norm": 0.8166904449462891, "learning_rate": 1.1367953480016848e-05, "loss": 0.3375, "step": 8227 }, { "epoch": 1.4274809160305344, "grad_norm": 1.2641361951828003, "learning_rate": 1.13618079875499e-05, "loss": 0.377, "step": 8228 }, { "epoch": 1.4276544066620402, "grad_norm": 0.9420011639595032, "learning_rate": 1.1355663497555186e-05, "loss": 0.3428, "step": 8229 }, { "epoch": 1.427827897293546, "grad_norm": 1.1718153953552246, "learning_rate": 1.1349520010745802e-05, "loss": 0.3717, "step": 8230 }, { "epoch": 1.4280013879250522, "grad_norm": 1.6591047048568726, "learning_rate": 1.1343377527834703e-05, "loss": 0.5181, "step": 8231 }, { "epoch": 1.428174878556558, "grad_norm": 1.0140023231506348, "learning_rate": 1.1337236049534726e-05, "loss": 0.3816, "step": 8232 }, { "epoch": 1.4283483691880638, "grad_norm": 1.085556149482727, "learning_rate": 1.133109557655859e-05, "loss": 0.4036, "step": 8233 }, { "epoch": 1.4285218598195697, "grad_norm": 1.0881836414337158, "learning_rate": 1.1324956109618927e-05, "loss": 0.4258, "step": 8234 }, { "epoch": 1.4286953504510755, "grad_norm": 0.7150824069976807, "learning_rate": 1.1318817649428213e-05, "loss": 0.4537, "step": 8235 }, { "epoch": 1.4288688410825816, "grad_norm": 0.6293311715126038, "learning_rate": 1.1312680196698817e-05, "loss": 0.6064, "step": 8236 }, { "epoch": 1.4290423317140875, "grad_norm": 0.8089287877082825, "learning_rate": 1.1306543752143018e-05, "loss": 0.4554, "step": 8237 }, { "epoch": 1.4292158223455933, "grad_norm": 1.0295014381408691, "learning_rate": 1.1300408316472944e-05, "loss": 0.3726, "step": 8238 }, { "epoch": 1.4293893129770994, "grad_norm": 2.3466145992279053, "learning_rate": 1.1294273890400625e-05, "loss": 0.3907, "step": 8239 }, { "epoch": 1.4295628036086052, "grad_norm": 0.6298247575759888, "learning_rate": 1.1288140474637953e-05, "loss": 0.5355, "step": 8240 }, { "epoch": 1.429736294240111, "grad_norm": 0.9479881525039673, "learning_rate": 1.1282008069896749e-05, "loss": 0.4679, "step": 8241 }, { "epoch": 1.429909784871617, "grad_norm": 0.9374826550483704, "learning_rate": 1.127587667688865e-05, "loss": 0.489, "step": 8242 }, { "epoch": 1.4300832755031228, "grad_norm": 0.7527461647987366, "learning_rate": 1.1269746296325236e-05, "loss": 0.4746, "step": 8243 }, { "epoch": 1.4302567661346286, "grad_norm": 0.6833534240722656, "learning_rate": 1.126361692891794e-05, "loss": 0.5043, "step": 8244 }, { "epoch": 1.4304302567661347, "grad_norm": 0.6030701994895935, "learning_rate": 1.1257488575378077e-05, "loss": 0.4475, "step": 8245 }, { "epoch": 1.4306037473976405, "grad_norm": 0.8309581875801086, "learning_rate": 1.1251361236416845e-05, "loss": 0.4567, "step": 8246 }, { "epoch": 1.4307772380291464, "grad_norm": 0.63411545753479, "learning_rate": 1.124523491274534e-05, "loss": 0.5254, "step": 8247 }, { "epoch": 1.4309507286606524, "grad_norm": 0.8961930274963379, "learning_rate": 1.1239109605074527e-05, "loss": 0.3545, "step": 8248 }, { "epoch": 1.4311242192921583, "grad_norm": 0.7756201028823853, "learning_rate": 1.1232985314115255e-05, "loss": 0.4612, "step": 8249 }, { "epoch": 1.4312977099236641, "grad_norm": 1.0690734386444092, "learning_rate": 1.1226862040578244e-05, "loss": 0.3627, "step": 8250 }, { "epoch": 1.43147120055517, "grad_norm": 0.6681637167930603, "learning_rate": 1.1220739785174129e-05, "loss": 0.5686, "step": 8251 }, { "epoch": 1.4316446911866758, "grad_norm": 1.3204238414764404, "learning_rate": 1.1214618548613379e-05, "loss": 0.4663, "step": 8252 }, { "epoch": 1.4318181818181819, "grad_norm": 0.7250412702560425, "learning_rate": 1.120849833160639e-05, "loss": 0.5106, "step": 8253 }, { "epoch": 1.4319916724496877, "grad_norm": 0.6920293569564819, "learning_rate": 1.1202379134863412e-05, "loss": 0.5026, "step": 8254 }, { "epoch": 1.4321651630811936, "grad_norm": 0.9779270887374878, "learning_rate": 1.1196260959094588e-05, "loss": 0.4744, "step": 8255 }, { "epoch": 1.4323386537126996, "grad_norm": 0.7436539530754089, "learning_rate": 1.1190143805009934e-05, "loss": 0.4041, "step": 8256 }, { "epoch": 1.4325121443442055, "grad_norm": 0.9834410548210144, "learning_rate": 1.118402767331936e-05, "loss": 0.5546, "step": 8257 }, { "epoch": 1.4326856349757113, "grad_norm": 0.6436958909034729, "learning_rate": 1.117791256473265e-05, "loss": 0.4659, "step": 8258 }, { "epoch": 1.4328591256072172, "grad_norm": 1.0978981256484985, "learning_rate": 1.1171798479959466e-05, "loss": 0.4434, "step": 8259 }, { "epoch": 1.433032616238723, "grad_norm": 0.8727421164512634, "learning_rate": 1.1165685419709353e-05, "loss": 0.5299, "step": 8260 }, { "epoch": 1.433206106870229, "grad_norm": 0.7201048135757446, "learning_rate": 1.1159573384691736e-05, "loss": 0.4921, "step": 8261 }, { "epoch": 1.433379597501735, "grad_norm": 1.7645478248596191, "learning_rate": 1.1153462375615934e-05, "loss": 0.4193, "step": 8262 }, { "epoch": 1.4335530881332408, "grad_norm": 0.8043879866600037, "learning_rate": 1.1147352393191135e-05, "loss": 0.4173, "step": 8263 }, { "epoch": 1.4337265787647466, "grad_norm": 0.6321791410446167, "learning_rate": 1.1141243438126403e-05, "loss": 0.5363, "step": 8264 }, { "epoch": 1.4339000693962527, "grad_norm": 1.3232882022857666, "learning_rate": 1.1135135511130685e-05, "loss": 0.3771, "step": 8265 }, { "epoch": 1.4340735600277585, "grad_norm": 1.0101134777069092, "learning_rate": 1.1129028612912832e-05, "loss": 0.3775, "step": 8266 }, { "epoch": 1.4342470506592644, "grad_norm": 0.903439462184906, "learning_rate": 1.1122922744181541e-05, "loss": 0.3851, "step": 8267 }, { "epoch": 1.4344205412907702, "grad_norm": 1.0646275281906128, "learning_rate": 1.1116817905645411e-05, "loss": 0.4194, "step": 8268 }, { "epoch": 1.434594031922276, "grad_norm": 0.6139266490936279, "learning_rate": 1.1110714098012907e-05, "loss": 0.5493, "step": 8269 }, { "epoch": 1.4347675225537821, "grad_norm": 0.7320806980133057, "learning_rate": 1.1104611321992404e-05, "loss": 0.3738, "step": 8270 }, { "epoch": 1.434941013185288, "grad_norm": 2.575880527496338, "learning_rate": 1.1098509578292109e-05, "loss": 0.4389, "step": 8271 }, { "epoch": 1.4351145038167938, "grad_norm": 0.7725823521614075, "learning_rate": 1.1092408867620155e-05, "loss": 0.5675, "step": 8272 }, { "epoch": 1.4352879944483, "grad_norm": 1.3586474657058716, "learning_rate": 1.1086309190684532e-05, "loss": 0.3756, "step": 8273 }, { "epoch": 1.4354614850798058, "grad_norm": 0.8224762678146362, "learning_rate": 1.1080210548193113e-05, "loss": 0.3847, "step": 8274 }, { "epoch": 1.4356349757113116, "grad_norm": 1.038537621498108, "learning_rate": 1.107411294085365e-05, "loss": 0.4352, "step": 8275 }, { "epoch": 1.4358084663428174, "grad_norm": 0.828065812587738, "learning_rate": 1.1068016369373784e-05, "loss": 0.4152, "step": 8276 }, { "epoch": 1.4359819569743233, "grad_norm": 0.7691086530685425, "learning_rate": 1.1061920834461026e-05, "loss": 0.463, "step": 8277 }, { "epoch": 1.4361554476058294, "grad_norm": 0.9992594718933105, "learning_rate": 1.1055826336822775e-05, "loss": 0.3401, "step": 8278 }, { "epoch": 1.4363289382373352, "grad_norm": 0.7811552286148071, "learning_rate": 1.1049732877166286e-05, "loss": 0.3811, "step": 8279 }, { "epoch": 1.436502428868841, "grad_norm": 0.9243087768554688, "learning_rate": 1.1043640456198745e-05, "loss": 0.4268, "step": 8280 }, { "epoch": 1.436675919500347, "grad_norm": 0.9331871867179871, "learning_rate": 1.103754907462715e-05, "loss": 0.4979, "step": 8281 }, { "epoch": 1.436849410131853, "grad_norm": 0.6397832632064819, "learning_rate": 1.1031458733158434e-05, "loss": 0.421, "step": 8282 }, { "epoch": 1.4370229007633588, "grad_norm": 0.9624772071838379, "learning_rate": 1.1025369432499385e-05, "loss": 0.4974, "step": 8283 }, { "epoch": 1.4371963913948647, "grad_norm": 1.1527369022369385, "learning_rate": 1.101928117335666e-05, "loss": 0.4113, "step": 8284 }, { "epoch": 1.4373698820263705, "grad_norm": 0.6153997778892517, "learning_rate": 1.101319395643683e-05, "loss": 0.5398, "step": 8285 }, { "epoch": 1.4375433726578764, "grad_norm": 0.5979682803153992, "learning_rate": 1.100710778244631e-05, "loss": 0.568, "step": 8286 }, { "epoch": 1.4377168632893824, "grad_norm": 0.8310407996177673, "learning_rate": 1.1001022652091413e-05, "loss": 0.488, "step": 8287 }, { "epoch": 1.4378903539208883, "grad_norm": 0.6937515735626221, "learning_rate": 1.0994938566078315e-05, "loss": 0.4478, "step": 8288 }, { "epoch": 1.438063844552394, "grad_norm": 1.24624764919281, "learning_rate": 1.0988855525113096e-05, "loss": 0.4032, "step": 8289 }, { "epoch": 1.4382373351839002, "grad_norm": 1.329052448272705, "learning_rate": 1.0982773529901696e-05, "loss": 0.4254, "step": 8290 }, { "epoch": 1.438410825815406, "grad_norm": 0.8210911750793457, "learning_rate": 1.0976692581149933e-05, "loss": 0.4188, "step": 8291 }, { "epoch": 1.4385843164469119, "grad_norm": 0.8221972584724426, "learning_rate": 1.0970612679563501e-05, "loss": 0.4941, "step": 8292 }, { "epoch": 1.4387578070784177, "grad_norm": 0.7580887079238892, "learning_rate": 1.0964533825848008e-05, "loss": 0.386, "step": 8293 }, { "epoch": 1.4389312977099236, "grad_norm": 0.8661448359489441, "learning_rate": 1.0958456020708875e-05, "loss": 0.3766, "step": 8294 }, { "epoch": 1.4391047883414296, "grad_norm": 1.0701017379760742, "learning_rate": 1.0952379264851464e-05, "loss": 0.4092, "step": 8295 }, { "epoch": 1.4392782789729355, "grad_norm": 0.7596119046211243, "learning_rate": 1.0946303558980981e-05, "loss": 0.5391, "step": 8296 }, { "epoch": 1.4394517696044413, "grad_norm": 0.7729180455207825, "learning_rate": 1.094022890380252e-05, "loss": 0.4447, "step": 8297 }, { "epoch": 1.4396252602359474, "grad_norm": 2.3617026805877686, "learning_rate": 1.0934155300021048e-05, "loss": 0.4341, "step": 8298 }, { "epoch": 1.4397987508674532, "grad_norm": 1.3820737600326538, "learning_rate": 1.0928082748341429e-05, "loss": 0.3708, "step": 8299 }, { "epoch": 1.439972241498959, "grad_norm": 0.9820736646652222, "learning_rate": 1.0922011249468362e-05, "loss": 0.4109, "step": 8300 }, { "epoch": 1.440145732130465, "grad_norm": 0.7744532227516174, "learning_rate": 1.0915940804106477e-05, "loss": 0.5073, "step": 8301 }, { "epoch": 1.4403192227619708, "grad_norm": 0.7360996603965759, "learning_rate": 1.0909871412960245e-05, "loss": 0.6124, "step": 8302 }, { "epoch": 1.4404927133934766, "grad_norm": 0.8232945799827576, "learning_rate": 1.090380307673403e-05, "loss": 0.5385, "step": 8303 }, { "epoch": 1.4406662040249827, "grad_norm": 0.6191974878311157, "learning_rate": 1.0897735796132056e-05, "loss": 0.6143, "step": 8304 }, { "epoch": 1.4408396946564885, "grad_norm": 1.2847371101379395, "learning_rate": 1.0891669571858458e-05, "loss": 0.4058, "step": 8305 }, { "epoch": 1.4410131852879944, "grad_norm": 0.8297262191772461, "learning_rate": 1.0885604404617221e-05, "loss": 0.3998, "step": 8306 }, { "epoch": 1.4411866759195004, "grad_norm": 0.9757704138755798, "learning_rate": 1.0879540295112212e-05, "loss": 0.3265, "step": 8307 }, { "epoch": 1.4413601665510063, "grad_norm": 0.7426693439483643, "learning_rate": 1.087347724404717e-05, "loss": 0.4854, "step": 8308 }, { "epoch": 1.4415336571825121, "grad_norm": 1.0658361911773682, "learning_rate": 1.086741525212574e-05, "loss": 0.4758, "step": 8309 }, { "epoch": 1.441707147814018, "grad_norm": 0.6992179751396179, "learning_rate": 1.086135432005141e-05, "loss": 0.4962, "step": 8310 }, { "epoch": 1.4418806384455238, "grad_norm": 0.7144151926040649, "learning_rate": 1.0855294448527548e-05, "loss": 0.4519, "step": 8311 }, { "epoch": 1.44205412907703, "grad_norm": 0.6744188070297241, "learning_rate": 1.0849235638257442e-05, "loss": 0.405, "step": 8312 }, { "epoch": 1.4422276197085357, "grad_norm": 0.6769935488700867, "learning_rate": 1.0843177889944187e-05, "loss": 0.5189, "step": 8313 }, { "epoch": 1.4424011103400416, "grad_norm": 0.5383853912353516, "learning_rate": 1.0837121204290812e-05, "loss": 0.6127, "step": 8314 }, { "epoch": 1.4425746009715477, "grad_norm": 0.7602445483207703, "learning_rate": 1.08310655820002e-05, "loss": 0.6104, "step": 8315 }, { "epoch": 1.4427480916030535, "grad_norm": 1.0592349767684937, "learning_rate": 1.0825011023775111e-05, "loss": 0.5861, "step": 8316 }, { "epoch": 1.4429215822345594, "grad_norm": 0.5987264513969421, "learning_rate": 1.0818957530318177e-05, "loss": 0.5736, "step": 8317 }, { "epoch": 1.4430950728660652, "grad_norm": 0.6851949691772461, "learning_rate": 1.0812905102331927e-05, "loss": 0.4696, "step": 8318 }, { "epoch": 1.443268563497571, "grad_norm": 0.5215808153152466, "learning_rate": 1.0806853740518743e-05, "loss": 0.5856, "step": 8319 }, { "epoch": 1.4434420541290771, "grad_norm": 5.401055335998535, "learning_rate": 1.0800803445580896e-05, "loss": 0.4019, "step": 8320 }, { "epoch": 1.443615544760583, "grad_norm": 0.598903238773346, "learning_rate": 1.079475421822052e-05, "loss": 0.4912, "step": 8321 }, { "epoch": 1.4437890353920888, "grad_norm": 1.0459188222885132, "learning_rate": 1.078870605913966e-05, "loss": 0.3865, "step": 8322 }, { "epoch": 1.4439625260235947, "grad_norm": 0.783470630645752, "learning_rate": 1.0782658969040179e-05, "loss": 0.3664, "step": 8323 }, { "epoch": 1.4441360166551007, "grad_norm": 0.9712731242179871, "learning_rate": 1.0776612948623874e-05, "loss": 0.3561, "step": 8324 }, { "epoch": 1.4443095072866066, "grad_norm": 1.0471014976501465, "learning_rate": 1.0770567998592383e-05, "loss": 0.3859, "step": 8325 }, { "epoch": 1.4444829979181124, "grad_norm": 1.1087979078292847, "learning_rate": 1.0764524119647228e-05, "loss": 0.3643, "step": 8326 }, { "epoch": 1.4446564885496183, "grad_norm": 0.8103334307670593, "learning_rate": 1.0758481312489804e-05, "loss": 0.5364, "step": 8327 }, { "epoch": 1.444829979181124, "grad_norm": 1.0769774913787842, "learning_rate": 1.0752439577821398e-05, "loss": 0.5149, "step": 8328 }, { "epoch": 1.4450034698126302, "grad_norm": 0.6957473754882812, "learning_rate": 1.0746398916343158e-05, "loss": 0.3832, "step": 8329 }, { "epoch": 1.445176960444136, "grad_norm": 1.0740876197814941, "learning_rate": 1.0740359328756105e-05, "loss": 0.3268, "step": 8330 }, { "epoch": 1.4453504510756419, "grad_norm": 1.0540927648544312, "learning_rate": 1.0734320815761143e-05, "loss": 0.3591, "step": 8331 }, { "epoch": 1.445523941707148, "grad_norm": 0.8353700637817383, "learning_rate": 1.0728283378059036e-05, "loss": 0.5391, "step": 8332 }, { "epoch": 1.4456974323386538, "grad_norm": 1.2079349756240845, "learning_rate": 1.0722247016350456e-05, "loss": 0.5454, "step": 8333 }, { "epoch": 1.4458709229701596, "grad_norm": 0.9697757959365845, "learning_rate": 1.0716211731335922e-05, "loss": 0.4517, "step": 8334 }, { "epoch": 1.4460444136016655, "grad_norm": 0.704738199710846, "learning_rate": 1.0710177523715833e-05, "loss": 0.5906, "step": 8335 }, { "epoch": 1.4462179042331713, "grad_norm": 0.8852899074554443, "learning_rate": 1.0704144394190458e-05, "loss": 0.4235, "step": 8336 }, { "epoch": 1.4463913948646774, "grad_norm": 0.9528529047966003, "learning_rate": 1.0698112343459967e-05, "loss": 0.4167, "step": 8337 }, { "epoch": 1.4465648854961832, "grad_norm": 1.396693468093872, "learning_rate": 1.0692081372224378e-05, "loss": 0.3618, "step": 8338 }, { "epoch": 1.446738376127689, "grad_norm": 0.8811051845550537, "learning_rate": 1.0686051481183593e-05, "loss": 0.5481, "step": 8339 }, { "epoch": 1.4469118667591951, "grad_norm": 0.7508476376533508, "learning_rate": 1.0680022671037376e-05, "loss": 0.4497, "step": 8340 }, { "epoch": 1.447085357390701, "grad_norm": 1.0145187377929688, "learning_rate": 1.0673994942485404e-05, "loss": 0.5071, "step": 8341 }, { "epoch": 1.4472588480222068, "grad_norm": 0.7528433799743652, "learning_rate": 1.0667968296227169e-05, "loss": 0.4768, "step": 8342 }, { "epoch": 1.4474323386537127, "grad_norm": 1.0493375062942505, "learning_rate": 1.0661942732962096e-05, "loss": 0.3898, "step": 8343 }, { "epoch": 1.4476058292852185, "grad_norm": 0.8907434940338135, "learning_rate": 1.0655918253389452e-05, "loss": 0.4526, "step": 8344 }, { "epoch": 1.4477793199167244, "grad_norm": 1.0421587228775024, "learning_rate": 1.0649894858208381e-05, "loss": 0.4113, "step": 8345 }, { "epoch": 1.4479528105482304, "grad_norm": 0.9041519165039062, "learning_rate": 1.06438725481179e-05, "loss": 0.3333, "step": 8346 }, { "epoch": 1.4481263011797363, "grad_norm": 0.6785004734992981, "learning_rate": 1.0637851323816918e-05, "loss": 0.4388, "step": 8347 }, { "epoch": 1.4482997918112421, "grad_norm": 0.6762832403182983, "learning_rate": 1.06318311860042e-05, "loss": 0.4838, "step": 8348 }, { "epoch": 1.4484732824427482, "grad_norm": 0.6441954970359802, "learning_rate": 1.0625812135378388e-05, "loss": 0.59, "step": 8349 }, { "epoch": 1.448646773074254, "grad_norm": 0.781283438205719, "learning_rate": 1.0619794172637995e-05, "loss": 0.4589, "step": 8350 }, { "epoch": 1.44882026370576, "grad_norm": 1.0100747346878052, "learning_rate": 1.0613777298481431e-05, "loss": 0.3864, "step": 8351 }, { "epoch": 1.4489937543372657, "grad_norm": 0.6544135808944702, "learning_rate": 1.0607761513606935e-05, "loss": 0.5074, "step": 8352 }, { "epoch": 1.4491672449687716, "grad_norm": 1.044804573059082, "learning_rate": 1.0601746818712666e-05, "loss": 0.5249, "step": 8353 }, { "epoch": 1.4493407356002777, "grad_norm": 0.859351396560669, "learning_rate": 1.0595733214496633e-05, "loss": 0.5231, "step": 8354 }, { "epoch": 1.4495142262317835, "grad_norm": 0.7557289600372314, "learning_rate": 1.0589720701656714e-05, "loss": 0.4941, "step": 8355 }, { "epoch": 1.4496877168632893, "grad_norm": 0.5786538124084473, "learning_rate": 1.0583709280890668e-05, "loss": 0.5795, "step": 8356 }, { "epoch": 1.4498612074947954, "grad_norm": 1.1952705383300781, "learning_rate": 1.057769895289614e-05, "loss": 0.3909, "step": 8357 }, { "epoch": 1.4500346981263013, "grad_norm": 0.8314424157142639, "learning_rate": 1.0571689718370629e-05, "loss": 0.4208, "step": 8358 }, { "epoch": 1.450208188757807, "grad_norm": 0.8545066118240356, "learning_rate": 1.05656815780115e-05, "loss": 0.5289, "step": 8359 }, { "epoch": 1.450381679389313, "grad_norm": 0.5665410161018372, "learning_rate": 1.0559674532516033e-05, "loss": 0.5892, "step": 8360 }, { "epoch": 1.4505551700208188, "grad_norm": 0.7000844478607178, "learning_rate": 1.0553668582581324e-05, "loss": 0.4115, "step": 8361 }, { "epoch": 1.4507286606523246, "grad_norm": 0.8924859166145325, "learning_rate": 1.0547663728904392e-05, "loss": 0.3994, "step": 8362 }, { "epoch": 1.4509021512838307, "grad_norm": 0.938998281955719, "learning_rate": 1.0541659972182088e-05, "loss": 0.5281, "step": 8363 }, { "epoch": 1.4510756419153366, "grad_norm": 0.7375452518463135, "learning_rate": 1.0535657313111183e-05, "loss": 0.4556, "step": 8364 }, { "epoch": 1.4512491325468424, "grad_norm": 0.7703595757484436, "learning_rate": 1.0529655752388254e-05, "loss": 0.545, "step": 8365 }, { "epoch": 1.4514226231783485, "grad_norm": 0.7838109731674194, "learning_rate": 1.0523655290709825e-05, "loss": 0.5122, "step": 8366 }, { "epoch": 1.4515961138098543, "grad_norm": 0.7033941745758057, "learning_rate": 1.051765592877224e-05, "loss": 0.4962, "step": 8367 }, { "epoch": 1.4517696044413602, "grad_norm": 0.6633094549179077, "learning_rate": 1.0511657667271731e-05, "loss": 0.5167, "step": 8368 }, { "epoch": 1.451943095072866, "grad_norm": 0.8117217421531677, "learning_rate": 1.0505660506904398e-05, "loss": 0.554, "step": 8369 }, { "epoch": 1.4521165857043719, "grad_norm": 1.3190199136734009, "learning_rate": 1.0499664448366245e-05, "loss": 0.3636, "step": 8370 }, { "epoch": 1.452290076335878, "grad_norm": 0.8043152093887329, "learning_rate": 1.0493669492353082e-05, "loss": 0.3423, "step": 8371 }, { "epoch": 1.4524635669673838, "grad_norm": 1.0273507833480835, "learning_rate": 1.0487675639560664e-05, "loss": 0.317, "step": 8372 }, { "epoch": 1.4526370575988896, "grad_norm": 0.7748756408691406, "learning_rate": 1.048168289068457e-05, "loss": 0.4421, "step": 8373 }, { "epoch": 1.4528105482303957, "grad_norm": 1.1627253293991089, "learning_rate": 1.0475691246420267e-05, "loss": 0.5059, "step": 8374 }, { "epoch": 1.4529840388619015, "grad_norm": 0.6672224998474121, "learning_rate": 1.0469700707463089e-05, "loss": 0.5135, "step": 8375 }, { "epoch": 1.4531575294934074, "grad_norm": 0.732526957988739, "learning_rate": 1.0463711274508253e-05, "loss": 0.5013, "step": 8376 }, { "epoch": 1.4533310201249132, "grad_norm": 0.9877192378044128, "learning_rate": 1.0457722948250837e-05, "loss": 0.5612, "step": 8377 }, { "epoch": 1.453504510756419, "grad_norm": 0.9189408421516418, "learning_rate": 1.045173572938579e-05, "loss": 0.3949, "step": 8378 }, { "epoch": 1.4536780013879251, "grad_norm": 0.7369690537452698, "learning_rate": 1.0445749618607932e-05, "loss": 0.5655, "step": 8379 }, { "epoch": 1.453851492019431, "grad_norm": 1.3301423788070679, "learning_rate": 1.0439764616611972e-05, "loss": 0.3816, "step": 8380 }, { "epoch": 1.4540249826509368, "grad_norm": 0.7214325070381165, "learning_rate": 1.0433780724092466e-05, "loss": 0.3775, "step": 8381 }, { "epoch": 1.4541984732824427, "grad_norm": 1.0076757669448853, "learning_rate": 1.0427797941743854e-05, "loss": 0.5013, "step": 8382 }, { "epoch": 1.4543719639139487, "grad_norm": 0.5548766851425171, "learning_rate": 1.0421816270260447e-05, "loss": 0.6019, "step": 8383 }, { "epoch": 1.4545454545454546, "grad_norm": 0.7440147995948792, "learning_rate": 1.041583571033641e-05, "loss": 0.603, "step": 8384 }, { "epoch": 1.4547189451769604, "grad_norm": 0.7528059482574463, "learning_rate": 1.0409856262665818e-05, "loss": 0.4189, "step": 8385 }, { "epoch": 1.4548924358084663, "grad_norm": 0.8353288769721985, "learning_rate": 1.0403877927942582e-05, "loss": 0.5216, "step": 8386 }, { "epoch": 1.4550659264399721, "grad_norm": 0.7426737546920776, "learning_rate": 1.0397900706860493e-05, "loss": 0.4143, "step": 8387 }, { "epoch": 1.4552394170714782, "grad_norm": 0.7790663242340088, "learning_rate": 1.0391924600113211e-05, "loss": 0.4835, "step": 8388 }, { "epoch": 1.455412907702984, "grad_norm": 0.8680485486984253, "learning_rate": 1.0385949608394283e-05, "loss": 0.5001, "step": 8389 }, { "epoch": 1.4555863983344899, "grad_norm": 0.7560135126113892, "learning_rate": 1.0379975732397096e-05, "loss": 0.4662, "step": 8390 }, { "epoch": 1.455759888965996, "grad_norm": 0.6263082027435303, "learning_rate": 1.0374002972814941e-05, "loss": 0.5312, "step": 8391 }, { "epoch": 1.4559333795975018, "grad_norm": 1.1819356679916382, "learning_rate": 1.0368031330340948e-05, "loss": 0.3561, "step": 8392 }, { "epoch": 1.4561068702290076, "grad_norm": 0.944335401058197, "learning_rate": 1.0362060805668162e-05, "loss": 0.5069, "step": 8393 }, { "epoch": 1.4562803608605135, "grad_norm": 0.9007725715637207, "learning_rate": 1.0356091399489431e-05, "loss": 0.4227, "step": 8394 }, { "epoch": 1.4564538514920193, "grad_norm": 0.6720866560935974, "learning_rate": 1.0350123112497541e-05, "loss": 0.4968, "step": 8395 }, { "epoch": 1.4566273421235254, "grad_norm": 0.7603023648262024, "learning_rate": 1.0344155945385106e-05, "loss": 0.3953, "step": 8396 }, { "epoch": 1.4568008327550312, "grad_norm": 0.8988627791404724, "learning_rate": 1.0338189898844626e-05, "loss": 0.4646, "step": 8397 }, { "epoch": 1.456974323386537, "grad_norm": 0.9700048565864563, "learning_rate": 1.0332224973568458e-05, "loss": 0.3648, "step": 8398 }, { "epoch": 1.4571478140180432, "grad_norm": 0.9551883339881897, "learning_rate": 1.0326261170248863e-05, "loss": 0.4601, "step": 8399 }, { "epoch": 1.457321304649549, "grad_norm": 0.7829773426055908, "learning_rate": 1.0320298489577913e-05, "loss": 0.4734, "step": 8400 }, { "epoch": 1.4574947952810549, "grad_norm": 0.7974841594696045, "learning_rate": 1.0314336932247615e-05, "loss": 0.3765, "step": 8401 }, { "epoch": 1.4576682859125607, "grad_norm": 0.8945306539535522, "learning_rate": 1.03083764989498e-05, "loss": 0.5234, "step": 8402 }, { "epoch": 1.4578417765440665, "grad_norm": 0.5802792906761169, "learning_rate": 1.0302417190376184e-05, "loss": 0.4392, "step": 8403 }, { "epoch": 1.4580152671755724, "grad_norm": 1.2883274555206299, "learning_rate": 1.0296459007218345e-05, "loss": 0.4371, "step": 8404 }, { "epoch": 1.4581887578070785, "grad_norm": 0.8092826008796692, "learning_rate": 1.0290501950167755e-05, "loss": 0.5762, "step": 8405 }, { "epoch": 1.4583622484385843, "grad_norm": 0.9017000198364258, "learning_rate": 1.0284546019915727e-05, "loss": 0.4286, "step": 8406 }, { "epoch": 1.4585357390700902, "grad_norm": 0.9883742332458496, "learning_rate": 1.0278591217153444e-05, "loss": 0.3618, "step": 8407 }, { "epoch": 1.4587092297015962, "grad_norm": 0.8880689144134521, "learning_rate": 1.0272637542571988e-05, "loss": 0.3259, "step": 8408 }, { "epoch": 1.458882720333102, "grad_norm": 0.6883126497268677, "learning_rate": 1.026668499686228e-05, "loss": 0.5259, "step": 8409 }, { "epoch": 1.459056210964608, "grad_norm": 0.6304786205291748, "learning_rate": 1.026073358071512e-05, "loss": 0.5785, "step": 8410 }, { "epoch": 1.4592297015961138, "grad_norm": 0.9625796675682068, "learning_rate": 1.0254783294821166e-05, "loss": 0.442, "step": 8411 }, { "epoch": 1.4594031922276196, "grad_norm": 0.7708973288536072, "learning_rate": 1.0248834139870985e-05, "loss": 0.5096, "step": 8412 }, { "epoch": 1.4595766828591257, "grad_norm": 0.6290324926376343, "learning_rate": 1.0242886116554947e-05, "loss": 0.5823, "step": 8413 }, { "epoch": 1.4597501734906315, "grad_norm": 0.7336987257003784, "learning_rate": 1.0236939225563351e-05, "loss": 0.6217, "step": 8414 }, { "epoch": 1.4599236641221374, "grad_norm": 0.8475745320320129, "learning_rate": 1.0230993467586336e-05, "loss": 0.3936, "step": 8415 }, { "epoch": 1.4600971547536434, "grad_norm": 0.76064532995224, "learning_rate": 1.0225048843313914e-05, "loss": 0.4185, "step": 8416 }, { "epoch": 1.4602706453851493, "grad_norm": 0.7234097719192505, "learning_rate": 1.0219105353435951e-05, "loss": 0.4302, "step": 8417 }, { "epoch": 1.4604441360166551, "grad_norm": 2.1421239376068115, "learning_rate": 1.021316299864223e-05, "loss": 0.3957, "step": 8418 }, { "epoch": 1.460617626648161, "grad_norm": 0.7063815593719482, "learning_rate": 1.0207221779622327e-05, "loss": 0.4518, "step": 8419 }, { "epoch": 1.4607911172796668, "grad_norm": 0.7108697891235352, "learning_rate": 1.0201281697065757e-05, "loss": 0.4972, "step": 8420 }, { "epoch": 1.4609646079111727, "grad_norm": 0.9781704545021057, "learning_rate": 1.0195342751661856e-05, "loss": 0.3401, "step": 8421 }, { "epoch": 1.4611380985426787, "grad_norm": 0.6981971859931946, "learning_rate": 1.0189404944099867e-05, "loss": 0.4618, "step": 8422 }, { "epoch": 1.4613115891741846, "grad_norm": 0.8739231824874878, "learning_rate": 1.0183468275068853e-05, "loss": 0.3482, "step": 8423 }, { "epoch": 1.4614850798056904, "grad_norm": 1.831498146057129, "learning_rate": 1.017753274525779e-05, "loss": 0.4212, "step": 8424 }, { "epoch": 1.4616585704371965, "grad_norm": 0.7566888928413391, "learning_rate": 1.01715983553555e-05, "loss": 0.4188, "step": 8425 }, { "epoch": 1.4618320610687023, "grad_norm": 2.4486005306243896, "learning_rate": 1.016566510605067e-05, "loss": 0.3857, "step": 8426 }, { "epoch": 1.4620055517002082, "grad_norm": 0.7806375622749329, "learning_rate": 1.0159732998031857e-05, "loss": 0.3994, "step": 8427 }, { "epoch": 1.462179042331714, "grad_norm": 0.7226378321647644, "learning_rate": 1.0153802031987504e-05, "loss": 0.5568, "step": 8428 }, { "epoch": 1.4623525329632199, "grad_norm": 0.7658690810203552, "learning_rate": 1.01478722086059e-05, "loss": 0.4004, "step": 8429 }, { "epoch": 1.462526023594726, "grad_norm": 0.7795974612236023, "learning_rate": 1.0141943528575205e-05, "loss": 0.4243, "step": 8430 }, { "epoch": 1.4626995142262318, "grad_norm": 0.6728429198265076, "learning_rate": 1.0136015992583449e-05, "loss": 0.4589, "step": 8431 }, { "epoch": 1.4628730048577376, "grad_norm": 0.896666944026947, "learning_rate": 1.0130089601318525e-05, "loss": 0.4868, "step": 8432 }, { "epoch": 1.4630464954892437, "grad_norm": 0.9360397458076477, "learning_rate": 1.0124164355468208e-05, "loss": 0.41, "step": 8433 }, { "epoch": 1.4632199861207495, "grad_norm": 0.8924311399459839, "learning_rate": 1.0118240255720128e-05, "loss": 0.4838, "step": 8434 }, { "epoch": 1.4633934767522554, "grad_norm": 0.8122832775115967, "learning_rate": 1.011231730276178e-05, "loss": 0.4663, "step": 8435 }, { "epoch": 1.4635669673837612, "grad_norm": 0.73357093334198, "learning_rate": 1.0106395497280524e-05, "loss": 0.5358, "step": 8436 }, { "epoch": 1.463740458015267, "grad_norm": 0.5891599059104919, "learning_rate": 1.0100474839963605e-05, "loss": 0.4646, "step": 8437 }, { "epoch": 1.4639139486467732, "grad_norm": 0.6430240273475647, "learning_rate": 1.0094555331498118e-05, "loss": 0.4349, "step": 8438 }, { "epoch": 1.464087439278279, "grad_norm": 1.0415815114974976, "learning_rate": 1.0088636972571027e-05, "loss": 0.4403, "step": 8439 }, { "epoch": 1.4642609299097848, "grad_norm": 0.7313327789306641, "learning_rate": 1.0082719763869153e-05, "loss": 0.51, "step": 8440 }, { "epoch": 1.4644344205412907, "grad_norm": 0.6586468815803528, "learning_rate": 1.0076803706079224e-05, "loss": 0.3967, "step": 8441 }, { "epoch": 1.4646079111727968, "grad_norm": 0.6369693279266357, "learning_rate": 1.0070888799887772e-05, "loss": 0.5192, "step": 8442 }, { "epoch": 1.4647814018043026, "grad_norm": 0.8158454895019531, "learning_rate": 1.0064975045981254e-05, "loss": 0.5299, "step": 8443 }, { "epoch": 1.4649548924358085, "grad_norm": 0.5639089345932007, "learning_rate": 1.0059062445045957e-05, "loss": 0.5651, "step": 8444 }, { "epoch": 1.4651283830673143, "grad_norm": 0.6220554709434509, "learning_rate": 1.0053150997768046e-05, "loss": 0.4736, "step": 8445 }, { "epoch": 1.4653018736988201, "grad_norm": 1.0816049575805664, "learning_rate": 1.0047240704833544e-05, "loss": 0.405, "step": 8446 }, { "epoch": 1.4654753643303262, "grad_norm": 1.5779757499694824, "learning_rate": 1.0041331566928365e-05, "loss": 0.5348, "step": 8447 }, { "epoch": 1.465648854961832, "grad_norm": 0.7839391827583313, "learning_rate": 1.0035423584738262e-05, "loss": 0.4214, "step": 8448 }, { "epoch": 1.465822345593338, "grad_norm": 0.9254438281059265, "learning_rate": 1.0029516758948863e-05, "loss": 0.5283, "step": 8449 }, { "epoch": 1.465995836224844, "grad_norm": 0.7670827507972717, "learning_rate": 1.0023611090245653e-05, "loss": 0.53, "step": 8450 }, { "epoch": 1.4661693268563498, "grad_norm": 0.9193515181541443, "learning_rate": 1.0017706579314016e-05, "loss": 0.5215, "step": 8451 }, { "epoch": 1.4663428174878557, "grad_norm": 0.755422055721283, "learning_rate": 1.0011803226839148e-05, "loss": 0.4807, "step": 8452 }, { "epoch": 1.4665163081193615, "grad_norm": 0.7575535774230957, "learning_rate": 1.000590103350616e-05, "loss": 0.4962, "step": 8453 }, { "epoch": 1.4666897987508674, "grad_norm": 0.8128533959388733, "learning_rate": 1.0000000000000006e-05, "loss": 0.5104, "step": 8454 }, { "epoch": 1.4668632893823734, "grad_norm": 0.6965898275375366, "learning_rate": 9.994100127005492e-06, "loss": 0.5612, "step": 8455 }, { "epoch": 1.4670367800138793, "grad_norm": 0.7350049614906311, "learning_rate": 9.988201415207327e-06, "loss": 0.4757, "step": 8456 }, { "epoch": 1.4672102706453851, "grad_norm": 0.7166589498519897, "learning_rate": 9.982303865290055e-06, "loss": 0.3915, "step": 8457 }, { "epoch": 1.4673837612768912, "grad_norm": 0.5868585705757141, "learning_rate": 9.976407477938092e-06, "loss": 0.538, "step": 8458 }, { "epoch": 1.467557251908397, "grad_norm": 0.6436400413513184, "learning_rate": 9.970512253835713e-06, "loss": 0.427, "step": 8459 }, { "epoch": 1.4677307425399029, "grad_norm": 0.7440900206565857, "learning_rate": 9.96461819366709e-06, "loss": 0.3804, "step": 8460 }, { "epoch": 1.4679042331714087, "grad_norm": 1.111206293106079, "learning_rate": 9.958725298116204e-06, "loss": 0.3649, "step": 8461 }, { "epoch": 1.4680777238029146, "grad_norm": 0.9442750215530396, "learning_rate": 9.952833567866954e-06, "loss": 0.4076, "step": 8462 }, { "epoch": 1.4682512144344204, "grad_norm": 1.032470703125, "learning_rate": 9.946943003603067e-06, "loss": 0.4896, "step": 8463 }, { "epoch": 1.4684247050659265, "grad_norm": 0.8886162042617798, "learning_rate": 9.941053606008176e-06, "loss": 0.314, "step": 8464 }, { "epoch": 1.4685981956974323, "grad_norm": 0.9300706386566162, "learning_rate": 9.935165375765718e-06, "loss": 0.3347, "step": 8465 }, { "epoch": 1.4687716863289382, "grad_norm": 0.73177570104599, "learning_rate": 9.929278313559054e-06, "loss": 0.4554, "step": 8466 }, { "epoch": 1.4689451769604442, "grad_norm": 0.6601850986480713, "learning_rate": 9.923392420071376e-06, "loss": 0.5767, "step": 8467 }, { "epoch": 1.46911866759195, "grad_norm": 1.0624281167984009, "learning_rate": 9.917507695985752e-06, "loss": 0.4116, "step": 8468 }, { "epoch": 1.469292158223456, "grad_norm": 1.227677345275879, "learning_rate": 9.911624141985096e-06, "loss": 0.323, "step": 8469 }, { "epoch": 1.4694656488549618, "grad_norm": 0.7922270894050598, "learning_rate": 9.905741758752234e-06, "loss": 0.371, "step": 8470 }, { "epoch": 1.4696391394864676, "grad_norm": 0.6427243947982788, "learning_rate": 9.899860546969785e-06, "loss": 0.3859, "step": 8471 }, { "epoch": 1.4698126301179737, "grad_norm": 0.7310827970504761, "learning_rate": 9.893980507320295e-06, "loss": 0.4584, "step": 8472 }, { "epoch": 1.4699861207494795, "grad_norm": 0.6126799583435059, "learning_rate": 9.888101640486146e-06, "loss": 0.4736, "step": 8473 }, { "epoch": 1.4701596113809854, "grad_norm": 0.5600416660308838, "learning_rate": 9.882223947149583e-06, "loss": 0.6259, "step": 8474 }, { "epoch": 1.4703331020124915, "grad_norm": 0.8366650938987732, "learning_rate": 9.876347427992712e-06, "loss": 0.5151, "step": 8475 }, { "epoch": 1.4705065926439973, "grad_norm": 0.9180997014045715, "learning_rate": 9.870472083697526e-06, "loss": 0.4323, "step": 8476 }, { "epoch": 1.4706800832755031, "grad_norm": 0.7303565740585327, "learning_rate": 9.864597914945859e-06, "loss": 0.4814, "step": 8477 }, { "epoch": 1.470853573907009, "grad_norm": 1.0823837518692017, "learning_rate": 9.858724922419413e-06, "loss": 0.4498, "step": 8478 }, { "epoch": 1.4710270645385148, "grad_norm": 0.7902705073356628, "learning_rate": 9.852853106799752e-06, "loss": 0.4369, "step": 8479 }, { "epoch": 1.4712005551700207, "grad_norm": 1.0457099676132202, "learning_rate": 9.846982468768316e-06, "loss": 0.4037, "step": 8480 }, { "epoch": 1.4713740458015268, "grad_norm": 0.9382363557815552, "learning_rate": 9.841113009006395e-06, "loss": 0.5115, "step": 8481 }, { "epoch": 1.4715475364330326, "grad_norm": 0.6638898849487305, "learning_rate": 9.83524472819515e-06, "loss": 0.5068, "step": 8482 }, { "epoch": 1.4717210270645384, "grad_norm": 0.670421838760376, "learning_rate": 9.829377627015595e-06, "loss": 0.5934, "step": 8483 }, { "epoch": 1.4718945176960445, "grad_norm": 0.8659542798995972, "learning_rate": 9.823511706148612e-06, "loss": 0.5544, "step": 8484 }, { "epoch": 1.4720680083275504, "grad_norm": 0.6149436831474304, "learning_rate": 9.81764696627496e-06, "loss": 0.5089, "step": 8485 }, { "epoch": 1.4722414989590562, "grad_norm": 0.7267339825630188, "learning_rate": 9.811783408075244e-06, "loss": 0.3921, "step": 8486 }, { "epoch": 1.472414989590562, "grad_norm": 0.8999056816101074, "learning_rate": 9.805921032229935e-06, "loss": 0.4633, "step": 8487 }, { "epoch": 1.472588480222068, "grad_norm": 0.96303790807724, "learning_rate": 9.800059839419358e-06, "loss": 0.4055, "step": 8488 }, { "epoch": 1.472761970853574, "grad_norm": 0.785336434841156, "learning_rate": 9.794199830323741e-06, "loss": 0.4161, "step": 8489 }, { "epoch": 1.4729354614850798, "grad_norm": 4.219667911529541, "learning_rate": 9.788341005623107e-06, "loss": 0.4529, "step": 8490 }, { "epoch": 1.4731089521165857, "grad_norm": 0.7926281094551086, "learning_rate": 9.782483365997409e-06, "loss": 0.3579, "step": 8491 }, { "epoch": 1.4732824427480917, "grad_norm": 0.8731950521469116, "learning_rate": 9.776626912126413e-06, "loss": 0.3644, "step": 8492 }, { "epoch": 1.4734559333795976, "grad_norm": 0.8293495774269104, "learning_rate": 9.770771644689792e-06, "loss": 0.5417, "step": 8493 }, { "epoch": 1.4736294240111034, "grad_norm": 0.7870762944221497, "learning_rate": 9.764917564367025e-06, "loss": 0.3804, "step": 8494 }, { "epoch": 1.4738029146426093, "grad_norm": 0.8626502752304077, "learning_rate": 9.75906467183751e-06, "loss": 0.5247, "step": 8495 }, { "epoch": 1.473976405274115, "grad_norm": 0.7849081754684448, "learning_rate": 9.753212967780472e-06, "loss": 0.5177, "step": 8496 }, { "epoch": 1.4741498959056212, "grad_norm": 1.1207414865493774, "learning_rate": 9.747362452875009e-06, "loss": 0.4625, "step": 8497 }, { "epoch": 1.474323386537127, "grad_norm": 0.8799973130226135, "learning_rate": 9.741513127800072e-06, "loss": 0.4646, "step": 8498 }, { "epoch": 1.4744968771686329, "grad_norm": 0.6825425028800964, "learning_rate": 9.735664993234499e-06, "loss": 0.5953, "step": 8499 }, { "epoch": 1.4746703678001387, "grad_norm": 0.7112365961074829, "learning_rate": 9.729818049856963e-06, "loss": 0.3888, "step": 8500 }, { "epoch": 1.4748438584316448, "grad_norm": 1.3058369159698486, "learning_rate": 9.72397229834601e-06, "loss": 0.4067, "step": 8501 }, { "epoch": 1.4750173490631506, "grad_norm": 0.6349422931671143, "learning_rate": 9.718127739380043e-06, "loss": 0.5435, "step": 8502 }, { "epoch": 1.4751908396946565, "grad_norm": 0.765207052230835, "learning_rate": 9.712284373637329e-06, "loss": 0.3588, "step": 8503 }, { "epoch": 1.4753643303261623, "grad_norm": 0.6961727738380432, "learning_rate": 9.706442201796007e-06, "loss": 0.4391, "step": 8504 }, { "epoch": 1.4755378209576682, "grad_norm": 0.7369248867034912, "learning_rate": 9.700601224534061e-06, "loss": 0.5142, "step": 8505 }, { "epoch": 1.4757113115891742, "grad_norm": 0.7529562711715698, "learning_rate": 9.694761442529345e-06, "loss": 0.3741, "step": 8506 }, { "epoch": 1.47588480222068, "grad_norm": 0.8767305612564087, "learning_rate": 9.688922856459563e-06, "loss": 0.3862, "step": 8507 }, { "epoch": 1.476058292852186, "grad_norm": 1.0340005159378052, "learning_rate": 9.683085467002306e-06, "loss": 0.4139, "step": 8508 }, { "epoch": 1.476231783483692, "grad_norm": 0.6615685820579529, "learning_rate": 9.677249274835003e-06, "loss": 0.4683, "step": 8509 }, { "epoch": 1.4764052741151978, "grad_norm": 0.6835951209068298, "learning_rate": 9.67141428063495e-06, "loss": 0.4952, "step": 8510 }, { "epoch": 1.4765787647467037, "grad_norm": 0.7067520618438721, "learning_rate": 9.665580485079297e-06, "loss": 0.4879, "step": 8511 }, { "epoch": 1.4767522553782095, "grad_norm": 0.8401545882225037, "learning_rate": 9.659747888845087e-06, "loss": 0.4802, "step": 8512 }, { "epoch": 1.4769257460097154, "grad_norm": 0.8900430202484131, "learning_rate": 9.653916492609168e-06, "loss": 0.4176, "step": 8513 }, { "epoch": 1.4770992366412214, "grad_norm": 0.8312796354293823, "learning_rate": 9.648086297048302e-06, "loss": 0.3748, "step": 8514 }, { "epoch": 1.4772727272727273, "grad_norm": 0.7063842415809631, "learning_rate": 9.642257302839085e-06, "loss": 0.4597, "step": 8515 }, { "epoch": 1.4774462179042331, "grad_norm": 1.0829441547393799, "learning_rate": 9.636429510657974e-06, "loss": 0.5219, "step": 8516 }, { "epoch": 1.4776197085357392, "grad_norm": 0.7372325658798218, "learning_rate": 9.63060292118129e-06, "loss": 0.4382, "step": 8517 }, { "epoch": 1.477793199167245, "grad_norm": 0.7412036061286926, "learning_rate": 9.624777535085233e-06, "loss": 0.386, "step": 8518 }, { "epoch": 1.477966689798751, "grad_norm": 0.8647828102111816, "learning_rate": 9.61895335304582e-06, "loss": 0.4579, "step": 8519 }, { "epoch": 1.4781401804302567, "grad_norm": 1.84585702419281, "learning_rate": 9.61313037573897e-06, "loss": 0.453, "step": 8520 }, { "epoch": 1.4783136710617626, "grad_norm": 0.5941470265388489, "learning_rate": 9.607308603840437e-06, "loss": 0.4928, "step": 8521 }, { "epoch": 1.4784871616932684, "grad_norm": 0.8481560349464417, "learning_rate": 9.601488038025869e-06, "loss": 0.4034, "step": 8522 }, { "epoch": 1.4786606523247745, "grad_norm": 0.9535527229309082, "learning_rate": 9.59566867897071e-06, "loss": 0.3519, "step": 8523 }, { "epoch": 1.4788341429562804, "grad_norm": 0.7511053085327148, "learning_rate": 9.589850527350337e-06, "loss": 0.4575, "step": 8524 }, { "epoch": 1.4790076335877862, "grad_norm": 1.0763275623321533, "learning_rate": 9.584033583839938e-06, "loss": 0.491, "step": 8525 }, { "epoch": 1.4791811242192923, "grad_norm": 1.197890281677246, "learning_rate": 9.578217849114579e-06, "loss": 0.3983, "step": 8526 }, { "epoch": 1.479354614850798, "grad_norm": 0.7234625816345215, "learning_rate": 9.572403323849175e-06, "loss": 0.3718, "step": 8527 }, { "epoch": 1.479528105482304, "grad_norm": 0.7402774691581726, "learning_rate": 9.566590008718524e-06, "loss": 0.5287, "step": 8528 }, { "epoch": 1.4797015961138098, "grad_norm": 0.7728129625320435, "learning_rate": 9.560777904397258e-06, "loss": 0.5115, "step": 8529 }, { "epoch": 1.4798750867453156, "grad_norm": 0.915511965751648, "learning_rate": 9.554967011559874e-06, "loss": 0.4138, "step": 8530 }, { "epoch": 1.4800485773768217, "grad_norm": 1.1457018852233887, "learning_rate": 9.549157330880753e-06, "loss": 0.3912, "step": 8531 }, { "epoch": 1.4802220680083276, "grad_norm": 0.864634096622467, "learning_rate": 9.54334886303409e-06, "loss": 0.4006, "step": 8532 }, { "epoch": 1.4803955586398334, "grad_norm": 0.7768258452415466, "learning_rate": 9.537541608693982e-06, "loss": 0.5658, "step": 8533 }, { "epoch": 1.4805690492713395, "grad_norm": 0.7090941667556763, "learning_rate": 9.53173556853435e-06, "loss": 0.4668, "step": 8534 }, { "epoch": 1.4807425399028453, "grad_norm": 1.6957459449768066, "learning_rate": 9.52593074322902e-06, "loss": 0.5034, "step": 8535 }, { "epoch": 1.4809160305343512, "grad_norm": 0.8803064227104187, "learning_rate": 9.520127133451619e-06, "loss": 0.4086, "step": 8536 }, { "epoch": 1.481089521165857, "grad_norm": 0.7819961905479431, "learning_rate": 9.514324739875684e-06, "loss": 0.4628, "step": 8537 }, { "epoch": 1.4812630117973629, "grad_norm": 0.6098188757896423, "learning_rate": 9.508523563174578e-06, "loss": 0.5854, "step": 8538 }, { "epoch": 1.4814365024288687, "grad_norm": 0.7409568428993225, "learning_rate": 9.50272360402154e-06, "loss": 0.389, "step": 8539 }, { "epoch": 1.4816099930603748, "grad_norm": 0.8341870307922363, "learning_rate": 9.496924863089652e-06, "loss": 0.394, "step": 8540 }, { "epoch": 1.4817834836918806, "grad_norm": 0.759617269039154, "learning_rate": 9.491127341051884e-06, "loss": 0.4193, "step": 8541 }, { "epoch": 1.4819569743233865, "grad_norm": 1.3312665224075317, "learning_rate": 9.485331038581021e-06, "loss": 0.4683, "step": 8542 }, { "epoch": 1.4821304649548925, "grad_norm": 0.7491121888160706, "learning_rate": 9.479535956349751e-06, "loss": 0.4744, "step": 8543 }, { "epoch": 1.4823039555863984, "grad_norm": 0.7190536856651306, "learning_rate": 9.473742095030588e-06, "loss": 0.431, "step": 8544 }, { "epoch": 1.4824774462179042, "grad_norm": 1.0622676610946655, "learning_rate": 9.467949455295925e-06, "loss": 0.4563, "step": 8545 }, { "epoch": 1.48265093684941, "grad_norm": 0.8268857598304749, "learning_rate": 9.46215803781799e-06, "loss": 0.4945, "step": 8546 }, { "epoch": 1.482824427480916, "grad_norm": 0.7199467420578003, "learning_rate": 9.456367843268903e-06, "loss": 0.4816, "step": 8547 }, { "epoch": 1.482997918112422, "grad_norm": 0.6562536954879761, "learning_rate": 9.450578872320613e-06, "loss": 0.5212, "step": 8548 }, { "epoch": 1.4831714087439278, "grad_norm": 1.1030664443969727, "learning_rate": 9.444791125644937e-06, "loss": 0.362, "step": 8549 }, { "epoch": 1.4833448993754337, "grad_norm": 0.7876076698303223, "learning_rate": 9.439004603913542e-06, "loss": 0.4969, "step": 8550 }, { "epoch": 1.4835183900069397, "grad_norm": 1.0008448362350464, "learning_rate": 9.433219307797977e-06, "loss": 0.3895, "step": 8551 }, { "epoch": 1.4836918806384456, "grad_norm": 0.7976511120796204, "learning_rate": 9.427435237969624e-06, "loss": 0.5105, "step": 8552 }, { "epoch": 1.4838653712699514, "grad_norm": 0.8462479114532471, "learning_rate": 9.421652395099732e-06, "loss": 0.462, "step": 8553 }, { "epoch": 1.4840388619014573, "grad_norm": 0.7487668991088867, "learning_rate": 9.415870779859405e-06, "loss": 0.4177, "step": 8554 }, { "epoch": 1.4842123525329631, "grad_norm": 0.5780131220817566, "learning_rate": 9.410090392919598e-06, "loss": 0.5214, "step": 8555 }, { "epoch": 1.4843858431644692, "grad_norm": 3.076767683029175, "learning_rate": 9.404311234951148e-06, "loss": 0.4138, "step": 8556 }, { "epoch": 1.484559333795975, "grad_norm": 0.7333472967147827, "learning_rate": 9.39853330662473e-06, "loss": 0.4052, "step": 8557 }, { "epoch": 1.484732824427481, "grad_norm": 0.7892191410064697, "learning_rate": 9.392756608610871e-06, "loss": 0.5435, "step": 8558 }, { "epoch": 1.4849063150589867, "grad_norm": 0.7384252548217773, "learning_rate": 9.386981141579961e-06, "loss": 0.3242, "step": 8559 }, { "epoch": 1.4850798056904928, "grad_norm": 0.9419588446617126, "learning_rate": 9.381206906202268e-06, "loss": 0.3159, "step": 8560 }, { "epoch": 1.4852532963219987, "grad_norm": 0.7519877552986145, "learning_rate": 9.375433903147877e-06, "loss": 0.5704, "step": 8561 }, { "epoch": 1.4854267869535045, "grad_norm": 0.8963461518287659, "learning_rate": 9.369662133086768e-06, "loss": 0.3737, "step": 8562 }, { "epoch": 1.4856002775850103, "grad_norm": 1.7488105297088623, "learning_rate": 9.363891596688745e-06, "loss": 0.3864, "step": 8563 }, { "epoch": 1.4857737682165162, "grad_norm": 0.8128525018692017, "learning_rate": 9.358122294623514e-06, "loss": 0.4784, "step": 8564 }, { "epoch": 1.4859472588480223, "grad_norm": 0.6044059991836548, "learning_rate": 9.352354227560575e-06, "loss": 0.5121, "step": 8565 }, { "epoch": 1.486120749479528, "grad_norm": 2.582504987716675, "learning_rate": 9.34658739616934e-06, "loss": 0.5166, "step": 8566 }, { "epoch": 1.486294240111034, "grad_norm": 0.9345373511314392, "learning_rate": 9.340821801119053e-06, "loss": 0.4183, "step": 8567 }, { "epoch": 1.48646773074254, "grad_norm": 3.323610305786133, "learning_rate": 9.335057443078817e-06, "loss": 0.5702, "step": 8568 }, { "epoch": 1.4866412213740459, "grad_norm": 0.6034752726554871, "learning_rate": 9.329294322717584e-06, "loss": 0.4974, "step": 8569 }, { "epoch": 1.4868147120055517, "grad_norm": 0.6210371851921082, "learning_rate": 9.323532440704196e-06, "loss": 0.3853, "step": 8570 }, { "epoch": 1.4869882026370576, "grad_norm": 1.0118930339813232, "learning_rate": 9.31777179770729e-06, "loss": 0.4394, "step": 8571 }, { "epoch": 1.4871616932685634, "grad_norm": 0.7408242225646973, "learning_rate": 9.312012394395423e-06, "loss": 0.4777, "step": 8572 }, { "epoch": 1.4873351839000695, "grad_norm": 0.700859785079956, "learning_rate": 9.30625423143697e-06, "loss": 0.5621, "step": 8573 }, { "epoch": 1.4875086745315753, "grad_norm": 0.7190759181976318, "learning_rate": 9.300497309500176e-06, "loss": 0.3806, "step": 8574 }, { "epoch": 1.4876821651630812, "grad_norm": 0.6803382039070129, "learning_rate": 9.29474162925313e-06, "loss": 0.4049, "step": 8575 }, { "epoch": 1.4878556557945872, "grad_norm": 0.657941460609436, "learning_rate": 9.288987191363799e-06, "loss": 0.4789, "step": 8576 }, { "epoch": 1.488029146426093, "grad_norm": 0.6799991130828857, "learning_rate": 9.283233996499984e-06, "loss": 0.6385, "step": 8577 }, { "epoch": 1.488202637057599, "grad_norm": 0.7778498530387878, "learning_rate": 9.277482045329344e-06, "loss": 0.4995, "step": 8578 }, { "epoch": 1.4883761276891048, "grad_norm": 1.108154296875, "learning_rate": 9.271731338519415e-06, "loss": 0.55, "step": 8579 }, { "epoch": 1.4885496183206106, "grad_norm": 0.638132631778717, "learning_rate": 9.265981876737566e-06, "loss": 0.4076, "step": 8580 }, { "epoch": 1.4887231089521165, "grad_norm": 0.942566990852356, "learning_rate": 9.260233660651025e-06, "loss": 0.4186, "step": 8581 }, { "epoch": 1.4888965995836225, "grad_norm": 0.8701571822166443, "learning_rate": 9.254486690926878e-06, "loss": 0.3516, "step": 8582 }, { "epoch": 1.4890700902151284, "grad_norm": 0.7635884881019592, "learning_rate": 9.248740968232084e-06, "loss": 0.3903, "step": 8583 }, { "epoch": 1.4892435808466342, "grad_norm": 0.742935836315155, "learning_rate": 9.242996493233414e-06, "loss": 0.4371, "step": 8584 }, { "epoch": 1.4894170714781403, "grad_norm": 0.7536942958831787, "learning_rate": 9.237253266597544e-06, "loss": 0.3566, "step": 8585 }, { "epoch": 1.4895905621096461, "grad_norm": 0.9712985157966614, "learning_rate": 9.23151128899097e-06, "loss": 0.4584, "step": 8586 }, { "epoch": 1.489764052741152, "grad_norm": 1.0012733936309814, "learning_rate": 9.225770561080062e-06, "loss": 0.4162, "step": 8587 }, { "epoch": 1.4899375433726578, "grad_norm": 0.856743335723877, "learning_rate": 9.220031083531026e-06, "loss": 0.491, "step": 8588 }, { "epoch": 1.4901110340041637, "grad_norm": 0.7103423476219177, "learning_rate": 9.214292857009961e-06, "loss": 0.5468, "step": 8589 }, { "epoch": 1.4902845246356697, "grad_norm": 0.6725903749465942, "learning_rate": 9.208555882182762e-06, "loss": 0.5104, "step": 8590 }, { "epoch": 1.4904580152671756, "grad_norm": 0.6370904445648193, "learning_rate": 9.202820159715234e-06, "loss": 0.5353, "step": 8591 }, { "epoch": 1.4906315058986814, "grad_norm": 0.7135043740272522, "learning_rate": 9.197085690273e-06, "loss": 0.4349, "step": 8592 }, { "epoch": 1.4908049965301875, "grad_norm": 0.8648028373718262, "learning_rate": 9.191352474521572e-06, "loss": 0.5393, "step": 8593 }, { "epoch": 1.4909784871616933, "grad_norm": 0.8293433785438538, "learning_rate": 9.185620513126275e-06, "loss": 0.423, "step": 8594 }, { "epoch": 1.4911519777931992, "grad_norm": 0.7110719084739685, "learning_rate": 9.179889806752322e-06, "loss": 0.4988, "step": 8595 }, { "epoch": 1.491325468424705, "grad_norm": 0.7662829160690308, "learning_rate": 9.174160356064765e-06, "loss": 0.4418, "step": 8596 }, { "epoch": 1.4914989590562109, "grad_norm": 0.8652961850166321, "learning_rate": 9.168432161728515e-06, "loss": 0.369, "step": 8597 }, { "epoch": 1.4916724496877167, "grad_norm": 0.8832250833511353, "learning_rate": 9.162705224408326e-06, "loss": 0.3846, "step": 8598 }, { "epoch": 1.4918459403192228, "grad_norm": 0.8304434418678284, "learning_rate": 9.15697954476883e-06, "loss": 0.4884, "step": 8599 }, { "epoch": 1.4920194309507286, "grad_norm": 0.7447237372398376, "learning_rate": 9.151255123474493e-06, "loss": 0.4503, "step": 8600 }, { "epoch": 1.4921929215822345, "grad_norm": 0.7981603741645813, "learning_rate": 9.14553196118964e-06, "loss": 0.4208, "step": 8601 }, { "epoch": 1.4923664122137406, "grad_norm": 0.7032742500305176, "learning_rate": 9.139810058578451e-06, "loss": 0.4658, "step": 8602 }, { "epoch": 1.4925399028452464, "grad_norm": 0.7949456572532654, "learning_rate": 9.134089416304951e-06, "loss": 0.4136, "step": 8603 }, { "epoch": 1.4927133934767522, "grad_norm": 0.7762060165405273, "learning_rate": 9.128370035033046e-06, "loss": 0.4445, "step": 8604 }, { "epoch": 1.492886884108258, "grad_norm": 0.7838228344917297, "learning_rate": 9.122651915426464e-06, "loss": 0.5227, "step": 8605 }, { "epoch": 1.493060374739764, "grad_norm": 0.682068943977356, "learning_rate": 9.116935058148801e-06, "loss": 0.4818, "step": 8606 }, { "epoch": 1.49323386537127, "grad_norm": 0.6018600463867188, "learning_rate": 9.111219463863501e-06, "loss": 0.5979, "step": 8607 }, { "epoch": 1.4934073560027759, "grad_norm": 0.7784172296524048, "learning_rate": 9.105505133233876e-06, "loss": 0.4379, "step": 8608 }, { "epoch": 1.4935808466342817, "grad_norm": 0.692943811416626, "learning_rate": 9.099792066923077e-06, "loss": 0.4481, "step": 8609 }, { "epoch": 1.4937543372657878, "grad_norm": 0.7734472751617432, "learning_rate": 9.094080265594108e-06, "loss": 0.5063, "step": 8610 }, { "epoch": 1.4939278278972936, "grad_norm": 1.248750925064087, "learning_rate": 9.088369729909823e-06, "loss": 0.5038, "step": 8611 }, { "epoch": 1.4941013185287995, "grad_norm": 0.6195592284202576, "learning_rate": 9.082660460532961e-06, "loss": 0.5079, "step": 8612 }, { "epoch": 1.4942748091603053, "grad_norm": 1.2637933492660522, "learning_rate": 9.07695245812606e-06, "loss": 0.3878, "step": 8613 }, { "epoch": 1.4944482997918112, "grad_norm": 0.6877127289772034, "learning_rate": 9.071245723351563e-06, "loss": 0.4902, "step": 8614 }, { "epoch": 1.4946217904233172, "grad_norm": 1.2176055908203125, "learning_rate": 9.065540256871733e-06, "loss": 0.5112, "step": 8615 }, { "epoch": 1.494795281054823, "grad_norm": 1.0032249689102173, "learning_rate": 9.059836059348696e-06, "loss": 0.4208, "step": 8616 }, { "epoch": 1.494968771686329, "grad_norm": 0.6759541034698486, "learning_rate": 9.054133131444427e-06, "loss": 0.5089, "step": 8617 }, { "epoch": 1.4951422623178348, "grad_norm": 0.9625713229179382, "learning_rate": 9.048431473820776e-06, "loss": 0.443, "step": 8618 }, { "epoch": 1.4953157529493408, "grad_norm": 0.6422067880630493, "learning_rate": 9.042731087139398e-06, "loss": 0.4221, "step": 8619 }, { "epoch": 1.4954892435808467, "grad_norm": 0.674849808216095, "learning_rate": 9.037031972061854e-06, "loss": 0.4745, "step": 8620 }, { "epoch": 1.4956627342123525, "grad_norm": 0.791410505771637, "learning_rate": 9.031334129249515e-06, "loss": 0.3588, "step": 8621 }, { "epoch": 1.4958362248438584, "grad_norm": 0.7717758417129517, "learning_rate": 9.02563755936365e-06, "loss": 0.4924, "step": 8622 }, { "epoch": 1.4960097154753642, "grad_norm": 0.800813615322113, "learning_rate": 9.019942263065316e-06, "loss": 0.559, "step": 8623 }, { "epoch": 1.4961832061068703, "grad_norm": 0.6531893014907837, "learning_rate": 9.014248241015484e-06, "loss": 0.583, "step": 8624 }, { "epoch": 1.4963566967383761, "grad_norm": 0.6513347029685974, "learning_rate": 9.008555493874944e-06, "loss": 0.5826, "step": 8625 }, { "epoch": 1.496530187369882, "grad_norm": 3.2361068725585938, "learning_rate": 9.00286402230434e-06, "loss": 0.3813, "step": 8626 }, { "epoch": 1.496703678001388, "grad_norm": 0.7543614506721497, "learning_rate": 8.99717382696419e-06, "loss": 0.3718, "step": 8627 }, { "epoch": 1.4968771686328939, "grad_norm": 1.0834203958511353, "learning_rate": 8.991484908514835e-06, "loss": 0.4679, "step": 8628 }, { "epoch": 1.4970506592643997, "grad_norm": 0.8064327836036682, "learning_rate": 8.985797267616485e-06, "loss": 0.51, "step": 8629 }, { "epoch": 1.4972241498959056, "grad_norm": 0.9959091544151306, "learning_rate": 8.980110904929189e-06, "loss": 0.3596, "step": 8630 }, { "epoch": 1.4973976405274114, "grad_norm": 0.8047711849212646, "learning_rate": 8.974425821112877e-06, "loss": 0.5043, "step": 8631 }, { "epoch": 1.4975711311589175, "grad_norm": 0.778866171836853, "learning_rate": 8.968742016827283e-06, "loss": 0.5012, "step": 8632 }, { "epoch": 1.4977446217904233, "grad_norm": 1.1322171688079834, "learning_rate": 8.963059492732039e-06, "loss": 0.3735, "step": 8633 }, { "epoch": 1.4979181124219292, "grad_norm": 0.9971731901168823, "learning_rate": 8.957378249486592e-06, "loss": 0.3782, "step": 8634 }, { "epoch": 1.4980916030534353, "grad_norm": 0.7050626277923584, "learning_rate": 8.951698287750283e-06, "loss": 0.6083, "step": 8635 }, { "epoch": 1.498265093684941, "grad_norm": 0.870461642742157, "learning_rate": 8.946019608182245e-06, "loss": 0.459, "step": 8636 }, { "epoch": 1.498438584316447, "grad_norm": 0.8623781204223633, "learning_rate": 8.940342211441522e-06, "loss": 0.3542, "step": 8637 }, { "epoch": 1.4986120749479528, "grad_norm": 1.6813430786132812, "learning_rate": 8.93466609818697e-06, "loss": 0.3652, "step": 8638 }, { "epoch": 1.4987855655794586, "grad_norm": 0.8649514317512512, "learning_rate": 8.928991269077311e-06, "loss": 0.4236, "step": 8639 }, { "epoch": 1.4989590562109645, "grad_norm": 0.8307508826255798, "learning_rate": 8.92331772477111e-06, "loss": 0.5244, "step": 8640 }, { "epoch": 1.4991325468424705, "grad_norm": 0.8284005522727966, "learning_rate": 8.917645465926807e-06, "loss": 0.3849, "step": 8641 }, { "epoch": 1.4993060374739764, "grad_norm": 1.0204592943191528, "learning_rate": 8.91197449320265e-06, "loss": 0.3115, "step": 8642 }, { "epoch": 1.4994795281054822, "grad_norm": 1.2717472314834595, "learning_rate": 8.906304807256775e-06, "loss": 0.3308, "step": 8643 }, { "epoch": 1.4996530187369883, "grad_norm": 1.0788602828979492, "learning_rate": 8.900636408747156e-06, "loss": 0.363, "step": 8644 }, { "epoch": 1.4998265093684942, "grad_norm": 0.6880956292152405, "learning_rate": 8.894969298331617e-06, "loss": 0.4625, "step": 8645 }, { "epoch": 1.5, "grad_norm": 0.6167536377906799, "learning_rate": 8.889303476667823e-06, "loss": 0.4945, "step": 8646 }, { "epoch": 1.5001734906315058, "grad_norm": 0.7947840094566345, "learning_rate": 8.883638944413313e-06, "loss": 0.4093, "step": 8647 }, { "epoch": 1.5003469812630117, "grad_norm": 0.6716247200965881, "learning_rate": 8.877975702225457e-06, "loss": 0.5023, "step": 8648 }, { "epoch": 1.5005204718945175, "grad_norm": 1.0151311159133911, "learning_rate": 8.872313750761482e-06, "loss": 0.4816, "step": 8649 }, { "epoch": 1.5006939625260236, "grad_norm": 0.9199911952018738, "learning_rate": 8.866653090678452e-06, "loss": 0.5542, "step": 8650 }, { "epoch": 1.5008674531575295, "grad_norm": 0.9588578343391418, "learning_rate": 8.860993722633312e-06, "loss": 0.4092, "step": 8651 }, { "epoch": 1.5010409437890355, "grad_norm": 1.4194256067276, "learning_rate": 8.855335647282833e-06, "loss": 0.3568, "step": 8652 }, { "epoch": 1.5012144344205414, "grad_norm": 0.8696090579032898, "learning_rate": 8.849678865283635e-06, "loss": 0.3328, "step": 8653 }, { "epoch": 1.5013879250520472, "grad_norm": 1.2177104949951172, "learning_rate": 8.844023377292198e-06, "loss": 0.5056, "step": 8654 }, { "epoch": 1.501561415683553, "grad_norm": 0.9737895727157593, "learning_rate": 8.838369183964841e-06, "loss": 0.3463, "step": 8655 }, { "epoch": 1.501734906315059, "grad_norm": 1.043319821357727, "learning_rate": 8.832716285957754e-06, "loss": 0.3523, "step": 8656 }, { "epoch": 1.5019083969465647, "grad_norm": 1.0447510480880737, "learning_rate": 8.827064683926954e-06, "loss": 0.3477, "step": 8657 }, { "epoch": 1.5020818875780708, "grad_norm": 1.3222448825836182, "learning_rate": 8.821414378528314e-06, "loss": 0.3904, "step": 8658 }, { "epoch": 1.5022553782095767, "grad_norm": 0.7870173454284668, "learning_rate": 8.815765370417557e-06, "loss": 0.4387, "step": 8659 }, { "epoch": 1.5024288688410827, "grad_norm": 0.757055938243866, "learning_rate": 8.810117660250275e-06, "loss": 0.5533, "step": 8660 }, { "epoch": 1.5026023594725886, "grad_norm": 0.9041140079498291, "learning_rate": 8.80447124868186e-06, "loss": 0.3798, "step": 8661 }, { "epoch": 1.5027758501040944, "grad_norm": 1.1670129299163818, "learning_rate": 8.798826136367613e-06, "loss": 0.3694, "step": 8662 }, { "epoch": 1.5029493407356003, "grad_norm": 0.834130048751831, "learning_rate": 8.793182323962635e-06, "loss": 0.3368, "step": 8663 }, { "epoch": 1.5031228313671061, "grad_norm": 0.6511291861534119, "learning_rate": 8.787539812121924e-06, "loss": 0.4883, "step": 8664 }, { "epoch": 1.503296321998612, "grad_norm": 0.7934287786483765, "learning_rate": 8.781898601500265e-06, "loss": 0.4287, "step": 8665 }, { "epoch": 1.5034698126301178, "grad_norm": 0.7822073698043823, "learning_rate": 8.776258692752355e-06, "loss": 0.4736, "step": 8666 }, { "epoch": 1.5036433032616239, "grad_norm": 0.7018622756004333, "learning_rate": 8.7706200865327e-06, "loss": 0.5229, "step": 8667 }, { "epoch": 1.5038167938931297, "grad_norm": 0.7407475709915161, "learning_rate": 8.76498278349567e-06, "loss": 0.4767, "step": 8668 }, { "epoch": 1.5039902845246358, "grad_norm": 0.7079533338546753, "learning_rate": 8.75934678429547e-06, "loss": 0.6644, "step": 8669 }, { "epoch": 1.5041637751561416, "grad_norm": 0.6935982704162598, "learning_rate": 8.753712089586184e-06, "loss": 0.5565, "step": 8670 }, { "epoch": 1.5043372657876475, "grad_norm": 0.6961166262626648, "learning_rate": 8.748078700021714e-06, "loss": 0.3792, "step": 8671 }, { "epoch": 1.5045107564191533, "grad_norm": 0.8431174755096436, "learning_rate": 8.74244661625582e-06, "loss": 0.459, "step": 8672 }, { "epoch": 1.5046842470506592, "grad_norm": 0.6829946637153625, "learning_rate": 8.736815838942114e-06, "loss": 0.4969, "step": 8673 }, { "epoch": 1.504857737682165, "grad_norm": 0.9456247687339783, "learning_rate": 8.731186368734049e-06, "loss": 0.349, "step": 8674 }, { "epoch": 1.505031228313671, "grad_norm": 0.8031119704246521, "learning_rate": 8.725558206284944e-06, "loss": 0.4606, "step": 8675 }, { "epoch": 1.505204718945177, "grad_norm": 0.6166511178016663, "learning_rate": 8.719931352247944e-06, "loss": 0.4719, "step": 8676 }, { "epoch": 1.505378209576683, "grad_norm": 1.3053230047225952, "learning_rate": 8.714305807276056e-06, "loss": 0.4058, "step": 8677 }, { "epoch": 1.5055517002081888, "grad_norm": 0.7026805877685547, "learning_rate": 8.708681572022122e-06, "loss": 0.4995, "step": 8678 }, { "epoch": 1.5057251908396947, "grad_norm": 0.8382639288902283, "learning_rate": 8.703058647138855e-06, "loss": 0.4978, "step": 8679 }, { "epoch": 1.5058986814712005, "grad_norm": 0.9075166583061218, "learning_rate": 8.697437033278797e-06, "loss": 0.3594, "step": 8680 }, { "epoch": 1.5060721721027064, "grad_norm": 1.3929052352905273, "learning_rate": 8.691816731094341e-06, "loss": 0.5269, "step": 8681 }, { "epoch": 1.5062456627342122, "grad_norm": 1.178870677947998, "learning_rate": 8.686197741237722e-06, "loss": 0.4796, "step": 8682 }, { "epoch": 1.5064191533657183, "grad_norm": 0.7761985063552856, "learning_rate": 8.680580064361053e-06, "loss": 0.3702, "step": 8683 }, { "epoch": 1.5065926439972241, "grad_norm": 0.7987775802612305, "learning_rate": 8.674963701116243e-06, "loss": 0.5496, "step": 8684 }, { "epoch": 1.5067661346287302, "grad_norm": 0.9020431637763977, "learning_rate": 8.669348652155101e-06, "loss": 0.4366, "step": 8685 }, { "epoch": 1.506939625260236, "grad_norm": 0.9262181520462036, "learning_rate": 8.663734918129247e-06, "loss": 0.4659, "step": 8686 }, { "epoch": 1.507113115891742, "grad_norm": 0.819610059261322, "learning_rate": 8.658122499690166e-06, "loss": 0.4401, "step": 8687 }, { "epoch": 1.5072866065232478, "grad_norm": 0.8673333525657654, "learning_rate": 8.652511397489181e-06, "loss": 0.3316, "step": 8688 }, { "epoch": 1.5074600971547536, "grad_norm": 0.950929582118988, "learning_rate": 8.646901612177484e-06, "loss": 0.4189, "step": 8689 }, { "epoch": 1.5076335877862594, "grad_norm": 0.7968010306358337, "learning_rate": 8.641293144406067e-06, "loss": 0.4417, "step": 8690 }, { "epoch": 1.5078070784177653, "grad_norm": 1.0768781900405884, "learning_rate": 8.635685994825824e-06, "loss": 0.3964, "step": 8691 }, { "epoch": 1.5079805690492714, "grad_norm": 0.8976142406463623, "learning_rate": 8.630080164087456e-06, "loss": 0.3446, "step": 8692 }, { "epoch": 1.5081540596807772, "grad_norm": 3.6228787899017334, "learning_rate": 8.624475652841549e-06, "loss": 0.3589, "step": 8693 }, { "epoch": 1.5083275503122833, "grad_norm": 0.6739755868911743, "learning_rate": 8.618872461738483e-06, "loss": 0.4677, "step": 8694 }, { "epoch": 1.5085010409437891, "grad_norm": 0.9304718971252441, "learning_rate": 8.613270591428537e-06, "loss": 0.4839, "step": 8695 }, { "epoch": 1.508674531575295, "grad_norm": 2.7039785385131836, "learning_rate": 8.607670042561807e-06, "loss": 0.5592, "step": 8696 }, { "epoch": 1.5088480222068008, "grad_norm": 0.783592700958252, "learning_rate": 8.602070815788241e-06, "loss": 0.4778, "step": 8697 }, { "epoch": 1.5090215128383067, "grad_norm": 0.6445301175117493, "learning_rate": 8.596472911757633e-06, "loss": 0.5728, "step": 8698 }, { "epoch": 1.5091950034698125, "grad_norm": 0.8287010788917542, "learning_rate": 8.59087633111964e-06, "loss": 0.4379, "step": 8699 }, { "epoch": 1.5093684941013186, "grad_norm": 0.8014488816261292, "learning_rate": 8.58528107452374e-06, "loss": 0.4491, "step": 8700 }, { "epoch": 1.5095419847328244, "grad_norm": 0.7545841932296753, "learning_rate": 8.579687142619267e-06, "loss": 0.4193, "step": 8701 }, { "epoch": 1.5097154753643305, "grad_norm": 0.8282356262207031, "learning_rate": 8.574094536055423e-06, "loss": 0.4635, "step": 8702 }, { "epoch": 1.5098889659958363, "grad_norm": 1.024894118309021, "learning_rate": 8.568503255481204e-06, "loss": 0.4114, "step": 8703 }, { "epoch": 1.5100624566273422, "grad_norm": 0.867028534412384, "learning_rate": 8.562913301545513e-06, "loss": 0.5336, "step": 8704 }, { "epoch": 1.510235947258848, "grad_norm": 0.6761842966079712, "learning_rate": 8.557324674897061e-06, "loss": 0.3893, "step": 8705 }, { "epoch": 1.5104094378903539, "grad_norm": 0.8481341004371643, "learning_rate": 8.55173737618441e-06, "loss": 0.4073, "step": 8706 }, { "epoch": 1.5105829285218597, "grad_norm": 0.7954474091529846, "learning_rate": 8.54615140605597e-06, "loss": 0.4332, "step": 8707 }, { "epoch": 1.5107564191533656, "grad_norm": 0.8756697773933411, "learning_rate": 8.540566765160016e-06, "loss": 0.4514, "step": 8708 }, { "epoch": 1.5109299097848716, "grad_norm": 2.885711669921875, "learning_rate": 8.53498345414464e-06, "loss": 0.5441, "step": 8709 }, { "epoch": 1.5111034004163775, "grad_norm": 0.6980843544006348, "learning_rate": 8.529401473657795e-06, "loss": 0.453, "step": 8710 }, { "epoch": 1.5112768910478835, "grad_norm": 1.0166019201278687, "learning_rate": 8.523820824347266e-06, "loss": 0.4214, "step": 8711 }, { "epoch": 1.5114503816793894, "grad_norm": 0.7295387387275696, "learning_rate": 8.518241506860719e-06, "loss": 0.3721, "step": 8712 }, { "epoch": 1.5116238723108952, "grad_norm": 0.9436020851135254, "learning_rate": 8.512663521845607e-06, "loss": 0.5176, "step": 8713 }, { "epoch": 1.511797362942401, "grad_norm": 0.7077387571334839, "learning_rate": 8.507086869949287e-06, "loss": 0.4146, "step": 8714 }, { "epoch": 1.511970853573907, "grad_norm": 1.8903543949127197, "learning_rate": 8.50151155181893e-06, "loss": 0.5031, "step": 8715 }, { "epoch": 1.5121443442054128, "grad_norm": 0.7740080952644348, "learning_rate": 8.495937568101551e-06, "loss": 0.5549, "step": 8716 }, { "epoch": 1.5123178348369188, "grad_norm": 0.7523136138916016, "learning_rate": 8.49036491944402e-06, "loss": 0.4861, "step": 8717 }, { "epoch": 1.5124913254684247, "grad_norm": 0.785311758518219, "learning_rate": 8.484793606493054e-06, "loss": 0.351, "step": 8718 }, { "epoch": 1.5126648160999308, "grad_norm": 1.0966126918792725, "learning_rate": 8.47922362989521e-06, "loss": 0.4734, "step": 8719 }, { "epoch": 1.5128383067314366, "grad_norm": 0.9584879875183105, "learning_rate": 8.473654990296887e-06, "loss": 0.3885, "step": 8720 }, { "epoch": 1.5130117973629424, "grad_norm": 1.2425463199615479, "learning_rate": 8.468087688344329e-06, "loss": 0.4983, "step": 8721 }, { "epoch": 1.5131852879944483, "grad_norm": 0.9456865191459656, "learning_rate": 8.462521724683637e-06, "loss": 0.3493, "step": 8722 }, { "epoch": 1.5133587786259541, "grad_norm": 1.2924953699111938, "learning_rate": 8.456957099960743e-06, "loss": 0.3552, "step": 8723 }, { "epoch": 1.51353226925746, "grad_norm": 0.617445707321167, "learning_rate": 8.451393814821427e-06, "loss": 0.571, "step": 8724 }, { "epoch": 1.5137057598889658, "grad_norm": 0.6202148199081421, "learning_rate": 8.445831869911317e-06, "loss": 0.5447, "step": 8725 }, { "epoch": 1.513879250520472, "grad_norm": 0.9100032448768616, "learning_rate": 8.440271265875875e-06, "loss": 0.3827, "step": 8726 }, { "epoch": 1.5140527411519777, "grad_norm": 1.0329697132110596, "learning_rate": 8.434712003360427e-06, "loss": 0.408, "step": 8727 }, { "epoch": 1.5142262317834838, "grad_norm": 0.7562122344970703, "learning_rate": 8.42915408301013e-06, "loss": 0.5271, "step": 8728 }, { "epoch": 1.5143997224149897, "grad_norm": 0.7683006525039673, "learning_rate": 8.423597505469983e-06, "loss": 0.4318, "step": 8729 }, { "epoch": 1.5145732130464955, "grad_norm": 1.2715370655059814, "learning_rate": 8.418042271384828e-06, "loss": 0.6125, "step": 8730 }, { "epoch": 1.5147467036780013, "grad_norm": 0.8350828289985657, "learning_rate": 8.412488381399378e-06, "loss": 0.4047, "step": 8731 }, { "epoch": 1.5149201943095072, "grad_norm": 0.763030469417572, "learning_rate": 8.406935836158138e-06, "loss": 0.4677, "step": 8732 }, { "epoch": 1.515093684941013, "grad_norm": 0.8802626132965088, "learning_rate": 8.401384636305509e-06, "loss": 0.3602, "step": 8733 }, { "epoch": 1.515267175572519, "grad_norm": 0.8387312293052673, "learning_rate": 8.39583478248571e-06, "loss": 0.413, "step": 8734 }, { "epoch": 1.515440666204025, "grad_norm": 0.7499859929084778, "learning_rate": 8.390286275342805e-06, "loss": 0.4001, "step": 8735 }, { "epoch": 1.515614156835531, "grad_norm": 0.7287644147872925, "learning_rate": 8.3847391155207e-06, "loss": 0.4792, "step": 8736 }, { "epoch": 1.5157876474670369, "grad_norm": 0.6044920086860657, "learning_rate": 8.379193303663162e-06, "loss": 0.6472, "step": 8737 }, { "epoch": 1.5159611380985427, "grad_norm": 0.676855742931366, "learning_rate": 8.373648840413781e-06, "loss": 0.5596, "step": 8738 }, { "epoch": 1.5161346287300486, "grad_norm": 0.6197137236595154, "learning_rate": 8.368105726416e-06, "loss": 0.496, "step": 8739 }, { "epoch": 1.5163081193615544, "grad_norm": 0.7422799468040466, "learning_rate": 8.362563962313095e-06, "loss": 0.5498, "step": 8740 }, { "epoch": 1.5164816099930603, "grad_norm": 0.7818398475646973, "learning_rate": 8.357023548748217e-06, "loss": 0.5175, "step": 8741 }, { "epoch": 1.5166551006245663, "grad_norm": 1.221691370010376, "learning_rate": 8.35148448636431e-06, "loss": 0.3844, "step": 8742 }, { "epoch": 1.5168285912560722, "grad_norm": 0.6863657236099243, "learning_rate": 8.345946775804209e-06, "loss": 0.5272, "step": 8743 }, { "epoch": 1.5170020818875782, "grad_norm": 0.6155330538749695, "learning_rate": 8.340410417710562e-06, "loss": 0.5604, "step": 8744 }, { "epoch": 1.517175572519084, "grad_norm": 0.8316579461097717, "learning_rate": 8.334875412725874e-06, "loss": 0.3724, "step": 8745 }, { "epoch": 1.51734906315059, "grad_norm": 0.7292289733886719, "learning_rate": 8.32934176149248e-06, "loss": 0.4319, "step": 8746 }, { "epoch": 1.5175225537820958, "grad_norm": 1.0372445583343506, "learning_rate": 8.323809464652579e-06, "loss": 0.3347, "step": 8747 }, { "epoch": 1.5176960444136016, "grad_norm": 1.7314422130584717, "learning_rate": 8.318278522848198e-06, "loss": 0.3685, "step": 8748 }, { "epoch": 1.5178695350451075, "grad_norm": 0.6570568084716797, "learning_rate": 8.312748936721196e-06, "loss": 0.4059, "step": 8749 }, { "epoch": 1.5180430256766133, "grad_norm": 0.5814393162727356, "learning_rate": 8.307220706913308e-06, "loss": 0.5522, "step": 8750 }, { "epoch": 1.5182165163081194, "grad_norm": 1.0725975036621094, "learning_rate": 8.30169383406608e-06, "loss": 0.4053, "step": 8751 }, { "epoch": 1.5183900069396252, "grad_norm": 0.7847479581832886, "learning_rate": 8.296168318820914e-06, "loss": 0.4827, "step": 8752 }, { "epoch": 1.5185634975711313, "grad_norm": 0.8318679928779602, "learning_rate": 8.290644161819046e-06, "loss": 0.4396, "step": 8753 }, { "epoch": 1.5187369882026371, "grad_norm": 0.5768228769302368, "learning_rate": 8.28512136370158e-06, "loss": 0.496, "step": 8754 }, { "epoch": 1.518910478834143, "grad_norm": 0.9376530647277832, "learning_rate": 8.279599925109415e-06, "loss": 0.5413, "step": 8755 }, { "epoch": 1.5190839694656488, "grad_norm": 0.6636176705360413, "learning_rate": 8.274079846683346e-06, "loss": 0.4565, "step": 8756 }, { "epoch": 1.5192574600971547, "grad_norm": 0.828438401222229, "learning_rate": 8.268561129063975e-06, "loss": 0.4106, "step": 8757 }, { "epoch": 1.5194309507286605, "grad_norm": 0.7038265466690063, "learning_rate": 8.263043772891752e-06, "loss": 0.379, "step": 8758 }, { "epoch": 1.5196044413601666, "grad_norm": 0.6734606623649597, "learning_rate": 8.257527778806968e-06, "loss": 0.5026, "step": 8759 }, { "epoch": 1.5197779319916724, "grad_norm": 0.6494773030281067, "learning_rate": 8.252013147449785e-06, "loss": 0.4474, "step": 8760 }, { "epoch": 1.5199514226231785, "grad_norm": 0.595797061920166, "learning_rate": 8.246499879460149e-06, "loss": 0.5162, "step": 8761 }, { "epoch": 1.5201249132546844, "grad_norm": 1.0151066780090332, "learning_rate": 8.240987975477903e-06, "loss": 0.4301, "step": 8762 }, { "epoch": 1.5202984038861902, "grad_norm": 0.668760359287262, "learning_rate": 8.2354774361427e-06, "loss": 0.4299, "step": 8763 }, { "epoch": 1.520471894517696, "grad_norm": 1.0509576797485352, "learning_rate": 8.229968262094064e-06, "loss": 0.4872, "step": 8764 }, { "epoch": 1.520645385149202, "grad_norm": 0.7104885578155518, "learning_rate": 8.224460453971307e-06, "loss": 0.5177, "step": 8765 }, { "epoch": 1.5208188757807077, "grad_norm": 0.9281945824623108, "learning_rate": 8.218954012413647e-06, "loss": 0.439, "step": 8766 }, { "epoch": 1.5209923664122136, "grad_norm": 0.9999685287475586, "learning_rate": 8.2134489380601e-06, "loss": 0.5006, "step": 8767 }, { "epoch": 1.5211658570437196, "grad_norm": 1.2470293045043945, "learning_rate": 8.207945231549539e-06, "loss": 0.5039, "step": 8768 }, { "epoch": 1.5213393476752255, "grad_norm": 0.6648445725440979, "learning_rate": 8.202442893520666e-06, "loss": 0.4855, "step": 8769 }, { "epoch": 1.5215128383067316, "grad_norm": 0.7445406913757324, "learning_rate": 8.19694192461205e-06, "loss": 0.5251, "step": 8770 }, { "epoch": 1.5216863289382374, "grad_norm": 0.9122549295425415, "learning_rate": 8.191442325462075e-06, "loss": 0.3891, "step": 8771 }, { "epoch": 1.5218598195697433, "grad_norm": 0.8672484755516052, "learning_rate": 8.185944096708982e-06, "loss": 0.4661, "step": 8772 }, { "epoch": 1.522033310201249, "grad_norm": 1.122320532798767, "learning_rate": 8.180447238990838e-06, "loss": 0.4647, "step": 8773 }, { "epoch": 1.522206800832755, "grad_norm": 0.8131636381149292, "learning_rate": 8.17495175294556e-06, "loss": 0.5242, "step": 8774 }, { "epoch": 1.5223802914642608, "grad_norm": 0.5619968771934509, "learning_rate": 8.169457639210916e-06, "loss": 0.5197, "step": 8775 }, { "epoch": 1.5225537820957669, "grad_norm": 1.2358834743499756, "learning_rate": 8.1639648984245e-06, "loss": 0.4531, "step": 8776 }, { "epoch": 1.5227272727272727, "grad_norm": 1.154571294784546, "learning_rate": 8.158473531223748e-06, "loss": 0.3551, "step": 8777 }, { "epoch": 1.5229007633587788, "grad_norm": 0.7980679869651794, "learning_rate": 8.152983538245933e-06, "loss": 0.4188, "step": 8778 }, { "epoch": 1.5230742539902846, "grad_norm": 0.8363901376724243, "learning_rate": 8.147494920128192e-06, "loss": 0.4165, "step": 8779 }, { "epoch": 1.5232477446217905, "grad_norm": 0.8565142750740051, "learning_rate": 8.142007677507475e-06, "loss": 0.4725, "step": 8780 }, { "epoch": 1.5234212352532963, "grad_norm": 0.6684045791625977, "learning_rate": 8.136521811020584e-06, "loss": 0.4329, "step": 8781 }, { "epoch": 1.5235947258848022, "grad_norm": 0.7264367341995239, "learning_rate": 8.131037321304154e-06, "loss": 0.4521, "step": 8782 }, { "epoch": 1.523768216516308, "grad_norm": 0.7267224788665771, "learning_rate": 8.125554208994688e-06, "loss": 0.3289, "step": 8783 }, { "epoch": 1.523941707147814, "grad_norm": 0.7770065069198608, "learning_rate": 8.120072474728476e-06, "loss": 0.5278, "step": 8784 }, { "epoch": 1.52411519777932, "grad_norm": 0.8099251985549927, "learning_rate": 8.114592119141704e-06, "loss": 0.4939, "step": 8785 }, { "epoch": 1.5242886884108258, "grad_norm": 0.9952824711799622, "learning_rate": 8.10911314287037e-06, "loss": 0.5514, "step": 8786 }, { "epoch": 1.5244621790423318, "grad_norm": 0.8321950435638428, "learning_rate": 8.103635546550305e-06, "loss": 0.4597, "step": 8787 }, { "epoch": 1.5246356696738377, "grad_norm": 0.8479383587837219, "learning_rate": 8.098159330817192e-06, "loss": 0.36, "step": 8788 }, { "epoch": 1.5248091603053435, "grad_norm": 0.7072434425354004, "learning_rate": 8.092684496306573e-06, "loss": 0.4403, "step": 8789 }, { "epoch": 1.5249826509368494, "grad_norm": 0.6384987235069275, "learning_rate": 8.087211043653777e-06, "loss": 0.6069, "step": 8790 }, { "epoch": 1.5251561415683552, "grad_norm": 0.8090570569038391, "learning_rate": 8.08173897349403e-06, "loss": 0.4767, "step": 8791 }, { "epoch": 1.525329632199861, "grad_norm": 0.794032871723175, "learning_rate": 8.076268286462352e-06, "loss": 0.4619, "step": 8792 }, { "epoch": 1.5255031228313671, "grad_norm": 0.839773952960968, "learning_rate": 8.070798983193651e-06, "loss": 0.3959, "step": 8793 }, { "epoch": 1.525676613462873, "grad_norm": 0.9397363662719727, "learning_rate": 8.065331064322612e-06, "loss": 0.4071, "step": 8794 }, { "epoch": 1.525850104094379, "grad_norm": 1.0764880180358887, "learning_rate": 8.059864530483816e-06, "loss": 0.4209, "step": 8795 }, { "epoch": 1.526023594725885, "grad_norm": 0.7488556504249573, "learning_rate": 8.054399382311657e-06, "loss": 0.6305, "step": 8796 }, { "epoch": 1.5261970853573907, "grad_norm": 0.7319439649581909, "learning_rate": 8.048935620440359e-06, "loss": 0.5018, "step": 8797 }, { "epoch": 1.5263705759888966, "grad_norm": 3.2551815509796143, "learning_rate": 8.043473245504017e-06, "loss": 0.5421, "step": 8798 }, { "epoch": 1.5265440666204024, "grad_norm": 0.7339943051338196, "learning_rate": 8.038012258136536e-06, "loss": 0.4573, "step": 8799 }, { "epoch": 1.5267175572519083, "grad_norm": 0.7562931776046753, "learning_rate": 8.032552658971672e-06, "loss": 0.4332, "step": 8800 }, { "epoch": 1.5268910478834143, "grad_norm": 1.1799920797348022, "learning_rate": 8.02709444864301e-06, "loss": 0.4946, "step": 8801 }, { "epoch": 1.5270645385149202, "grad_norm": 1.3458267450332642, "learning_rate": 8.021637627784e-06, "loss": 0.3823, "step": 8802 }, { "epoch": 1.5272380291464263, "grad_norm": 0.8133507966995239, "learning_rate": 8.01618219702789e-06, "loss": 0.4359, "step": 8803 }, { "epoch": 1.527411519777932, "grad_norm": 0.8063660860061646, "learning_rate": 8.010728157007805e-06, "loss": 0.4825, "step": 8804 }, { "epoch": 1.527585010409438, "grad_norm": 0.7743210196495056, "learning_rate": 8.005275508356689e-06, "loss": 0.5447, "step": 8805 }, { "epoch": 1.5277585010409438, "grad_norm": 0.9252443909645081, "learning_rate": 7.999824251707324e-06, "loss": 0.4867, "step": 8806 }, { "epoch": 1.5279319916724496, "grad_norm": 0.9197413921356201, "learning_rate": 7.994374387692335e-06, "loss": 0.3779, "step": 8807 }, { "epoch": 1.5281054823039555, "grad_norm": 0.9694995284080505, "learning_rate": 7.98892591694419e-06, "loss": 0.4221, "step": 8808 }, { "epoch": 1.5282789729354613, "grad_norm": 0.7210115194320679, "learning_rate": 7.98347884009519e-06, "loss": 0.5466, "step": 8809 }, { "epoch": 1.5284524635669674, "grad_norm": 0.7455983757972717, "learning_rate": 7.978033157777473e-06, "loss": 0.3805, "step": 8810 }, { "epoch": 1.5286259541984732, "grad_norm": 0.7473982572555542, "learning_rate": 7.97258887062301e-06, "loss": 0.449, "step": 8811 }, { "epoch": 1.5287994448299793, "grad_norm": 0.7718564867973328, "learning_rate": 7.967145979263637e-06, "loss": 0.4937, "step": 8812 }, { "epoch": 1.5289729354614852, "grad_norm": 0.6857679486274719, "learning_rate": 7.961704484330979e-06, "loss": 0.5232, "step": 8813 }, { "epoch": 1.529146426092991, "grad_norm": 0.7407565116882324, "learning_rate": 7.956264386456551e-06, "loss": 0.4028, "step": 8814 }, { "epoch": 1.5293199167244969, "grad_norm": 0.9997866749763489, "learning_rate": 7.950825686271674e-06, "loss": 0.4673, "step": 8815 }, { "epoch": 1.5294934073560027, "grad_norm": 0.706325352191925, "learning_rate": 7.945388384407518e-06, "loss": 0.4778, "step": 8816 }, { "epoch": 1.5296668979875085, "grad_norm": 0.9359931349754333, "learning_rate": 7.939952481495076e-06, "loss": 0.3251, "step": 8817 }, { "epoch": 1.5298403886190146, "grad_norm": 5.180793285369873, "learning_rate": 7.934517978165211e-06, "loss": 0.382, "step": 8818 }, { "epoch": 1.5300138792505205, "grad_norm": 0.7217835187911987, "learning_rate": 7.929084875048594e-06, "loss": 0.4458, "step": 8819 }, { "epoch": 1.5301873698820265, "grad_norm": 0.6770364046096802, "learning_rate": 7.92365317277574e-06, "loss": 0.5565, "step": 8820 }, { "epoch": 1.5303608605135324, "grad_norm": 0.9632218480110168, "learning_rate": 7.918222871977004e-06, "loss": 0.4833, "step": 8821 }, { "epoch": 1.5305343511450382, "grad_norm": 0.9420800805091858, "learning_rate": 7.912793973282584e-06, "loss": 0.4531, "step": 8822 }, { "epoch": 1.530707841776544, "grad_norm": 0.8090986609458923, "learning_rate": 7.90736647732251e-06, "loss": 0.4592, "step": 8823 }, { "epoch": 1.53088133240805, "grad_norm": 0.8491949439048767, "learning_rate": 7.90194038472665e-06, "loss": 0.3547, "step": 8824 }, { "epoch": 1.5310548230395558, "grad_norm": 0.8736350536346436, "learning_rate": 7.896515696124703e-06, "loss": 0.3782, "step": 8825 }, { "epoch": 1.5312283136710616, "grad_norm": 0.7773745656013489, "learning_rate": 7.891092412146204e-06, "loss": 0.4202, "step": 8826 }, { "epoch": 1.5314018043025677, "grad_norm": 0.7690825462341309, "learning_rate": 7.885670533420548e-06, "loss": 0.4293, "step": 8827 }, { "epoch": 1.5315752949340735, "grad_norm": 2.417379379272461, "learning_rate": 7.880250060576943e-06, "loss": 0.3743, "step": 8828 }, { "epoch": 1.5317487855655796, "grad_norm": 0.6922433376312256, "learning_rate": 7.874830994244443e-06, "loss": 0.5067, "step": 8829 }, { "epoch": 1.5319222761970854, "grad_norm": 0.7780335545539856, "learning_rate": 7.869413335051926e-06, "loss": 0.535, "step": 8830 }, { "epoch": 1.5320957668285913, "grad_norm": 0.9590572714805603, "learning_rate": 7.863997083628139e-06, "loss": 0.5477, "step": 8831 }, { "epoch": 1.5322692574600971, "grad_norm": 0.8635021448135376, "learning_rate": 7.85858224060162e-06, "loss": 0.4388, "step": 8832 }, { "epoch": 1.532442748091603, "grad_norm": 0.629139244556427, "learning_rate": 7.853168806600784e-06, "loss": 0.5092, "step": 8833 }, { "epoch": 1.5326162387231088, "grad_norm": 1.41582453250885, "learning_rate": 7.847756782253864e-06, "loss": 0.456, "step": 8834 }, { "epoch": 1.5327897293546149, "grad_norm": 1.408156156539917, "learning_rate": 7.842346168188927e-06, "loss": 0.4902, "step": 8835 }, { "epoch": 1.5329632199861207, "grad_norm": 0.7910613417625427, "learning_rate": 7.836936965033881e-06, "loss": 0.4017, "step": 8836 }, { "epoch": 1.5331367106176268, "grad_norm": 0.842945396900177, "learning_rate": 7.831529173416476e-06, "loss": 0.4017, "step": 8837 }, { "epoch": 1.5333102012491326, "grad_norm": 0.7382654547691345, "learning_rate": 7.826122793964293e-06, "loss": 0.3668, "step": 8838 }, { "epoch": 1.5334836918806385, "grad_norm": 0.6733341813087463, "learning_rate": 7.820717827304743e-06, "loss": 0.48, "step": 8839 }, { "epoch": 1.5336571825121443, "grad_norm": 1.4515665769577026, "learning_rate": 7.815314274065074e-06, "loss": 0.3545, "step": 8840 }, { "epoch": 1.5338306731436502, "grad_norm": 0.868407130241394, "learning_rate": 7.809912134872395e-06, "loss": 0.3721, "step": 8841 }, { "epoch": 1.534004163775156, "grad_norm": 0.7506738305091858, "learning_rate": 7.804511410353603e-06, "loss": 0.4023, "step": 8842 }, { "epoch": 1.534177654406662, "grad_norm": 0.7305961847305298, "learning_rate": 7.79911210113548e-06, "loss": 0.5459, "step": 8843 }, { "epoch": 1.534351145038168, "grad_norm": 0.6782276630401611, "learning_rate": 7.793714207844616e-06, "loss": 0.4285, "step": 8844 }, { "epoch": 1.5345246356696738, "grad_norm": 0.6881431341171265, "learning_rate": 7.788317731107431e-06, "loss": 0.5073, "step": 8845 }, { "epoch": 1.5346981263011799, "grad_norm": 0.6842974424362183, "learning_rate": 7.782922671550213e-06, "loss": 0.4827, "step": 8846 }, { "epoch": 1.5348716169326857, "grad_norm": 0.9670971632003784, "learning_rate": 7.777529029799053e-06, "loss": 0.5076, "step": 8847 }, { "epoch": 1.5350451075641915, "grad_norm": 0.9345824718475342, "learning_rate": 7.772136806479891e-06, "loss": 0.5463, "step": 8848 }, { "epoch": 1.5352185981956974, "grad_norm": 0.8835152983665466, "learning_rate": 7.766746002218495e-06, "loss": 0.4781, "step": 8849 }, { "epoch": 1.5353920888272032, "grad_norm": 0.7977238297462463, "learning_rate": 7.761356617640485e-06, "loss": 0.3837, "step": 8850 }, { "epoch": 1.535565579458709, "grad_norm": 0.5723086595535278, "learning_rate": 7.755968653371302e-06, "loss": 0.4725, "step": 8851 }, { "epoch": 1.5357390700902152, "grad_norm": 0.766066312789917, "learning_rate": 7.750582110036225e-06, "loss": 0.3985, "step": 8852 }, { "epoch": 1.535912560721721, "grad_norm": 0.8079668879508972, "learning_rate": 7.745196988260357e-06, "loss": 0.5059, "step": 8853 }, { "epoch": 1.536086051353227, "grad_norm": 1.4065872430801392, "learning_rate": 7.739813288668677e-06, "loss": 0.3893, "step": 8854 }, { "epoch": 1.536259541984733, "grad_norm": 0.8298816680908203, "learning_rate": 7.734431011885932e-06, "loss": 0.5157, "step": 8855 }, { "epoch": 1.5364330326162388, "grad_norm": 0.7164503931999207, "learning_rate": 7.72905015853677e-06, "loss": 0.4236, "step": 8856 }, { "epoch": 1.5366065232477446, "grad_norm": 0.7510810494422913, "learning_rate": 7.723670729245636e-06, "loss": 0.4879, "step": 8857 }, { "epoch": 1.5367800138792505, "grad_norm": 1.4038009643554688, "learning_rate": 7.718292724636815e-06, "loss": 0.4797, "step": 8858 }, { "epoch": 1.5369535045107563, "grad_norm": 0.6696471571922302, "learning_rate": 7.712916145334432e-06, "loss": 0.6, "step": 8859 }, { "epoch": 1.5371269951422624, "grad_norm": 1.284012794494629, "learning_rate": 7.70754099196246e-06, "loss": 0.5154, "step": 8860 }, { "epoch": 1.5373004857737682, "grad_norm": 1.2538352012634277, "learning_rate": 7.702167265144664e-06, "loss": 0.4362, "step": 8861 }, { "epoch": 1.5374739764052743, "grad_norm": 0.7628593444824219, "learning_rate": 7.696794965504695e-06, "loss": 0.4001, "step": 8862 }, { "epoch": 1.5376474670367801, "grad_norm": 1.3229341506958008, "learning_rate": 7.691424093666007e-06, "loss": 0.5374, "step": 8863 }, { "epoch": 1.537820957668286, "grad_norm": 0.7463794350624084, "learning_rate": 7.686054650251893e-06, "loss": 0.5352, "step": 8864 }, { "epoch": 1.5379944482997918, "grad_norm": 0.8578668236732483, "learning_rate": 7.68068663588548e-06, "loss": 0.3442, "step": 8865 }, { "epoch": 1.5381679389312977, "grad_norm": 0.9100556373596191, "learning_rate": 7.675320051189746e-06, "loss": 0.3804, "step": 8866 }, { "epoch": 1.5383414295628035, "grad_norm": 0.9146038889884949, "learning_rate": 7.66995489678748e-06, "loss": 0.4378, "step": 8867 }, { "epoch": 1.5385149201943094, "grad_norm": 0.9123438000679016, "learning_rate": 7.664591173301315e-06, "loss": 0.4573, "step": 8868 }, { "epoch": 1.5386884108258154, "grad_norm": 0.6044626832008362, "learning_rate": 7.65922888135371e-06, "loss": 0.5498, "step": 8869 }, { "epoch": 1.5388619014573213, "grad_norm": 0.7664366960525513, "learning_rate": 7.65386802156698e-06, "loss": 0.3813, "step": 8870 }, { "epoch": 1.5390353920888273, "grad_norm": 0.5493521690368652, "learning_rate": 7.648508594563256e-06, "loss": 0.4487, "step": 8871 }, { "epoch": 1.5392088827203332, "grad_norm": 0.8492365479469299, "learning_rate": 7.64315060096449e-06, "loss": 0.4982, "step": 8872 }, { "epoch": 1.539382373351839, "grad_norm": 0.7712075114250183, "learning_rate": 7.637794041392512e-06, "loss": 0.4443, "step": 8873 }, { "epoch": 1.5395558639833449, "grad_norm": 1.18946373462677, "learning_rate": 7.632438916468928e-06, "loss": 0.3827, "step": 8874 }, { "epoch": 1.5397293546148507, "grad_norm": 0.9963278770446777, "learning_rate": 7.627085226815223e-06, "loss": 0.3531, "step": 8875 }, { "epoch": 1.5399028452463566, "grad_norm": 0.8705530166625977, "learning_rate": 7.621732973052696e-06, "loss": 0.4442, "step": 8876 }, { "epoch": 1.5400763358778626, "grad_norm": 0.7073082327842712, "learning_rate": 7.616382155802482e-06, "loss": 0.4034, "step": 8877 }, { "epoch": 1.5402498265093685, "grad_norm": 0.617682695388794, "learning_rate": 7.611032775685541e-06, "loss": 0.3966, "step": 8878 }, { "epoch": 1.5404233171408745, "grad_norm": 0.7195889353752136, "learning_rate": 7.605684833322686e-06, "loss": 0.5928, "step": 8879 }, { "epoch": 1.5405968077723804, "grad_norm": 0.8288052082061768, "learning_rate": 7.600338329334554e-06, "loss": 0.4183, "step": 8880 }, { "epoch": 1.5407702984038862, "grad_norm": 0.8420101404190063, "learning_rate": 7.5949932643416036e-06, "loss": 0.4926, "step": 8881 }, { "epoch": 1.540943789035392, "grad_norm": 0.6889872550964355, "learning_rate": 7.5896496389641336e-06, "loss": 0.597, "step": 8882 }, { "epoch": 1.541117279666898, "grad_norm": 0.6035048961639404, "learning_rate": 7.584307453822297e-06, "loss": 0.4974, "step": 8883 }, { "epoch": 1.5412907702984038, "grad_norm": 0.8604782223701477, "learning_rate": 7.5789667095360355e-06, "loss": 0.3951, "step": 8884 }, { "epoch": 1.5414642609299096, "grad_norm": 0.8894672393798828, "learning_rate": 7.5736274067251655e-06, "loss": 0.5442, "step": 8885 }, { "epoch": 1.5416377515614157, "grad_norm": 0.9153072834014893, "learning_rate": 7.568289546009316e-06, "loss": 0.4735, "step": 8886 }, { "epoch": 1.5418112421929215, "grad_norm": 1.0435465574264526, "learning_rate": 7.562953128007951e-06, "loss": 0.4873, "step": 8887 }, { "epoch": 1.5419847328244276, "grad_norm": 0.8539049029350281, "learning_rate": 7.557618153340358e-06, "loss": 0.4045, "step": 8888 }, { "epoch": 1.5421582234559335, "grad_norm": 1.1509201526641846, "learning_rate": 7.5522846226256855e-06, "loss": 0.4175, "step": 8889 }, { "epoch": 1.5423317140874393, "grad_norm": 0.8957439064979553, "learning_rate": 7.546952536482888e-06, "loss": 0.4139, "step": 8890 }, { "epoch": 1.5425052047189451, "grad_norm": 0.7355961799621582, "learning_rate": 7.541621895530757e-06, "loss": 0.4144, "step": 8891 }, { "epoch": 1.542678695350451, "grad_norm": 1.029009461402893, "learning_rate": 7.536292700387924e-06, "loss": 0.5231, "step": 8892 }, { "epoch": 1.5428521859819568, "grad_norm": 0.7891607284545898, "learning_rate": 7.530964951672836e-06, "loss": 0.448, "step": 8893 }, { "epoch": 1.543025676613463, "grad_norm": 0.8846710324287415, "learning_rate": 7.5256386500038055e-06, "loss": 0.4792, "step": 8894 }, { "epoch": 1.5431991672449688, "grad_norm": 0.9755485653877258, "learning_rate": 7.520313795998943e-06, "loss": 0.4188, "step": 8895 }, { "epoch": 1.5433726578764748, "grad_norm": 0.7196856737136841, "learning_rate": 7.5149903902762066e-06, "loss": 0.3968, "step": 8896 }, { "epoch": 1.5435461485079807, "grad_norm": 0.8937692642211914, "learning_rate": 7.5096684334533765e-06, "loss": 0.3875, "step": 8897 }, { "epoch": 1.5437196391394865, "grad_norm": 0.7358959317207336, "learning_rate": 7.504347926148086e-06, "loss": 0.4679, "step": 8898 }, { "epoch": 1.5438931297709924, "grad_norm": 0.9264071583747864, "learning_rate": 7.499028868977778e-06, "loss": 0.5386, "step": 8899 }, { "epoch": 1.5440666204024982, "grad_norm": 0.8067649006843567, "learning_rate": 7.4937112625597375e-06, "loss": 0.4251, "step": 8900 }, { "epoch": 1.544240111034004, "grad_norm": 0.9063412547111511, "learning_rate": 7.48839510751107e-06, "loss": 0.4148, "step": 8901 }, { "epoch": 1.5444136016655101, "grad_norm": 0.7851597666740417, "learning_rate": 7.483080404448744e-06, "loss": 0.4479, "step": 8902 }, { "epoch": 1.544587092297016, "grad_norm": 1.094315528869629, "learning_rate": 7.477767153989508e-06, "loss": 0.3554, "step": 8903 }, { "epoch": 1.5447605829285218, "grad_norm": 0.879088819026947, "learning_rate": 7.472455356749992e-06, "loss": 0.5227, "step": 8904 }, { "epoch": 1.5449340735600279, "grad_norm": 0.6144444346427917, "learning_rate": 7.4671450133466285e-06, "loss": 0.5631, "step": 8905 }, { "epoch": 1.5451075641915337, "grad_norm": 0.6775422692298889, "learning_rate": 7.461836124395692e-06, "loss": 0.4895, "step": 8906 }, { "epoch": 1.5452810548230396, "grad_norm": 0.8490018248558044, "learning_rate": 7.456528690513274e-06, "loss": 0.4784, "step": 8907 }, { "epoch": 1.5454545454545454, "grad_norm": 0.8370487093925476, "learning_rate": 7.451222712315325e-06, "loss": 0.4385, "step": 8908 }, { "epoch": 1.5456280360860513, "grad_norm": 0.7490215301513672, "learning_rate": 7.445918190417603e-06, "loss": 0.3976, "step": 8909 }, { "epoch": 1.545801526717557, "grad_norm": 0.7567493319511414, "learning_rate": 7.440615125435702e-06, "loss": 0.4846, "step": 8910 }, { "epoch": 1.5459750173490632, "grad_norm": 1.395071268081665, "learning_rate": 7.435313517985043e-06, "loss": 0.3939, "step": 8911 }, { "epoch": 1.546148507980569, "grad_norm": 1.2977927923202515, "learning_rate": 7.430013368680908e-06, "loss": 0.4623, "step": 8912 }, { "epoch": 1.546321998612075, "grad_norm": 0.7360656261444092, "learning_rate": 7.424714678138351e-06, "loss": 0.4561, "step": 8913 }, { "epoch": 1.546495489243581, "grad_norm": 0.6926393508911133, "learning_rate": 7.419417446972319e-06, "loss": 0.4658, "step": 8914 }, { "epoch": 1.5466689798750868, "grad_norm": 0.8517128229141235, "learning_rate": 7.41412167579755e-06, "loss": 0.5496, "step": 8915 }, { "epoch": 1.5468424705065926, "grad_norm": 2.9771463871002197, "learning_rate": 7.408827365228625e-06, "loss": 0.5194, "step": 8916 }, { "epoch": 1.5470159611380985, "grad_norm": 0.8298816084861755, "learning_rate": 7.403534515879951e-06, "loss": 0.4966, "step": 8917 }, { "epoch": 1.5471894517696043, "grad_norm": 0.7964062690734863, "learning_rate": 7.3982431283657805e-06, "loss": 0.4346, "step": 8918 }, { "epoch": 1.5473629424011104, "grad_norm": 0.8984449505805969, "learning_rate": 7.392953203300177e-06, "loss": 0.4348, "step": 8919 }, { "epoch": 1.5475364330326162, "grad_norm": 0.8992365598678589, "learning_rate": 7.38766474129704e-06, "loss": 0.4463, "step": 8920 }, { "epoch": 1.5477099236641223, "grad_norm": 0.724549412727356, "learning_rate": 7.3823777429701125e-06, "loss": 0.4171, "step": 8921 }, { "epoch": 1.5478834142956281, "grad_norm": 0.8949272632598877, "learning_rate": 7.37709220893295e-06, "loss": 0.5023, "step": 8922 }, { "epoch": 1.548056904927134, "grad_norm": 0.8588585257530212, "learning_rate": 7.371808139798944e-06, "loss": 0.3899, "step": 8923 }, { "epoch": 1.5482303955586398, "grad_norm": 0.6372013092041016, "learning_rate": 7.3665255361813125e-06, "loss": 0.4033, "step": 8924 }, { "epoch": 1.5484038861901457, "grad_norm": 0.9637125730514526, "learning_rate": 7.361244398693128e-06, "loss": 0.363, "step": 8925 }, { "epoch": 1.5485773768216515, "grad_norm": 0.5929570198059082, "learning_rate": 7.355964727947242e-06, "loss": 0.3713, "step": 8926 }, { "epoch": 1.5487508674531574, "grad_norm": 0.9674267768859863, "learning_rate": 7.350686524556392e-06, "loss": 0.4985, "step": 8927 }, { "epoch": 1.5489243580846634, "grad_norm": 0.7561858296394348, "learning_rate": 7.3454097891331085e-06, "loss": 0.527, "step": 8928 }, { "epoch": 1.5490978487161693, "grad_norm": 0.7522987127304077, "learning_rate": 7.340134522289766e-06, "loss": 0.3781, "step": 8929 }, { "epoch": 1.5492713393476754, "grad_norm": 0.8440223336219788, "learning_rate": 7.334860724638555e-06, "loss": 0.3956, "step": 8930 }, { "epoch": 1.5494448299791812, "grad_norm": 0.9758820533752441, "learning_rate": 7.329588396791529e-06, "loss": 0.3809, "step": 8931 }, { "epoch": 1.549618320610687, "grad_norm": 0.6462194323539734, "learning_rate": 7.3243175393605215e-06, "loss": 0.4856, "step": 8932 }, { "epoch": 1.549791811242193, "grad_norm": 0.7615544199943542, "learning_rate": 7.31904815295724e-06, "loss": 0.5265, "step": 8933 }, { "epoch": 1.5499653018736987, "grad_norm": 0.737786054611206, "learning_rate": 7.313780238193195e-06, "loss": 0.5164, "step": 8934 }, { "epoch": 1.5501387925052046, "grad_norm": 1.0981014966964722, "learning_rate": 7.308513795679737e-06, "loss": 0.485, "step": 8935 }, { "epoch": 1.5503122831367107, "grad_norm": 1.1777385473251343, "learning_rate": 7.303248826028036e-06, "loss": 0.4087, "step": 8936 }, { "epoch": 1.5504857737682165, "grad_norm": 0.7753485441207886, "learning_rate": 7.297985329849106e-06, "loss": 0.4117, "step": 8937 }, { "epoch": 1.5506592643997226, "grad_norm": 1.2526369094848633, "learning_rate": 7.292723307753784e-06, "loss": 0.476, "step": 8938 }, { "epoch": 1.5508327550312284, "grad_norm": 0.814134955406189, "learning_rate": 7.287462760352726e-06, "loss": 0.5787, "step": 8939 }, { "epoch": 1.5510062456627343, "grad_norm": 0.7823880910873413, "learning_rate": 7.282203688256422e-06, "loss": 0.5027, "step": 8940 }, { "epoch": 1.55117973629424, "grad_norm": 0.5917524099349976, "learning_rate": 7.276946092075205e-06, "loss": 0.4935, "step": 8941 }, { "epoch": 1.551353226925746, "grad_norm": 1.1361775398254395, "learning_rate": 7.27168997241922e-06, "loss": 0.4821, "step": 8942 }, { "epoch": 1.5515267175572518, "grad_norm": 0.9524984955787659, "learning_rate": 7.266435329898447e-06, "loss": 0.4102, "step": 8943 }, { "epoch": 1.5517002081887576, "grad_norm": 0.6795886158943176, "learning_rate": 7.261182165122689e-06, "loss": 0.4523, "step": 8944 }, { "epoch": 1.5518736988202637, "grad_norm": 0.6872532963752747, "learning_rate": 7.255930478701578e-06, "loss": 0.5601, "step": 8945 }, { "epoch": 1.5520471894517696, "grad_norm": 1.181846261024475, "learning_rate": 7.250680271244593e-06, "loss": 0.4717, "step": 8946 }, { "epoch": 1.5522206800832756, "grad_norm": 0.7594887614250183, "learning_rate": 7.245431543361016e-06, "loss": 0.4582, "step": 8947 }, { "epoch": 1.5523941707147815, "grad_norm": 1.0892949104309082, "learning_rate": 7.240184295659971e-06, "loss": 0.3826, "step": 8948 }, { "epoch": 1.5525676613462873, "grad_norm": 0.7497468590736389, "learning_rate": 7.234938528750402e-06, "loss": 0.4995, "step": 8949 }, { "epoch": 1.5527411519777932, "grad_norm": 0.7543135285377502, "learning_rate": 7.229694243241097e-06, "loss": 0.4598, "step": 8950 }, { "epoch": 1.552914642609299, "grad_norm": 0.8440332412719727, "learning_rate": 7.224451439740654e-06, "loss": 0.5125, "step": 8951 }, { "epoch": 1.5530881332408049, "grad_norm": 0.9373429417610168, "learning_rate": 7.219210118857509e-06, "loss": 0.5015, "step": 8952 }, { "epoch": 1.553261623872311, "grad_norm": 0.8574715256690979, "learning_rate": 7.213970281199913e-06, "loss": 0.3992, "step": 8953 }, { "epoch": 1.5534351145038168, "grad_norm": 1.7464864253997803, "learning_rate": 7.208731927375982e-06, "loss": 0.344, "step": 8954 }, { "epoch": 1.5536086051353228, "grad_norm": 0.7708041071891785, "learning_rate": 7.203495057993599e-06, "loss": 0.3959, "step": 8955 }, { "epoch": 1.5537820957668287, "grad_norm": 0.8572695851325989, "learning_rate": 7.198259673660535e-06, "loss": 0.4666, "step": 8956 }, { "epoch": 1.5539555863983345, "grad_norm": 0.9208317399024963, "learning_rate": 7.19302577498435e-06, "loss": 0.3711, "step": 8957 }, { "epoch": 1.5541290770298404, "grad_norm": 0.8072161078453064, "learning_rate": 7.187793362572451e-06, "loss": 0.4554, "step": 8958 }, { "epoch": 1.5543025676613462, "grad_norm": 0.6844647526741028, "learning_rate": 7.1825624370320505e-06, "loss": 0.3895, "step": 8959 }, { "epoch": 1.554476058292852, "grad_norm": 0.5709481835365295, "learning_rate": 7.17733299897023e-06, "loss": 0.6091, "step": 8960 }, { "epoch": 1.5546495489243581, "grad_norm": 0.6680960059165955, "learning_rate": 7.172105048993842e-06, "loss": 0.5458, "step": 8961 }, { "epoch": 1.554823039555864, "grad_norm": 0.6700970530509949, "learning_rate": 7.166878587709618e-06, "loss": 0.4829, "step": 8962 }, { "epoch": 1.5549965301873698, "grad_norm": 0.7285874485969543, "learning_rate": 7.1616536157240865e-06, "loss": 0.4807, "step": 8963 }, { "epoch": 1.555170020818876, "grad_norm": 0.7929092645645142, "learning_rate": 7.156430133643613e-06, "loss": 0.4189, "step": 8964 }, { "epoch": 1.5553435114503817, "grad_norm": 1.0222623348236084, "learning_rate": 7.151208142074382e-06, "loss": 0.5461, "step": 8965 }, { "epoch": 1.5555170020818876, "grad_norm": 0.7129715085029602, "learning_rate": 7.145987641622423e-06, "loss": 0.6161, "step": 8966 }, { "epoch": 1.5556904927133934, "grad_norm": 0.897832453250885, "learning_rate": 7.1407686328935774e-06, "loss": 0.4286, "step": 8967 }, { "epoch": 1.5558639833448993, "grad_norm": 0.5735976696014404, "learning_rate": 7.1355511164935085e-06, "loss": 0.4882, "step": 8968 }, { "epoch": 1.5560374739764051, "grad_norm": 0.7005171775817871, "learning_rate": 7.130335093027731e-06, "loss": 0.4387, "step": 8969 }, { "epoch": 1.5562109646079112, "grad_norm": 0.9279868602752686, "learning_rate": 7.125120563101562e-06, "loss": 0.42, "step": 8970 }, { "epoch": 1.556384455239417, "grad_norm": 0.9232456088066101, "learning_rate": 7.1199075273201515e-06, "loss": 0.5034, "step": 8971 }, { "epoch": 1.556557945870923, "grad_norm": 0.8271253705024719, "learning_rate": 7.114695986288476e-06, "loss": 0.5229, "step": 8972 }, { "epoch": 1.556731436502429, "grad_norm": 0.8400318026542664, "learning_rate": 7.109485940611358e-06, "loss": 0.41, "step": 8973 }, { "epoch": 1.5569049271339348, "grad_norm": 0.868891179561615, "learning_rate": 7.104277390893404e-06, "loss": 0.5298, "step": 8974 }, { "epoch": 1.5570784177654406, "grad_norm": 0.691235363483429, "learning_rate": 7.099070337739094e-06, "loss": 0.5015, "step": 8975 }, { "epoch": 1.5572519083969465, "grad_norm": 0.6555090546607971, "learning_rate": 7.0938647817527014e-06, "loss": 0.5098, "step": 8976 }, { "epoch": 1.5574253990284523, "grad_norm": 0.8201124668121338, "learning_rate": 7.088660723538339e-06, "loss": 0.3494, "step": 8977 }, { "epoch": 1.5575988896599584, "grad_norm": 1.1224524974822998, "learning_rate": 7.083458163699939e-06, "loss": 0.3679, "step": 8978 }, { "epoch": 1.5577723802914643, "grad_norm": 0.7555831074714661, "learning_rate": 7.0782571028412774e-06, "loss": 0.3779, "step": 8979 }, { "epoch": 1.5579458709229703, "grad_norm": 0.7846662402153015, "learning_rate": 7.073057541565933e-06, "loss": 0.4219, "step": 8980 }, { "epoch": 1.5581193615544762, "grad_norm": 0.8801530003547668, "learning_rate": 7.067859480477326e-06, "loss": 0.4624, "step": 8981 }, { "epoch": 1.558292852185982, "grad_norm": 0.9899958968162537, "learning_rate": 7.062662920178689e-06, "loss": 0.4891, "step": 8982 }, { "epoch": 1.5584663428174879, "grad_norm": 1.2896184921264648, "learning_rate": 7.057467861273106e-06, "loss": 0.4803, "step": 8983 }, { "epoch": 1.5586398334489937, "grad_norm": 0.8861548900604248, "learning_rate": 7.052274304363449e-06, "loss": 0.4966, "step": 8984 }, { "epoch": 1.5588133240804996, "grad_norm": 0.8386437892913818, "learning_rate": 7.0470822500524504e-06, "loss": 0.3698, "step": 8985 }, { "epoch": 1.5589868147120054, "grad_norm": 0.657148003578186, "learning_rate": 7.041891698942649e-06, "loss": 0.4625, "step": 8986 }, { "epoch": 1.5591603053435115, "grad_norm": 0.688218891620636, "learning_rate": 7.036702651636416e-06, "loss": 0.554, "step": 8987 }, { "epoch": 1.5593337959750173, "grad_norm": 0.76467365026474, "learning_rate": 7.03151510873594e-06, "loss": 0.4149, "step": 8988 }, { "epoch": 1.5595072866065234, "grad_norm": 0.6468123197555542, "learning_rate": 7.0263290708432515e-06, "loss": 0.5258, "step": 8989 }, { "epoch": 1.5596807772380292, "grad_norm": 0.8571133017539978, "learning_rate": 7.021144538560194e-06, "loss": 0.554, "step": 8990 }, { "epoch": 1.559854267869535, "grad_norm": 0.6320785880088806, "learning_rate": 7.015961512488434e-06, "loss": 0.4454, "step": 8991 }, { "epoch": 1.560027758501041, "grad_norm": 0.7255579233169556, "learning_rate": 7.010779993229471e-06, "loss": 0.3877, "step": 8992 }, { "epoch": 1.5602012491325468, "grad_norm": 0.8437013626098633, "learning_rate": 7.005599981384618e-06, "loss": 0.459, "step": 8993 }, { "epoch": 1.5603747397640526, "grad_norm": 0.7917951345443726, "learning_rate": 7.000421477555038e-06, "loss": 0.5112, "step": 8994 }, { "epoch": 1.5605482303955587, "grad_norm": 0.7351046800613403, "learning_rate": 6.9952444823416894e-06, "loss": 0.5686, "step": 8995 }, { "epoch": 1.5607217210270645, "grad_norm": 0.6726951003074646, "learning_rate": 6.9900689963453734e-06, "loss": 0.6199, "step": 8996 }, { "epoch": 1.5608952116585706, "grad_norm": 0.7336606383323669, "learning_rate": 6.9848950201667045e-06, "loss": 0.478, "step": 8997 }, { "epoch": 1.5610687022900764, "grad_norm": 0.9243895411491394, "learning_rate": 6.9797225544061385e-06, "loss": 0.4578, "step": 8998 }, { "epoch": 1.5612421929215823, "grad_norm": 0.7041562795639038, "learning_rate": 6.974551599663944e-06, "loss": 0.4706, "step": 8999 }, { "epoch": 1.5614156835530881, "grad_norm": 0.7184816598892212, "learning_rate": 6.969382156540212e-06, "loss": 0.5055, "step": 9000 }, { "epoch": 1.561589174184594, "grad_norm": 0.6358767151832581, "learning_rate": 6.96421422563486e-06, "loss": 0.3832, "step": 9001 }, { "epoch": 1.5617626648160998, "grad_norm": 0.9298535585403442, "learning_rate": 6.9590478075476475e-06, "loss": 0.3671, "step": 9002 }, { "epoch": 1.5619361554476057, "grad_norm": 0.8680828213691711, "learning_rate": 6.953882902878122e-06, "loss": 0.4718, "step": 9003 }, { "epoch": 1.5621096460791117, "grad_norm": 0.783250093460083, "learning_rate": 6.9487195122256925e-06, "loss": 0.3511, "step": 9004 }, { "epoch": 1.5622831367106176, "grad_norm": 0.7922610640525818, "learning_rate": 6.943557636189571e-06, "loss": 0.4148, "step": 9005 }, { "epoch": 1.5624566273421236, "grad_norm": 0.6474125385284424, "learning_rate": 6.9383972753688e-06, "loss": 0.54, "step": 9006 }, { "epoch": 1.5626301179736295, "grad_norm": 0.6304345726966858, "learning_rate": 6.933238430362239e-06, "loss": 0.5142, "step": 9007 }, { "epoch": 1.5628036086051353, "grad_norm": 0.7479380369186401, "learning_rate": 6.928081101768589e-06, "loss": 0.4434, "step": 9008 }, { "epoch": 1.5629770992366412, "grad_norm": 0.6971745491027832, "learning_rate": 6.922925290186362e-06, "loss": 0.387, "step": 9009 }, { "epoch": 1.563150589868147, "grad_norm": 0.7164735794067383, "learning_rate": 6.9177709962138905e-06, "loss": 0.3875, "step": 9010 }, { "epoch": 1.5633240804996529, "grad_norm": 0.6971276998519897, "learning_rate": 6.912618220449332e-06, "loss": 0.4354, "step": 9011 }, { "epoch": 1.563497571131159, "grad_norm": 1.1500219106674194, "learning_rate": 6.907466963490692e-06, "loss": 0.4811, "step": 9012 }, { "epoch": 1.5636710617626648, "grad_norm": 0.7223638892173767, "learning_rate": 6.9023172259357555e-06, "loss": 0.4069, "step": 9013 }, { "epoch": 1.5638445523941709, "grad_norm": 0.9820303320884705, "learning_rate": 6.897169008382172e-06, "loss": 0.3965, "step": 9014 }, { "epoch": 1.5640180430256767, "grad_norm": 0.6734370589256287, "learning_rate": 6.892022311427393e-06, "loss": 0.552, "step": 9015 }, { "epoch": 1.5641915336571826, "grad_norm": 0.7944628596305847, "learning_rate": 6.88687713566869e-06, "loss": 0.4819, "step": 9016 }, { "epoch": 1.5643650242886884, "grad_norm": 0.9793569445610046, "learning_rate": 6.881733481703186e-06, "loss": 0.4395, "step": 9017 }, { "epoch": 1.5645385149201942, "grad_norm": 0.8730150461196899, "learning_rate": 6.876591350127795e-06, "loss": 0.4686, "step": 9018 }, { "epoch": 1.5647120055517, "grad_norm": 0.9695137739181519, "learning_rate": 6.87145074153927e-06, "loss": 0.4552, "step": 9019 }, { "epoch": 1.5648854961832062, "grad_norm": 0.7851256132125854, "learning_rate": 6.866311656534177e-06, "loss": 0.4071, "step": 9020 }, { "epoch": 1.565058986814712, "grad_norm": 0.7857309579849243, "learning_rate": 6.861174095708934e-06, "loss": 0.3802, "step": 9021 }, { "epoch": 1.5652324774462179, "grad_norm": 0.8684077858924866, "learning_rate": 6.856038059659731e-06, "loss": 0.3909, "step": 9022 }, { "epoch": 1.565405968077724, "grad_norm": 0.9386990666389465, "learning_rate": 6.850903548982637e-06, "loss": 0.4572, "step": 9023 }, { "epoch": 1.5655794587092298, "grad_norm": 0.9225912690162659, "learning_rate": 6.8457705642734994e-06, "loss": 0.4894, "step": 9024 }, { "epoch": 1.5657529493407356, "grad_norm": 1.114875316619873, "learning_rate": 6.840639106128031e-06, "loss": 0.3694, "step": 9025 }, { "epoch": 1.5659264399722415, "grad_norm": 1.1497195959091187, "learning_rate": 6.835509175141713e-06, "loss": 0.5614, "step": 9026 }, { "epoch": 1.5660999306037473, "grad_norm": 0.636439323425293, "learning_rate": 6.830380771909901e-06, "loss": 0.5386, "step": 9027 }, { "epoch": 1.5662734212352531, "grad_norm": 0.8100028038024902, "learning_rate": 6.825253897027746e-06, "loss": 0.4321, "step": 9028 }, { "epoch": 1.5664469118667592, "grad_norm": 0.8574302792549133, "learning_rate": 6.82012855109023e-06, "loss": 0.4034, "step": 9029 }, { "epoch": 1.566620402498265, "grad_norm": 0.8456642627716064, "learning_rate": 6.815004734692146e-06, "loss": 0.4025, "step": 9030 }, { "epoch": 1.5667938931297711, "grad_norm": 1.075279951095581, "learning_rate": 6.8098824484281375e-06, "loss": 0.4342, "step": 9031 }, { "epoch": 1.566967383761277, "grad_norm": 0.8899574279785156, "learning_rate": 6.804761692892627e-06, "loss": 0.4513, "step": 9032 }, { "epoch": 1.5671408743927828, "grad_norm": 0.7178186774253845, "learning_rate": 6.799642468679908e-06, "loss": 0.5368, "step": 9033 }, { "epoch": 1.5673143650242887, "grad_norm": 0.8426361680030823, "learning_rate": 6.794524776384059e-06, "loss": 0.3991, "step": 9034 }, { "epoch": 1.5674878556557945, "grad_norm": 1.0742685794830322, "learning_rate": 6.7894086165989985e-06, "loss": 0.4621, "step": 9035 }, { "epoch": 1.5676613462873004, "grad_norm": 0.7841826677322388, "learning_rate": 6.784293989918454e-06, "loss": 0.3975, "step": 9036 }, { "epoch": 1.5678348369188064, "grad_norm": 0.732846736907959, "learning_rate": 6.779180896935997e-06, "loss": 0.5145, "step": 9037 }, { "epoch": 1.5680083275503123, "grad_norm": 0.9075619578361511, "learning_rate": 6.774069338245002e-06, "loss": 0.4452, "step": 9038 }, { "epoch": 1.5681818181818183, "grad_norm": 0.7694520950317383, "learning_rate": 6.7689593144386745e-06, "loss": 0.5419, "step": 9039 }, { "epoch": 1.5683553088133242, "grad_norm": 0.8426071405410767, "learning_rate": 6.763850826110025e-06, "loss": 0.3242, "step": 9040 }, { "epoch": 1.56852879944483, "grad_norm": 0.9523208141326904, "learning_rate": 6.758743873851921e-06, "loss": 0.327, "step": 9041 }, { "epoch": 1.5687022900763359, "grad_norm": 0.9337752461433411, "learning_rate": 6.753638458257017e-06, "loss": 0.4467, "step": 9042 }, { "epoch": 1.5688757807078417, "grad_norm": 0.6437515616416931, "learning_rate": 6.748534579917807e-06, "loss": 0.5056, "step": 9043 }, { "epoch": 1.5690492713393476, "grad_norm": 0.7449395656585693, "learning_rate": 6.743432239426599e-06, "loss": 0.4554, "step": 9044 }, { "epoch": 1.5692227619708534, "grad_norm": 0.8015483617782593, "learning_rate": 6.7383314373755184e-06, "loss": 0.3591, "step": 9045 }, { "epoch": 1.5693962526023595, "grad_norm": 0.6457865834236145, "learning_rate": 6.733232174356537e-06, "loss": 0.494, "step": 9046 }, { "epoch": 1.5695697432338653, "grad_norm": 0.7131983637809753, "learning_rate": 6.728134450961419e-06, "loss": 0.5073, "step": 9047 }, { "epoch": 1.5697432338653714, "grad_norm": 0.7374870181083679, "learning_rate": 6.723038267781763e-06, "loss": 0.53, "step": 9048 }, { "epoch": 1.5699167244968772, "grad_norm": 0.7074459195137024, "learning_rate": 6.71794362540898e-06, "loss": 0.5764, "step": 9049 }, { "epoch": 1.570090215128383, "grad_norm": 0.799924910068512, "learning_rate": 6.712850524434329e-06, "loss": 0.5123, "step": 9050 }, { "epoch": 1.570263705759889, "grad_norm": 0.7831860780715942, "learning_rate": 6.707758965448843e-06, "loss": 0.4191, "step": 9051 }, { "epoch": 1.5704371963913948, "grad_norm": 0.810056209564209, "learning_rate": 6.7026689490434275e-06, "loss": 0.3857, "step": 9052 }, { "epoch": 1.5706106870229006, "grad_norm": 0.9071383476257324, "learning_rate": 6.6975804758087645e-06, "loss": 0.3643, "step": 9053 }, { "epoch": 1.5707841776544067, "grad_norm": 0.8107170462608337, "learning_rate": 6.692493546335404e-06, "loss": 0.3964, "step": 9054 }, { "epoch": 1.5709576682859125, "grad_norm": 1.23489511013031, "learning_rate": 6.687408161213657e-06, "loss": 0.4023, "step": 9055 }, { "epoch": 1.5711311589174186, "grad_norm": 0.7870251536369324, "learning_rate": 6.682324321033715e-06, "loss": 0.4589, "step": 9056 }, { "epoch": 1.5713046495489245, "grad_norm": 0.6991247534751892, "learning_rate": 6.677242026385553e-06, "loss": 0.4662, "step": 9057 }, { "epoch": 1.5714781401804303, "grad_norm": 0.8230313062667847, "learning_rate": 6.672161277858977e-06, "loss": 0.3594, "step": 9058 }, { "epoch": 1.5716516308119362, "grad_norm": 0.966713547706604, "learning_rate": 6.667082076043609e-06, "loss": 0.419, "step": 9059 }, { "epoch": 1.571825121443442, "grad_norm": 0.7259449362754822, "learning_rate": 6.662004421528909e-06, "loss": 0.5421, "step": 9060 }, { "epoch": 1.5719986120749478, "grad_norm": 1.3742544651031494, "learning_rate": 6.656928314904136e-06, "loss": 0.4172, "step": 9061 }, { "epoch": 1.5721721027064537, "grad_norm": 0.7404026985168457, "learning_rate": 6.651853756758382e-06, "loss": 0.5085, "step": 9062 }, { "epoch": 1.5723455933379598, "grad_norm": 0.9047279953956604, "learning_rate": 6.646780747680552e-06, "loss": 0.4746, "step": 9063 }, { "epoch": 1.5725190839694656, "grad_norm": 1.2601579427719116, "learning_rate": 6.641709288259368e-06, "loss": 0.5427, "step": 9064 }, { "epoch": 1.5726925746009717, "grad_norm": 0.6506098508834839, "learning_rate": 6.636639379083396e-06, "loss": 0.5492, "step": 9065 }, { "epoch": 1.5728660652324775, "grad_norm": 1.5154086351394653, "learning_rate": 6.6315710207409925e-06, "loss": 0.3955, "step": 9066 }, { "epoch": 1.5730395558639834, "grad_norm": 1.506515383720398, "learning_rate": 6.6265042138203505e-06, "loss": 0.3898, "step": 9067 }, { "epoch": 1.5732130464954892, "grad_norm": 0.9609709978103638, "learning_rate": 6.621438958909472e-06, "loss": 0.4889, "step": 9068 }, { "epoch": 1.573386537126995, "grad_norm": 0.9973898530006409, "learning_rate": 6.616375256596197e-06, "loss": 0.329, "step": 9069 }, { "epoch": 1.573560027758501, "grad_norm": 0.8524582386016846, "learning_rate": 6.6113131074681694e-06, "loss": 0.489, "step": 9070 }, { "epoch": 1.573733518390007, "grad_norm": 0.7121954560279846, "learning_rate": 6.606252512112856e-06, "loss": 0.3952, "step": 9071 }, { "epoch": 1.5739070090215128, "grad_norm": 0.7621756196022034, "learning_rate": 6.6011934711175395e-06, "loss": 0.5177, "step": 9072 }, { "epoch": 1.5740804996530189, "grad_norm": 0.810859739780426, "learning_rate": 6.596135985069347e-06, "loss": 0.4792, "step": 9073 }, { "epoch": 1.5742539902845247, "grad_norm": 0.7965677380561829, "learning_rate": 6.591080054555177e-06, "loss": 0.4496, "step": 9074 }, { "epoch": 1.5744274809160306, "grad_norm": 0.7577996253967285, "learning_rate": 6.586025680161799e-06, "loss": 0.5212, "step": 9075 }, { "epoch": 1.5746009715475364, "grad_norm": 0.7506447434425354, "learning_rate": 6.580972862475769e-06, "loss": 0.4076, "step": 9076 }, { "epoch": 1.5747744621790423, "grad_norm": 1.142518401145935, "learning_rate": 6.575921602083477e-06, "loss": 0.5144, "step": 9077 }, { "epoch": 1.5749479528105481, "grad_norm": 0.9225130677223206, "learning_rate": 6.570871899571119e-06, "loss": 0.4324, "step": 9078 }, { "epoch": 1.5751214434420542, "grad_norm": 0.6513336896896362, "learning_rate": 6.565823755524732e-06, "loss": 0.4908, "step": 9079 }, { "epoch": 1.57529493407356, "grad_norm": 0.613537073135376, "learning_rate": 6.56077717053015e-06, "loss": 0.5026, "step": 9080 }, { "epoch": 1.5754684247050659, "grad_norm": 0.7019462585449219, "learning_rate": 6.555732145173037e-06, "loss": 0.5442, "step": 9081 }, { "epoch": 1.575641915336572, "grad_norm": 0.9109891057014465, "learning_rate": 6.550688680038871e-06, "loss": 0.3792, "step": 9082 }, { "epoch": 1.5758154059680778, "grad_norm": 0.7040951251983643, "learning_rate": 6.545646775712964e-06, "loss": 0.5659, "step": 9083 }, { "epoch": 1.5759888965995836, "grad_norm": 0.7814924120903015, "learning_rate": 6.5406064327804165e-06, "loss": 0.4725, "step": 9084 }, { "epoch": 1.5761623872310895, "grad_norm": 1.5051835775375366, "learning_rate": 6.53556765182618e-06, "loss": 0.4648, "step": 9085 }, { "epoch": 1.5763358778625953, "grad_norm": 0.7355524897575378, "learning_rate": 6.5305304334350075e-06, "loss": 0.5163, "step": 9086 }, { "epoch": 1.5765093684941012, "grad_norm": 0.8153585195541382, "learning_rate": 6.525494778191473e-06, "loss": 0.418, "step": 9087 }, { "epoch": 1.5766828591256072, "grad_norm": 0.714627742767334, "learning_rate": 6.520460686679964e-06, "loss": 0.3392, "step": 9088 }, { "epoch": 1.576856349757113, "grad_norm": 0.6588917374610901, "learning_rate": 6.515428159484707e-06, "loss": 0.4119, "step": 9089 }, { "epoch": 1.5770298403886192, "grad_norm": 0.7315980792045593, "learning_rate": 6.510397197189724e-06, "loss": 0.3981, "step": 9090 }, { "epoch": 1.577203331020125, "grad_norm": 0.8116651177406311, "learning_rate": 6.505367800378856e-06, "loss": 0.3419, "step": 9091 }, { "epoch": 1.5773768216516308, "grad_norm": 0.7682046294212341, "learning_rate": 6.500339969635794e-06, "loss": 0.4075, "step": 9092 }, { "epoch": 1.5775503122831367, "grad_norm": 0.7009580731391907, "learning_rate": 6.495313705543997e-06, "loss": 0.3578, "step": 9093 }, { "epoch": 1.5777238029146425, "grad_norm": 0.8438929915428162, "learning_rate": 6.490289008686786e-06, "loss": 0.4176, "step": 9094 }, { "epoch": 1.5778972935461484, "grad_norm": 0.7142999768257141, "learning_rate": 6.485265879647269e-06, "loss": 0.5607, "step": 9095 }, { "epoch": 1.5780707841776545, "grad_norm": 0.7572633624076843, "learning_rate": 6.480244319008411e-06, "loss": 0.5898, "step": 9096 }, { "epoch": 1.5782442748091603, "grad_norm": 0.48166990280151367, "learning_rate": 6.475224327352938e-06, "loss": 0.5698, "step": 9097 }, { "epoch": 1.5784177654406664, "grad_norm": 0.8895741105079651, "learning_rate": 6.470205905263449e-06, "loss": 0.5183, "step": 9098 }, { "epoch": 1.5785912560721722, "grad_norm": 0.8576816916465759, "learning_rate": 6.4651890533223294e-06, "loss": 0.4637, "step": 9099 }, { "epoch": 1.578764746703678, "grad_norm": 0.9381229281425476, "learning_rate": 6.460173772111791e-06, "loss": 0.4706, "step": 9100 }, { "epoch": 1.578938237335184, "grad_norm": 0.8738219738006592, "learning_rate": 6.455160062213857e-06, "loss": 0.3947, "step": 9101 }, { "epoch": 1.5791117279666897, "grad_norm": 0.873576819896698, "learning_rate": 6.450147924210395e-06, "loss": 0.4266, "step": 9102 }, { "epoch": 1.5792852185981956, "grad_norm": 1.0661113262176514, "learning_rate": 6.44513735868304e-06, "loss": 0.4674, "step": 9103 }, { "epoch": 1.5794587092297014, "grad_norm": 0.8318861126899719, "learning_rate": 6.440128366213297e-06, "loss": 0.5085, "step": 9104 }, { "epoch": 1.5796321998612075, "grad_norm": 1.5399738550186157, "learning_rate": 6.435120947382456e-06, "loss": 0.4467, "step": 9105 }, { "epoch": 1.5798056904927134, "grad_norm": 0.8369775414466858, "learning_rate": 6.430115102771637e-06, "loss": 0.4502, "step": 9106 }, { "epoch": 1.5799791811242194, "grad_norm": 1.0189540386199951, "learning_rate": 6.425110832961765e-06, "loss": 0.3452, "step": 9107 }, { "epoch": 1.5801526717557253, "grad_norm": 1.0672706365585327, "learning_rate": 6.420108138533607e-06, "loss": 0.3255, "step": 9108 }, { "epoch": 1.5803261623872311, "grad_norm": 0.7228703498840332, "learning_rate": 6.415107020067721e-06, "loss": 0.3575, "step": 9109 }, { "epoch": 1.580499653018737, "grad_norm": 0.7414281368255615, "learning_rate": 6.410107478144496e-06, "loss": 0.3776, "step": 9110 }, { "epoch": 1.5806731436502428, "grad_norm": 1.7083401679992676, "learning_rate": 6.405109513344126e-06, "loss": 0.4216, "step": 9111 }, { "epoch": 1.5808466342817487, "grad_norm": 0.7078840732574463, "learning_rate": 6.400113126246645e-06, "loss": 0.5028, "step": 9112 }, { "epoch": 1.5810201249132547, "grad_norm": 1.244852900505066, "learning_rate": 6.395118317431883e-06, "loss": 0.3494, "step": 9113 }, { "epoch": 1.5811936155447606, "grad_norm": 1.4125138521194458, "learning_rate": 6.390125087479493e-06, "loss": 0.3657, "step": 9114 }, { "epoch": 1.5813671061762666, "grad_norm": 0.834786593914032, "learning_rate": 6.385133436968946e-06, "loss": 0.4061, "step": 9115 }, { "epoch": 1.5815405968077725, "grad_norm": 1.4661144018173218, "learning_rate": 6.380143366479521e-06, "loss": 0.4791, "step": 9116 }, { "epoch": 1.5817140874392783, "grad_norm": 0.7873803377151489, "learning_rate": 6.375154876590335e-06, "loss": 0.5126, "step": 9117 }, { "epoch": 1.5818875780707842, "grad_norm": 0.863389790058136, "learning_rate": 6.370167967880303e-06, "loss": 0.3486, "step": 9118 }, { "epoch": 1.58206106870229, "grad_norm": 0.9513432383537292, "learning_rate": 6.365182640928158e-06, "loss": 0.3654, "step": 9119 }, { "epoch": 1.5822345593337959, "grad_norm": 0.6658217906951904, "learning_rate": 6.360198896312451e-06, "loss": 0.4583, "step": 9120 }, { "epoch": 1.5824080499653017, "grad_norm": 0.8609620332717896, "learning_rate": 6.355216734611567e-06, "loss": 0.4985, "step": 9121 }, { "epoch": 1.5825815405968078, "grad_norm": 0.9660351872444153, "learning_rate": 6.350236156403666e-06, "loss": 0.4331, "step": 9122 }, { "epoch": 1.5827550312283136, "grad_norm": 0.7391103506088257, "learning_rate": 6.345257162266773e-06, "loss": 0.5355, "step": 9123 }, { "epoch": 1.5829285218598197, "grad_norm": 1.0405844449996948, "learning_rate": 6.3402797527786904e-06, "loss": 0.4579, "step": 9124 }, { "epoch": 1.5831020124913255, "grad_norm": 0.7017146348953247, "learning_rate": 6.335303928517071e-06, "loss": 0.3473, "step": 9125 }, { "epoch": 1.5832755031228314, "grad_norm": 0.8399113416671753, "learning_rate": 6.330329690059342e-06, "loss": 0.4696, "step": 9126 }, { "epoch": 1.5834489937543372, "grad_norm": 0.8594346046447754, "learning_rate": 6.3253570379827864e-06, "loss": 0.351, "step": 9127 }, { "epoch": 1.583622484385843, "grad_norm": 1.07136070728302, "learning_rate": 6.32038597286448e-06, "loss": 0.4559, "step": 9128 }, { "epoch": 1.583795975017349, "grad_norm": 0.8304911851882935, "learning_rate": 6.315416495281323e-06, "loss": 0.4612, "step": 9129 }, { "epoch": 1.583969465648855, "grad_norm": 0.9867230653762817, "learning_rate": 6.31044860581002e-06, "loss": 0.4049, "step": 9130 }, { "epoch": 1.5841429562803608, "grad_norm": 0.960951030254364, "learning_rate": 6.305482305027122e-06, "loss": 0.4299, "step": 9131 }, { "epoch": 1.584316446911867, "grad_norm": 0.8653309345245361, "learning_rate": 6.300517593508944e-06, "loss": 0.46, "step": 9132 }, { "epoch": 1.5844899375433728, "grad_norm": 0.7783135175704956, "learning_rate": 6.295554471831671e-06, "loss": 0.4358, "step": 9133 }, { "epoch": 1.5846634281748786, "grad_norm": 1.077301263809204, "learning_rate": 6.290592940571269e-06, "loss": 0.3718, "step": 9134 }, { "epoch": 1.5848369188063844, "grad_norm": 1.4728044271469116, "learning_rate": 6.2856330003035324e-06, "loss": 0.4374, "step": 9135 }, { "epoch": 1.5850104094378903, "grad_norm": 0.8031530976295471, "learning_rate": 6.280674651604059e-06, "loss": 0.6, "step": 9136 }, { "epoch": 1.5851839000693961, "grad_norm": 0.8494324088096619, "learning_rate": 6.275717895048285e-06, "loss": 0.5836, "step": 9137 }, { "epoch": 1.5853573907009022, "grad_norm": 0.7558203935623169, "learning_rate": 6.270762731211442e-06, "loss": 0.46, "step": 9138 }, { "epoch": 1.585530881332408, "grad_norm": 0.5446569323539734, "learning_rate": 6.2658091606685745e-06, "loss": 0.5767, "step": 9139 }, { "epoch": 1.585704371963914, "grad_norm": 0.7430805563926697, "learning_rate": 6.260857183994564e-06, "loss": 0.5164, "step": 9140 }, { "epoch": 1.58587786259542, "grad_norm": 1.3108949661254883, "learning_rate": 6.2559068017640865e-06, "loss": 0.3945, "step": 9141 }, { "epoch": 1.5860513532269258, "grad_norm": 0.6558985114097595, "learning_rate": 6.25095801455164e-06, "loss": 0.4688, "step": 9142 }, { "epoch": 1.5862248438584317, "grad_norm": 0.7553314566612244, "learning_rate": 6.246010822931532e-06, "loss": 0.4707, "step": 9143 }, { "epoch": 1.5863983344899375, "grad_norm": 0.7920790910720825, "learning_rate": 6.241065227477905e-06, "loss": 0.4611, "step": 9144 }, { "epoch": 1.5865718251214433, "grad_norm": 1.4178071022033691, "learning_rate": 6.2361212287646774e-06, "loss": 0.3625, "step": 9145 }, { "epoch": 1.5867453157529492, "grad_norm": 0.9829283356666565, "learning_rate": 6.231178827365627e-06, "loss": 0.4968, "step": 9146 }, { "epoch": 1.5869188063844553, "grad_norm": 0.7507148385047913, "learning_rate": 6.2262380238543185e-06, "loss": 0.4103, "step": 9147 }, { "epoch": 1.587092297015961, "grad_norm": 0.830384373664856, "learning_rate": 6.221298818804136e-06, "loss": 0.3536, "step": 9148 }, { "epoch": 1.5872657876474672, "grad_norm": 0.6961411237716675, "learning_rate": 6.216361212788276e-06, "loss": 0.5941, "step": 9149 }, { "epoch": 1.587439278278973, "grad_norm": 0.6326931715011597, "learning_rate": 6.211425206379769e-06, "loss": 0.4996, "step": 9150 }, { "epoch": 1.5876127689104789, "grad_norm": 0.8804559707641602, "learning_rate": 6.206490800151421e-06, "loss": 0.4768, "step": 9151 }, { "epoch": 1.5877862595419847, "grad_norm": 0.803973376750946, "learning_rate": 6.201557994675895e-06, "loss": 0.4097, "step": 9152 }, { "epoch": 1.5879597501734906, "grad_norm": 0.6925197839736938, "learning_rate": 6.196626790525635e-06, "loss": 0.4462, "step": 9153 }, { "epoch": 1.5881332408049964, "grad_norm": 0.6980529427528381, "learning_rate": 6.191697188272933e-06, "loss": 0.3606, "step": 9154 }, { "epoch": 1.5883067314365025, "grad_norm": 0.8955292701721191, "learning_rate": 6.186769188489852e-06, "loss": 0.5315, "step": 9155 }, { "epoch": 1.5884802220680083, "grad_norm": 0.6994858980178833, "learning_rate": 6.181842791748307e-06, "loss": 0.5776, "step": 9156 }, { "epoch": 1.5886537126995144, "grad_norm": 0.7811257243156433, "learning_rate": 6.1769179986200065e-06, "loss": 0.4552, "step": 9157 }, { "epoch": 1.5888272033310202, "grad_norm": 0.7694811224937439, "learning_rate": 6.17199480967648e-06, "loss": 0.4657, "step": 9158 }, { "epoch": 1.589000693962526, "grad_norm": 0.8114852905273438, "learning_rate": 6.167073225489062e-06, "loss": 0.3735, "step": 9159 }, { "epoch": 1.589174184594032, "grad_norm": 0.7839207053184509, "learning_rate": 6.162153246628921e-06, "loss": 0.3959, "step": 9160 }, { "epoch": 1.5893476752255378, "grad_norm": 0.5622174739837646, "learning_rate": 6.157234873667021e-06, "loss": 0.5399, "step": 9161 }, { "epoch": 1.5895211658570436, "grad_norm": 0.7764237523078918, "learning_rate": 6.152318107174144e-06, "loss": 0.4785, "step": 9162 }, { "epoch": 1.5896946564885495, "grad_norm": 0.7497486472129822, "learning_rate": 6.1474029477208864e-06, "loss": 0.5824, "step": 9163 }, { "epoch": 1.5898681471200555, "grad_norm": 1.0032864809036255, "learning_rate": 6.142489395877651e-06, "loss": 0.3083, "step": 9164 }, { "epoch": 1.5900416377515614, "grad_norm": 0.8805416226387024, "learning_rate": 6.137577452214676e-06, "loss": 0.4022, "step": 9165 }, { "epoch": 1.5902151283830674, "grad_norm": 0.8548498749732971, "learning_rate": 6.132667117301989e-06, "loss": 0.4919, "step": 9166 }, { "epoch": 1.5903886190145733, "grad_norm": 1.2921521663665771, "learning_rate": 6.127758391709442e-06, "loss": 0.3438, "step": 9167 }, { "epoch": 1.5905621096460791, "grad_norm": 0.9614955186843872, "learning_rate": 6.122851276006692e-06, "loss": 0.3766, "step": 9168 }, { "epoch": 1.590735600277585, "grad_norm": 1.2059763669967651, "learning_rate": 6.117945770763228e-06, "loss": 0.4922, "step": 9169 }, { "epoch": 1.5909090909090908, "grad_norm": 0.8131858706474304, "learning_rate": 6.113041876548333e-06, "loss": 0.4535, "step": 9170 }, { "epoch": 1.5910825815405967, "grad_norm": 0.736976683139801, "learning_rate": 6.1081395939311086e-06, "loss": 0.5316, "step": 9171 }, { "epoch": 1.5912560721721027, "grad_norm": 0.8057016134262085, "learning_rate": 6.103238923480468e-06, "loss": 0.4275, "step": 9172 }, { "epoch": 1.5914295628036086, "grad_norm": 0.9016966819763184, "learning_rate": 6.098339865765153e-06, "loss": 0.4287, "step": 9173 }, { "epoch": 1.5916030534351147, "grad_norm": 0.9611093401908875, "learning_rate": 6.093442421353683e-06, "loss": 0.5229, "step": 9174 }, { "epoch": 1.5917765440666205, "grad_norm": 0.6343430280685425, "learning_rate": 6.088546590814432e-06, "loss": 0.5574, "step": 9175 }, { "epoch": 1.5919500346981263, "grad_norm": 1.0098336935043335, "learning_rate": 6.083652374715561e-06, "loss": 0.608, "step": 9176 }, { "epoch": 1.5921235253296322, "grad_norm": 0.6942441463470459, "learning_rate": 6.078759773625045e-06, "loss": 0.4966, "step": 9177 }, { "epoch": 1.592297015961138, "grad_norm": 0.7739158272743225, "learning_rate": 6.073868788110673e-06, "loss": 0.4349, "step": 9178 }, { "epoch": 1.5924705065926439, "grad_norm": 0.6788617968559265, "learning_rate": 6.068979418740067e-06, "loss": 0.4912, "step": 9179 }, { "epoch": 1.5926439972241497, "grad_norm": 0.70098876953125, "learning_rate": 6.064091666080621e-06, "loss": 0.4116, "step": 9180 }, { "epoch": 1.5928174878556558, "grad_norm": 0.8632174730300903, "learning_rate": 6.059205530699581e-06, "loss": 0.4497, "step": 9181 }, { "epoch": 1.5929909784871616, "grad_norm": 0.9082633256912231, "learning_rate": 6.054321013163978e-06, "loss": 0.3707, "step": 9182 }, { "epoch": 1.5931644691186677, "grad_norm": 0.8024830222129822, "learning_rate": 6.049438114040682e-06, "loss": 0.4788, "step": 9183 }, { "epoch": 1.5933379597501736, "grad_norm": 0.7636101245880127, "learning_rate": 6.044556833896338e-06, "loss": 0.4121, "step": 9184 }, { "epoch": 1.5935114503816794, "grad_norm": 0.7734701633453369, "learning_rate": 6.03967717329744e-06, "loss": 0.3143, "step": 9185 }, { "epoch": 1.5936849410131853, "grad_norm": 0.7777693867683411, "learning_rate": 6.034799132810274e-06, "loss": 0.4668, "step": 9186 }, { "epoch": 1.593858431644691, "grad_norm": 0.7053585052490234, "learning_rate": 6.029922713000935e-06, "loss": 0.5001, "step": 9187 }, { "epoch": 1.594031922276197, "grad_norm": 0.8492000102996826, "learning_rate": 6.025047914435349e-06, "loss": 0.4404, "step": 9188 }, { "epoch": 1.594205412907703, "grad_norm": 0.7375931739807129, "learning_rate": 6.020174737679236e-06, "loss": 0.3701, "step": 9189 }, { "epoch": 1.5943789035392089, "grad_norm": 0.7666688561439514, "learning_rate": 6.015303183298135e-06, "loss": 0.3666, "step": 9190 }, { "epoch": 1.594552394170715, "grad_norm": 0.9085474610328674, "learning_rate": 6.01043325185739e-06, "loss": 0.4756, "step": 9191 }, { "epoch": 1.5947258848022208, "grad_norm": 0.7535820007324219, "learning_rate": 6.005564943922179e-06, "loss": 0.5159, "step": 9192 }, { "epoch": 1.5948993754337266, "grad_norm": 0.8561382293701172, "learning_rate": 6.000698260057452e-06, "loss": 0.3917, "step": 9193 }, { "epoch": 1.5950728660652325, "grad_norm": 0.6852325201034546, "learning_rate": 5.995833200828007e-06, "loss": 0.5533, "step": 9194 }, { "epoch": 1.5952463566967383, "grad_norm": 0.9255613088607788, "learning_rate": 5.990969766798433e-06, "loss": 0.4999, "step": 9195 }, { "epoch": 1.5954198473282442, "grad_norm": 0.8838304877281189, "learning_rate": 5.9861079585331535e-06, "loss": 0.3906, "step": 9196 }, { "epoch": 1.5955933379597502, "grad_norm": 0.8277800679206848, "learning_rate": 5.981247776596364e-06, "loss": 0.4397, "step": 9197 }, { "epoch": 1.595766828591256, "grad_norm": 0.7583249807357788, "learning_rate": 5.97638922155211e-06, "loss": 0.5585, "step": 9198 }, { "epoch": 1.595940319222762, "grad_norm": 0.7804730534553528, "learning_rate": 5.971532293964226e-06, "loss": 0.4011, "step": 9199 }, { "epoch": 1.596113809854268, "grad_norm": 0.7163670063018799, "learning_rate": 5.9666769943963675e-06, "loss": 0.4969, "step": 9200 }, { "epoch": 1.5962873004857738, "grad_norm": 0.7205650210380554, "learning_rate": 5.96182332341199e-06, "loss": 0.4364, "step": 9201 }, { "epoch": 1.5964607911172797, "grad_norm": 0.7018092274665833, "learning_rate": 5.956971281574386e-06, "loss": 0.5375, "step": 9202 }, { "epoch": 1.5966342817487855, "grad_norm": 0.6795221567153931, "learning_rate": 5.952120869446614e-06, "loss": 0.5612, "step": 9203 }, { "epoch": 1.5968077723802914, "grad_norm": 0.771239697933197, "learning_rate": 5.947272087591591e-06, "loss": 0.5967, "step": 9204 }, { "epoch": 1.5969812630117972, "grad_norm": 0.7565046548843384, "learning_rate": 5.942424936572017e-06, "loss": 0.4929, "step": 9205 }, { "epoch": 1.5971547536433033, "grad_norm": 0.8558209538459778, "learning_rate": 5.9375794169504095e-06, "loss": 0.494, "step": 9206 }, { "epoch": 1.5973282442748091, "grad_norm": 0.6613254547119141, "learning_rate": 5.932735529289091e-06, "loss": 0.4871, "step": 9207 }, { "epoch": 1.5975017349063152, "grad_norm": 0.8535557985305786, "learning_rate": 5.927893274150214e-06, "loss": 0.4371, "step": 9208 }, { "epoch": 1.597675225537821, "grad_norm": 0.6975634694099426, "learning_rate": 5.9230526520957154e-06, "loss": 0.4816, "step": 9209 }, { "epoch": 1.5978487161693269, "grad_norm": 0.8697851896286011, "learning_rate": 5.918213663687362e-06, "loss": 0.4951, "step": 9210 }, { "epoch": 1.5980222068008327, "grad_norm": 0.7481193542480469, "learning_rate": 5.913376309486716e-06, "loss": 0.5833, "step": 9211 }, { "epoch": 1.5981956974323386, "grad_norm": 0.6353183388710022, "learning_rate": 5.908540590055168e-06, "loss": 0.4703, "step": 9212 }, { "epoch": 1.5983691880638444, "grad_norm": 0.9413015246391296, "learning_rate": 5.903706505953906e-06, "loss": 0.3647, "step": 9213 }, { "epoch": 1.5985426786953505, "grad_norm": 0.8919071555137634, "learning_rate": 5.898874057743926e-06, "loss": 0.4026, "step": 9214 }, { "epoch": 1.5987161693268563, "grad_norm": 1.060607671737671, "learning_rate": 5.894043245986045e-06, "loss": 0.5665, "step": 9215 }, { "epoch": 1.5988896599583624, "grad_norm": 0.8731929063796997, "learning_rate": 5.889214071240876e-06, "loss": 0.4706, "step": 9216 }, { "epoch": 1.5990631505898683, "grad_norm": 0.7572540640830994, "learning_rate": 5.884386534068864e-06, "loss": 0.4324, "step": 9217 }, { "epoch": 1.599236641221374, "grad_norm": 0.6952676773071289, "learning_rate": 5.879560635030242e-06, "loss": 0.4614, "step": 9218 }, { "epoch": 1.59941013185288, "grad_norm": 0.8549386858940125, "learning_rate": 5.87473637468506e-06, "loss": 0.4067, "step": 9219 }, { "epoch": 1.5995836224843858, "grad_norm": 0.8678174018859863, "learning_rate": 5.869913753593175e-06, "loss": 0.4244, "step": 9220 }, { "epoch": 1.5997571131158916, "grad_norm": 0.8675947785377502, "learning_rate": 5.865092772314276e-06, "loss": 0.3524, "step": 9221 }, { "epoch": 1.5999306037473975, "grad_norm": 1.4709620475769043, "learning_rate": 5.860273431407821e-06, "loss": 0.4775, "step": 9222 }, { "epoch": 1.6001040943789036, "grad_norm": 0.8937550187110901, "learning_rate": 5.855455731433115e-06, "loss": 0.3348, "step": 9223 }, { "epoch": 1.6002775850104094, "grad_norm": 0.6757932901382446, "learning_rate": 5.8506396729492455e-06, "loss": 0.4922, "step": 9224 }, { "epoch": 1.6004510756419155, "grad_norm": 0.9056023359298706, "learning_rate": 5.845825256515145e-06, "loss": 0.5139, "step": 9225 }, { "epoch": 1.6006245662734213, "grad_norm": 1.0853079557418823, "learning_rate": 5.841012482689501e-06, "loss": 0.4449, "step": 9226 }, { "epoch": 1.6007980569049272, "grad_norm": 0.7655895948410034, "learning_rate": 5.836201352030863e-06, "loss": 0.5815, "step": 9227 }, { "epoch": 1.600971547536433, "grad_norm": 1.3326359987258911, "learning_rate": 5.831391865097564e-06, "loss": 0.4553, "step": 9228 }, { "epoch": 1.6011450381679388, "grad_norm": 0.7713728547096252, "learning_rate": 5.82658402244775e-06, "loss": 0.527, "step": 9229 }, { "epoch": 1.6013185287994447, "grad_norm": 10.568388938903809, "learning_rate": 5.821777824639365e-06, "loss": 0.5093, "step": 9230 }, { "epoch": 1.6014920194309508, "grad_norm": 0.7469318509101868, "learning_rate": 5.816973272230196e-06, "loss": 0.4376, "step": 9231 }, { "epoch": 1.6016655100624566, "grad_norm": 1.014448881149292, "learning_rate": 5.812170365777801e-06, "loss": 0.4911, "step": 9232 }, { "epoch": 1.6018390006939627, "grad_norm": 0.9525623917579651, "learning_rate": 5.807369105839569e-06, "loss": 0.3952, "step": 9233 }, { "epoch": 1.6020124913254685, "grad_norm": 1.4405517578125, "learning_rate": 5.802569492972687e-06, "loss": 0.5205, "step": 9234 }, { "epoch": 1.6021859819569744, "grad_norm": 0.782208263874054, "learning_rate": 5.797771527734155e-06, "loss": 0.3975, "step": 9235 }, { "epoch": 1.6023594725884802, "grad_norm": 0.7666157484054565, "learning_rate": 5.792975210680793e-06, "loss": 0.3898, "step": 9236 }, { "epoch": 1.602532963219986, "grad_norm": 0.8280892968177795, "learning_rate": 5.7881805423692105e-06, "loss": 0.3287, "step": 9237 }, { "epoch": 1.602706453851492, "grad_norm": 0.8459081053733826, "learning_rate": 5.783387523355839e-06, "loss": 0.5415, "step": 9238 }, { "epoch": 1.6028799444829978, "grad_norm": 0.8862437009811401, "learning_rate": 5.778596154196902e-06, "loss": 0.4484, "step": 9239 }, { "epoch": 1.6030534351145038, "grad_norm": 0.9152216911315918, "learning_rate": 5.773806435448459e-06, "loss": 0.4476, "step": 9240 }, { "epoch": 1.6032269257460097, "grad_norm": 1.9208900928497314, "learning_rate": 5.769018367666357e-06, "loss": 0.4103, "step": 9241 }, { "epoch": 1.6034004163775157, "grad_norm": 0.8232020735740662, "learning_rate": 5.764231951406256e-06, "loss": 0.3483, "step": 9242 }, { "epoch": 1.6035739070090216, "grad_norm": 1.0295816659927368, "learning_rate": 5.759447187223617e-06, "loss": 0.4777, "step": 9243 }, { "epoch": 1.6037473976405274, "grad_norm": 0.7637811899185181, "learning_rate": 5.75466407567374e-06, "loss": 0.411, "step": 9244 }, { "epoch": 1.6039208882720333, "grad_norm": 0.6895084381103516, "learning_rate": 5.749882617311682e-06, "loss": 0.5614, "step": 9245 }, { "epoch": 1.6040943789035391, "grad_norm": 0.8308559060096741, "learning_rate": 5.745102812692358e-06, "loss": 0.3559, "step": 9246 }, { "epoch": 1.604267869535045, "grad_norm": 1.082716464996338, "learning_rate": 5.740324662370462e-06, "loss": 0.345, "step": 9247 }, { "epoch": 1.604441360166551, "grad_norm": 1.375120759010315, "learning_rate": 5.735548166900506e-06, "loss": 0.3909, "step": 9248 }, { "epoch": 1.6046148507980569, "grad_norm": 0.8588771820068359, "learning_rate": 5.7307733268368e-06, "loss": 0.3923, "step": 9249 }, { "epoch": 1.604788341429563, "grad_norm": 0.931693434715271, "learning_rate": 5.72600014273349e-06, "loss": 0.3853, "step": 9250 }, { "epoch": 1.6049618320610688, "grad_norm": 1.8362689018249512, "learning_rate": 5.721228615144481e-06, "loss": 0.4402, "step": 9251 }, { "epoch": 1.6051353226925746, "grad_norm": 0.8088194131851196, "learning_rate": 5.716458744623536e-06, "loss": 0.4089, "step": 9252 }, { "epoch": 1.6053088133240805, "grad_norm": 0.77403724193573, "learning_rate": 5.711690531724192e-06, "loss": 0.3812, "step": 9253 }, { "epoch": 1.6054823039555863, "grad_norm": 0.6571146845817566, "learning_rate": 5.706923976999825e-06, "loss": 0.5425, "step": 9254 }, { "epoch": 1.6056557945870922, "grad_norm": 0.7870723605155945, "learning_rate": 5.702159081003571e-06, "loss": 0.374, "step": 9255 }, { "epoch": 1.6058292852185982, "grad_norm": 1.3615376949310303, "learning_rate": 5.697395844288423e-06, "loss": 0.3523, "step": 9256 }, { "epoch": 1.606002775850104, "grad_norm": 0.7027696371078491, "learning_rate": 5.692634267407151e-06, "loss": 0.4547, "step": 9257 }, { "epoch": 1.60617626648161, "grad_norm": 0.6760240197181702, "learning_rate": 5.687874350912346e-06, "loss": 0.5397, "step": 9258 }, { "epoch": 1.606349757113116, "grad_norm": 0.6329642534255981, "learning_rate": 5.683116095356391e-06, "loss": 0.4252, "step": 9259 }, { "epoch": 1.6065232477446219, "grad_norm": 0.8432753086090088, "learning_rate": 5.678359501291504e-06, "loss": 0.4285, "step": 9260 }, { "epoch": 1.6066967383761277, "grad_norm": 0.7299416661262512, "learning_rate": 5.673604569269684e-06, "loss": 0.4967, "step": 9261 }, { "epoch": 1.6068702290076335, "grad_norm": 0.7422299981117249, "learning_rate": 5.668851299842739e-06, "loss": 0.4343, "step": 9262 }, { "epoch": 1.6070437196391394, "grad_norm": 1.866463541984558, "learning_rate": 5.664099693562315e-06, "loss": 0.4298, "step": 9263 }, { "epoch": 1.6072172102706452, "grad_norm": 0.8768714666366577, "learning_rate": 5.659349750979814e-06, "loss": 0.3596, "step": 9264 }, { "epoch": 1.6073907009021513, "grad_norm": 0.7241788506507874, "learning_rate": 5.6546014726464906e-06, "loss": 0.4443, "step": 9265 }, { "epoch": 1.6075641915336571, "grad_norm": 0.7192736864089966, "learning_rate": 5.6498548591133725e-06, "loss": 0.3469, "step": 9266 }, { "epoch": 1.6077376821651632, "grad_norm": 0.7598190903663635, "learning_rate": 5.645109910931335e-06, "loss": 0.4667, "step": 9267 }, { "epoch": 1.607911172796669, "grad_norm": 0.7664093375205994, "learning_rate": 5.6403666286510065e-06, "loss": 0.5514, "step": 9268 }, { "epoch": 1.608084663428175, "grad_norm": 0.7578974366188049, "learning_rate": 5.635625012822869e-06, "loss": 0.3986, "step": 9269 }, { "epoch": 1.6082581540596808, "grad_norm": 0.7644089460372925, "learning_rate": 5.630885063997187e-06, "loss": 0.3786, "step": 9270 }, { "epoch": 1.6084316446911866, "grad_norm": 0.731870174407959, "learning_rate": 5.626146782724036e-06, "loss": 0.4619, "step": 9271 }, { "epoch": 1.6086051353226924, "grad_norm": 0.6918858289718628, "learning_rate": 5.621410169553292e-06, "loss": 0.5197, "step": 9272 }, { "epoch": 1.6087786259541985, "grad_norm": 1.2198553085327148, "learning_rate": 5.616675225034667e-06, "loss": 0.5334, "step": 9273 }, { "epoch": 1.6089521165857044, "grad_norm": 0.6842505931854248, "learning_rate": 5.6119419497176275e-06, "loss": 0.4431, "step": 9274 }, { "epoch": 1.6091256072172104, "grad_norm": 0.720130980014801, "learning_rate": 5.607210344151497e-06, "loss": 0.3934, "step": 9275 }, { "epoch": 1.6092990978487163, "grad_norm": 1.2143728733062744, "learning_rate": 5.6024804088853775e-06, "loss": 0.4303, "step": 9276 }, { "epoch": 1.6094725884802221, "grad_norm": 0.8116788268089294, "learning_rate": 5.597752144468185e-06, "loss": 0.4598, "step": 9277 }, { "epoch": 1.609646079111728, "grad_norm": 0.9019766449928284, "learning_rate": 5.59302555144863e-06, "loss": 0.4512, "step": 9278 }, { "epoch": 1.6098195697432338, "grad_norm": 0.8071489930152893, "learning_rate": 5.588300630375252e-06, "loss": 0.3835, "step": 9279 }, { "epoch": 1.6099930603747397, "grad_norm": 1.1320544481277466, "learning_rate": 5.5835773817963814e-06, "loss": 0.4166, "step": 9280 }, { "epoch": 1.6101665510062455, "grad_norm": 0.8258172869682312, "learning_rate": 5.57885580626015e-06, "loss": 0.4205, "step": 9281 }, { "epoch": 1.6103400416377516, "grad_norm": 0.739963948726654, "learning_rate": 5.574135904314504e-06, "loss": 0.4767, "step": 9282 }, { "epoch": 1.6105135322692574, "grad_norm": 0.7183013558387756, "learning_rate": 5.5694176765072005e-06, "loss": 0.5292, "step": 9283 }, { "epoch": 1.6106870229007635, "grad_norm": 1.0741134881973267, "learning_rate": 5.5647011233857915e-06, "loss": 0.4429, "step": 9284 }, { "epoch": 1.6108605135322693, "grad_norm": 0.8927547335624695, "learning_rate": 5.559986245497637e-06, "loss": 0.3763, "step": 9285 }, { "epoch": 1.6110340041637752, "grad_norm": 0.9037680625915527, "learning_rate": 5.555273043389906e-06, "loss": 0.4192, "step": 9286 }, { "epoch": 1.611207494795281, "grad_norm": 0.8425066471099854, "learning_rate": 5.55056151760956e-06, "loss": 0.4099, "step": 9287 }, { "epoch": 1.6113809854267869, "grad_norm": 0.7884397506713867, "learning_rate": 5.545851668703397e-06, "loss": 0.4019, "step": 9288 }, { "epoch": 1.6115544760582927, "grad_norm": 0.6788613200187683, "learning_rate": 5.541143497217989e-06, "loss": 0.4137, "step": 9289 }, { "epoch": 1.6117279666897988, "grad_norm": 0.6453644037246704, "learning_rate": 5.536437003699724e-06, "loss": 0.5056, "step": 9290 }, { "epoch": 1.6119014573213046, "grad_norm": 0.7041041254997253, "learning_rate": 5.531732188694794e-06, "loss": 0.5112, "step": 9291 }, { "epoch": 1.6120749479528107, "grad_norm": 0.8735839128494263, "learning_rate": 5.527029052749216e-06, "loss": 0.3748, "step": 9292 }, { "epoch": 1.6122484385843165, "grad_norm": 1.1022486686706543, "learning_rate": 5.522327596408766e-06, "loss": 0.3633, "step": 9293 }, { "epoch": 1.6124219292158224, "grad_norm": 0.8820821642875671, "learning_rate": 5.517627820219076e-06, "loss": 0.3546, "step": 9294 }, { "epoch": 1.6125954198473282, "grad_norm": 0.8135870695114136, "learning_rate": 5.512929724725544e-06, "loss": 0.5264, "step": 9295 }, { "epoch": 1.612768910478834, "grad_norm": 1.2711671590805054, "learning_rate": 5.508233310473412e-06, "loss": 0.4166, "step": 9296 }, { "epoch": 1.61294240111034, "grad_norm": 1.0111753940582275, "learning_rate": 5.503538578007679e-06, "loss": 0.474, "step": 9297 }, { "epoch": 1.6131158917418458, "grad_norm": 0.9204239249229431, "learning_rate": 5.498845527873193e-06, "loss": 0.6093, "step": 9298 }, { "epoch": 1.6132893823733518, "grad_norm": 0.8759139776229858, "learning_rate": 5.494154160614578e-06, "loss": 0.4791, "step": 9299 }, { "epoch": 1.6134628730048577, "grad_norm": 1.0911972522735596, "learning_rate": 5.489464476776276e-06, "loss": 0.3347, "step": 9300 }, { "epoch": 1.6136363636363638, "grad_norm": 0.7725734114646912, "learning_rate": 5.484776476902525e-06, "loss": 0.4252, "step": 9301 }, { "epoch": 1.6138098542678696, "grad_norm": 0.7725723385810852, "learning_rate": 5.480090161537388e-06, "loss": 0.4652, "step": 9302 }, { "epoch": 1.6139833448993754, "grad_norm": 0.825690507888794, "learning_rate": 5.475405531224696e-06, "loss": 0.3943, "step": 9303 }, { "epoch": 1.6141568355308813, "grad_norm": 0.8373029828071594, "learning_rate": 5.470722586508122e-06, "loss": 0.377, "step": 9304 }, { "epoch": 1.6143303261623871, "grad_norm": 1.112805962562561, "learning_rate": 5.466041327931122e-06, "loss": 0.3735, "step": 9305 }, { "epoch": 1.614503816793893, "grad_norm": 1.0779210329055786, "learning_rate": 5.4613617560369625e-06, "loss": 0.4561, "step": 9306 }, { "epoch": 1.614677307425399, "grad_norm": 0.8767843246459961, "learning_rate": 5.456683871368704e-06, "loss": 0.4266, "step": 9307 }, { "epoch": 1.614850798056905, "grad_norm": 0.8819618225097656, "learning_rate": 5.452007674469235e-06, "loss": 0.4556, "step": 9308 }, { "epoch": 1.615024288688411, "grad_norm": 0.8689743876457214, "learning_rate": 5.447333165881228e-06, "loss": 0.4421, "step": 9309 }, { "epoch": 1.6151977793199168, "grad_norm": 0.9669204354286194, "learning_rate": 5.442660346147157e-06, "loss": 0.5767, "step": 9310 }, { "epoch": 1.6153712699514227, "grad_norm": 0.6955989003181458, "learning_rate": 5.437989215809323e-06, "loss": 0.5322, "step": 9311 }, { "epoch": 1.6155447605829285, "grad_norm": 0.7663390040397644, "learning_rate": 5.433319775409807e-06, "loss": 0.527, "step": 9312 }, { "epoch": 1.6157182512144344, "grad_norm": 1.5488319396972656, "learning_rate": 5.428652025490506e-06, "loss": 0.5129, "step": 9313 }, { "epoch": 1.6158917418459402, "grad_norm": 1.026512622833252, "learning_rate": 5.4239859665931105e-06, "loss": 0.4865, "step": 9314 }, { "epoch": 1.6160652324774463, "grad_norm": 0.9221273064613342, "learning_rate": 5.41932159925914e-06, "loss": 0.4036, "step": 9315 }, { "epoch": 1.6162387231089521, "grad_norm": 1.0832653045654297, "learning_rate": 5.4146589240298745e-06, "loss": 0.4113, "step": 9316 }, { "epoch": 1.6164122137404582, "grad_norm": 1.9201949834823608, "learning_rate": 5.409997941446443e-06, "loss": 0.5138, "step": 9317 }, { "epoch": 1.616585704371964, "grad_norm": 1.0195437669754028, "learning_rate": 5.405338652049749e-06, "loss": 0.4425, "step": 9318 }, { "epoch": 1.6167591950034699, "grad_norm": 0.7928690910339355, "learning_rate": 5.400681056380515e-06, "loss": 0.4218, "step": 9319 }, { "epoch": 1.6169326856349757, "grad_norm": 0.8504543900489807, "learning_rate": 5.396025154979247e-06, "loss": 0.4165, "step": 9320 }, { "epoch": 1.6171061762664816, "grad_norm": 0.7713993191719055, "learning_rate": 5.39137094838629e-06, "loss": 0.5248, "step": 9321 }, { "epoch": 1.6172796668979874, "grad_norm": 0.5526061654090881, "learning_rate": 5.386718437141743e-06, "loss": 0.574, "step": 9322 }, { "epoch": 1.6174531575294933, "grad_norm": 0.8415552973747253, "learning_rate": 5.382067621785556e-06, "loss": 0.4799, "step": 9323 }, { "epoch": 1.6176266481609993, "grad_norm": 0.9677339792251587, "learning_rate": 5.37741850285745e-06, "loss": 0.3543, "step": 9324 }, { "epoch": 1.6178001387925052, "grad_norm": 0.7718323469161987, "learning_rate": 5.372771080896977e-06, "loss": 0.4779, "step": 9325 }, { "epoch": 1.6179736294240112, "grad_norm": 0.6921229362487793, "learning_rate": 5.368125356443452e-06, "loss": 0.3804, "step": 9326 }, { "epoch": 1.618147120055517, "grad_norm": 0.6649701595306396, "learning_rate": 5.3634813300360355e-06, "loss": 0.5355, "step": 9327 }, { "epoch": 1.618320610687023, "grad_norm": 0.6825523972511292, "learning_rate": 5.358839002213665e-06, "loss": 0.4819, "step": 9328 }, { "epoch": 1.6184941013185288, "grad_norm": 0.7959592938423157, "learning_rate": 5.354198373515087e-06, "loss": 0.4883, "step": 9329 }, { "epoch": 1.6186675919500346, "grad_norm": 0.6361215710639954, "learning_rate": 5.349559444478849e-06, "loss": 0.5021, "step": 9330 }, { "epoch": 1.6188410825815405, "grad_norm": 1.068855881690979, "learning_rate": 5.344922215643316e-06, "loss": 0.4326, "step": 9331 }, { "epoch": 1.6190145732130465, "grad_norm": 0.7132313251495361, "learning_rate": 5.3402866875466344e-06, "loss": 0.5419, "step": 9332 }, { "epoch": 1.6191880638445524, "grad_norm": 1.0609829425811768, "learning_rate": 5.335652860726765e-06, "loss": 0.4026, "step": 9333 }, { "epoch": 1.6193615544760585, "grad_norm": 0.8046808838844299, "learning_rate": 5.331020735721469e-06, "loss": 0.453, "step": 9334 }, { "epoch": 1.6195350451075643, "grad_norm": 1.76338791847229, "learning_rate": 5.326390313068303e-06, "loss": 0.3383, "step": 9335 }, { "epoch": 1.6197085357390701, "grad_norm": 0.7001862525939941, "learning_rate": 5.321761593304646e-06, "loss": 0.4744, "step": 9336 }, { "epoch": 1.619882026370576, "grad_norm": 0.6396026015281677, "learning_rate": 5.317134576967658e-06, "loss": 0.5968, "step": 9337 }, { "epoch": 1.6200555170020818, "grad_norm": 0.6747438907623291, "learning_rate": 5.312509264594312e-06, "loss": 0.4683, "step": 9338 }, { "epoch": 1.6202290076335877, "grad_norm": 1.0268335342407227, "learning_rate": 5.307885656721374e-06, "loss": 0.3291, "step": 9339 }, { "epoch": 1.6204024982650935, "grad_norm": 0.9441016316413879, "learning_rate": 5.303263753885433e-06, "loss": 0.4294, "step": 9340 }, { "epoch": 1.6205759888965996, "grad_norm": 0.9289868474006653, "learning_rate": 5.298643556622858e-06, "loss": 0.4991, "step": 9341 }, { "epoch": 1.6207494795281054, "grad_norm": 1.0585331916809082, "learning_rate": 5.294025065469827e-06, "loss": 0.5472, "step": 9342 }, { "epoch": 1.6209229701596115, "grad_norm": 0.7627704739570618, "learning_rate": 5.2894082809623185e-06, "loss": 0.5371, "step": 9343 }, { "epoch": 1.6210964607911174, "grad_norm": 0.6469327211380005, "learning_rate": 5.284793203636132e-06, "loss": 0.5803, "step": 9344 }, { "epoch": 1.6212699514226232, "grad_norm": 0.8668105006217957, "learning_rate": 5.280179834026828e-06, "loss": 0.4592, "step": 9345 }, { "epoch": 1.621443442054129, "grad_norm": 1.0167548656463623, "learning_rate": 5.2755681726698134e-06, "loss": 0.372, "step": 9346 }, { "epoch": 1.621616932685635, "grad_norm": 0.915577232837677, "learning_rate": 5.270958220100269e-06, "loss": 0.5203, "step": 9347 }, { "epoch": 1.6217904233171407, "grad_norm": 0.7445098161697388, "learning_rate": 5.2663499768531865e-06, "loss": 0.5134, "step": 9348 }, { "epoch": 1.6219639139486468, "grad_norm": 0.9516087174415588, "learning_rate": 5.26174344346335e-06, "loss": 0.5135, "step": 9349 }, { "epoch": 1.6221374045801527, "grad_norm": 0.8416820764541626, "learning_rate": 5.257138620465374e-06, "loss": 0.389, "step": 9350 }, { "epoch": 1.6223108952116587, "grad_norm": 0.6412305235862732, "learning_rate": 5.252535508393628e-06, "loss": 0.5183, "step": 9351 }, { "epoch": 1.6224843858431646, "grad_norm": 0.8845618963241577, "learning_rate": 5.247934107782324e-06, "loss": 0.4965, "step": 9352 }, { "epoch": 1.6226578764746704, "grad_norm": 1.1905583143234253, "learning_rate": 5.243334419165453e-06, "loss": 0.4443, "step": 9353 }, { "epoch": 1.6228313671061763, "grad_norm": 1.2265573740005493, "learning_rate": 5.238736443076828e-06, "loss": 0.5354, "step": 9354 }, { "epoch": 1.623004857737682, "grad_norm": 0.8113061189651489, "learning_rate": 5.234140180050029e-06, "loss": 0.3723, "step": 9355 }, { "epoch": 1.623178348369188, "grad_norm": 0.6684175729751587, "learning_rate": 5.2295456306184715e-06, "loss": 0.4166, "step": 9356 }, { "epoch": 1.6233518390006938, "grad_norm": 0.6276340484619141, "learning_rate": 5.2249527953153545e-06, "loss": 0.4747, "step": 9357 }, { "epoch": 1.6235253296321999, "grad_norm": 0.6321830153465271, "learning_rate": 5.220361674673677e-06, "loss": 0.4192, "step": 9358 }, { "epoch": 1.6236988202637057, "grad_norm": 0.7691701054573059, "learning_rate": 5.215772269226255e-06, "loss": 0.3533, "step": 9359 }, { "epoch": 1.6238723108952118, "grad_norm": 0.7306829690933228, "learning_rate": 5.211184579505688e-06, "loss": 0.3625, "step": 9360 }, { "epoch": 1.6240458015267176, "grad_norm": 0.8247324824333191, "learning_rate": 5.206598606044384e-06, "loss": 0.3588, "step": 9361 }, { "epoch": 1.6242192921582235, "grad_norm": 0.9606726169586182, "learning_rate": 5.2020143493745425e-06, "loss": 0.3992, "step": 9362 }, { "epoch": 1.6243927827897293, "grad_norm": 0.766910970211029, "learning_rate": 5.1974318100281905e-06, "loss": 0.5186, "step": 9363 }, { "epoch": 1.6245662734212352, "grad_norm": 0.795785665512085, "learning_rate": 5.1928509885371124e-06, "loss": 0.5034, "step": 9364 }, { "epoch": 1.624739764052741, "grad_norm": 0.7035911679267883, "learning_rate": 5.188271885432938e-06, "loss": 0.5208, "step": 9365 }, { "epoch": 1.624913254684247, "grad_norm": 0.8098400831222534, "learning_rate": 5.183694501247072e-06, "loss": 0.4284, "step": 9366 }, { "epoch": 1.625086745315753, "grad_norm": 0.9330823421478271, "learning_rate": 5.179118836510721e-06, "loss": 0.4572, "step": 9367 }, { "epoch": 1.625260235947259, "grad_norm": 0.8530337810516357, "learning_rate": 5.174544891754896e-06, "loss": 0.5416, "step": 9368 }, { "epoch": 1.6254337265787648, "grad_norm": 0.7967491149902344, "learning_rate": 5.169972667510414e-06, "loss": 0.4255, "step": 9369 }, { "epoch": 1.6256072172102707, "grad_norm": 0.7864426970481873, "learning_rate": 5.165402164307884e-06, "loss": 0.5245, "step": 9370 }, { "epoch": 1.6257807078417765, "grad_norm": 0.9544775485992432, "learning_rate": 5.160833382677721e-06, "loss": 0.4849, "step": 9371 }, { "epoch": 1.6259541984732824, "grad_norm": 0.9821032285690308, "learning_rate": 5.15626632315013e-06, "loss": 0.4447, "step": 9372 }, { "epoch": 1.6261276891047882, "grad_norm": 0.8680276870727539, "learning_rate": 5.151700986255137e-06, "loss": 0.4697, "step": 9373 }, { "epoch": 1.6263011797362943, "grad_norm": 0.8093870878219604, "learning_rate": 5.147137372522537e-06, "loss": 0.418, "step": 9374 }, { "epoch": 1.6264746703678001, "grad_norm": 1.3203651905059814, "learning_rate": 5.142575482481957e-06, "loss": 0.4877, "step": 9375 }, { "epoch": 1.6266481609993062, "grad_norm": 0.8356888294219971, "learning_rate": 5.138015316662803e-06, "loss": 0.4918, "step": 9376 }, { "epoch": 1.626821651630812, "grad_norm": 0.8403335809707642, "learning_rate": 5.1334568755942915e-06, "loss": 0.4158, "step": 9377 }, { "epoch": 1.626995142262318, "grad_norm": 0.6890471577644348, "learning_rate": 5.128900159805425e-06, "loss": 0.4746, "step": 9378 }, { "epoch": 1.6271686328938237, "grad_norm": 0.9023023247718811, "learning_rate": 5.124345169825031e-06, "loss": 0.4921, "step": 9379 }, { "epoch": 1.6273421235253296, "grad_norm": 0.777741551399231, "learning_rate": 5.119791906181713e-06, "loss": 0.4172, "step": 9380 }, { "epoch": 1.6275156141568354, "grad_norm": 0.7545634508132935, "learning_rate": 5.115240369403882e-06, "loss": 0.4095, "step": 9381 }, { "epoch": 1.6276891047883413, "grad_norm": 0.7467777132987976, "learning_rate": 5.110690560019744e-06, "loss": 0.4971, "step": 9382 }, { "epoch": 1.6278625954198473, "grad_norm": 0.7398180365562439, "learning_rate": 5.106142478557323e-06, "loss": 0.4148, "step": 9383 }, { "epoch": 1.6280360860513532, "grad_norm": 0.757480263710022, "learning_rate": 5.1015961255444235e-06, "loss": 0.3639, "step": 9384 }, { "epoch": 1.6282095766828593, "grad_norm": 0.8431456685066223, "learning_rate": 5.097051501508652e-06, "loss": 0.4777, "step": 9385 }, { "epoch": 1.628383067314365, "grad_norm": 0.9009338021278381, "learning_rate": 5.09250860697742e-06, "loss": 0.36, "step": 9386 }, { "epoch": 1.628556557945871, "grad_norm": 0.7599311470985413, "learning_rate": 5.087967442477928e-06, "loss": 0.3723, "step": 9387 }, { "epoch": 1.6287300485773768, "grad_norm": 0.7761737108230591, "learning_rate": 5.083428008537197e-06, "loss": 0.372, "step": 9388 }, { "epoch": 1.6289035392088826, "grad_norm": 0.7962995767593384, "learning_rate": 5.078890305682027e-06, "loss": 0.4002, "step": 9389 }, { "epoch": 1.6290770298403885, "grad_norm": 0.9658076167106628, "learning_rate": 5.074354334439022e-06, "loss": 0.5925, "step": 9390 }, { "epoch": 1.6292505204718946, "grad_norm": 0.8104629516601562, "learning_rate": 5.069820095334583e-06, "loss": 0.4004, "step": 9391 }, { "epoch": 1.6294240111034004, "grad_norm": 1.1162132024765015, "learning_rate": 5.065287588894933e-06, "loss": 0.4384, "step": 9392 }, { "epoch": 1.6295975017349065, "grad_norm": 1.3077670335769653, "learning_rate": 5.060756815646046e-06, "loss": 0.3755, "step": 9393 }, { "epoch": 1.6297709923664123, "grad_norm": 2.617023468017578, "learning_rate": 5.056227776113747e-06, "loss": 0.395, "step": 9394 }, { "epoch": 1.6299444829979182, "grad_norm": 0.7315855622291565, "learning_rate": 5.05170047082362e-06, "loss": 0.554, "step": 9395 }, { "epoch": 1.630117973629424, "grad_norm": 0.748149037361145, "learning_rate": 5.0471749003010835e-06, "loss": 0.3365, "step": 9396 }, { "epoch": 1.6302914642609299, "grad_norm": 0.771815836429596, "learning_rate": 5.0426510650713116e-06, "loss": 0.3942, "step": 9397 }, { "epoch": 1.6304649548924357, "grad_norm": 0.8909497857093811, "learning_rate": 5.038128965659317e-06, "loss": 0.3345, "step": 9398 }, { "epoch": 1.6306384455239415, "grad_norm": 0.802179753780365, "learning_rate": 5.033608602589892e-06, "loss": 0.4484, "step": 9399 }, { "epoch": 1.6308119361554476, "grad_norm": 0.7441188097000122, "learning_rate": 5.029089976387627e-06, "loss": 0.4084, "step": 9400 }, { "epoch": 1.6309854267869535, "grad_norm": 0.7388148903846741, "learning_rate": 5.0245730875769095e-06, "loss": 0.4401, "step": 9401 }, { "epoch": 1.6311589174184595, "grad_norm": 0.9957874417304993, "learning_rate": 5.020057936681939e-06, "loss": 0.417, "step": 9402 }, { "epoch": 1.6313324080499654, "grad_norm": 0.7852291464805603, "learning_rate": 5.0155445242267006e-06, "loss": 0.3761, "step": 9403 }, { "epoch": 1.6315058986814712, "grad_norm": 0.7357715964317322, "learning_rate": 5.011032850734983e-06, "loss": 0.399, "step": 9404 }, { "epoch": 1.631679389312977, "grad_norm": 0.8344091773033142, "learning_rate": 5.006522916730368e-06, "loss": 0.3878, "step": 9405 }, { "epoch": 1.631852879944483, "grad_norm": 0.6356186866760254, "learning_rate": 5.00201472273623e-06, "loss": 0.4965, "step": 9406 }, { "epoch": 1.6320263705759888, "grad_norm": 0.9192460179328918, "learning_rate": 4.9975082692757705e-06, "loss": 0.5393, "step": 9407 }, { "epoch": 1.6321998612074948, "grad_norm": 1.1186772584915161, "learning_rate": 4.993003556871954e-06, "loss": 0.3128, "step": 9408 }, { "epoch": 1.6323733518390007, "grad_norm": 0.6981306672096252, "learning_rate": 4.9885005860475626e-06, "loss": 0.5186, "step": 9409 }, { "epoch": 1.6325468424705067, "grad_norm": 1.1612515449523926, "learning_rate": 4.983999357325164e-06, "loss": 0.5011, "step": 9410 }, { "epoch": 1.6327203331020126, "grad_norm": 1.1904067993164062, "learning_rate": 4.9794998712271425e-06, "loss": 0.5631, "step": 9411 }, { "epoch": 1.6328938237335184, "grad_norm": 0.772208034992218, "learning_rate": 4.975002128275666e-06, "loss": 0.3475, "step": 9412 }, { "epoch": 1.6330673143650243, "grad_norm": 0.7802537083625793, "learning_rate": 4.970506128992696e-06, "loss": 0.5374, "step": 9413 }, { "epoch": 1.6332408049965301, "grad_norm": 1.3811105489730835, "learning_rate": 4.966011873900001e-06, "loss": 0.3776, "step": 9414 }, { "epoch": 1.633414295628036, "grad_norm": 1.1440554857254028, "learning_rate": 4.961519363519154e-06, "loss": 0.4086, "step": 9415 }, { "epoch": 1.6335877862595418, "grad_norm": 0.6254736185073853, "learning_rate": 4.957028598371498e-06, "loss": 0.5498, "step": 9416 }, { "epoch": 1.6337612768910479, "grad_norm": 1.0763905048370361, "learning_rate": 4.9525395789782085e-06, "loss": 0.4874, "step": 9417 }, { "epoch": 1.6339347675225537, "grad_norm": 0.9219430088996887, "learning_rate": 4.948052305860233e-06, "loss": 0.3763, "step": 9418 }, { "epoch": 1.6341082581540598, "grad_norm": 0.8166424632072449, "learning_rate": 4.943566779538327e-06, "loss": 0.4628, "step": 9419 }, { "epoch": 1.6342817487855656, "grad_norm": 0.7532215714454651, "learning_rate": 4.939083000533036e-06, "loss": 0.394, "step": 9420 }, { "epoch": 1.6344552394170715, "grad_norm": 1.005659818649292, "learning_rate": 4.934600969364722e-06, "loss": 0.4742, "step": 9421 }, { "epoch": 1.6346287300485773, "grad_norm": 0.8567238450050354, "learning_rate": 4.93012068655351e-06, "loss": 0.6027, "step": 9422 }, { "epoch": 1.6348022206800832, "grad_norm": 0.9618515372276306, "learning_rate": 4.925642152619357e-06, "loss": 0.4766, "step": 9423 }, { "epoch": 1.634975711311589, "grad_norm": 0.6993759274482727, "learning_rate": 4.92116536808199e-06, "loss": 0.4626, "step": 9424 }, { "epoch": 1.635149201943095, "grad_norm": 0.8960062861442566, "learning_rate": 4.9166903334609675e-06, "loss": 0.5129, "step": 9425 }, { "epoch": 1.635322692574601, "grad_norm": 0.8389983177185059, "learning_rate": 4.912217049275594e-06, "loss": 0.4299, "step": 9426 }, { "epoch": 1.635496183206107, "grad_norm": 0.9389564394950867, "learning_rate": 4.907745516045017e-06, "loss": 0.4811, "step": 9427 }, { "epoch": 1.6356696738376129, "grad_norm": 0.8085852861404419, "learning_rate": 4.903275734288162e-06, "loss": 0.5209, "step": 9428 }, { "epoch": 1.6358431644691187, "grad_norm": 0.7433094382286072, "learning_rate": 4.898807704523747e-06, "loss": 0.5425, "step": 9429 }, { "epoch": 1.6360166551006246, "grad_norm": 1.0063952207565308, "learning_rate": 4.8943414272702886e-06, "loss": 0.365, "step": 9430 }, { "epoch": 1.6361901457321304, "grad_norm": 0.7665280103683472, "learning_rate": 4.889876903046116e-06, "loss": 0.5081, "step": 9431 }, { "epoch": 1.6363636363636362, "grad_norm": 0.8327005505561829, "learning_rate": 4.885414132369335e-06, "loss": 0.4341, "step": 9432 }, { "epoch": 1.6365371269951423, "grad_norm": 0.8074477314949036, "learning_rate": 4.8809531157578495e-06, "loss": 0.3812, "step": 9433 }, { "epoch": 1.6367106176266482, "grad_norm": 2.5853981971740723, "learning_rate": 4.876493853729385e-06, "loss": 0.3887, "step": 9434 }, { "epoch": 1.6368841082581542, "grad_norm": 0.6790578961372375, "learning_rate": 4.872036346801419e-06, "loss": 0.4586, "step": 9435 }, { "epoch": 1.63705759888966, "grad_norm": 1.1151888370513916, "learning_rate": 4.867580595491268e-06, "loss": 0.3708, "step": 9436 }, { "epoch": 1.637231089521166, "grad_norm": 0.6660562753677368, "learning_rate": 4.863126600316021e-06, "loss": 0.4998, "step": 9437 }, { "epoch": 1.6374045801526718, "grad_norm": 0.679646372795105, "learning_rate": 4.858674361792571e-06, "loss": 0.4925, "step": 9438 }, { "epoch": 1.6375780707841776, "grad_norm": 1.0015981197357178, "learning_rate": 4.854223880437599e-06, "loss": 0.4862, "step": 9439 }, { "epoch": 1.6377515614156835, "grad_norm": 0.7962157726287842, "learning_rate": 4.849775156767598e-06, "loss": 0.4476, "step": 9440 }, { "epoch": 1.6379250520471893, "grad_norm": 0.7867410182952881, "learning_rate": 4.845328191298846e-06, "loss": 0.5082, "step": 9441 }, { "epoch": 1.6380985426786954, "grad_norm": 0.6451948881149292, "learning_rate": 4.840882984547415e-06, "loss": 0.6041, "step": 9442 }, { "epoch": 1.6382720333102012, "grad_norm": 0.6247390508651733, "learning_rate": 4.8364395370291715e-06, "loss": 0.5873, "step": 9443 }, { "epoch": 1.6384455239417073, "grad_norm": 0.6129417419433594, "learning_rate": 4.8319978492598e-06, "loss": 0.6053, "step": 9444 }, { "epoch": 1.6386190145732131, "grad_norm": 0.6954910159111023, "learning_rate": 4.827557921754742e-06, "loss": 0.5193, "step": 9445 }, { "epoch": 1.638792505204719, "grad_norm": 0.8252256512641907, "learning_rate": 4.823119755029271e-06, "loss": 0.4645, "step": 9446 }, { "epoch": 1.6389659958362248, "grad_norm": 1.2125977277755737, "learning_rate": 4.818683349598439e-06, "loss": 0.4218, "step": 9447 }, { "epoch": 1.6391394864677307, "grad_norm": 0.9081424474716187, "learning_rate": 4.814248705977092e-06, "loss": 0.3681, "step": 9448 }, { "epoch": 1.6393129770992365, "grad_norm": 0.6138148307800293, "learning_rate": 4.8098158246798734e-06, "loss": 0.4502, "step": 9449 }, { "epoch": 1.6394864677307426, "grad_norm": 0.7577382922172546, "learning_rate": 4.805384706221232e-06, "loss": 0.442, "step": 9450 }, { "epoch": 1.6396599583622484, "grad_norm": 0.8864315152168274, "learning_rate": 4.800955351115402e-06, "loss": 0.3993, "step": 9451 }, { "epoch": 1.6398334489937545, "grad_norm": 0.8313661217689514, "learning_rate": 4.796527759876415e-06, "loss": 0.4368, "step": 9452 }, { "epoch": 1.6400069396252603, "grad_norm": 0.8117265105247498, "learning_rate": 4.79210193301809e-06, "loss": 0.4047, "step": 9453 }, { "epoch": 1.6401804302567662, "grad_norm": 1.246932864189148, "learning_rate": 4.787677871054062e-06, "loss": 0.4875, "step": 9454 }, { "epoch": 1.640353920888272, "grad_norm": 0.7654351592063904, "learning_rate": 4.783255574497742e-06, "loss": 0.4834, "step": 9455 }, { "epoch": 1.6405274115197779, "grad_norm": 0.8894326686859131, "learning_rate": 4.7788350438623465e-06, "loss": 0.4487, "step": 9456 }, { "epoch": 1.6407009021512837, "grad_norm": 0.6546960473060608, "learning_rate": 4.774416279660879e-06, "loss": 0.4564, "step": 9457 }, { "epoch": 1.6408743927827896, "grad_norm": 0.6331251859664917, "learning_rate": 4.769999282406137e-06, "loss": 0.5345, "step": 9458 }, { "epoch": 1.6410478834142956, "grad_norm": 0.828997015953064, "learning_rate": 4.76558405261073e-06, "loss": 0.5928, "step": 9459 }, { "epoch": 1.6412213740458015, "grad_norm": 0.7962414622306824, "learning_rate": 4.7611705907870474e-06, "loss": 0.3768, "step": 9460 }, { "epoch": 1.6413948646773076, "grad_norm": 0.6648374795913696, "learning_rate": 4.7567588974472734e-06, "loss": 0.5637, "step": 9461 }, { "epoch": 1.6415683553088134, "grad_norm": 0.6690629124641418, "learning_rate": 4.7523489731033845e-06, "loss": 0.5367, "step": 9462 }, { "epoch": 1.6417418459403192, "grad_norm": 0.6988204121589661, "learning_rate": 4.747940818267178e-06, "loss": 0.4329, "step": 9463 }, { "epoch": 1.641915336571825, "grad_norm": 0.8069101572036743, "learning_rate": 4.743534433450199e-06, "loss": 0.4016, "step": 9464 }, { "epoch": 1.642088827203331, "grad_norm": 0.6000931262969971, "learning_rate": 4.739129819163832e-06, "loss": 0.4399, "step": 9465 }, { "epoch": 1.6422623178348368, "grad_norm": 0.7521392703056335, "learning_rate": 4.734726975919233e-06, "loss": 0.5347, "step": 9466 }, { "epoch": 1.6424358084663429, "grad_norm": 0.826542854309082, "learning_rate": 4.730325904227355e-06, "loss": 0.4824, "step": 9467 }, { "epoch": 1.6426092990978487, "grad_norm": 0.8546525239944458, "learning_rate": 4.725926604598942e-06, "loss": 0.4591, "step": 9468 }, { "epoch": 1.6427827897293548, "grad_norm": 0.6947259306907654, "learning_rate": 4.721529077544551e-06, "loss": 0.4568, "step": 9469 }, { "epoch": 1.6429562803608606, "grad_norm": 0.7343437671661377, "learning_rate": 4.7171333235745145e-06, "loss": 0.4131, "step": 9470 }, { "epoch": 1.6431297709923665, "grad_norm": 0.7316738367080688, "learning_rate": 4.712739343198962e-06, "loss": 0.3978, "step": 9471 }, { "epoch": 1.6433032616238723, "grad_norm": 0.8545839190483093, "learning_rate": 4.708347136927818e-06, "loss": 0.4841, "step": 9472 }, { "epoch": 1.6434767522553781, "grad_norm": 1.1487629413604736, "learning_rate": 4.703956705270818e-06, "loss": 0.4713, "step": 9473 }, { "epoch": 1.643650242886884, "grad_norm": 0.7485223412513733, "learning_rate": 4.699568048737453e-06, "loss": 0.4142, "step": 9474 }, { "epoch": 1.64382373351839, "grad_norm": 0.6850613355636597, "learning_rate": 4.695181167837051e-06, "loss": 0.4752, "step": 9475 }, { "epoch": 1.643997224149896, "grad_norm": 0.861230194568634, "learning_rate": 4.690796063078709e-06, "loss": 0.4969, "step": 9476 }, { "epoch": 1.6441707147814018, "grad_norm": 0.9509091973304749, "learning_rate": 4.686412734971322e-06, "loss": 0.4342, "step": 9477 }, { "epoch": 1.6443442054129078, "grad_norm": 0.9492801427841187, "learning_rate": 4.6820311840235745e-06, "loss": 0.4766, "step": 9478 }, { "epoch": 1.6445176960444137, "grad_norm": 0.8114972710609436, "learning_rate": 4.677651410743964e-06, "loss": 0.4592, "step": 9479 }, { "epoch": 1.6446911866759195, "grad_norm": 1.0280996561050415, "learning_rate": 4.67327341564076e-06, "loss": 0.5789, "step": 9480 }, { "epoch": 1.6448646773074254, "grad_norm": 0.7430195808410645, "learning_rate": 4.668897199222031e-06, "loss": 0.4691, "step": 9481 }, { "epoch": 1.6450381679389312, "grad_norm": 0.6867184638977051, "learning_rate": 4.6645227619956515e-06, "loss": 0.4368, "step": 9482 }, { "epoch": 1.645211658570437, "grad_norm": 0.9730817675590515, "learning_rate": 4.660150104469274e-06, "loss": 0.3169, "step": 9483 }, { "epoch": 1.6453851492019431, "grad_norm": 0.8736039400100708, "learning_rate": 4.655779227150352e-06, "loss": 0.4745, "step": 9484 }, { "epoch": 1.645558639833449, "grad_norm": 0.8217387795448303, "learning_rate": 4.6514101305461255e-06, "loss": 0.3871, "step": 9485 }, { "epoch": 1.645732130464955, "grad_norm": 0.6346628665924072, "learning_rate": 4.647042815163649e-06, "loss": 0.4198, "step": 9486 }, { "epoch": 1.6459056210964609, "grad_norm": 0.8603171110153198, "learning_rate": 4.6426772815097306e-06, "loss": 0.3154, "step": 9487 }, { "epoch": 1.6460791117279667, "grad_norm": 0.7276177406311035, "learning_rate": 4.638313530091016e-06, "loss": 0.4436, "step": 9488 }, { "epoch": 1.6462526023594726, "grad_norm": 0.9174731373786926, "learning_rate": 4.633951561413916e-06, "loss": 0.5026, "step": 9489 }, { "epoch": 1.6464260929909784, "grad_norm": 1.3416101932525635, "learning_rate": 4.629591375984641e-06, "loss": 0.3343, "step": 9490 }, { "epoch": 1.6465995836224843, "grad_norm": 0.9114309549331665, "learning_rate": 4.625232974309193e-06, "loss": 0.4585, "step": 9491 }, { "epoch": 1.6467730742539903, "grad_norm": 1.5631992816925049, "learning_rate": 4.620876356893385e-06, "loss": 0.3553, "step": 9492 }, { "epoch": 1.6469465648854962, "grad_norm": 0.6930509209632874, "learning_rate": 4.616521524242783e-06, "loss": 0.4706, "step": 9493 }, { "epoch": 1.6471200555170022, "grad_norm": 0.9006015658378601, "learning_rate": 4.612168476862789e-06, "loss": 0.4557, "step": 9494 }, { "epoch": 1.647293546148508, "grad_norm": 0.8068966865539551, "learning_rate": 4.607817215258574e-06, "loss": 0.4913, "step": 9495 }, { "epoch": 1.647467036780014, "grad_norm": 1.1427596807479858, "learning_rate": 4.603467739935108e-06, "loss": 0.3982, "step": 9496 }, { "epoch": 1.6476405274115198, "grad_norm": 0.8430987596511841, "learning_rate": 4.599120051397144e-06, "loss": 0.4786, "step": 9497 }, { "epoch": 1.6478140180430256, "grad_norm": 0.7175557613372803, "learning_rate": 4.594774150149251e-06, "loss": 0.4966, "step": 9498 }, { "epoch": 1.6479875086745315, "grad_norm": 1.0722674131393433, "learning_rate": 4.5904300366957675e-06, "loss": 0.4788, "step": 9499 }, { "epoch": 1.6481609993060373, "grad_norm": 2.1372292041778564, "learning_rate": 4.586087711540832e-06, "loss": 0.4724, "step": 9500 }, { "epoch": 1.6483344899375434, "grad_norm": 1.5587410926818848, "learning_rate": 4.581747175188376e-06, "loss": 0.3839, "step": 9501 }, { "epoch": 1.6485079805690492, "grad_norm": 0.8519799709320068, "learning_rate": 4.57740842814213e-06, "loss": 0.4861, "step": 9502 }, { "epoch": 1.6486814712005553, "grad_norm": 0.7175669074058533, "learning_rate": 4.573071470905608e-06, "loss": 0.4009, "step": 9503 }, { "epoch": 1.6488549618320612, "grad_norm": 0.7031004428863525, "learning_rate": 4.568736303982115e-06, "loss": 0.3728, "step": 9504 }, { "epoch": 1.649028452463567, "grad_norm": 0.7492570281028748, "learning_rate": 4.564402927874758e-06, "loss": 0.3357, "step": 9505 }, { "epoch": 1.6492019430950728, "grad_norm": 1.0007660388946533, "learning_rate": 4.560071343086421e-06, "loss": 0.3837, "step": 9506 }, { "epoch": 1.6493754337265787, "grad_norm": 0.9471139311790466, "learning_rate": 4.555741550119801e-06, "loss": 0.3647, "step": 9507 }, { "epoch": 1.6495489243580845, "grad_norm": 0.7544403076171875, "learning_rate": 4.55141354947737e-06, "loss": 0.4247, "step": 9508 }, { "epoch": 1.6497224149895906, "grad_norm": 0.7976730465888977, "learning_rate": 4.547087341661398e-06, "loss": 0.4744, "step": 9509 }, { "epoch": 1.6498959056210964, "grad_norm": 0.9931909441947937, "learning_rate": 4.542762927173941e-06, "loss": 0.4789, "step": 9510 }, { "epoch": 1.6500693962526025, "grad_norm": 0.6307627558708191, "learning_rate": 4.53844030651686e-06, "loss": 0.5188, "step": 9511 }, { "epoch": 1.6502428868841084, "grad_norm": 0.6200315952301025, "learning_rate": 4.534119480191801e-06, "loss": 0.5017, "step": 9512 }, { "epoch": 1.6504163775156142, "grad_norm": 0.7837086915969849, "learning_rate": 4.5298004487001966e-06, "loss": 0.3384, "step": 9513 }, { "epoch": 1.65058986814712, "grad_norm": 0.7162777185440063, "learning_rate": 4.525483212543273e-06, "loss": 0.5287, "step": 9514 }, { "epoch": 1.650763358778626, "grad_norm": 0.9284862279891968, "learning_rate": 4.521167772222064e-06, "loss": 0.5698, "step": 9515 }, { "epoch": 1.6509368494101317, "grad_norm": 0.8446031212806702, "learning_rate": 4.516854128237358e-06, "loss": 0.5477, "step": 9516 }, { "epoch": 1.6511103400416376, "grad_norm": 0.8378081321716309, "learning_rate": 4.51254228108978e-06, "loss": 0.36, "step": 9517 }, { "epoch": 1.6512838306731437, "grad_norm": 0.9502765536308289, "learning_rate": 4.5082322312797166e-06, "loss": 0.3824, "step": 9518 }, { "epoch": 1.6514573213046495, "grad_norm": 1.0102424621582031, "learning_rate": 4.503923979307352e-06, "loss": 0.5515, "step": 9519 }, { "epoch": 1.6516308119361556, "grad_norm": 0.9823939800262451, "learning_rate": 4.499617525672664e-06, "loss": 0.4515, "step": 9520 }, { "epoch": 1.6518043025676614, "grad_norm": 0.9717512726783752, "learning_rate": 4.4953128708754326e-06, "loss": 0.5508, "step": 9521 }, { "epoch": 1.6519777931991673, "grad_norm": 0.9089481234550476, "learning_rate": 4.491010015415198e-06, "loss": 0.4867, "step": 9522 }, { "epoch": 1.6521512838306731, "grad_norm": 1.0057765245437622, "learning_rate": 4.486708959791328e-06, "loss": 0.4462, "step": 9523 }, { "epoch": 1.652324774462179, "grad_norm": 1.0206904411315918, "learning_rate": 4.4824097045029615e-06, "loss": 0.4706, "step": 9524 }, { "epoch": 1.6524982650936848, "grad_norm": 0.6364613175392151, "learning_rate": 4.478112250049029e-06, "loss": 0.5292, "step": 9525 }, { "epoch": 1.6526717557251909, "grad_norm": 0.7264257669448853, "learning_rate": 4.473816596928251e-06, "loss": 0.59, "step": 9526 }, { "epoch": 1.6528452463566967, "grad_norm": 0.6859540939331055, "learning_rate": 4.469522745639154e-06, "loss": 0.6169, "step": 9527 }, { "epoch": 1.6530187369882028, "grad_norm": 0.7884160876274109, "learning_rate": 4.465230696680038e-06, "loss": 0.4731, "step": 9528 }, { "epoch": 1.6531922276197086, "grad_norm": 1.012871265411377, "learning_rate": 4.460940450548998e-06, "loss": 0.4489, "step": 9529 }, { "epoch": 1.6533657182512145, "grad_norm": 0.7458043694496155, "learning_rate": 4.456652007743929e-06, "loss": 0.5337, "step": 9530 }, { "epoch": 1.6535392088827203, "grad_norm": 0.8405235409736633, "learning_rate": 4.452365368762508e-06, "loss": 0.389, "step": 9531 }, { "epoch": 1.6537126995142262, "grad_norm": 0.6050131916999817, "learning_rate": 4.448080534102202e-06, "loss": 0.4435, "step": 9532 }, { "epoch": 1.653886190145732, "grad_norm": 0.6795126795768738, "learning_rate": 4.4437975042602635e-06, "loss": 0.5243, "step": 9533 }, { "epoch": 1.654059680777238, "grad_norm": 0.6958543062210083, "learning_rate": 4.439516279733764e-06, "loss": 0.5288, "step": 9534 }, { "epoch": 1.654233171408744, "grad_norm": 0.7578051090240479, "learning_rate": 4.435236861019521e-06, "loss": 0.4407, "step": 9535 }, { "epoch": 1.6544066620402498, "grad_norm": 0.9487757086753845, "learning_rate": 4.430959248614184e-06, "loss": 0.53, "step": 9536 }, { "epoch": 1.6545801526717558, "grad_norm": 0.9098021388053894, "learning_rate": 4.4266834430141654e-06, "loss": 0.3459, "step": 9537 }, { "epoch": 1.6547536433032617, "grad_norm": 0.800282895565033, "learning_rate": 4.42240944471568e-06, "loss": 0.408, "step": 9538 }, { "epoch": 1.6549271339347675, "grad_norm": 0.8329261541366577, "learning_rate": 4.418137254214725e-06, "loss": 0.3662, "step": 9539 }, { "epoch": 1.6551006245662734, "grad_norm": 0.733981192111969, "learning_rate": 4.413866872007104e-06, "loss": 0.4084, "step": 9540 }, { "epoch": 1.6552741151977792, "grad_norm": 0.7053385376930237, "learning_rate": 4.409598298588394e-06, "loss": 0.4844, "step": 9541 }, { "epoch": 1.655447605829285, "grad_norm": 1.4506841897964478, "learning_rate": 4.405331534453967e-06, "loss": 0.5758, "step": 9542 }, { "epoch": 1.6556210964607911, "grad_norm": 0.8209178447723389, "learning_rate": 4.4010665800989804e-06, "loss": 0.5397, "step": 9543 }, { "epoch": 1.655794587092297, "grad_norm": 0.6431795358657837, "learning_rate": 4.396803436018406e-06, "loss": 0.4479, "step": 9544 }, { "epoch": 1.655968077723803, "grad_norm": 0.8899828791618347, "learning_rate": 4.3925421027069645e-06, "loss": 0.4127, "step": 9545 }, { "epoch": 1.656141568355309, "grad_norm": 0.8157262802124023, "learning_rate": 4.3882825806592024e-06, "loss": 0.5422, "step": 9546 }, { "epoch": 1.6563150589868147, "grad_norm": 0.9176727533340454, "learning_rate": 4.3840248703694365e-06, "loss": 0.3475, "step": 9547 }, { "epoch": 1.6564885496183206, "grad_norm": 0.991773247718811, "learning_rate": 4.379768972331784e-06, "loss": 0.3473, "step": 9548 }, { "epoch": 1.6566620402498264, "grad_norm": 0.974721372127533, "learning_rate": 4.375514887040135e-06, "loss": 0.3997, "step": 9549 }, { "epoch": 1.6568355308813323, "grad_norm": 0.856035053730011, "learning_rate": 4.371262614988196e-06, "loss": 0.4032, "step": 9550 }, { "epoch": 1.6570090215128384, "grad_norm": 0.7112841010093689, "learning_rate": 4.367012156669441e-06, "loss": 0.5157, "step": 9551 }, { "epoch": 1.6571825121443442, "grad_norm": 0.8579465746879578, "learning_rate": 4.362763512577144e-06, "loss": 0.4253, "step": 9552 }, { "epoch": 1.6573560027758503, "grad_norm": 0.7902611494064331, "learning_rate": 4.358516683204355e-06, "loss": 0.534, "step": 9553 }, { "epoch": 1.6575294934073561, "grad_norm": 0.7621585726737976, "learning_rate": 4.354271669043934e-06, "loss": 0.4409, "step": 9554 }, { "epoch": 1.657702984038862, "grad_norm": 0.9942328333854675, "learning_rate": 4.350028470588521e-06, "loss": 0.3877, "step": 9555 }, { "epoch": 1.6578764746703678, "grad_norm": 0.6779175996780396, "learning_rate": 4.345787088330537e-06, "loss": 0.5796, "step": 9556 }, { "epoch": 1.6580499653018737, "grad_norm": 1.2362260818481445, "learning_rate": 4.341547522762202e-06, "loss": 0.5586, "step": 9557 }, { "epoch": 1.6582234559333795, "grad_norm": 0.7966768145561218, "learning_rate": 4.33730977437552e-06, "loss": 0.5646, "step": 9558 }, { "epoch": 1.6583969465648853, "grad_norm": 0.6846951842308044, "learning_rate": 4.333073843662292e-06, "loss": 0.3647, "step": 9559 }, { "epoch": 1.6585704371963914, "grad_norm": 0.6259403824806213, "learning_rate": 4.328839731114101e-06, "loss": 0.4735, "step": 9560 }, { "epoch": 1.6587439278278973, "grad_norm": 0.6614114046096802, "learning_rate": 4.324607437222319e-06, "loss": 0.4196, "step": 9561 }, { "epoch": 1.6589174184594033, "grad_norm": 0.6352244019508362, "learning_rate": 4.3203769624781055e-06, "loss": 0.3795, "step": 9562 }, { "epoch": 1.6590909090909092, "grad_norm": 1.7433141469955444, "learning_rate": 4.316148307372425e-06, "loss": 0.4055, "step": 9563 }, { "epoch": 1.659264399722415, "grad_norm": 1.985318899154663, "learning_rate": 4.311921472395999e-06, "loss": 0.3379, "step": 9564 }, { "epoch": 1.6594378903539209, "grad_norm": 0.8454378843307495, "learning_rate": 4.307696458039372e-06, "loss": 0.3607, "step": 9565 }, { "epoch": 1.6596113809854267, "grad_norm": 0.9035350680351257, "learning_rate": 4.303473264792857e-06, "loss": 0.5366, "step": 9566 }, { "epoch": 1.6597848716169326, "grad_norm": 0.7647597193717957, "learning_rate": 4.2992518931465566e-06, "loss": 0.3423, "step": 9567 }, { "epoch": 1.6599583622484386, "grad_norm": 0.8340458273887634, "learning_rate": 4.295032343590366e-06, "loss": 0.2598, "step": 9568 }, { "epoch": 1.6601318528799445, "grad_norm": 0.92600417137146, "learning_rate": 4.290814616613976e-06, "loss": 0.4066, "step": 9569 }, { "epoch": 1.6603053435114505, "grad_norm": 1.1044412851333618, "learning_rate": 4.286598712706858e-06, "loss": 0.3838, "step": 9570 }, { "epoch": 1.6604788341429564, "grad_norm": 0.8176655769348145, "learning_rate": 4.282384632358265e-06, "loss": 0.3781, "step": 9571 }, { "epoch": 1.6606523247744622, "grad_norm": 0.7669593691825867, "learning_rate": 4.278172376057246e-06, "loss": 0.4214, "step": 9572 }, { "epoch": 1.660825815405968, "grad_norm": 0.7503926753997803, "learning_rate": 4.2739619442926525e-06, "loss": 0.4418, "step": 9573 }, { "epoch": 1.660999306037474, "grad_norm": 1.3540635108947754, "learning_rate": 4.269753337553091e-06, "loss": 0.4642, "step": 9574 }, { "epoch": 1.6611727966689798, "grad_norm": 0.8061459064483643, "learning_rate": 4.265546556326989e-06, "loss": 0.3364, "step": 9575 }, { "epoch": 1.6613462873004856, "grad_norm": 0.8524975180625916, "learning_rate": 4.2613416011025424e-06, "loss": 0.4283, "step": 9576 }, { "epoch": 1.6615197779319917, "grad_norm": 0.636336624622345, "learning_rate": 4.257138472367737e-06, "loss": 0.5293, "step": 9577 }, { "epoch": 1.6616932685634975, "grad_norm": 0.6362683773040771, "learning_rate": 4.25293717061036e-06, "loss": 0.6263, "step": 9578 }, { "epoch": 1.6618667591950036, "grad_norm": 0.8132637739181519, "learning_rate": 4.248737696317975e-06, "loss": 0.3624, "step": 9579 }, { "epoch": 1.6620402498265094, "grad_norm": 0.7631382346153259, "learning_rate": 4.244540049977934e-06, "loss": 0.4004, "step": 9580 }, { "epoch": 1.6622137404580153, "grad_norm": 0.7176964282989502, "learning_rate": 4.240344232077373e-06, "loss": 0.5193, "step": 9581 }, { "epoch": 1.6623872310895211, "grad_norm": 0.6607311964035034, "learning_rate": 4.236150243103234e-06, "loss": 0.5862, "step": 9582 }, { "epoch": 1.662560721721027, "grad_norm": 0.7352809309959412, "learning_rate": 4.231958083542229e-06, "loss": 0.3513, "step": 9583 }, { "epoch": 1.6627342123525328, "grad_norm": 1.154639720916748, "learning_rate": 4.227767753880861e-06, "loss": 0.3141, "step": 9584 }, { "epoch": 1.662907702984039, "grad_norm": 0.6323665380477905, "learning_rate": 4.223579254605414e-06, "loss": 0.5624, "step": 9585 }, { "epoch": 1.6630811936155447, "grad_norm": 0.7658765912055969, "learning_rate": 4.2193925862019934e-06, "loss": 0.4747, "step": 9586 }, { "epoch": 1.6632546842470508, "grad_norm": 0.5467133522033691, "learning_rate": 4.2152077491564385e-06, "loss": 0.5022, "step": 9587 }, { "epoch": 1.6634281748785567, "grad_norm": 0.7082794904708862, "learning_rate": 4.211024743954424e-06, "loss": 0.5679, "step": 9588 }, { "epoch": 1.6636016655100625, "grad_norm": 0.6206352114677429, "learning_rate": 4.206843571081383e-06, "loss": 0.605, "step": 9589 }, { "epoch": 1.6637751561415683, "grad_norm": 0.8550276756286621, "learning_rate": 4.2026642310225505e-06, "loss": 0.5267, "step": 9590 }, { "epoch": 1.6639486467730742, "grad_norm": 0.8094297647476196, "learning_rate": 4.198486724262935e-06, "loss": 0.5392, "step": 9591 }, { "epoch": 1.66412213740458, "grad_norm": 0.7623730897903442, "learning_rate": 4.194311051287359e-06, "loss": 0.5349, "step": 9592 }, { "epoch": 1.664295628036086, "grad_norm": 0.6538176536560059, "learning_rate": 4.190137212580392e-06, "loss": 0.4421, "step": 9593 }, { "epoch": 1.664469118667592, "grad_norm": 0.8178464770317078, "learning_rate": 4.185965208626428e-06, "loss": 0.3848, "step": 9594 }, { "epoch": 1.6646426092990978, "grad_norm": 0.7703167796134949, "learning_rate": 4.181795039909631e-06, "loss": 0.4844, "step": 9595 }, { "epoch": 1.6648160999306039, "grad_norm": 0.9850491285324097, "learning_rate": 4.177626706913948e-06, "loss": 0.3661, "step": 9596 }, { "epoch": 1.6649895905621097, "grad_norm": 0.7138931751251221, "learning_rate": 4.173460210123119e-06, "loss": 0.5471, "step": 9597 }, { "epoch": 1.6651630811936156, "grad_norm": 1.267538070678711, "learning_rate": 4.16929555002068e-06, "loss": 0.4827, "step": 9598 }, { "epoch": 1.6653365718251214, "grad_norm": 0.7268630862236023, "learning_rate": 4.16513272708994e-06, "loss": 0.5835, "step": 9599 }, { "epoch": 1.6655100624566272, "grad_norm": 0.7588167786598206, "learning_rate": 4.160971741813995e-06, "loss": 0.3947, "step": 9600 }, { "epoch": 1.665683553088133, "grad_norm": 0.8451988101005554, "learning_rate": 4.156812594675732e-06, "loss": 0.5444, "step": 9601 }, { "epoch": 1.6658570437196392, "grad_norm": 0.7650555968284607, "learning_rate": 4.152655286157834e-06, "loss": 0.515, "step": 9602 }, { "epoch": 1.666030534351145, "grad_norm": 0.8789478540420532, "learning_rate": 4.148499816742755e-06, "loss": 0.5022, "step": 9603 }, { "epoch": 1.666204024982651, "grad_norm": 0.8302474021911621, "learning_rate": 4.144346186912738e-06, "loss": 0.459, "step": 9604 }, { "epoch": 1.666377515614157, "grad_norm": 0.7248766422271729, "learning_rate": 4.140194397149833e-06, "loss": 0.438, "step": 9605 }, { "epoch": 1.6665510062456628, "grad_norm": 0.8582567572593689, "learning_rate": 4.136044447935837e-06, "loss": 0.4225, "step": 9606 }, { "epoch": 1.6667244968771686, "grad_norm": 0.750859260559082, "learning_rate": 4.1318963397523725e-06, "loss": 0.4366, "step": 9607 }, { "epoch": 1.6668979875086745, "grad_norm": 0.7951454520225525, "learning_rate": 4.127750073080829e-06, "loss": 0.3839, "step": 9608 }, { "epoch": 1.6670714781401803, "grad_norm": 0.9483717679977417, "learning_rate": 4.123605648402385e-06, "loss": 0.3567, "step": 9609 }, { "epoch": 1.6672449687716864, "grad_norm": 0.8451215624809265, "learning_rate": 4.119463066197997e-06, "loss": 0.3776, "step": 9610 }, { "epoch": 1.6674184594031922, "grad_norm": 2.248885154724121, "learning_rate": 4.115322326948432e-06, "loss": 0.3769, "step": 9611 }, { "epoch": 1.6675919500346983, "grad_norm": 1.1927193403244019, "learning_rate": 4.111183431134223e-06, "loss": 0.401, "step": 9612 }, { "epoch": 1.6677654406662041, "grad_norm": 0.731179416179657, "learning_rate": 4.1070463792356865e-06, "loss": 0.511, "step": 9613 }, { "epoch": 1.66793893129771, "grad_norm": 1.3155031204223633, "learning_rate": 4.102911171732933e-06, "loss": 0.3981, "step": 9614 }, { "epoch": 1.6681124219292158, "grad_norm": 1.7868298292160034, "learning_rate": 4.098777809105871e-06, "loss": 0.4999, "step": 9615 }, { "epoch": 1.6682859125607217, "grad_norm": 0.7645481824874878, "learning_rate": 4.094646291834166e-06, "loss": 0.4681, "step": 9616 }, { "epoch": 1.6684594031922275, "grad_norm": 0.8161604404449463, "learning_rate": 4.090516620397294e-06, "loss": 0.465, "step": 9617 }, { "epoch": 1.6686328938237334, "grad_norm": 0.7998003959655762, "learning_rate": 4.086388795274508e-06, "loss": 0.3854, "step": 9618 }, { "epoch": 1.6688063844552394, "grad_norm": 0.6143330335617065, "learning_rate": 4.082262816944845e-06, "loss": 0.5396, "step": 9619 }, { "epoch": 1.6689798750867453, "grad_norm": 0.7949010729789734, "learning_rate": 4.078138685887125e-06, "loss": 0.4173, "step": 9620 }, { "epoch": 1.6691533657182513, "grad_norm": 1.2927134037017822, "learning_rate": 4.074016402579968e-06, "loss": 0.3998, "step": 9621 }, { "epoch": 1.6693268563497572, "grad_norm": 0.8502987027168274, "learning_rate": 4.069895967501765e-06, "loss": 0.3896, "step": 9622 }, { "epoch": 1.669500346981263, "grad_norm": 0.5281307101249695, "learning_rate": 4.065777381130698e-06, "loss": 0.5065, "step": 9623 }, { "epoch": 1.6696738376127689, "grad_norm": 0.7240021824836731, "learning_rate": 4.0616606439447315e-06, "loss": 0.5621, "step": 9624 }, { "epoch": 1.6698473282442747, "grad_norm": 0.9518048167228699, "learning_rate": 4.057545756421615e-06, "loss": 0.3801, "step": 9625 }, { "epoch": 1.6700208188757806, "grad_norm": 1.1966935396194458, "learning_rate": 4.053432719038895e-06, "loss": 0.4609, "step": 9626 }, { "epoch": 1.6701943095072866, "grad_norm": 0.8296990394592285, "learning_rate": 4.049321532273889e-06, "loss": 0.3398, "step": 9627 }, { "epoch": 1.6703678001387925, "grad_norm": 1.3707387447357178, "learning_rate": 4.045212196603705e-06, "loss": 0.4018, "step": 9628 }, { "epoch": 1.6705412907702986, "grad_norm": 0.9826870560646057, "learning_rate": 4.041104712505233e-06, "loss": 0.4464, "step": 9629 }, { "epoch": 1.6707147814018044, "grad_norm": 0.7787246108055115, "learning_rate": 4.03699908045516e-06, "loss": 0.4161, "step": 9630 }, { "epoch": 1.6708882720333103, "grad_norm": 0.9037700295448303, "learning_rate": 4.0328953009299425e-06, "loss": 0.5441, "step": 9631 }, { "epoch": 1.671061762664816, "grad_norm": 0.8366997241973877, "learning_rate": 4.028793374405833e-06, "loss": 0.4343, "step": 9632 }, { "epoch": 1.671235253296322, "grad_norm": 1.0335955619812012, "learning_rate": 4.024693301358855e-06, "loss": 0.4542, "step": 9633 }, { "epoch": 1.6714087439278278, "grad_norm": 0.6714066863059998, "learning_rate": 4.020595082264847e-06, "loss": 0.5325, "step": 9634 }, { "epoch": 1.6715822345593336, "grad_norm": 1.3859325647354126, "learning_rate": 4.016498717599387e-06, "loss": 0.4736, "step": 9635 }, { "epoch": 1.6717557251908397, "grad_norm": 0.7687572836875916, "learning_rate": 4.012404207837881e-06, "loss": 0.32, "step": 9636 }, { "epoch": 1.6719292158223455, "grad_norm": 0.8668257594108582, "learning_rate": 4.008311553455497e-06, "loss": 0.457, "step": 9637 }, { "epoch": 1.6721027064538516, "grad_norm": 0.7919766902923584, "learning_rate": 4.0042207549271905e-06, "loss": 0.4264, "step": 9638 }, { "epoch": 1.6722761970853575, "grad_norm": 0.951241672039032, "learning_rate": 4.0001318127276985e-06, "loss": 0.4434, "step": 9639 }, { "epoch": 1.6724496877168633, "grad_norm": 0.6608285903930664, "learning_rate": 3.996044727331558e-06, "loss": 0.472, "step": 9640 }, { "epoch": 1.6726231783483692, "grad_norm": 0.6451571583747864, "learning_rate": 3.991959499213076e-06, "loss": 0.5497, "step": 9641 }, { "epoch": 1.672796668979875, "grad_norm": 0.7177254557609558, "learning_rate": 3.987876128846349e-06, "loss": 0.5411, "step": 9642 }, { "epoch": 1.6729701596113808, "grad_norm": 1.3236669301986694, "learning_rate": 3.983794616705248e-06, "loss": 0.4778, "step": 9643 }, { "epoch": 1.673143650242887, "grad_norm": 0.9966208338737488, "learning_rate": 3.979714963263455e-06, "loss": 0.3341, "step": 9644 }, { "epoch": 1.6733171408743928, "grad_norm": 0.7492838501930237, "learning_rate": 3.975637168994397e-06, "loss": 0.4531, "step": 9645 }, { "epoch": 1.6734906315058988, "grad_norm": 0.9830329418182373, "learning_rate": 3.971561234371324e-06, "loss": 0.5131, "step": 9646 }, { "epoch": 1.6736641221374047, "grad_norm": 0.7363716959953308, "learning_rate": 3.967487159867245e-06, "loss": 0.4955, "step": 9647 }, { "epoch": 1.6738376127689105, "grad_norm": 0.6564109921455383, "learning_rate": 3.963414945954962e-06, "loss": 0.4585, "step": 9648 }, { "epoch": 1.6740111034004164, "grad_norm": 0.6819577813148499, "learning_rate": 3.959344593107057e-06, "loss": 0.5862, "step": 9649 }, { "epoch": 1.6741845940319222, "grad_norm": 0.705721914768219, "learning_rate": 3.955276101795908e-06, "loss": 0.4185, "step": 9650 }, { "epoch": 1.674358084663428, "grad_norm": 0.8432149291038513, "learning_rate": 3.951209472493664e-06, "loss": 0.4398, "step": 9651 }, { "epoch": 1.6745315752949341, "grad_norm": 1.7393169403076172, "learning_rate": 3.947144705672257e-06, "loss": 0.4418, "step": 9652 }, { "epoch": 1.67470506592644, "grad_norm": 1.0593560934066772, "learning_rate": 3.943081801803421e-06, "loss": 0.3892, "step": 9653 }, { "epoch": 1.6748785565579458, "grad_norm": 1.1183923482894897, "learning_rate": 3.939020761358641e-06, "loss": 0.5154, "step": 9654 }, { "epoch": 1.6750520471894519, "grad_norm": 0.9049838781356812, "learning_rate": 3.934961584809222e-06, "loss": 0.3672, "step": 9655 }, { "epoch": 1.6752255378209577, "grad_norm": 0.9599663615226746, "learning_rate": 3.930904272626226e-06, "loss": 0.5461, "step": 9656 }, { "epoch": 1.6753990284524636, "grad_norm": 0.94817054271698, "learning_rate": 3.926848825280524e-06, "loss": 0.4105, "step": 9657 }, { "epoch": 1.6755725190839694, "grad_norm": 1.129248857498169, "learning_rate": 3.922795243242734e-06, "loss": 0.348, "step": 9658 }, { "epoch": 1.6757460097154753, "grad_norm": 0.8643382787704468, "learning_rate": 3.918743526983295e-06, "loss": 0.3235, "step": 9659 }, { "epoch": 1.6759195003469811, "grad_norm": 0.9845125675201416, "learning_rate": 3.914693676972408e-06, "loss": 0.4991, "step": 9660 }, { "epoch": 1.6760929909784872, "grad_norm": 0.7317501306533813, "learning_rate": 3.9106456936800615e-06, "loss": 0.5369, "step": 9661 }, { "epoch": 1.676266481609993, "grad_norm": 1.0669113397598267, "learning_rate": 3.906599577576027e-06, "loss": 0.5198, "step": 9662 }, { "epoch": 1.676439972241499, "grad_norm": 0.9549260139465332, "learning_rate": 3.902555329129874e-06, "loss": 0.4857, "step": 9663 }, { "epoch": 1.676613462873005, "grad_norm": 0.8042954802513123, "learning_rate": 3.898512948810922e-06, "loss": 0.4109, "step": 9664 }, { "epoch": 1.6767869535045108, "grad_norm": 0.9220744967460632, "learning_rate": 3.894472437088308e-06, "loss": 0.353, "step": 9665 }, { "epoch": 1.6769604441360166, "grad_norm": 0.864961564540863, "learning_rate": 3.890433794430934e-06, "loss": 0.3972, "step": 9666 }, { "epoch": 1.6771339347675225, "grad_norm": 0.926474928855896, "learning_rate": 3.886397021307493e-06, "loss": 0.4734, "step": 9667 }, { "epoch": 1.6773074253990283, "grad_norm": 0.7888760566711426, "learning_rate": 3.882362118186445e-06, "loss": 0.4186, "step": 9668 }, { "epoch": 1.6774809160305344, "grad_norm": 1.256698489189148, "learning_rate": 3.878329085536061e-06, "loss": 0.3407, "step": 9669 }, { "epoch": 1.6776544066620402, "grad_norm": 1.0274312496185303, "learning_rate": 3.87429792382437e-06, "loss": 0.4354, "step": 9670 }, { "epoch": 1.6778278972935463, "grad_norm": 0.7899155020713806, "learning_rate": 3.870268633519198e-06, "loss": 0.506, "step": 9671 }, { "epoch": 1.6780013879250522, "grad_norm": 0.8052603006362915, "learning_rate": 3.86624121508814e-06, "loss": 0.3835, "step": 9672 }, { "epoch": 1.678174878556558, "grad_norm": 1.2355273962020874, "learning_rate": 3.862215668998592e-06, "loss": 0.2889, "step": 9673 }, { "epoch": 1.6783483691880638, "grad_norm": 1.290600061416626, "learning_rate": 3.858191995717722e-06, "loss": 0.407, "step": 9674 }, { "epoch": 1.6785218598195697, "grad_norm": 0.8202452659606934, "learning_rate": 3.854170195712479e-06, "loss": 0.3003, "step": 9675 }, { "epoch": 1.6786953504510755, "grad_norm": 0.9715308547019958, "learning_rate": 3.850150269449597e-06, "loss": 0.3814, "step": 9676 }, { "epoch": 1.6788688410825814, "grad_norm": 0.7318767309188843, "learning_rate": 3.846132217395593e-06, "loss": 0.4712, "step": 9677 }, { "epoch": 1.6790423317140875, "grad_norm": 0.7196683287620544, "learning_rate": 3.84211604001677e-06, "loss": 0.5443, "step": 9678 }, { "epoch": 1.6792158223455933, "grad_norm": 0.7301241159439087, "learning_rate": 3.83810173777921e-06, "loss": 0.4612, "step": 9679 }, { "epoch": 1.6793893129770994, "grad_norm": 0.8351182341575623, "learning_rate": 3.834089311148774e-06, "loss": 0.3381, "step": 9680 }, { "epoch": 1.6795628036086052, "grad_norm": 0.6372829079627991, "learning_rate": 3.830078760591107e-06, "loss": 0.4675, "step": 9681 }, { "epoch": 1.679736294240111, "grad_norm": 0.8103852868080139, "learning_rate": 3.826070086571651e-06, "loss": 0.3896, "step": 9682 }, { "epoch": 1.679909784871617, "grad_norm": 0.9043280482292175, "learning_rate": 3.822063289555597e-06, "loss": 0.4155, "step": 9683 }, { "epoch": 1.6800832755031228, "grad_norm": 0.7413236498832703, "learning_rate": 3.818058370007956e-06, "loss": 0.4149, "step": 9684 }, { "epoch": 1.6802567661346286, "grad_norm": 1.0025862455368042, "learning_rate": 3.814055328393491e-06, "loss": 0.4901, "step": 9685 }, { "epoch": 1.6804302567661347, "grad_norm": 0.679567813873291, "learning_rate": 3.810054165176775e-06, "loss": 0.5236, "step": 9686 }, { "epoch": 1.6806037473976405, "grad_norm": 0.6744206547737122, "learning_rate": 3.8060548808221277e-06, "loss": 0.4021, "step": 9687 }, { "epoch": 1.6807772380291466, "grad_norm": 0.9984070658683777, "learning_rate": 3.802057475793688e-06, "loss": 0.494, "step": 9688 }, { "epoch": 1.6809507286606524, "grad_norm": 0.649757444858551, "learning_rate": 3.7980619505553516e-06, "loss": 0.4397, "step": 9689 }, { "epoch": 1.6811242192921583, "grad_norm": 0.7462676167488098, "learning_rate": 3.794068305570804e-06, "loss": 0.4412, "step": 9690 }, { "epoch": 1.6812977099236641, "grad_norm": 0.6842268109321594, "learning_rate": 3.790076541303509e-06, "loss": 0.53, "step": 9691 }, { "epoch": 1.68147120055517, "grad_norm": 0.9055030345916748, "learning_rate": 3.78608665821673e-06, "loss": 0.4002, "step": 9692 }, { "epoch": 1.6816446911866758, "grad_norm": 0.6715995669364929, "learning_rate": 3.7820986567734787e-06, "loss": 0.4067, "step": 9693 }, { "epoch": 1.6818181818181817, "grad_norm": 0.6809877157211304, "learning_rate": 3.778112537436578e-06, "loss": 0.3868, "step": 9694 }, { "epoch": 1.6819916724496877, "grad_norm": 0.6496911644935608, "learning_rate": 3.7741283006686204e-06, "loss": 0.4899, "step": 9695 }, { "epoch": 1.6821651630811936, "grad_norm": 0.783438503742218, "learning_rate": 3.7701459469319824e-06, "loss": 0.5194, "step": 9696 }, { "epoch": 1.6823386537126996, "grad_norm": 1.6692430973052979, "learning_rate": 3.7661654766888124e-06, "loss": 0.4019, "step": 9697 }, { "epoch": 1.6825121443442055, "grad_norm": 0.7749941349029541, "learning_rate": 3.7621868904010585e-06, "loss": 0.5715, "step": 9698 }, { "epoch": 1.6826856349757113, "grad_norm": 0.7720735669136047, "learning_rate": 3.758210188530438e-06, "loss": 0.4672, "step": 9699 }, { "epoch": 1.6828591256072172, "grad_norm": 0.7236547470092773, "learning_rate": 3.7542353715384462e-06, "loss": 0.4265, "step": 9700 }, { "epoch": 1.683032616238723, "grad_norm": 0.9045554995536804, "learning_rate": 3.750262439886374e-06, "loss": 0.6182, "step": 9701 }, { "epoch": 1.6832061068702289, "grad_norm": 0.8127009272575378, "learning_rate": 3.7462913940352797e-06, "loss": 0.3979, "step": 9702 }, { "epoch": 1.683379597501735, "grad_norm": 1.0219383239746094, "learning_rate": 3.7423222344460096e-06, "loss": 0.4023, "step": 9703 }, { "epoch": 1.6835530881332408, "grad_norm": 0.6961208581924438, "learning_rate": 3.7383549615791826e-06, "loss": 0.489, "step": 9704 }, { "epoch": 1.6837265787647469, "grad_norm": 0.7545673251152039, "learning_rate": 3.734389575895221e-06, "loss": 0.5376, "step": 9705 }, { "epoch": 1.6839000693962527, "grad_norm": 0.666425347328186, "learning_rate": 3.7304260778542924e-06, "loss": 0.5713, "step": 9706 }, { "epoch": 1.6840735600277585, "grad_norm": 0.7239332795143127, "learning_rate": 3.726464467916382e-06, "loss": 0.4697, "step": 9707 }, { "epoch": 1.6842470506592644, "grad_norm": 0.8578612208366394, "learning_rate": 3.722504746541229e-06, "loss": 0.4288, "step": 9708 }, { "epoch": 1.6844205412907702, "grad_norm": 0.9409584403038025, "learning_rate": 3.71854691418837e-06, "loss": 0.428, "step": 9709 }, { "epoch": 1.684594031922276, "grad_norm": 0.6740388870239258, "learning_rate": 3.714590971317107e-06, "loss": 0.4722, "step": 9710 }, { "epoch": 1.6847675225537821, "grad_norm": 0.7901949286460876, "learning_rate": 3.710636918386543e-06, "loss": 0.4281, "step": 9711 }, { "epoch": 1.684941013185288, "grad_norm": 0.7378773093223572, "learning_rate": 3.706684755855545e-06, "loss": 0.4368, "step": 9712 }, { "epoch": 1.6851145038167938, "grad_norm": 0.9257503747940063, "learning_rate": 3.7027344841827684e-06, "loss": 0.3464, "step": 9713 }, { "epoch": 1.6852879944483, "grad_norm": 0.755611777305603, "learning_rate": 3.698786103826639e-06, "loss": 0.4982, "step": 9714 }, { "epoch": 1.6854614850798058, "grad_norm": 0.6503063440322876, "learning_rate": 3.694839615245387e-06, "loss": 0.5034, "step": 9715 }, { "epoch": 1.6856349757113116, "grad_norm": 0.7667842507362366, "learning_rate": 3.690895018896987e-06, "loss": 0.5358, "step": 9716 }, { "epoch": 1.6858084663428174, "grad_norm": 0.6968609094619751, "learning_rate": 3.6869523152392296e-06, "loss": 0.5353, "step": 9717 }, { "epoch": 1.6859819569743233, "grad_norm": 0.944675624370575, "learning_rate": 3.6830115047296633e-06, "loss": 0.413, "step": 9718 }, { "epoch": 1.6861554476058291, "grad_norm": 0.8377568125724792, "learning_rate": 3.679072587825625e-06, "loss": 0.5443, "step": 9719 }, { "epoch": 1.6863289382373352, "grad_norm": 0.794916033744812, "learning_rate": 3.675135564984227e-06, "loss": 0.3719, "step": 9720 }, { "epoch": 1.686502428868841, "grad_norm": 0.7275283336639404, "learning_rate": 3.671200436662372e-06, "loss": 0.4502, "step": 9721 }, { "epoch": 1.6866759195003471, "grad_norm": 0.7036992311477661, "learning_rate": 3.6672672033167333e-06, "loss": 0.512, "step": 9722 }, { "epoch": 1.686849410131853, "grad_norm": 0.757016122341156, "learning_rate": 3.6633358654037676e-06, "loss": 0.5358, "step": 9723 }, { "epoch": 1.6870229007633588, "grad_norm": 4.114889144897461, "learning_rate": 3.6594064233797123e-06, "loss": 0.5155, "step": 9724 }, { "epoch": 1.6871963913948647, "grad_norm": 1.4096416234970093, "learning_rate": 3.6554788777005758e-06, "loss": 0.4038, "step": 9725 }, { "epoch": 1.6873698820263705, "grad_norm": 0.6038339138031006, "learning_rate": 3.6515532288221646e-06, "loss": 0.4409, "step": 9726 }, { "epoch": 1.6875433726578764, "grad_norm": 0.6808177828788757, "learning_rate": 3.647629477200052e-06, "loss": 0.5103, "step": 9727 }, { "epoch": 1.6877168632893824, "grad_norm": 0.8416544198989868, "learning_rate": 3.643707623289592e-06, "loss": 0.4237, "step": 9728 }, { "epoch": 1.6878903539208883, "grad_norm": 0.6936036348342896, "learning_rate": 3.6397876675459175e-06, "loss": 0.3955, "step": 9729 }, { "epoch": 1.6880638445523943, "grad_norm": 0.9723979234695435, "learning_rate": 3.635869610423952e-06, "loss": 0.5072, "step": 9730 }, { "epoch": 1.6882373351839002, "grad_norm": 0.7053537368774414, "learning_rate": 3.6319534523783872e-06, "loss": 0.4186, "step": 9731 }, { "epoch": 1.688410825815406, "grad_norm": 2.3634746074676514, "learning_rate": 3.628039193863695e-06, "loss": 0.3661, "step": 9732 }, { "epoch": 1.6885843164469119, "grad_norm": 0.8253313899040222, "learning_rate": 3.6241268353341276e-06, "loss": 0.4602, "step": 9733 }, { "epoch": 1.6887578070784177, "grad_norm": 0.7930001020431519, "learning_rate": 3.6202163772437326e-06, "loss": 0.4257, "step": 9734 }, { "epoch": 1.6889312977099236, "grad_norm": 0.8238438963890076, "learning_rate": 3.616307820046303e-06, "loss": 0.4066, "step": 9735 }, { "epoch": 1.6891047883414294, "grad_norm": 0.9378172755241394, "learning_rate": 3.6124011641954473e-06, "loss": 0.5157, "step": 9736 }, { "epoch": 1.6892782789729355, "grad_norm": 0.7956205606460571, "learning_rate": 3.6084964101445307e-06, "loss": 0.3901, "step": 9737 }, { "epoch": 1.6894517696044413, "grad_norm": 0.742908239364624, "learning_rate": 3.6045935583467053e-06, "loss": 0.4529, "step": 9738 }, { "epoch": 1.6896252602359474, "grad_norm": 0.7737570405006409, "learning_rate": 3.6006926092548988e-06, "loss": 0.5082, "step": 9739 }, { "epoch": 1.6897987508674532, "grad_norm": 0.5851488709449768, "learning_rate": 3.5967935633218277e-06, "loss": 0.5751, "step": 9740 }, { "epoch": 1.689972241498959, "grad_norm": 0.9748550057411194, "learning_rate": 3.5928964209999784e-06, "loss": 0.4352, "step": 9741 }, { "epoch": 1.690145732130465, "grad_norm": 0.7415540218353271, "learning_rate": 3.589001182741616e-06, "loss": 0.3856, "step": 9742 }, { "epoch": 1.6903192227619708, "grad_norm": 0.9179946184158325, "learning_rate": 3.5851078489987834e-06, "loss": 0.4203, "step": 9743 }, { "epoch": 1.6904927133934766, "grad_norm": 0.8719099164009094, "learning_rate": 3.5812164202233236e-06, "loss": 0.4686, "step": 9744 }, { "epoch": 1.6906662040249827, "grad_norm": 0.8644099831581116, "learning_rate": 3.5773268968668194e-06, "loss": 0.5646, "step": 9745 }, { "epoch": 1.6908396946564885, "grad_norm": 1.2237269878387451, "learning_rate": 3.5734392793806704e-06, "loss": 0.3469, "step": 9746 }, { "epoch": 1.6910131852879946, "grad_norm": 0.8852053880691528, "learning_rate": 3.569553568216033e-06, "loss": 0.4629, "step": 9747 }, { "epoch": 1.6911866759195004, "grad_norm": 0.9039144515991211, "learning_rate": 3.5656697638238447e-06, "loss": 0.3802, "step": 9748 }, { "epoch": 1.6913601665510063, "grad_norm": 0.9759804010391235, "learning_rate": 3.5617878666548354e-06, "loss": 0.4327, "step": 9749 }, { "epoch": 1.6915336571825121, "grad_norm": 0.80108243227005, "learning_rate": 3.5579078771594988e-06, "loss": 0.402, "step": 9750 }, { "epoch": 1.691707147814018, "grad_norm": 0.8020294308662415, "learning_rate": 3.55402979578811e-06, "loss": 0.5035, "step": 9751 }, { "epoch": 1.6918806384455238, "grad_norm": 0.6639646887779236, "learning_rate": 3.550153622990724e-06, "loss": 0.5635, "step": 9752 }, { "epoch": 1.6920541290770297, "grad_norm": 0.697598934173584, "learning_rate": 3.546279359217186e-06, "loss": 0.4956, "step": 9753 }, { "epoch": 1.6922276197085357, "grad_norm": 0.7204028367996216, "learning_rate": 3.542407004917092e-06, "loss": 0.4203, "step": 9754 }, { "epoch": 1.6924011103400416, "grad_norm": 0.6753879189491272, "learning_rate": 3.538536560539847e-06, "loss": 0.5222, "step": 9755 }, { "epoch": 1.6925746009715477, "grad_norm": 1.0081450939178467, "learning_rate": 3.5346680265346113e-06, "loss": 0.4051, "step": 9756 }, { "epoch": 1.6927480916030535, "grad_norm": 0.9310484528541565, "learning_rate": 3.530801403350346e-06, "loss": 0.4008, "step": 9757 }, { "epoch": 1.6929215822345594, "grad_norm": 1.2470906972885132, "learning_rate": 3.5269366914357585e-06, "loss": 0.5457, "step": 9758 }, { "epoch": 1.6930950728660652, "grad_norm": 1.6500725746154785, "learning_rate": 3.523073891239368e-06, "loss": 0.6094, "step": 9759 }, { "epoch": 1.693268563497571, "grad_norm": 0.8279609084129333, "learning_rate": 3.5192130032094517e-06, "loss": 0.5032, "step": 9760 }, { "epoch": 1.693442054129077, "grad_norm": 0.6731119751930237, "learning_rate": 3.515354027794069e-06, "loss": 0.5054, "step": 9761 }, { "epoch": 1.693615544760583, "grad_norm": 1.3643908500671387, "learning_rate": 3.511496965441057e-06, "loss": 0.5199, "step": 9762 }, { "epoch": 1.6937890353920888, "grad_norm": 0.8825380802154541, "learning_rate": 3.507641816598044e-06, "loss": 0.4775, "step": 9763 }, { "epoch": 1.6939625260235949, "grad_norm": 5.034497261047363, "learning_rate": 3.503788581712406e-06, "loss": 0.4719, "step": 9764 }, { "epoch": 1.6941360166551007, "grad_norm": 0.7237112522125244, "learning_rate": 3.49993726123133e-06, "loss": 0.5272, "step": 9765 }, { "epoch": 1.6943095072866066, "grad_norm": 0.682573676109314, "learning_rate": 3.4960878556017597e-06, "loss": 0.473, "step": 9766 }, { "epoch": 1.6944829979181124, "grad_norm": 1.1272791624069214, "learning_rate": 3.492240365270425e-06, "loss": 0.3773, "step": 9767 }, { "epoch": 1.6946564885496183, "grad_norm": 0.7569313645362854, "learning_rate": 3.488394790683829e-06, "loss": 0.4498, "step": 9768 }, { "epoch": 1.694829979181124, "grad_norm": 0.8451586961746216, "learning_rate": 3.4845511322882587e-06, "loss": 0.3754, "step": 9769 }, { "epoch": 1.6950034698126302, "grad_norm": 0.864126443862915, "learning_rate": 3.480709390529777e-06, "loss": 0.3108, "step": 9770 }, { "epoch": 1.695176960444136, "grad_norm": 2.0130836963653564, "learning_rate": 3.476869565854217e-06, "loss": 0.4581, "step": 9771 }, { "epoch": 1.6953504510756419, "grad_norm": 0.8672719597816467, "learning_rate": 3.473031658707193e-06, "loss": 0.3718, "step": 9772 }, { "epoch": 1.695523941707148, "grad_norm": 0.7420392036437988, "learning_rate": 3.469195669534109e-06, "loss": 0.548, "step": 9773 }, { "epoch": 1.6956974323386538, "grad_norm": 0.6630168557167053, "learning_rate": 3.465361598780128e-06, "loss": 0.4626, "step": 9774 }, { "epoch": 1.6958709229701596, "grad_norm": 1.0279978513717651, "learning_rate": 3.4615294468902017e-06, "loss": 0.5593, "step": 9775 }, { "epoch": 1.6960444136016655, "grad_norm": 0.9407429099082947, "learning_rate": 3.4576992143090517e-06, "loss": 0.3635, "step": 9776 }, { "epoch": 1.6962179042331713, "grad_norm": 0.8444896340370178, "learning_rate": 3.45387090148118e-06, "loss": 0.5234, "step": 9777 }, { "epoch": 1.6963913948646772, "grad_norm": 0.9997003078460693, "learning_rate": 3.450044508850876e-06, "loss": 0.4099, "step": 9778 }, { "epoch": 1.6965648854961832, "grad_norm": 0.8258613348007202, "learning_rate": 3.446220036862191e-06, "loss": 0.4784, "step": 9779 }, { "epoch": 1.696738376127689, "grad_norm": 0.7915644645690918, "learning_rate": 3.4423974859589594e-06, "loss": 0.489, "step": 9780 }, { "epoch": 1.6969118667591951, "grad_norm": 1.6192443370819092, "learning_rate": 3.4385768565847876e-06, "loss": 0.3755, "step": 9781 }, { "epoch": 1.697085357390701, "grad_norm": 0.7252005934715271, "learning_rate": 3.4347581491830796e-06, "loss": 0.3982, "step": 9782 }, { "epoch": 1.6972588480222068, "grad_norm": 0.8437852263450623, "learning_rate": 3.4309413641969802e-06, "loss": 0.4167, "step": 9783 }, { "epoch": 1.6974323386537127, "grad_norm": 0.7831665277481079, "learning_rate": 3.427126502069449e-06, "loss": 0.436, "step": 9784 }, { "epoch": 1.6976058292852185, "grad_norm": 0.9040247201919556, "learning_rate": 3.4233135632431913e-06, "loss": 0.5322, "step": 9785 }, { "epoch": 1.6977793199167244, "grad_norm": 0.8263068199157715, "learning_rate": 3.4195025481607224e-06, "loss": 0.52, "step": 9786 }, { "epoch": 1.6979528105482304, "grad_norm": 0.5854077339172363, "learning_rate": 3.415693457264291e-06, "loss": 0.4957, "step": 9787 }, { "epoch": 1.6981263011797363, "grad_norm": 0.7523157000541687, "learning_rate": 3.411886290995965e-06, "loss": 0.4803, "step": 9788 }, { "epoch": 1.6982997918112424, "grad_norm": 1.736129641532898, "learning_rate": 3.4080810497975626e-06, "loss": 0.5338, "step": 9789 }, { "epoch": 1.6984732824427482, "grad_norm": 0.8919421434402466, "learning_rate": 3.4042777341106903e-06, "loss": 0.432, "step": 9790 }, { "epoch": 1.698646773074254, "grad_norm": 0.7974951863288879, "learning_rate": 3.4004763443767175e-06, "loss": 0.49, "step": 9791 }, { "epoch": 1.69882026370576, "grad_norm": 1.1867344379425049, "learning_rate": 3.3966768810368132e-06, "loss": 0.4305, "step": 9792 }, { "epoch": 1.6989937543372657, "grad_norm": 0.9479686617851257, "learning_rate": 3.392879344531903e-06, "loss": 0.3116, "step": 9793 }, { "epoch": 1.6991672449687716, "grad_norm": 0.8172270655632019, "learning_rate": 3.3890837353026964e-06, "loss": 0.5779, "step": 9794 }, { "epoch": 1.6993407356002774, "grad_norm": 0.6842443943023682, "learning_rate": 3.385290053789676e-06, "loss": 0.4427, "step": 9795 }, { "epoch": 1.6995142262317835, "grad_norm": 1.7373539209365845, "learning_rate": 3.3814983004331014e-06, "loss": 0.4446, "step": 9796 }, { "epoch": 1.6996877168632893, "grad_norm": 0.6328423619270325, "learning_rate": 3.3777084756730183e-06, "loss": 0.6019, "step": 9797 }, { "epoch": 1.6998612074947954, "grad_norm": 0.8261460661888123, "learning_rate": 3.373920579949237e-06, "loss": 0.4897, "step": 9798 }, { "epoch": 1.7000346981263013, "grad_norm": 0.8764965534210205, "learning_rate": 3.3701346137013435e-06, "loss": 0.4509, "step": 9799 }, { "epoch": 1.700208188757807, "grad_norm": 0.7964087724685669, "learning_rate": 3.3663505773687023e-06, "loss": 0.4847, "step": 9800 }, { "epoch": 1.700381679389313, "grad_norm": 0.984257698059082, "learning_rate": 3.3625684713904617e-06, "loss": 0.4481, "step": 9801 }, { "epoch": 1.7005551700208188, "grad_norm": 0.7451890110969543, "learning_rate": 3.3587882962055374e-06, "loss": 0.396, "step": 9802 }, { "epoch": 1.7007286606523246, "grad_norm": 0.7505077719688416, "learning_rate": 3.355010052252623e-06, "loss": 0.3909, "step": 9803 }, { "epoch": 1.7009021512838307, "grad_norm": 0.7706956267356873, "learning_rate": 3.3512337399701813e-06, "loss": 0.4438, "step": 9804 }, { "epoch": 1.7010756419153366, "grad_norm": 0.784045398235321, "learning_rate": 3.3474593597964746e-06, "loss": 0.6011, "step": 9805 }, { "epoch": 1.7012491325468426, "grad_norm": 0.5819199085235596, "learning_rate": 3.3436869121695013e-06, "loss": 0.5012, "step": 9806 }, { "epoch": 1.7014226231783485, "grad_norm": 1.122066855430603, "learning_rate": 3.3399163975270786e-06, "loss": 0.5092, "step": 9807 }, { "epoch": 1.7015961138098543, "grad_norm": 0.7156979441642761, "learning_rate": 3.3361478163067673e-06, "loss": 0.3828, "step": 9808 }, { "epoch": 1.7017696044413602, "grad_norm": 0.9581041932106018, "learning_rate": 3.332381168945922e-06, "loss": 0.3796, "step": 9809 }, { "epoch": 1.701943095072866, "grad_norm": 0.644724428653717, "learning_rate": 3.328616455881657e-06, "loss": 0.5094, "step": 9810 }, { "epoch": 1.7021165857043719, "grad_norm": 0.6522907018661499, "learning_rate": 3.324853677550888e-06, "loss": 0.5576, "step": 9811 }, { "epoch": 1.7022900763358777, "grad_norm": 0.9530681371688843, "learning_rate": 3.3210928343902716e-06, "loss": 0.432, "step": 9812 }, { "epoch": 1.7024635669673838, "grad_norm": 0.8103886246681213, "learning_rate": 3.31733392683627e-06, "loss": 0.429, "step": 9813 }, { "epoch": 1.7026370575988896, "grad_norm": 0.8654118180274963, "learning_rate": 3.3135769553251017e-06, "loss": 0.5895, "step": 9814 }, { "epoch": 1.7028105482303957, "grad_norm": 0.8727630376815796, "learning_rate": 3.3098219202927815e-06, "loss": 0.395, "step": 9815 }, { "epoch": 1.7029840388619015, "grad_norm": 2.0842878818511963, "learning_rate": 3.3060688221750637e-06, "loss": 0.416, "step": 9816 }, { "epoch": 1.7031575294934074, "grad_norm": 0.7195104956626892, "learning_rate": 3.302317661407519e-06, "loss": 0.5002, "step": 9817 }, { "epoch": 1.7033310201249132, "grad_norm": 0.9616917967796326, "learning_rate": 3.2985684384254648e-06, "loss": 0.4795, "step": 9818 }, { "epoch": 1.703504510756419, "grad_norm": 2.126901388168335, "learning_rate": 3.294821153664003e-06, "loss": 0.3903, "step": 9819 }, { "epoch": 1.703678001387925, "grad_norm": 0.8265792727470398, "learning_rate": 3.2910758075580085e-06, "loss": 0.4197, "step": 9820 }, { "epoch": 1.703851492019431, "grad_norm": 1.0048558712005615, "learning_rate": 3.28733240054214e-06, "loss": 0.3671, "step": 9821 }, { "epoch": 1.7040249826509368, "grad_norm": 1.233837604522705, "learning_rate": 3.283590933050822e-06, "loss": 0.4093, "step": 9822 }, { "epoch": 1.704198473282443, "grad_norm": 0.674347460269928, "learning_rate": 3.2798514055182486e-06, "loss": 0.5133, "step": 9823 }, { "epoch": 1.7043719639139487, "grad_norm": 1.1877583265304565, "learning_rate": 3.2761138183784126e-06, "loss": 0.3771, "step": 9824 }, { "epoch": 1.7045454545454546, "grad_norm": 0.7793569564819336, "learning_rate": 3.2723781720650473e-06, "loss": 0.3204, "step": 9825 }, { "epoch": 1.7047189451769604, "grad_norm": 0.6692693829536438, "learning_rate": 3.2686444670116878e-06, "loss": 0.4117, "step": 9826 }, { "epoch": 1.7048924358084663, "grad_norm": 0.8593830466270447, "learning_rate": 3.2649127036516325e-06, "loss": 0.3987, "step": 9827 }, { "epoch": 1.7050659264399721, "grad_norm": 0.677654504776001, "learning_rate": 3.261182882417966e-06, "loss": 0.5596, "step": 9828 }, { "epoch": 1.7052394170714782, "grad_norm": 0.7616249322891235, "learning_rate": 3.2574550037435214e-06, "loss": 0.4303, "step": 9829 }, { "epoch": 1.705412907702984, "grad_norm": 1.898205280303955, "learning_rate": 3.253729068060938e-06, "loss": 0.4346, "step": 9830 }, { "epoch": 1.7055863983344899, "grad_norm": 0.7222949862480164, "learning_rate": 3.250005075802607e-06, "loss": 0.4666, "step": 9831 }, { "epoch": 1.705759888965996, "grad_norm": 1.2181752920150757, "learning_rate": 3.2462830274007073e-06, "loss": 0.3671, "step": 9832 }, { "epoch": 1.7059333795975018, "grad_norm": 0.9278891086578369, "learning_rate": 3.242562923287178e-06, "loss": 0.4618, "step": 9833 }, { "epoch": 1.7061068702290076, "grad_norm": 0.6020893454551697, "learning_rate": 3.2388447638937583e-06, "loss": 0.4412, "step": 9834 }, { "epoch": 1.7062803608605135, "grad_norm": 0.7284871339797974, "learning_rate": 3.2351285496519247e-06, "loss": 0.392, "step": 9835 }, { "epoch": 1.7064538514920193, "grad_norm": 1.2857002019882202, "learning_rate": 3.2314142809929617e-06, "loss": 0.3588, "step": 9836 }, { "epoch": 1.7066273421235252, "grad_norm": 0.7840943336486816, "learning_rate": 3.227701958347911e-06, "loss": 0.4854, "step": 9837 }, { "epoch": 1.7068008327550312, "grad_norm": 1.0641329288482666, "learning_rate": 3.223991582147592e-06, "loss": 0.3261, "step": 9838 }, { "epoch": 1.706974323386537, "grad_norm": 1.1004409790039062, "learning_rate": 3.220283152822592e-06, "loss": 0.4625, "step": 9839 }, { "epoch": 1.7071478140180432, "grad_norm": 1.3531230688095093, "learning_rate": 3.216576670803291e-06, "loss": 0.3833, "step": 9840 }, { "epoch": 1.707321304649549, "grad_norm": 0.7847244739532471, "learning_rate": 3.2128721365198223e-06, "loss": 0.4813, "step": 9841 }, { "epoch": 1.7074947952810549, "grad_norm": 0.9855622053146362, "learning_rate": 3.2091695504021047e-06, "loss": 0.4823, "step": 9842 }, { "epoch": 1.7076682859125607, "grad_norm": 0.7081166505813599, "learning_rate": 3.205468912879821e-06, "loss": 0.464, "step": 9843 }, { "epoch": 1.7078417765440665, "grad_norm": 0.9425329566001892, "learning_rate": 3.2017702243824434e-06, "loss": 0.4929, "step": 9844 }, { "epoch": 1.7080152671755724, "grad_norm": 0.6763478517532349, "learning_rate": 3.198073485339204e-06, "loss": 0.3898, "step": 9845 }, { "epoch": 1.7081887578070785, "grad_norm": 0.6645935773849487, "learning_rate": 3.1943786961791166e-06, "loss": 0.5366, "step": 9846 }, { "epoch": 1.7083622484385843, "grad_norm": 0.9655393958091736, "learning_rate": 3.190685857330964e-06, "loss": 0.438, "step": 9847 }, { "epoch": 1.7085357390700904, "grad_norm": 0.9493473768234253, "learning_rate": 3.1869949692232982e-06, "loss": 0.4033, "step": 9848 }, { "epoch": 1.7087092297015962, "grad_norm": 0.7750371098518372, "learning_rate": 3.1833060322844633e-06, "loss": 0.3461, "step": 9849 }, { "epoch": 1.708882720333102, "grad_norm": 0.7906068563461304, "learning_rate": 3.179619046942557e-06, "loss": 0.5339, "step": 9850 }, { "epoch": 1.709056210964608, "grad_norm": 0.8526831865310669, "learning_rate": 3.1759340136254614e-06, "loss": 0.4376, "step": 9851 }, { "epoch": 1.7092297015961138, "grad_norm": 0.7395734786987305, "learning_rate": 3.172250932760823e-06, "loss": 0.5119, "step": 9852 }, { "epoch": 1.7094031922276196, "grad_norm": 1.161130428314209, "learning_rate": 3.1685698047760806e-06, "loss": 0.4056, "step": 9853 }, { "epoch": 1.7095766828591255, "grad_norm": 0.977079451084137, "learning_rate": 3.164890630098416e-06, "loss": 0.4141, "step": 9854 }, { "epoch": 1.7097501734906315, "grad_norm": 0.8077842593193054, "learning_rate": 3.1612134091548153e-06, "loss": 0.3906, "step": 9855 }, { "epoch": 1.7099236641221374, "grad_norm": 0.7422673106193542, "learning_rate": 3.1575381423720142e-06, "loss": 0.4556, "step": 9856 }, { "epoch": 1.7100971547536434, "grad_norm": 0.8558520078659058, "learning_rate": 3.153864830176547e-06, "loss": 0.3735, "step": 9857 }, { "epoch": 1.7102706453851493, "grad_norm": 0.9983931183815002, "learning_rate": 3.150193472994687e-06, "loss": 0.3532, "step": 9858 }, { "epoch": 1.7104441360166551, "grad_norm": 1.005107045173645, "learning_rate": 3.1465240712525124e-06, "loss": 0.3118, "step": 9859 }, { "epoch": 1.710617626648161, "grad_norm": 1.0908174514770508, "learning_rate": 3.142856625375856e-06, "loss": 0.4779, "step": 9860 }, { "epoch": 1.7107911172796668, "grad_norm": 1.317470908164978, "learning_rate": 3.139191135790334e-06, "loss": 0.3978, "step": 9861 }, { "epoch": 1.7109646079111727, "grad_norm": 0.7371059060096741, "learning_rate": 3.13552760292132e-06, "loss": 0.4893, "step": 9862 }, { "epoch": 1.7111380985426787, "grad_norm": 1.1593574285507202, "learning_rate": 3.131866027193988e-06, "loss": 0.4475, "step": 9863 }, { "epoch": 1.7113115891741846, "grad_norm": 1.3534239530563354, "learning_rate": 3.1282064090332522e-06, "loss": 0.4206, "step": 9864 }, { "epoch": 1.7114850798056906, "grad_norm": 0.6876006126403809, "learning_rate": 3.1245487488638247e-06, "loss": 0.449, "step": 9865 }, { "epoch": 1.7116585704371965, "grad_norm": 0.7555912733078003, "learning_rate": 3.1208930471101786e-06, "loss": 0.4205, "step": 9866 }, { "epoch": 1.7118320610687023, "grad_norm": 0.6664054989814758, "learning_rate": 3.1172393041965644e-06, "loss": 0.4686, "step": 9867 }, { "epoch": 1.7120055517002082, "grad_norm": 0.8035487532615662, "learning_rate": 3.1135875205469946e-06, "loss": 0.3636, "step": 9868 }, { "epoch": 1.712179042331714, "grad_norm": 1.3106471300125122, "learning_rate": 3.1099376965852744e-06, "loss": 0.4021, "step": 9869 }, { "epoch": 1.7123525329632199, "grad_norm": 0.740695059299469, "learning_rate": 3.1062898327349656e-06, "loss": 0.4259, "step": 9870 }, { "epoch": 1.7125260235947257, "grad_norm": 0.9075673222541809, "learning_rate": 3.102643929419402e-06, "loss": 0.4011, "step": 9871 }, { "epoch": 1.7126995142262318, "grad_norm": 0.7000410556793213, "learning_rate": 3.098999987061706e-06, "loss": 0.4046, "step": 9872 }, { "epoch": 1.7128730048577376, "grad_norm": 0.7891289591789246, "learning_rate": 3.0953580060847545e-06, "loss": 0.4453, "step": 9873 }, { "epoch": 1.7130464954892437, "grad_norm": 0.9291444420814514, "learning_rate": 3.0917179869112023e-06, "loss": 0.4176, "step": 9874 }, { "epoch": 1.7132199861207495, "grad_norm": 1.2433059215545654, "learning_rate": 3.0880799299634767e-06, "loss": 0.4355, "step": 9875 }, { "epoch": 1.7133934767522554, "grad_norm": 0.7945812940597534, "learning_rate": 3.084443835663791e-06, "loss": 0.4093, "step": 9876 }, { "epoch": 1.7135669673837612, "grad_norm": 0.7113139033317566, "learning_rate": 3.080809704434098e-06, "loss": 0.4581, "step": 9877 }, { "epoch": 1.713740458015267, "grad_norm": 0.7200466394424438, "learning_rate": 3.077177536696159e-06, "loss": 0.4387, "step": 9878 }, { "epoch": 1.713913948646773, "grad_norm": 0.5357228517532349, "learning_rate": 3.0735473328714873e-06, "loss": 0.5604, "step": 9879 }, { "epoch": 1.714087439278279, "grad_norm": 0.7950665950775146, "learning_rate": 3.0699190933813683e-06, "loss": 0.5728, "step": 9880 }, { "epoch": 1.7142609299097848, "grad_norm": 0.5989641547203064, "learning_rate": 3.066292818646863e-06, "loss": 0.4639, "step": 9881 }, { "epoch": 1.714434420541291, "grad_norm": 0.85533607006073, "learning_rate": 3.0626685090888177e-06, "loss": 0.4712, "step": 9882 }, { "epoch": 1.7146079111727968, "grad_norm": 0.8431836366653442, "learning_rate": 3.0590461651278168e-06, "loss": 0.5385, "step": 9883 }, { "epoch": 1.7147814018043026, "grad_norm": 4.216972827911377, "learning_rate": 3.0554257871842543e-06, "loss": 0.4375, "step": 9884 }, { "epoch": 1.7149548924358085, "grad_norm": 0.6002256870269775, "learning_rate": 3.0518073756782683e-06, "loss": 0.5396, "step": 9885 }, { "epoch": 1.7151283830673143, "grad_norm": 0.7627999186515808, "learning_rate": 3.0481909310297954e-06, "loss": 0.4133, "step": 9886 }, { "epoch": 1.7153018736988201, "grad_norm": 0.7516846656799316, "learning_rate": 3.0445764536585076e-06, "loss": 0.5165, "step": 9887 }, { "epoch": 1.7154753643303262, "grad_norm": 0.8690098524093628, "learning_rate": 3.0409639439838833e-06, "loss": 0.4739, "step": 9888 }, { "epoch": 1.715648854961832, "grad_norm": 0.7863034009933472, "learning_rate": 3.037353402425154e-06, "loss": 0.402, "step": 9889 }, { "epoch": 1.715822345593338, "grad_norm": 0.7509139180183411, "learning_rate": 3.0337448294013307e-06, "loss": 0.4033, "step": 9890 }, { "epoch": 1.715995836224844, "grad_norm": 0.8073070645332336, "learning_rate": 3.0301382253311828e-06, "loss": 0.3865, "step": 9891 }, { "epoch": 1.7161693268563498, "grad_norm": 1.1924837827682495, "learning_rate": 3.0265335906332717e-06, "loss": 0.3853, "step": 9892 }, { "epoch": 1.7163428174878557, "grad_norm": 2.007868528366089, "learning_rate": 3.0229309257259154e-06, "loss": 0.4081, "step": 9893 }, { "epoch": 1.7165163081193615, "grad_norm": 0.9190699458122253, "learning_rate": 3.019330231027209e-06, "loss": 0.3104, "step": 9894 }, { "epoch": 1.7166897987508674, "grad_norm": 0.7925846576690674, "learning_rate": 3.015731506955015e-06, "loss": 0.4226, "step": 9895 }, { "epoch": 1.7168632893823732, "grad_norm": 1.215623140335083, "learning_rate": 3.012134753926965e-06, "loss": 0.4392, "step": 9896 }, { "epoch": 1.7170367800138793, "grad_norm": 0.7585077285766602, "learning_rate": 3.008539972360478e-06, "loss": 0.4553, "step": 9897 }, { "epoch": 1.7172102706453851, "grad_norm": 0.9052568674087524, "learning_rate": 3.0049471626727246e-06, "loss": 0.5319, "step": 9898 }, { "epoch": 1.7173837612768912, "grad_norm": 0.7209457159042358, "learning_rate": 3.0013563252806576e-06, "loss": 0.4357, "step": 9899 }, { "epoch": 1.717557251908397, "grad_norm": 0.6793199777603149, "learning_rate": 2.997767460600991e-06, "loss": 0.5719, "step": 9900 }, { "epoch": 1.7177307425399029, "grad_norm": 0.8123597502708435, "learning_rate": 2.9941805690502246e-06, "loss": 0.4197, "step": 9901 }, { "epoch": 1.7179042331714087, "grad_norm": 0.9759712219238281, "learning_rate": 2.990595651044621e-06, "loss": 0.3523, "step": 9902 }, { "epoch": 1.7180777238029146, "grad_norm": 2.39614200592041, "learning_rate": 2.9870127070002117e-06, "loss": 0.3889, "step": 9903 }, { "epoch": 1.7182512144344204, "grad_norm": 1.0239982604980469, "learning_rate": 2.9834317373327983e-06, "loss": 0.4694, "step": 9904 }, { "epoch": 1.7184247050659265, "grad_norm": 0.8857870697975159, "learning_rate": 2.979852742457967e-06, "loss": 0.5314, "step": 9905 }, { "epoch": 1.7185981956974323, "grad_norm": 0.7471193671226501, "learning_rate": 2.976275722791051e-06, "loss": 0.473, "step": 9906 }, { "epoch": 1.7187716863289384, "grad_norm": 1.022916555404663, "learning_rate": 2.972700678747176e-06, "loss": 0.5603, "step": 9907 }, { "epoch": 1.7189451769604442, "grad_norm": 0.7705941200256348, "learning_rate": 2.9691276107412293e-06, "loss": 0.4449, "step": 9908 }, { "epoch": 1.71911866759195, "grad_norm": 1.0875569581985474, "learning_rate": 2.9655565191878668e-06, "loss": 0.3584, "step": 9909 }, { "epoch": 1.719292158223456, "grad_norm": 1.3497709035873413, "learning_rate": 2.961987404501516e-06, "loss": 0.3804, "step": 9910 }, { "epoch": 1.7194656488549618, "grad_norm": 0.6637280583381653, "learning_rate": 2.9584202670963892e-06, "loss": 0.5167, "step": 9911 }, { "epoch": 1.7196391394864676, "grad_norm": 0.9440620541572571, "learning_rate": 2.9548551073864386e-06, "loss": 0.4071, "step": 9912 }, { "epoch": 1.7198126301179735, "grad_norm": 0.7562226057052612, "learning_rate": 2.95129192578542e-06, "loss": 0.5015, "step": 9913 }, { "epoch": 1.7199861207494795, "grad_norm": 0.8880428075790405, "learning_rate": 2.947730722706832e-06, "loss": 0.5117, "step": 9914 }, { "epoch": 1.7201596113809854, "grad_norm": 1.0197423696517944, "learning_rate": 2.9441714985639747e-06, "loss": 0.3461, "step": 9915 }, { "epoch": 1.7203331020124915, "grad_norm": 0.7170447707176208, "learning_rate": 2.94061425376988e-06, "loss": 0.3707, "step": 9916 }, { "epoch": 1.7205065926439973, "grad_norm": 1.3667303323745728, "learning_rate": 2.9370589887373825e-06, "loss": 0.4254, "step": 9917 }, { "epoch": 1.7206800832755031, "grad_norm": 0.9315698742866516, "learning_rate": 2.9335057038790715e-06, "loss": 0.4695, "step": 9918 }, { "epoch": 1.720853573907009, "grad_norm": 0.7081165909767151, "learning_rate": 2.9299543996073067e-06, "loss": 0.4189, "step": 9919 }, { "epoch": 1.7210270645385148, "grad_norm": 0.7056809067726135, "learning_rate": 2.9264050763342267e-06, "loss": 0.5752, "step": 9920 }, { "epoch": 1.7212005551700207, "grad_norm": 0.8265276551246643, "learning_rate": 2.9228577344717357e-06, "loss": 0.4545, "step": 9921 }, { "epoch": 1.7213740458015268, "grad_norm": 0.6816700100898743, "learning_rate": 2.9193123744315e-06, "loss": 0.4418, "step": 9922 }, { "epoch": 1.7215475364330326, "grad_norm": 1.0332481861114502, "learning_rate": 2.9157689966249636e-06, "loss": 0.3752, "step": 9923 }, { "epoch": 1.7217210270645387, "grad_norm": 0.7489287257194519, "learning_rate": 2.912227601463351e-06, "loss": 0.5599, "step": 9924 }, { "epoch": 1.7218945176960445, "grad_norm": 1.5686593055725098, "learning_rate": 2.9086881893576267e-06, "loss": 0.5272, "step": 9925 }, { "epoch": 1.7220680083275504, "grad_norm": 0.6984260678291321, "learning_rate": 2.9051507607185603e-06, "loss": 0.4105, "step": 9926 }, { "epoch": 1.7222414989590562, "grad_norm": 0.6304365396499634, "learning_rate": 2.9016153159566607e-06, "loss": 0.5432, "step": 9927 }, { "epoch": 1.722414989590562, "grad_norm": 0.7879930734634399, "learning_rate": 2.8980818554822376e-06, "loss": 0.4963, "step": 9928 }, { "epoch": 1.722588480222068, "grad_norm": 0.7842549085617065, "learning_rate": 2.894550379705332e-06, "loss": 0.4963, "step": 9929 }, { "epoch": 1.7227619708535737, "grad_norm": 0.8720807433128357, "learning_rate": 2.8910208890357916e-06, "loss": 0.3947, "step": 9930 }, { "epoch": 1.7229354614850798, "grad_norm": 0.8417780995368958, "learning_rate": 2.8874933838832154e-06, "loss": 0.399, "step": 9931 }, { "epoch": 1.7231089521165857, "grad_norm": 1.0623208284378052, "learning_rate": 2.883967864656969e-06, "loss": 0.3695, "step": 9932 }, { "epoch": 1.7232824427480917, "grad_norm": 0.6795383095741272, "learning_rate": 2.8804443317661925e-06, "loss": 0.4988, "step": 9933 }, { "epoch": 1.7234559333795976, "grad_norm": 0.8090497255325317, "learning_rate": 2.876922785619809e-06, "loss": 0.4906, "step": 9934 }, { "epoch": 1.7236294240111034, "grad_norm": 0.9351137280464172, "learning_rate": 2.873403226626479e-06, "loss": 0.4972, "step": 9935 }, { "epoch": 1.7238029146426093, "grad_norm": 0.7210533022880554, "learning_rate": 2.8698856551946664e-06, "loss": 0.415, "step": 9936 }, { "epoch": 1.723976405274115, "grad_norm": 1.0768041610717773, "learning_rate": 2.866370071732585e-06, "loss": 0.3759, "step": 9937 }, { "epoch": 1.724149895905621, "grad_norm": 0.7763087749481201, "learning_rate": 2.8628564766482193e-06, "loss": 0.458, "step": 9938 }, { "epoch": 1.724323386537127, "grad_norm": 0.7806427478790283, "learning_rate": 2.859344870349323e-06, "loss": 0.4099, "step": 9939 }, { "epoch": 1.7244968771686329, "grad_norm": 0.7402631044387817, "learning_rate": 2.855835253243433e-06, "loss": 0.4496, "step": 9940 }, { "epoch": 1.724670367800139, "grad_norm": 0.6902532577514648, "learning_rate": 2.8523276257378406e-06, "loss": 0.525, "step": 9941 }, { "epoch": 1.7248438584316448, "grad_norm": 1.1441080570220947, "learning_rate": 2.848821988239605e-06, "loss": 0.47, "step": 9942 }, { "epoch": 1.7250173490631506, "grad_norm": 0.7120885252952576, "learning_rate": 2.8453183411555606e-06, "loss": 0.5054, "step": 9943 }, { "epoch": 1.7251908396946565, "grad_norm": 0.7540342211723328, "learning_rate": 2.8418166848923158e-06, "loss": 0.452, "step": 9944 }, { "epoch": 1.7253643303261623, "grad_norm": 0.5520671606063843, "learning_rate": 2.838317019856238e-06, "loss": 0.4268, "step": 9945 }, { "epoch": 1.7255378209576682, "grad_norm": 0.9635013937950134, "learning_rate": 2.834819346453468e-06, "loss": 0.4554, "step": 9946 }, { "epoch": 1.7257113115891742, "grad_norm": 0.7378911375999451, "learning_rate": 2.8313236650899135e-06, "loss": 0.3519, "step": 9947 }, { "epoch": 1.72588480222068, "grad_norm": 0.885434627532959, "learning_rate": 2.827829976171248e-06, "loss": 0.3899, "step": 9948 }, { "epoch": 1.726058292852186, "grad_norm": 0.8526363968849182, "learning_rate": 2.8243382801029295e-06, "loss": 0.434, "step": 9949 }, { "epoch": 1.726231783483692, "grad_norm": 0.9503956437110901, "learning_rate": 2.820848577290165e-06, "loss": 0.403, "step": 9950 }, { "epoch": 1.7264052741151978, "grad_norm": 1.0041067600250244, "learning_rate": 2.8173608681379417e-06, "loss": 0.4014, "step": 9951 }, { "epoch": 1.7265787647467037, "grad_norm": 1.171455979347229, "learning_rate": 2.8138751530510065e-06, "loss": 0.3354, "step": 9952 }, { "epoch": 1.7267522553782095, "grad_norm": 0.7163133025169373, "learning_rate": 2.8103914324338965e-06, "loss": 0.4609, "step": 9953 }, { "epoch": 1.7269257460097154, "grad_norm": 0.6427908539772034, "learning_rate": 2.806909706690881e-06, "loss": 0.535, "step": 9954 }, { "epoch": 1.7270992366412212, "grad_norm": 0.9116261005401611, "learning_rate": 2.8034299762260308e-06, "loss": 0.4227, "step": 9955 }, { "epoch": 1.7272727272727273, "grad_norm": 0.5795822143554688, "learning_rate": 2.799952241443167e-06, "loss": 0.4703, "step": 9956 }, { "epoch": 1.7274462179042331, "grad_norm": 0.8556695580482483, "learning_rate": 2.796476502745895e-06, "loss": 0.4191, "step": 9957 }, { "epoch": 1.7276197085357392, "grad_norm": 0.7606567144393921, "learning_rate": 2.7930027605375644e-06, "loss": 0.5043, "step": 9958 }, { "epoch": 1.727793199167245, "grad_norm": 0.8193055391311646, "learning_rate": 2.7895310152213163e-06, "loss": 0.4131, "step": 9959 }, { "epoch": 1.727966689798751, "grad_norm": 0.9150891900062561, "learning_rate": 2.7860612672000485e-06, "loss": 0.4764, "step": 9960 }, { "epoch": 1.7281401804302567, "grad_norm": 0.7192273139953613, "learning_rate": 2.7825935168764284e-06, "loss": 0.6002, "step": 9961 }, { "epoch": 1.7283136710617626, "grad_norm": 0.724804699420929, "learning_rate": 2.7791277646528893e-06, "loss": 0.4231, "step": 9962 }, { "epoch": 1.7284871616932684, "grad_norm": 0.8425358533859253, "learning_rate": 2.7756640109316423e-06, "loss": 0.5256, "step": 9963 }, { "epoch": 1.7286606523247745, "grad_norm": 1.4396371841430664, "learning_rate": 2.77220225611466e-06, "loss": 0.4001, "step": 9964 }, { "epoch": 1.7288341429562804, "grad_norm": 0.9159610271453857, "learning_rate": 2.768742500603678e-06, "loss": 0.5305, "step": 9965 }, { "epoch": 1.7290076335877864, "grad_norm": 0.6820814609527588, "learning_rate": 2.7652847448002074e-06, "loss": 0.417, "step": 9966 }, { "epoch": 1.7291811242192923, "grad_norm": 0.7331205010414124, "learning_rate": 2.7618289891055217e-06, "loss": 0.5649, "step": 9967 }, { "epoch": 1.729354614850798, "grad_norm": 1.05643892288208, "learning_rate": 2.7583752339206714e-06, "loss": 0.4611, "step": 9968 }, { "epoch": 1.729528105482304, "grad_norm": 0.6232126951217651, "learning_rate": 2.754923479646465e-06, "loss": 0.5616, "step": 9969 }, { "epoch": 1.7297015961138098, "grad_norm": 0.6674892902374268, "learning_rate": 2.7514737266834845e-06, "loss": 0.4, "step": 9970 }, { "epoch": 1.7298750867453156, "grad_norm": 0.8359811902046204, "learning_rate": 2.7480259754320716e-06, "loss": 0.4115, "step": 9971 }, { "epoch": 1.7300485773768215, "grad_norm": 0.9754602909088135, "learning_rate": 2.7445802262923505e-06, "loss": 0.426, "step": 9972 }, { "epoch": 1.7302220680083276, "grad_norm": 0.8265777230262756, "learning_rate": 2.7411364796642015e-06, "loss": 0.4432, "step": 9973 }, { "epoch": 1.7303955586398334, "grad_norm": 0.7472012042999268, "learning_rate": 2.737694735947276e-06, "loss": 0.5466, "step": 9974 }, { "epoch": 1.7305690492713395, "grad_norm": 0.790481448173523, "learning_rate": 2.7342549955409836e-06, "loss": 0.4149, "step": 9975 }, { "epoch": 1.7307425399028453, "grad_norm": 0.7875306606292725, "learning_rate": 2.730817258844529e-06, "loss": 0.473, "step": 9976 }, { "epoch": 1.7309160305343512, "grad_norm": 1.0280705690383911, "learning_rate": 2.727381526256845e-06, "loss": 0.4008, "step": 9977 }, { "epoch": 1.731089521165857, "grad_norm": 0.7789735198020935, "learning_rate": 2.723947798176665e-06, "loss": 0.4231, "step": 9978 }, { "epoch": 1.7312630117973629, "grad_norm": 0.6488164663314819, "learning_rate": 2.720516075002473e-06, "loss": 0.6138, "step": 9979 }, { "epoch": 1.7314365024288687, "grad_norm": 0.6671362519264221, "learning_rate": 2.7170863571325257e-06, "loss": 0.5438, "step": 9980 }, { "epoch": 1.7316099930603748, "grad_norm": 1.200859546661377, "learning_rate": 2.7136586449648407e-06, "loss": 0.3715, "step": 9981 }, { "epoch": 1.7317834836918806, "grad_norm": 0.6614787578582764, "learning_rate": 2.7102329388972215e-06, "loss": 0.3636, "step": 9982 }, { "epoch": 1.7319569743233867, "grad_norm": 0.8215885758399963, "learning_rate": 2.7068092393272082e-06, "loss": 0.4867, "step": 9983 }, { "epoch": 1.7321304649548925, "grad_norm": 0.7617893815040588, "learning_rate": 2.7033875466521363e-06, "loss": 0.4686, "step": 9984 }, { "epoch": 1.7323039555863984, "grad_norm": 0.7937578558921814, "learning_rate": 2.6999678612690907e-06, "loss": 0.3956, "step": 9985 }, { "epoch": 1.7324774462179042, "grad_norm": 0.9495329260826111, "learning_rate": 2.696550183574942e-06, "loss": 0.4323, "step": 9986 }, { "epoch": 1.73265093684941, "grad_norm": 1.5091499090194702, "learning_rate": 2.6931345139663e-06, "loss": 0.4475, "step": 9987 }, { "epoch": 1.732824427480916, "grad_norm": 0.668420135974884, "learning_rate": 2.6897208528395656e-06, "loss": 0.5051, "step": 9988 }, { "epoch": 1.7329979181124218, "grad_norm": 0.6214038729667664, "learning_rate": 2.6863092005908973e-06, "loss": 0.5253, "step": 9989 }, { "epoch": 1.7331714087439278, "grad_norm": 0.9628726840019226, "learning_rate": 2.682899557616223e-06, "loss": 0.4208, "step": 9990 }, { "epoch": 1.7333448993754337, "grad_norm": 0.6523237228393555, "learning_rate": 2.679491924311226e-06, "loss": 0.4209, "step": 9991 }, { "epoch": 1.7335183900069397, "grad_norm": 0.9754306077957153, "learning_rate": 2.676086301071381e-06, "loss": 0.3916, "step": 9992 }, { "epoch": 1.7336918806384456, "grad_norm": 0.8062222003936768, "learning_rate": 2.6726826882919055e-06, "loss": 0.4349, "step": 9993 }, { "epoch": 1.7338653712699514, "grad_norm": 1.4655934572219849, "learning_rate": 2.66928108636779e-06, "loss": 0.4634, "step": 9994 }, { "epoch": 1.7340388619014573, "grad_norm": 0.993941605091095, "learning_rate": 2.6658814956938073e-06, "loss": 0.4526, "step": 9995 }, { "epoch": 1.7342123525329631, "grad_norm": 0.8276480436325073, "learning_rate": 2.662483916664467e-06, "loss": 0.3713, "step": 9996 }, { "epoch": 1.734385843164469, "grad_norm": 0.7622029781341553, "learning_rate": 2.6590883496740727e-06, "loss": 0.4706, "step": 9997 }, { "epoch": 1.734559333795975, "grad_norm": 0.816926121711731, "learning_rate": 2.6556947951166836e-06, "loss": 0.3597, "step": 9998 }, { "epoch": 1.734732824427481, "grad_norm": 0.6731540560722351, "learning_rate": 2.6523032533861236e-06, "loss": 0.4859, "step": 9999 }, { "epoch": 1.734906315058987, "grad_norm": 1.0628679990768433, "learning_rate": 2.648913724875981e-06, "loss": 0.3247, "step": 10000 }, { "epoch": 1.7350798056904928, "grad_norm": 0.7253689765930176, "learning_rate": 2.6455262099796233e-06, "loss": 0.5372, "step": 10001 }, { "epoch": 1.7352532963219987, "grad_norm": 0.8236063122749329, "learning_rate": 2.6421407090901707e-06, "loss": 0.529, "step": 10002 }, { "epoch": 1.7354267869535045, "grad_norm": 1.1921359300613403, "learning_rate": 2.6387572226005143e-06, "loss": 0.3976, "step": 10003 }, { "epoch": 1.7356002775850103, "grad_norm": 1.595541000366211, "learning_rate": 2.635375750903306e-06, "loss": 0.383, "step": 10004 }, { "epoch": 1.7357737682165162, "grad_norm": 0.7787811756134033, "learning_rate": 2.631996294390986e-06, "loss": 0.5839, "step": 10005 }, { "epoch": 1.7359472588480223, "grad_norm": 0.6764829158782959, "learning_rate": 2.628618853455727e-06, "loss": 0.5826, "step": 10006 }, { "epoch": 1.736120749479528, "grad_norm": 0.8212352991104126, "learning_rate": 2.625243428489492e-06, "loss": 0.442, "step": 10007 }, { "epoch": 1.7362942401110342, "grad_norm": 0.8655128479003906, "learning_rate": 2.621870019884005e-06, "loss": 0.394, "step": 10008 }, { "epoch": 1.73646773074254, "grad_norm": 0.7016631960868835, "learning_rate": 2.6184986280307525e-06, "loss": 0.5529, "step": 10009 }, { "epoch": 1.7366412213740459, "grad_norm": 0.855135440826416, "learning_rate": 2.6151292533209826e-06, "loss": 0.6355, "step": 10010 }, { "epoch": 1.7368147120055517, "grad_norm": 0.900076150894165, "learning_rate": 2.6117618961457235e-06, "loss": 0.4408, "step": 10011 }, { "epoch": 1.7369882026370576, "grad_norm": 0.9842840433120728, "learning_rate": 2.6083965568957603e-06, "loss": 0.4554, "step": 10012 }, { "epoch": 1.7371616932685634, "grad_norm": 0.6928418874740601, "learning_rate": 2.6050332359616403e-06, "loss": 0.4578, "step": 10013 }, { "epoch": 1.7373351839000692, "grad_norm": 0.6585573554039001, "learning_rate": 2.601671933733678e-06, "loss": 0.5532, "step": 10014 }, { "epoch": 1.7375086745315753, "grad_norm": 0.6021066904067993, "learning_rate": 2.598312650601964e-06, "loss": 0.5166, "step": 10015 }, { "epoch": 1.7376821651630812, "grad_norm": 0.6157493591308594, "learning_rate": 2.594955386956346e-06, "loss": 0.5314, "step": 10016 }, { "epoch": 1.7378556557945872, "grad_norm": 0.9439457058906555, "learning_rate": 2.5916001431864346e-06, "loss": 0.3905, "step": 10017 }, { "epoch": 1.738029146426093, "grad_norm": 0.974572479724884, "learning_rate": 2.588246919681614e-06, "loss": 0.3445, "step": 10018 }, { "epoch": 1.738202637057599, "grad_norm": 1.8008818626403809, "learning_rate": 2.5848957168310195e-06, "loss": 0.3837, "step": 10019 }, { "epoch": 1.7383761276891048, "grad_norm": 0.8484526872634888, "learning_rate": 2.5815465350235756e-06, "loss": 0.5154, "step": 10020 }, { "epoch": 1.7385496183206106, "grad_norm": 0.746911346912384, "learning_rate": 2.5781993746479537e-06, "loss": 0.5085, "step": 10021 }, { "epoch": 1.7387231089521165, "grad_norm": 0.7924544811248779, "learning_rate": 2.5748542360925944e-06, "loss": 0.4045, "step": 10022 }, { "epoch": 1.7388965995836225, "grad_norm": 1.0243325233459473, "learning_rate": 2.5715111197457e-06, "loss": 0.4777, "step": 10023 }, { "epoch": 1.7390700902151284, "grad_norm": 0.621435821056366, "learning_rate": 2.568170025995258e-06, "loss": 0.531, "step": 10024 }, { "epoch": 1.7392435808466344, "grad_norm": 0.9693120718002319, "learning_rate": 2.5648309552289875e-06, "loss": 0.5636, "step": 10025 }, { "epoch": 1.7394170714781403, "grad_norm": 0.7685256600379944, "learning_rate": 2.561493907834405e-06, "loss": 0.45, "step": 10026 }, { "epoch": 1.7395905621096461, "grad_norm": 0.6334861516952515, "learning_rate": 2.5581588841987693e-06, "loss": 0.5999, "step": 10027 }, { "epoch": 1.739764052741152, "grad_norm": 1.0231965780258179, "learning_rate": 2.5548258847091266e-06, "loss": 0.464, "step": 10028 }, { "epoch": 1.7399375433726578, "grad_norm": 0.672338604927063, "learning_rate": 2.551494909752261e-06, "loss": 0.5079, "step": 10029 }, { "epoch": 1.7401110340041637, "grad_norm": 0.807580828666687, "learning_rate": 2.548165959714748e-06, "loss": 0.3598, "step": 10030 }, { "epoch": 1.7402845246356695, "grad_norm": 0.6821241974830627, "learning_rate": 2.544839034982909e-06, "loss": 0.366, "step": 10031 }, { "epoch": 1.7404580152671756, "grad_norm": 0.8526586890220642, "learning_rate": 2.54151413594284e-06, "loss": 0.4384, "step": 10032 }, { "epoch": 1.7406315058986814, "grad_norm": 0.8350969552993774, "learning_rate": 2.538191262980394e-06, "loss": 0.3817, "step": 10033 }, { "epoch": 1.7408049965301875, "grad_norm": 0.7892290949821472, "learning_rate": 2.534870416481208e-06, "loss": 0.5518, "step": 10034 }, { "epoch": 1.7409784871616933, "grad_norm": 0.6795223951339722, "learning_rate": 2.5315515968306503e-06, "loss": 0.4832, "step": 10035 }, { "epoch": 1.7411519777931992, "grad_norm": 1.3044379949569702, "learning_rate": 2.5282348044138915e-06, "loss": 0.3311, "step": 10036 }, { "epoch": 1.741325468424705, "grad_norm": 1.0554447174072266, "learning_rate": 2.5249200396158414e-06, "loss": 0.4771, "step": 10037 }, { "epoch": 1.7414989590562109, "grad_norm": 0.812095046043396, "learning_rate": 2.521607302821183e-06, "loss": 0.4941, "step": 10038 }, { "epoch": 1.7416724496877167, "grad_norm": 0.6278399229049683, "learning_rate": 2.51829659441436e-06, "loss": 0.5004, "step": 10039 }, { "epoch": 1.7418459403192228, "grad_norm": 0.9077810645103455, "learning_rate": 2.514987914779592e-06, "loss": 0.404, "step": 10040 }, { "epoch": 1.7420194309507286, "grad_norm": 0.7014707326889038, "learning_rate": 2.5116812643008494e-06, "loss": 0.2863, "step": 10041 }, { "epoch": 1.7421929215822347, "grad_norm": 0.9283987283706665, "learning_rate": 2.5083766433618695e-06, "loss": 0.352, "step": 10042 }, { "epoch": 1.7423664122137406, "grad_norm": 0.7386503219604492, "learning_rate": 2.5050740523461682e-06, "loss": 0.4692, "step": 10043 }, { "epoch": 1.7425399028452464, "grad_norm": 0.58295738697052, "learning_rate": 2.5017734916370073e-06, "loss": 0.6099, "step": 10044 }, { "epoch": 1.7427133934767522, "grad_norm": 0.9074196219444275, "learning_rate": 2.498474961617421e-06, "loss": 0.428, "step": 10045 }, { "epoch": 1.742886884108258, "grad_norm": 0.7043365240097046, "learning_rate": 2.495178462670207e-06, "loss": 0.4751, "step": 10046 }, { "epoch": 1.743060374739764, "grad_norm": 0.8902522325515747, "learning_rate": 2.4918839951779374e-06, "loss": 0.3989, "step": 10047 }, { "epoch": 1.7432338653712698, "grad_norm": 1.1538342237472534, "learning_rate": 2.4885915595229215e-06, "loss": 0.3883, "step": 10048 }, { "epoch": 1.7434073560027759, "grad_norm": 0.8350592255592346, "learning_rate": 2.4853011560872653e-06, "loss": 0.4956, "step": 10049 }, { "epoch": 1.7435808466342817, "grad_norm": 0.6407992839813232, "learning_rate": 2.4820127852528163e-06, "loss": 0.5217, "step": 10050 }, { "epoch": 1.7437543372657878, "grad_norm": 1.2211695909500122, "learning_rate": 2.4787264474011984e-06, "loss": 0.3939, "step": 10051 }, { "epoch": 1.7439278278972936, "grad_norm": 0.8848746418952942, "learning_rate": 2.4754421429137887e-06, "loss": 0.3485, "step": 10052 }, { "epoch": 1.7441013185287995, "grad_norm": 0.84989333152771, "learning_rate": 2.4721598721717465e-06, "loss": 0.4356, "step": 10053 }, { "epoch": 1.7442748091603053, "grad_norm": 0.8318312764167786, "learning_rate": 2.468879635555965e-06, "loss": 0.4052, "step": 10054 }, { "epoch": 1.7444482997918112, "grad_norm": 0.8529197573661804, "learning_rate": 2.4656014334471357e-06, "loss": 0.5172, "step": 10055 }, { "epoch": 1.744621790423317, "grad_norm": 0.7869355082511902, "learning_rate": 2.462325266225687e-06, "loss": 0.5625, "step": 10056 }, { "epoch": 1.744795281054823, "grad_norm": 0.6938173770904541, "learning_rate": 2.4590511342718348e-06, "loss": 0.448, "step": 10057 }, { "epoch": 1.744968771686329, "grad_norm": 1.1824657917022705, "learning_rate": 2.455779037965529e-06, "loss": 0.4856, "step": 10058 }, { "epoch": 1.745142262317835, "grad_norm": 1.080678939819336, "learning_rate": 2.452508977686514e-06, "loss": 0.3717, "step": 10059 }, { "epoch": 1.7453157529493408, "grad_norm": 0.6940746903419495, "learning_rate": 2.4492409538142803e-06, "loss": 0.4337, "step": 10060 }, { "epoch": 1.7454892435808467, "grad_norm": 0.7876454591751099, "learning_rate": 2.445974966728082e-06, "loss": 0.4869, "step": 10061 }, { "epoch": 1.7456627342123525, "grad_norm": 0.839835524559021, "learning_rate": 2.44271101680694e-06, "loss": 0.4744, "step": 10062 }, { "epoch": 1.7458362248438584, "grad_norm": 2.489712953567505, "learning_rate": 2.4394491044296474e-06, "loss": 0.4675, "step": 10063 }, { "epoch": 1.7460097154753642, "grad_norm": 0.7875381112098694, "learning_rate": 2.436189229974748e-06, "loss": 0.5355, "step": 10064 }, { "epoch": 1.7461832061068703, "grad_norm": 0.8252909183502197, "learning_rate": 2.4329313938205546e-06, "loss": 0.3831, "step": 10065 }, { "epoch": 1.7463566967383761, "grad_norm": 0.6469065546989441, "learning_rate": 2.4296755963451424e-06, "loss": 0.4752, "step": 10066 }, { "epoch": 1.7465301873698822, "grad_norm": 1.6683177947998047, "learning_rate": 2.426421837926345e-06, "loss": 0.4642, "step": 10067 }, { "epoch": 1.746703678001388, "grad_norm": 0.6783648729324341, "learning_rate": 2.423170118941778e-06, "loss": 0.351, "step": 10068 }, { "epoch": 1.7468771686328939, "grad_norm": 0.6830014586448669, "learning_rate": 2.4199204397687968e-06, "loss": 0.4288, "step": 10069 }, { "epoch": 1.7470506592643997, "grad_norm": 0.9472129940986633, "learning_rate": 2.4166728007845364e-06, "loss": 0.5343, "step": 10070 }, { "epoch": 1.7472241498959056, "grad_norm": 0.7599621415138245, "learning_rate": 2.413427202365879e-06, "loss": 0.432, "step": 10071 }, { "epoch": 1.7473976405274114, "grad_norm": 0.8760243058204651, "learning_rate": 2.4101836448894924e-06, "loss": 0.3851, "step": 10072 }, { "epoch": 1.7475711311589173, "grad_norm": 0.8620213270187378, "learning_rate": 2.406942128731791e-06, "loss": 0.4406, "step": 10073 }, { "epoch": 1.7477446217904233, "grad_norm": 1.1500178575515747, "learning_rate": 2.4037026542689555e-06, "loss": 0.4408, "step": 10074 }, { "epoch": 1.7479181124219292, "grad_norm": 0.8120666742324829, "learning_rate": 2.4004652218769244e-06, "loss": 0.5251, "step": 10075 }, { "epoch": 1.7480916030534353, "grad_norm": 0.8861424326896667, "learning_rate": 2.3972298319314224e-06, "loss": 0.4749, "step": 10076 }, { "epoch": 1.748265093684941, "grad_norm": 0.7357455492019653, "learning_rate": 2.393996484807901e-06, "loss": 0.3649, "step": 10077 }, { "epoch": 1.748438584316447, "grad_norm": 0.6997224688529968, "learning_rate": 2.3907651808816067e-06, "loss": 0.5558, "step": 10078 }, { "epoch": 1.7486120749479528, "grad_norm": 0.9641300439834595, "learning_rate": 2.3875359205275307e-06, "loss": 0.4082, "step": 10079 }, { "epoch": 1.7487855655794586, "grad_norm": 0.9489569067955017, "learning_rate": 2.384308704120435e-06, "loss": 0.4228, "step": 10080 }, { "epoch": 1.7489590562109645, "grad_norm": 0.8642375469207764, "learning_rate": 2.381083532034836e-06, "loss": 0.4338, "step": 10081 }, { "epoch": 1.7491325468424705, "grad_norm": 0.7411103248596191, "learning_rate": 2.3778604046450313e-06, "loss": 0.3862, "step": 10082 }, { "epoch": 1.7493060374739764, "grad_norm": 0.6525177359580994, "learning_rate": 2.374639322325052e-06, "loss": 0.4488, "step": 10083 }, { "epoch": 1.7494795281054825, "grad_norm": 0.8858721256256104, "learning_rate": 2.371420285448722e-06, "loss": 0.4323, "step": 10084 }, { "epoch": 1.7496530187369883, "grad_norm": 1.2127820253372192, "learning_rate": 2.368203294389606e-06, "loss": 0.3624, "step": 10085 }, { "epoch": 1.7498265093684942, "grad_norm": 0.7921899557113647, "learning_rate": 2.364988349521049e-06, "loss": 0.4139, "step": 10086 }, { "epoch": 1.75, "grad_norm": 0.9015601873397827, "learning_rate": 2.3617754512161353e-06, "loss": 0.4285, "step": 10087 }, { "epoch": 1.7501734906315058, "grad_norm": 0.9655046463012695, "learning_rate": 2.358564599847737e-06, "loss": 0.549, "step": 10088 }, { "epoch": 1.7503469812630117, "grad_norm": 0.7301394939422607, "learning_rate": 2.3553557957884744e-06, "loss": 0.3454, "step": 10089 }, { "epoch": 1.7505204718945175, "grad_norm": 1.331050157546997, "learning_rate": 2.352149039410727e-06, "loss": 0.4889, "step": 10090 }, { "epoch": 1.7506939625260236, "grad_norm": 0.8005419373512268, "learning_rate": 2.348944331086651e-06, "loss": 0.3352, "step": 10091 }, { "epoch": 1.7508674531575295, "grad_norm": 0.8455546498298645, "learning_rate": 2.345741671188153e-06, "loss": 0.3281, "step": 10092 }, { "epoch": 1.7510409437890355, "grad_norm": 1.028125524520874, "learning_rate": 2.342541060086907e-06, "loss": 0.4072, "step": 10093 }, { "epoch": 1.7512144344205414, "grad_norm": 0.8474204540252686, "learning_rate": 2.33934249815434e-06, "loss": 0.4888, "step": 10094 }, { "epoch": 1.7513879250520472, "grad_norm": 0.8342925906181335, "learning_rate": 2.336145985761664e-06, "loss": 0.5154, "step": 10095 }, { "epoch": 1.751561415683553, "grad_norm": 1.0096440315246582, "learning_rate": 2.3329515232798207e-06, "loss": 0.4929, "step": 10096 }, { "epoch": 1.751734906315059, "grad_norm": 0.7327522039413452, "learning_rate": 2.3297591110795437e-06, "loss": 0.4319, "step": 10097 }, { "epoch": 1.7519083969465647, "grad_norm": 0.848007082939148, "learning_rate": 2.3265687495313106e-06, "loss": 0.3483, "step": 10098 }, { "epoch": 1.7520818875780708, "grad_norm": 0.9690507650375366, "learning_rate": 2.323380439005367e-06, "loss": 0.3968, "step": 10099 }, { "epoch": 1.7522553782095767, "grad_norm": 0.6661014556884766, "learning_rate": 2.3201941798717176e-06, "loss": 0.4374, "step": 10100 }, { "epoch": 1.7524288688410827, "grad_norm": 0.764918863773346, "learning_rate": 2.3170099725001393e-06, "loss": 0.4493, "step": 10101 }, { "epoch": 1.7526023594725886, "grad_norm": 0.7615021467208862, "learning_rate": 2.313827817260159e-06, "loss": 0.4489, "step": 10102 }, { "epoch": 1.7527758501040944, "grad_norm": 0.7164032459259033, "learning_rate": 2.310647714521068e-06, "loss": 0.4794, "step": 10103 }, { "epoch": 1.7529493407356003, "grad_norm": 1.0460331439971924, "learning_rate": 2.307469664651918e-06, "loss": 0.4767, "step": 10104 }, { "epoch": 1.7531228313671061, "grad_norm": 0.9769534468650818, "learning_rate": 2.304293668021538e-06, "loss": 0.4417, "step": 10105 }, { "epoch": 1.753296321998612, "grad_norm": 0.8242330551147461, "learning_rate": 2.3011197249984886e-06, "loss": 0.4003, "step": 10106 }, { "epoch": 1.7534698126301178, "grad_norm": 1.248779058456421, "learning_rate": 2.2979478359511244e-06, "loss": 0.4025, "step": 10107 }, { "epoch": 1.7536433032616239, "grad_norm": 1.0432181358337402, "learning_rate": 2.2947780012475396e-06, "loss": 0.449, "step": 10108 }, { "epoch": 1.7538167938931297, "grad_norm": 0.8337266445159912, "learning_rate": 2.291610221255598e-06, "loss": 0.4597, "step": 10109 }, { "epoch": 1.7539902845246358, "grad_norm": 0.8977551460266113, "learning_rate": 2.2884444963429188e-06, "loss": 0.4958, "step": 10110 }, { "epoch": 1.7541637751561416, "grad_norm": 0.7688846588134766, "learning_rate": 2.285280826876901e-06, "loss": 0.3867, "step": 10111 }, { "epoch": 1.7543372657876475, "grad_norm": 0.837756872177124, "learning_rate": 2.282119213224683e-06, "loss": 0.3563, "step": 10112 }, { "epoch": 1.7545107564191533, "grad_norm": 1.8376587629318237, "learning_rate": 2.2789596557531766e-06, "loss": 0.3823, "step": 10113 }, { "epoch": 1.7546842470506592, "grad_norm": 0.7061861157417297, "learning_rate": 2.2758021548290478e-06, "loss": 0.522, "step": 10114 }, { "epoch": 1.754857737682165, "grad_norm": 0.6778156757354736, "learning_rate": 2.2726467108187335e-06, "loss": 0.5293, "step": 10115 }, { "epoch": 1.755031228313671, "grad_norm": 0.6336186528205872, "learning_rate": 2.2694933240884277e-06, "loss": 0.5385, "step": 10116 }, { "epoch": 1.755204718945177, "grad_norm": 0.6999941468238831, "learning_rate": 2.26634199500408e-06, "loss": 0.3204, "step": 10117 }, { "epoch": 1.755378209576683, "grad_norm": 1.1926878690719604, "learning_rate": 2.263192723931409e-06, "loss": 0.3995, "step": 10118 }, { "epoch": 1.7555517002081888, "grad_norm": 1.460123062133789, "learning_rate": 2.2600455112358843e-06, "loss": 0.4139, "step": 10119 }, { "epoch": 1.7557251908396947, "grad_norm": 0.7850372195243835, "learning_rate": 2.2569003572827543e-06, "loss": 0.3902, "step": 10120 }, { "epoch": 1.7558986814712005, "grad_norm": 0.8340042233467102, "learning_rate": 2.2537572624370107e-06, "loss": 0.3344, "step": 10121 }, { "epoch": 1.7560721721027064, "grad_norm": 0.9595273733139038, "learning_rate": 2.250616227063418e-06, "loss": 0.3964, "step": 10122 }, { "epoch": 1.7562456627342122, "grad_norm": 1.0658237934112549, "learning_rate": 2.247477251526489e-06, "loss": 0.3994, "step": 10123 }, { "epoch": 1.7564191533657183, "grad_norm": 0.9107815027236938, "learning_rate": 2.244340336190518e-06, "loss": 0.4043, "step": 10124 }, { "epoch": 1.7565926439972241, "grad_norm": 1.0793975591659546, "learning_rate": 2.2412054814195326e-06, "loss": 0.4739, "step": 10125 }, { "epoch": 1.7567661346287302, "grad_norm": 0.716812789440155, "learning_rate": 2.2380726875773507e-06, "loss": 0.4882, "step": 10126 }, { "epoch": 1.756939625260236, "grad_norm": 0.8273600339889526, "learning_rate": 2.2349419550275275e-06, "loss": 0.5278, "step": 10127 }, { "epoch": 1.757113115891742, "grad_norm": 0.6313033103942871, "learning_rate": 2.2318132841333906e-06, "loss": 0.446, "step": 10128 }, { "epoch": 1.7572866065232478, "grad_norm": 0.9580927491188049, "learning_rate": 2.228686675258025e-06, "loss": 0.4038, "step": 10129 }, { "epoch": 1.7574600971547536, "grad_norm": 0.8508036136627197, "learning_rate": 2.2255621287642805e-06, "loss": 0.519, "step": 10130 }, { "epoch": 1.7576335877862594, "grad_norm": 0.7911813855171204, "learning_rate": 2.2224396450147623e-06, "loss": 0.4701, "step": 10131 }, { "epoch": 1.7578070784177653, "grad_norm": 0.8794583678245544, "learning_rate": 2.2193192243718385e-06, "loss": 0.4142, "step": 10132 }, { "epoch": 1.7579805690492714, "grad_norm": 0.7201571464538574, "learning_rate": 2.216200867197633e-06, "loss": 0.5037, "step": 10133 }, { "epoch": 1.7581540596807772, "grad_norm": 0.6206756234169006, "learning_rate": 2.2130845738540475e-06, "loss": 0.5522, "step": 10134 }, { "epoch": 1.7583275503122833, "grad_norm": 1.1410282850265503, "learning_rate": 2.2099703447027142e-06, "loss": 0.4435, "step": 10135 }, { "epoch": 1.7585010409437891, "grad_norm": 0.754542350769043, "learning_rate": 2.2068581801050557e-06, "loss": 0.3986, "step": 10136 }, { "epoch": 1.758674531575295, "grad_norm": 1.0550987720489502, "learning_rate": 2.203748080422239e-06, "loss": 0.3549, "step": 10137 }, { "epoch": 1.7588480222068008, "grad_norm": 1.461365818977356, "learning_rate": 2.2006400460151923e-06, "loss": 0.3439, "step": 10138 }, { "epoch": 1.7590215128383067, "grad_norm": 0.8991324305534363, "learning_rate": 2.1975340772446095e-06, "loss": 0.3806, "step": 10139 }, { "epoch": 1.7591950034698125, "grad_norm": 0.8629834055900574, "learning_rate": 2.1944301744709428e-06, "loss": 0.4394, "step": 10140 }, { "epoch": 1.7593684941013186, "grad_norm": 0.7667680978775024, "learning_rate": 2.1913283380544013e-06, "loss": 0.5836, "step": 10141 }, { "epoch": 1.7595419847328244, "grad_norm": 0.8159300088882446, "learning_rate": 2.1882285683549555e-06, "loss": 0.4668, "step": 10142 }, { "epoch": 1.7597154753643305, "grad_norm": 0.8265716433525085, "learning_rate": 2.185130865732341e-06, "loss": 0.4355, "step": 10143 }, { "epoch": 1.7598889659958363, "grad_norm": 1.1387720108032227, "learning_rate": 2.1820352305460492e-06, "loss": 0.468, "step": 10144 }, { "epoch": 1.7600624566273422, "grad_norm": 0.7151750326156616, "learning_rate": 2.1789416631553294e-06, "loss": 0.5829, "step": 10145 }, { "epoch": 1.760235947258848, "grad_norm": 0.6007384061813354, "learning_rate": 2.1758501639191908e-06, "loss": 0.5656, "step": 10146 }, { "epoch": 1.7604094378903539, "grad_norm": 0.7706556916236877, "learning_rate": 2.1727607331964197e-06, "loss": 0.4412, "step": 10147 }, { "epoch": 1.7605829285218597, "grad_norm": 0.8431768417358398, "learning_rate": 2.169673371345531e-06, "loss": 0.4158, "step": 10148 }, { "epoch": 1.7607564191533656, "grad_norm": 0.8578419089317322, "learning_rate": 2.166588078724827e-06, "loss": 0.4647, "step": 10149 }, { "epoch": 1.7609299097848716, "grad_norm": 1.0609016418457031, "learning_rate": 2.1635048556923555e-06, "loss": 0.4402, "step": 10150 }, { "epoch": 1.7611034004163775, "grad_norm": 0.7982937693595886, "learning_rate": 2.1604237026059296e-06, "loss": 0.3978, "step": 10151 }, { "epoch": 1.7612768910478835, "grad_norm": 0.7780153155326843, "learning_rate": 2.1573446198231185e-06, "loss": 0.4018, "step": 10152 }, { "epoch": 1.7614503816793894, "grad_norm": 0.8052198886871338, "learning_rate": 2.154267607701259e-06, "loss": 0.4348, "step": 10153 }, { "epoch": 1.7616238723108952, "grad_norm": 0.752699613571167, "learning_rate": 2.1511926665974324e-06, "loss": 0.4047, "step": 10154 }, { "epoch": 1.761797362942401, "grad_norm": 0.7057964205741882, "learning_rate": 2.1481197968684998e-06, "loss": 0.5571, "step": 10155 }, { "epoch": 1.761970853573907, "grad_norm": 0.8463672995567322, "learning_rate": 2.1450489988710644e-06, "loss": 0.4365, "step": 10156 }, { "epoch": 1.7621443442054128, "grad_norm": 0.8756439685821533, "learning_rate": 2.1419802729614993e-06, "loss": 0.4567, "step": 10157 }, { "epoch": 1.7623178348369188, "grad_norm": 0.7664997577667236, "learning_rate": 2.138913619495928e-06, "loss": 0.3491, "step": 10158 }, { "epoch": 1.7624913254684247, "grad_norm": 1.0745248794555664, "learning_rate": 2.1358490388302466e-06, "loss": 0.3785, "step": 10159 }, { "epoch": 1.7626648160999308, "grad_norm": 1.0532866716384888, "learning_rate": 2.1327865313201015e-06, "loss": 0.449, "step": 10160 }, { "epoch": 1.7628383067314366, "grad_norm": 1.0335370302200317, "learning_rate": 2.1297260973208987e-06, "loss": 0.4962, "step": 10161 }, { "epoch": 1.7630117973629424, "grad_norm": 0.7184655666351318, "learning_rate": 2.1266677371877996e-06, "loss": 0.4698, "step": 10162 }, { "epoch": 1.7631852879944483, "grad_norm": 0.926429271697998, "learning_rate": 2.1236114512757423e-06, "loss": 0.5233, "step": 10163 }, { "epoch": 1.7633587786259541, "grad_norm": 0.7761516571044922, "learning_rate": 2.120557239939405e-06, "loss": 0.418, "step": 10164 }, { "epoch": 1.76353226925746, "grad_norm": 0.665821373462677, "learning_rate": 2.1175051035332285e-06, "loss": 0.5332, "step": 10165 }, { "epoch": 1.7637057598889658, "grad_norm": 1.3791216611862183, "learning_rate": 2.114455042411432e-06, "loss": 0.3474, "step": 10166 }, { "epoch": 1.763879250520472, "grad_norm": 0.8793630003929138, "learning_rate": 2.111407056927959e-06, "loss": 0.4183, "step": 10167 }, { "epoch": 1.7640527411519777, "grad_norm": 0.9198608994483948, "learning_rate": 2.108361147436546e-06, "loss": 0.3718, "step": 10168 }, { "epoch": 1.7642262317834838, "grad_norm": 1.2578948736190796, "learning_rate": 2.105317314290671e-06, "loss": 0.4615, "step": 10169 }, { "epoch": 1.7643997224149897, "grad_norm": 0.6621404886245728, "learning_rate": 2.1022755578435715e-06, "loss": 0.4683, "step": 10170 }, { "epoch": 1.7645732130464955, "grad_norm": 0.7790876030921936, "learning_rate": 2.099235878448247e-06, "loss": 0.4091, "step": 10171 }, { "epoch": 1.7647467036780013, "grad_norm": 1.2456015348434448, "learning_rate": 2.0961982764574597e-06, "loss": 0.3719, "step": 10172 }, { "epoch": 1.7649201943095072, "grad_norm": 0.8161839246749878, "learning_rate": 2.093162752223723e-06, "loss": 0.4462, "step": 10173 }, { "epoch": 1.765093684941013, "grad_norm": 1.1468185186386108, "learning_rate": 2.0901293060993154e-06, "loss": 0.4938, "step": 10174 }, { "epoch": 1.765267175572519, "grad_norm": 0.8534411191940308, "learning_rate": 2.087097938436269e-06, "loss": 0.437, "step": 10175 }, { "epoch": 1.765440666204025, "grad_norm": 0.6585747599601746, "learning_rate": 2.0840686495863837e-06, "loss": 0.4927, "step": 10176 }, { "epoch": 1.765614156835531, "grad_norm": 0.6890860199928284, "learning_rate": 2.0810414399012034e-06, "loss": 0.4344, "step": 10177 }, { "epoch": 1.7657876474670369, "grad_norm": 0.8579777479171753, "learning_rate": 2.078016309732047e-06, "loss": 0.444, "step": 10178 }, { "epoch": 1.7659611380985427, "grad_norm": 0.7780200839042664, "learning_rate": 2.0749932594299804e-06, "loss": 0.4333, "step": 10179 }, { "epoch": 1.7661346287300486, "grad_norm": 1.21971595287323, "learning_rate": 2.0719722893458317e-06, "loss": 0.347, "step": 10180 }, { "epoch": 1.7663081193615544, "grad_norm": 0.831279456615448, "learning_rate": 2.0689533998301868e-06, "loss": 0.426, "step": 10181 }, { "epoch": 1.7664816099930603, "grad_norm": 1.502861738204956, "learning_rate": 2.0659365912333972e-06, "loss": 0.4578, "step": 10182 }, { "epoch": 1.7666551006245663, "grad_norm": 0.8384041786193848, "learning_rate": 2.0629218639055625e-06, "loss": 0.4526, "step": 10183 }, { "epoch": 1.7668285912560722, "grad_norm": 0.7056971192359924, "learning_rate": 2.0599092181965474e-06, "loss": 0.3987, "step": 10184 }, { "epoch": 1.7670020818875782, "grad_norm": 0.7954768538475037, "learning_rate": 2.05689865445597e-06, "loss": 0.5176, "step": 10185 }, { "epoch": 1.767175572519084, "grad_norm": 0.6894344687461853, "learning_rate": 2.0538901730332128e-06, "loss": 0.4622, "step": 10186 }, { "epoch": 1.76734906315059, "grad_norm": 0.7572733163833618, "learning_rate": 2.0508837742774125e-06, "loss": 0.3764, "step": 10187 }, { "epoch": 1.7675225537820958, "grad_norm": 0.7938310503959656, "learning_rate": 2.047879458537465e-06, "loss": 0.4937, "step": 10188 }, { "epoch": 1.7676960444136016, "grad_norm": 0.6096211671829224, "learning_rate": 2.0448772261620254e-06, "loss": 0.5094, "step": 10189 }, { "epoch": 1.7678695350451075, "grad_norm": 0.9245356321334839, "learning_rate": 2.0418770774995034e-06, "loss": 0.536, "step": 10190 }, { "epoch": 1.7680430256766133, "grad_norm": 0.82867830991745, "learning_rate": 2.038879012898074e-06, "loss": 0.482, "step": 10191 }, { "epoch": 1.7682165163081194, "grad_norm": 1.137810468673706, "learning_rate": 2.0358830327056633e-06, "loss": 0.4565, "step": 10192 }, { "epoch": 1.7683900069396252, "grad_norm": 0.9972358345985413, "learning_rate": 2.03288913726996e-06, "loss": 0.4631, "step": 10193 }, { "epoch": 1.7685634975711313, "grad_norm": 0.6278783082962036, "learning_rate": 2.0298973269384037e-06, "loss": 0.4972, "step": 10194 }, { "epoch": 1.7687369882026371, "grad_norm": 0.6001548171043396, "learning_rate": 2.02690760205821e-06, "loss": 0.559, "step": 10195 }, { "epoch": 1.768910478834143, "grad_norm": 0.8958408236503601, "learning_rate": 2.023919962976324e-06, "loss": 0.4073, "step": 10196 }, { "epoch": 1.7690839694656488, "grad_norm": 0.938954770565033, "learning_rate": 2.020934410039477e-06, "loss": 0.4994, "step": 10197 }, { "epoch": 1.7692574600971547, "grad_norm": 0.6814796924591064, "learning_rate": 2.0179509435941403e-06, "loss": 0.5527, "step": 10198 }, { "epoch": 1.7694309507286605, "grad_norm": 0.8747894167900085, "learning_rate": 2.0149695639865507e-06, "loss": 0.3686, "step": 10199 }, { "epoch": 1.7696044413601666, "grad_norm": 1.2067766189575195, "learning_rate": 2.011990271562696e-06, "loss": 0.3603, "step": 10200 }, { "epoch": 1.7697779319916724, "grad_norm": 0.8373238444328308, "learning_rate": 2.0090130666683347e-06, "loss": 0.3881, "step": 10201 }, { "epoch": 1.7699514226231785, "grad_norm": 1.303937554359436, "learning_rate": 2.006037949648971e-06, "loss": 0.5143, "step": 10202 }, { "epoch": 1.7701249132546844, "grad_norm": 0.6505475640296936, "learning_rate": 2.0030649208498685e-06, "loss": 0.5986, "step": 10203 }, { "epoch": 1.7702984038861902, "grad_norm": 1.174577236175537, "learning_rate": 2.000093980616051e-06, "loss": 0.4208, "step": 10204 }, { "epoch": 1.770471894517696, "grad_norm": 1.0686638355255127, "learning_rate": 1.9971251292923076e-06, "loss": 0.4965, "step": 10205 }, { "epoch": 1.770645385149202, "grad_norm": 0.8603082299232483, "learning_rate": 1.9941583672231624e-06, "loss": 0.4551, "step": 10206 }, { "epoch": 1.7708188757807077, "grad_norm": 0.8211946487426758, "learning_rate": 1.991193694752924e-06, "loss": 0.4965, "step": 10207 }, { "epoch": 1.7709923664122136, "grad_norm": 0.8139055967330933, "learning_rate": 1.9882311122256425e-06, "loss": 0.4799, "step": 10208 }, { "epoch": 1.7711658570437196, "grad_norm": 0.8746566772460938, "learning_rate": 1.985270619985127e-06, "loss": 0.447, "step": 10209 }, { "epoch": 1.7713393476752255, "grad_norm": 1.1020523309707642, "learning_rate": 1.9823122183749443e-06, "loss": 0.4387, "step": 10210 }, { "epoch": 1.7715128383067316, "grad_norm": 0.7173384428024292, "learning_rate": 1.979355907738427e-06, "loss": 0.5352, "step": 10211 }, { "epoch": 1.7716863289382374, "grad_norm": 0.7200915217399597, "learning_rate": 1.9764016884186545e-06, "loss": 0.4022, "step": 10212 }, { "epoch": 1.7718598195697433, "grad_norm": 0.8327056765556335, "learning_rate": 1.973449560758465e-06, "loss": 0.4324, "step": 10213 }, { "epoch": 1.772033310201249, "grad_norm": 0.7548806667327881, "learning_rate": 1.9704995251004622e-06, "loss": 0.5532, "step": 10214 }, { "epoch": 1.772206800832755, "grad_norm": 0.927376925945282, "learning_rate": 1.9675515817869974e-06, "loss": 0.4202, "step": 10215 }, { "epoch": 1.7723802914642608, "grad_norm": 1.4509769678115845, "learning_rate": 1.9646057311601853e-06, "loss": 0.4789, "step": 10216 }, { "epoch": 1.7725537820957669, "grad_norm": 0.6847125887870789, "learning_rate": 1.961661973561888e-06, "loss": 0.5554, "step": 10217 }, { "epoch": 1.7727272727272727, "grad_norm": 0.7594535946846008, "learning_rate": 1.958720309333746e-06, "loss": 0.4485, "step": 10218 }, { "epoch": 1.7729007633587788, "grad_norm": 0.7194278836250305, "learning_rate": 1.9557807388171257e-06, "loss": 0.4448, "step": 10219 }, { "epoch": 1.7730742539902846, "grad_norm": 0.6663083434104919, "learning_rate": 1.952843262353181e-06, "loss": 0.4426, "step": 10220 }, { "epoch": 1.7732477446217905, "grad_norm": 0.731451153755188, "learning_rate": 1.9499078802828044e-06, "loss": 0.478, "step": 10221 }, { "epoch": 1.7734212352532963, "grad_norm": 1.2679458856582642, "learning_rate": 1.946974592946651e-06, "loss": 0.4308, "step": 10222 }, { "epoch": 1.7735947258848022, "grad_norm": 1.0227187871932983, "learning_rate": 1.9440434006851296e-06, "loss": 0.4109, "step": 10223 }, { "epoch": 1.773768216516308, "grad_norm": 0.8104212880134583, "learning_rate": 1.9411143038384163e-06, "loss": 0.5212, "step": 10224 }, { "epoch": 1.773941707147814, "grad_norm": 0.8245218992233276, "learning_rate": 1.938187302746424e-06, "loss": 0.4449, "step": 10225 }, { "epoch": 1.77411519777932, "grad_norm": 0.6253479719161987, "learning_rate": 1.935262397748845e-06, "loss": 0.5405, "step": 10226 }, { "epoch": 1.7742886884108258, "grad_norm": 0.8679730892181396, "learning_rate": 1.932339589185115e-06, "loss": 0.46, "step": 10227 }, { "epoch": 1.7744621790423318, "grad_norm": 0.9214310646057129, "learning_rate": 1.929418877394429e-06, "loss": 0.4501, "step": 10228 }, { "epoch": 1.7746356696738377, "grad_norm": 0.6874799728393555, "learning_rate": 1.9265002627157335e-06, "loss": 0.5813, "step": 10229 }, { "epoch": 1.7748091603053435, "grad_norm": 0.7295671105384827, "learning_rate": 1.923583745487747e-06, "loss": 0.4421, "step": 10230 }, { "epoch": 1.7749826509368494, "grad_norm": 0.7274503111839294, "learning_rate": 1.920669326048932e-06, "loss": 0.4229, "step": 10231 }, { "epoch": 1.7751561415683552, "grad_norm": 0.7916246056556702, "learning_rate": 1.917757004737506e-06, "loss": 0.486, "step": 10232 }, { "epoch": 1.775329632199861, "grad_norm": 0.7616907954216003, "learning_rate": 1.914846781891444e-06, "loss": 0.3854, "step": 10233 }, { "epoch": 1.7755031228313671, "grad_norm": 0.7439457774162292, "learning_rate": 1.9119386578484934e-06, "loss": 0.4105, "step": 10234 }, { "epoch": 1.775676613462873, "grad_norm": 0.7740806341171265, "learning_rate": 1.909032632946137e-06, "loss": 0.4697, "step": 10235 }, { "epoch": 1.775850104094379, "grad_norm": 0.9047095775604248, "learning_rate": 1.906128707521624e-06, "loss": 0.4653, "step": 10236 }, { "epoch": 1.776023594725885, "grad_norm": 0.6695199608802795, "learning_rate": 1.903226881911957e-06, "loss": 0.6056, "step": 10237 }, { "epoch": 1.7761970853573907, "grad_norm": 0.5723161697387695, "learning_rate": 1.900327156453896e-06, "loss": 0.4205, "step": 10238 }, { "epoch": 1.7763705759888966, "grad_norm": 1.0215060710906982, "learning_rate": 1.8974295314839609e-06, "loss": 0.5173, "step": 10239 }, { "epoch": 1.7765440666204024, "grad_norm": 0.7813283205032349, "learning_rate": 1.894534007338422e-06, "loss": 0.4249, "step": 10240 }, { "epoch": 1.7767175572519083, "grad_norm": 0.8839203119277954, "learning_rate": 1.8916405843533092e-06, "loss": 0.4543, "step": 10241 }, { "epoch": 1.7768910478834143, "grad_norm": 0.9386798739433289, "learning_rate": 1.8887492628644022e-06, "loss": 0.3719, "step": 10242 }, { "epoch": 1.7770645385149202, "grad_norm": 0.8831916451454163, "learning_rate": 1.8858600432072527e-06, "loss": 0.4041, "step": 10243 }, { "epoch": 1.7772380291464263, "grad_norm": 0.710510790348053, "learning_rate": 1.8829729257171503e-06, "loss": 0.6177, "step": 10244 }, { "epoch": 1.777411519777932, "grad_norm": 0.6700908541679382, "learning_rate": 1.8800879107291537e-06, "loss": 0.5376, "step": 10245 }, { "epoch": 1.777585010409438, "grad_norm": 0.991139829158783, "learning_rate": 1.8772049985780616e-06, "loss": 0.3387, "step": 10246 }, { "epoch": 1.7777585010409438, "grad_norm": 0.8388208150863647, "learning_rate": 1.8743241895984554e-06, "loss": 0.424, "step": 10247 }, { "epoch": 1.7779319916724496, "grad_norm": 0.7843843698501587, "learning_rate": 1.871445484124641e-06, "loss": 0.5238, "step": 10248 }, { "epoch": 1.7781054823039555, "grad_norm": 1.037152886390686, "learning_rate": 1.8685688824907044e-06, "loss": 0.5073, "step": 10249 }, { "epoch": 1.7782789729354613, "grad_norm": 0.7034249305725098, "learning_rate": 1.8656943850304765e-06, "loss": 0.4507, "step": 10250 }, { "epoch": 1.7784524635669674, "grad_norm": 0.9017806053161621, "learning_rate": 1.8628219920775481e-06, "loss": 0.3208, "step": 10251 }, { "epoch": 1.7786259541984732, "grad_norm": 1.3028045892715454, "learning_rate": 1.8599517039652548e-06, "loss": 0.3081, "step": 10252 }, { "epoch": 1.7787994448299793, "grad_norm": 1.6372616291046143, "learning_rate": 1.8570835210267125e-06, "loss": 0.3827, "step": 10253 }, { "epoch": 1.7789729354614852, "grad_norm": 0.6452852487564087, "learning_rate": 1.8542174435947614e-06, "loss": 0.5815, "step": 10254 }, { "epoch": 1.779146426092991, "grad_norm": 0.6727705001831055, "learning_rate": 1.851353472002022e-06, "loss": 0.5663, "step": 10255 }, { "epoch": 1.7793199167244969, "grad_norm": 0.8144544363021851, "learning_rate": 1.8484916065808622e-06, "loss": 0.4896, "step": 10256 }, { "epoch": 1.7794934073560027, "grad_norm": 1.128566861152649, "learning_rate": 1.8456318476634006e-06, "loss": 0.4705, "step": 10257 }, { "epoch": 1.7796668979875085, "grad_norm": 0.8338884711265564, "learning_rate": 1.8427741955815138e-06, "loss": 0.4083, "step": 10258 }, { "epoch": 1.7798403886190146, "grad_norm": 0.813132107257843, "learning_rate": 1.839918650666841e-06, "loss": 0.4037, "step": 10259 }, { "epoch": 1.7800138792505205, "grad_norm": 0.926805853843689, "learning_rate": 1.8370652132507705e-06, "loss": 0.3605, "step": 10260 }, { "epoch": 1.7801873698820265, "grad_norm": 0.8099647760391235, "learning_rate": 1.8342138836644419e-06, "loss": 0.3469, "step": 10261 }, { "epoch": 1.7803608605135324, "grad_norm": 0.727428674697876, "learning_rate": 1.8313646622387639e-06, "loss": 0.3259, "step": 10262 }, { "epoch": 1.7805343511450382, "grad_norm": 0.8847869038581848, "learning_rate": 1.8285175493043893e-06, "loss": 0.4856, "step": 10263 }, { "epoch": 1.780707841776544, "grad_norm": 1.1018608808517456, "learning_rate": 1.8256725451917233e-06, "loss": 0.4294, "step": 10264 }, { "epoch": 1.78088133240805, "grad_norm": 0.8565403819084167, "learning_rate": 1.822829650230935e-06, "loss": 0.4897, "step": 10265 }, { "epoch": 1.7810548230395558, "grad_norm": 1.2412939071655273, "learning_rate": 1.8199888647519537e-06, "loss": 0.3682, "step": 10266 }, { "epoch": 1.7812283136710616, "grad_norm": 1.0650684833526611, "learning_rate": 1.81715018908444e-06, "loss": 0.3518, "step": 10267 }, { "epoch": 1.7814018043025677, "grad_norm": 0.8299738764762878, "learning_rate": 1.8143136235578374e-06, "loss": 0.551, "step": 10268 }, { "epoch": 1.7815752949340735, "grad_norm": 0.6508877873420715, "learning_rate": 1.811479168501329e-06, "loss": 0.5564, "step": 10269 }, { "epoch": 1.7817487855655796, "grad_norm": 0.7891836762428284, "learning_rate": 1.8086468242438582e-06, "loss": 0.4751, "step": 10270 }, { "epoch": 1.7819222761970854, "grad_norm": 1.8561694622039795, "learning_rate": 1.8058165911141179e-06, "loss": 0.4174, "step": 10271 }, { "epoch": 1.7820957668285913, "grad_norm": 0.8580641150474548, "learning_rate": 1.8029884694405631e-06, "loss": 0.5828, "step": 10272 }, { "epoch": 1.7822692574600971, "grad_norm": 0.7157613039016724, "learning_rate": 1.8001624595514022e-06, "loss": 0.4888, "step": 10273 }, { "epoch": 1.782442748091603, "grad_norm": 0.972023606300354, "learning_rate": 1.7973385617745953e-06, "loss": 0.4709, "step": 10274 }, { "epoch": 1.7826162387231088, "grad_norm": 1.1180193424224854, "learning_rate": 1.7945167764378536e-06, "loss": 0.46, "step": 10275 }, { "epoch": 1.7827897293546149, "grad_norm": 0.731166422367096, "learning_rate": 1.7916971038686614e-06, "loss": 0.6198, "step": 10276 }, { "epoch": 1.7829632199861207, "grad_norm": 0.8009506464004517, "learning_rate": 1.7888795443942308e-06, "loss": 0.5244, "step": 10277 }, { "epoch": 1.7831367106176268, "grad_norm": 0.9906429052352905, "learning_rate": 1.7860640983415533e-06, "loss": 0.3961, "step": 10278 }, { "epoch": 1.7833102012491326, "grad_norm": 0.8273224830627441, "learning_rate": 1.7832507660373589e-06, "loss": 0.3832, "step": 10279 }, { "epoch": 1.7834836918806385, "grad_norm": 1.7725870609283447, "learning_rate": 1.7804395478081416e-06, "loss": 0.5018, "step": 10280 }, { "epoch": 1.7836571825121443, "grad_norm": 0.9515945911407471, "learning_rate": 1.7776304439801384e-06, "loss": 0.4553, "step": 10281 }, { "epoch": 1.7838306731436502, "grad_norm": 0.7654293179512024, "learning_rate": 1.774823454879362e-06, "loss": 0.4698, "step": 10282 }, { "epoch": 1.784004163775156, "grad_norm": 1.058476448059082, "learning_rate": 1.7720185808315583e-06, "loss": 0.4478, "step": 10283 }, { "epoch": 1.784177654406662, "grad_norm": 0.7763571739196777, "learning_rate": 1.7692158221622379e-06, "loss": 0.3785, "step": 10284 }, { "epoch": 1.784351145038168, "grad_norm": 0.8149800300598145, "learning_rate": 1.7664151791966654e-06, "loss": 0.4167, "step": 10285 }, { "epoch": 1.7845246356696738, "grad_norm": 0.8562837839126587, "learning_rate": 1.763616652259854e-06, "loss": 0.4088, "step": 10286 }, { "epoch": 1.7846981263011799, "grad_norm": 0.7629222273826599, "learning_rate": 1.760820241676584e-06, "loss": 0.3582, "step": 10287 }, { "epoch": 1.7848716169326857, "grad_norm": 1.0314542055130005, "learning_rate": 1.758025947771378e-06, "loss": 0.4069, "step": 10288 }, { "epoch": 1.7850451075641915, "grad_norm": 0.9086339473724365, "learning_rate": 1.7552337708685163e-06, "loss": 0.4018, "step": 10289 }, { "epoch": 1.7852185981956974, "grad_norm": 0.8123794794082642, "learning_rate": 1.752443711292029e-06, "loss": 0.3992, "step": 10290 }, { "epoch": 1.7853920888272032, "grad_norm": 0.9427492618560791, "learning_rate": 1.749655769365719e-06, "loss": 0.3154, "step": 10291 }, { "epoch": 1.785565579458709, "grad_norm": 1.0322617292404175, "learning_rate": 1.7468699454131211e-06, "loss": 0.4545, "step": 10292 }, { "epoch": 1.7857390700902152, "grad_norm": 0.9409964680671692, "learning_rate": 1.7440862397575343e-06, "loss": 0.3777, "step": 10293 }, { "epoch": 1.785912560721721, "grad_norm": 0.7909843325614929, "learning_rate": 1.741304652722009e-06, "loss": 0.4912, "step": 10294 }, { "epoch": 1.786086051353227, "grad_norm": 1.1315380334854126, "learning_rate": 1.7385251846293606e-06, "loss": 0.4037, "step": 10295 }, { "epoch": 1.786259541984733, "grad_norm": 0.7563734650611877, "learning_rate": 1.7357478358021374e-06, "loss": 0.4702, "step": 10296 }, { "epoch": 1.7864330326162388, "grad_norm": 2.4120776653289795, "learning_rate": 1.732972606562664e-06, "loss": 0.4395, "step": 10297 }, { "epoch": 1.7866065232477446, "grad_norm": 0.7617950439453125, "learning_rate": 1.7301994972330028e-06, "loss": 0.4054, "step": 10298 }, { "epoch": 1.7867800138792505, "grad_norm": 0.8544481992721558, "learning_rate": 1.7274285081349807e-06, "loss": 0.3783, "step": 10299 }, { "epoch": 1.7869535045107563, "grad_norm": 0.6865641474723816, "learning_rate": 1.724659639590167e-06, "loss": 0.3918, "step": 10300 }, { "epoch": 1.7871269951422624, "grad_norm": 1.0140076875686646, "learning_rate": 1.7218928919199008e-06, "loss": 0.4379, "step": 10301 }, { "epoch": 1.7873004857737682, "grad_norm": 0.9270845055580139, "learning_rate": 1.7191282654452646e-06, "loss": 0.6001, "step": 10302 }, { "epoch": 1.7874739764052743, "grad_norm": 0.764961838722229, "learning_rate": 1.7163657604870932e-06, "loss": 0.4532, "step": 10303 }, { "epoch": 1.7876474670367801, "grad_norm": 0.953648030757904, "learning_rate": 1.7136053773659766e-06, "loss": 0.4871, "step": 10304 }, { "epoch": 1.787820957668286, "grad_norm": 0.9284045696258545, "learning_rate": 1.71084711640227e-06, "loss": 0.5942, "step": 10305 }, { "epoch": 1.7879944482997918, "grad_norm": 0.9803312420845032, "learning_rate": 1.7080909779160615e-06, "loss": 0.4761, "step": 10306 }, { "epoch": 1.7881679389312977, "grad_norm": 0.7785720825195312, "learning_rate": 1.705336962227211e-06, "loss": 0.454, "step": 10307 }, { "epoch": 1.7883414295628035, "grad_norm": 0.9398047924041748, "learning_rate": 1.7025850696553248e-06, "loss": 0.3566, "step": 10308 }, { "epoch": 1.7885149201943094, "grad_norm": 0.910733163356781, "learning_rate": 1.6998353005197565e-06, "loss": 0.4122, "step": 10309 }, { "epoch": 1.7886884108258154, "grad_norm": 0.7384318709373474, "learning_rate": 1.6970876551396309e-06, "loss": 0.424, "step": 10310 }, { "epoch": 1.7888619014573213, "grad_norm": 0.763297975063324, "learning_rate": 1.6943421338338085e-06, "loss": 0.367, "step": 10311 }, { "epoch": 1.7890353920888273, "grad_norm": 1.9891629219055176, "learning_rate": 1.6915987369209142e-06, "loss": 0.3597, "step": 10312 }, { "epoch": 1.7892088827203332, "grad_norm": 0.8588789701461792, "learning_rate": 1.6888574647193157e-06, "loss": 0.3425, "step": 10313 }, { "epoch": 1.789382373351839, "grad_norm": 0.9315842986106873, "learning_rate": 1.6861183175471495e-06, "loss": 0.4163, "step": 10314 }, { "epoch": 1.7895558639833449, "grad_norm": 0.820051908493042, "learning_rate": 1.6833812957222884e-06, "loss": 0.4178, "step": 10315 }, { "epoch": 1.7897293546148507, "grad_norm": 2.0135653018951416, "learning_rate": 1.6806463995623735e-06, "loss": 0.4259, "step": 10316 }, { "epoch": 1.7899028452463566, "grad_norm": 0.8339837193489075, "learning_rate": 1.6779136293847864e-06, "loss": 0.4941, "step": 10317 }, { "epoch": 1.7900763358778626, "grad_norm": 1.223927617073059, "learning_rate": 1.6751829855066804e-06, "loss": 0.4333, "step": 10318 }, { "epoch": 1.7902498265093685, "grad_norm": 1.108045220375061, "learning_rate": 1.6724544682449328e-06, "loss": 0.4402, "step": 10319 }, { "epoch": 1.7904233171408745, "grad_norm": 1.0140706300735474, "learning_rate": 1.669728077916206e-06, "loss": 0.3382, "step": 10320 }, { "epoch": 1.7905968077723804, "grad_norm": 0.6455281972885132, "learning_rate": 1.6670038148368916e-06, "loss": 0.4989, "step": 10321 }, { "epoch": 1.7907702984038862, "grad_norm": 0.7716452479362488, "learning_rate": 1.6642816793231499e-06, "loss": 0.5653, "step": 10322 }, { "epoch": 1.790943789035392, "grad_norm": 1.3400412797927856, "learning_rate": 1.661561671690879e-06, "loss": 0.4108, "step": 10323 }, { "epoch": 1.791117279666898, "grad_norm": 1.5090062618255615, "learning_rate": 1.6588437922557533e-06, "loss": 0.3788, "step": 10324 }, { "epoch": 1.7912907702984038, "grad_norm": 0.7390293478965759, "learning_rate": 1.6561280413331672e-06, "loss": 0.484, "step": 10325 }, { "epoch": 1.7914642609299096, "grad_norm": 0.8134437203407288, "learning_rate": 1.6534144192383038e-06, "loss": 0.5377, "step": 10326 }, { "epoch": 1.7916377515614157, "grad_norm": 0.7178210020065308, "learning_rate": 1.6507029262860718e-06, "loss": 0.491, "step": 10327 }, { "epoch": 1.7918112421929215, "grad_norm": 0.9239500761032104, "learning_rate": 1.6479935627911481e-06, "loss": 0.584, "step": 10328 }, { "epoch": 1.7919847328244276, "grad_norm": 1.1858335733413696, "learning_rate": 1.6452863290679522e-06, "loss": 0.5474, "step": 10329 }, { "epoch": 1.7921582234559335, "grad_norm": 0.9040897488594055, "learning_rate": 1.6425812254306707e-06, "loss": 0.3726, "step": 10330 }, { "epoch": 1.7923317140874393, "grad_norm": 0.6616557240486145, "learning_rate": 1.6398782521932254e-06, "loss": 0.578, "step": 10331 }, { "epoch": 1.7925052047189451, "grad_norm": 0.6143748760223389, "learning_rate": 1.637177409669304e-06, "loss": 0.5868, "step": 10332 }, { "epoch": 1.792678695350451, "grad_norm": 1.123853325843811, "learning_rate": 1.6344786981723371e-06, "loss": 0.3966, "step": 10333 }, { "epoch": 1.7928521859819568, "grad_norm": 1.0074896812438965, "learning_rate": 1.6317821180155214e-06, "loss": 0.3824, "step": 10334 }, { "epoch": 1.793025676613463, "grad_norm": 1.1881918907165527, "learning_rate": 1.6290876695117951e-06, "loss": 0.5178, "step": 10335 }, { "epoch": 1.7931991672449688, "grad_norm": 1.177901029586792, "learning_rate": 1.6263953529738464e-06, "loss": 0.3974, "step": 10336 }, { "epoch": 1.7933726578764748, "grad_norm": 0.6227571964263916, "learning_rate": 1.6237051687141336e-06, "loss": 0.5773, "step": 10337 }, { "epoch": 1.7935461485079807, "grad_norm": 0.8060899376869202, "learning_rate": 1.621017117044843e-06, "loss": 0.4395, "step": 10338 }, { "epoch": 1.7937196391394865, "grad_norm": 0.8530584573745728, "learning_rate": 1.6183311982779337e-06, "loss": 0.4142, "step": 10339 }, { "epoch": 1.7938931297709924, "grad_norm": 0.9127979278564453, "learning_rate": 1.6156474127251077e-06, "loss": 0.4025, "step": 10340 }, { "epoch": 1.7940666204024982, "grad_norm": 0.813700795173645, "learning_rate": 1.6129657606978221e-06, "loss": 0.5027, "step": 10341 }, { "epoch": 1.794240111034004, "grad_norm": 1.3757143020629883, "learning_rate": 1.6102862425072818e-06, "loss": 0.3531, "step": 10342 }, { "epoch": 1.7944136016655101, "grad_norm": 0.8702859878540039, "learning_rate": 1.6076088584644534e-06, "loss": 0.3686, "step": 10343 }, { "epoch": 1.794587092297016, "grad_norm": 0.9329940676689148, "learning_rate": 1.6049336088800505e-06, "loss": 0.4861, "step": 10344 }, { "epoch": 1.7947605829285218, "grad_norm": 1.13565194606781, "learning_rate": 1.6022604940645337e-06, "loss": 0.5552, "step": 10345 }, { "epoch": 1.7949340735600279, "grad_norm": 0.797062337398529, "learning_rate": 1.5995895143281236e-06, "loss": 0.5052, "step": 10346 }, { "epoch": 1.7951075641915337, "grad_norm": 0.7553451061248779, "learning_rate": 1.596920669980797e-06, "loss": 0.5099, "step": 10347 }, { "epoch": 1.7952810548230396, "grad_norm": 0.8680810928344727, "learning_rate": 1.5942539613322638e-06, "loss": 0.3078, "step": 10348 }, { "epoch": 1.7954545454545454, "grad_norm": 0.8695831298828125, "learning_rate": 1.5915893886920098e-06, "loss": 0.3704, "step": 10349 }, { "epoch": 1.7956280360860513, "grad_norm": 0.7685412764549255, "learning_rate": 1.5889269523692541e-06, "loss": 0.4189, "step": 10350 }, { "epoch": 1.795801526717557, "grad_norm": 0.724926233291626, "learning_rate": 1.586266652672981e-06, "loss": 0.514, "step": 10351 }, { "epoch": 1.7959750173490632, "grad_norm": 3.163278341293335, "learning_rate": 1.5836084899119165e-06, "loss": 0.4619, "step": 10352 }, { "epoch": 1.796148507980569, "grad_norm": 2.5651533603668213, "learning_rate": 1.5809524643945472e-06, "loss": 0.4409, "step": 10353 }, { "epoch": 1.796321998612075, "grad_norm": 0.7803921103477478, "learning_rate": 1.5782985764291091e-06, "loss": 0.3702, "step": 10354 }, { "epoch": 1.796495489243581, "grad_norm": 0.798792839050293, "learning_rate": 1.575646826323587e-06, "loss": 0.3646, "step": 10355 }, { "epoch": 1.7966689798750868, "grad_norm": 0.964057981967926, "learning_rate": 1.5729972143857164e-06, "loss": 0.3783, "step": 10356 }, { "epoch": 1.7968424705065926, "grad_norm": 1.277469515800476, "learning_rate": 1.5703497409229896e-06, "loss": 0.3801, "step": 10357 }, { "epoch": 1.7970159611380985, "grad_norm": 0.7194745540618896, "learning_rate": 1.567704406242654e-06, "loss": 0.5676, "step": 10358 }, { "epoch": 1.7971894517696043, "grad_norm": 0.824563205242157, "learning_rate": 1.5650612106516993e-06, "loss": 0.3987, "step": 10359 }, { "epoch": 1.7973629424011104, "grad_norm": 0.9087297916412354, "learning_rate": 1.5624201544568717e-06, "loss": 0.3857, "step": 10360 }, { "epoch": 1.7975364330326162, "grad_norm": 0.7791262269020081, "learning_rate": 1.559781237964666e-06, "loss": 0.4685, "step": 10361 }, { "epoch": 1.7977099236641223, "grad_norm": 0.7930310964584351, "learning_rate": 1.557144461481337e-06, "loss": 0.4318, "step": 10362 }, { "epoch": 1.7978834142956281, "grad_norm": 0.7366043925285339, "learning_rate": 1.5545098253128843e-06, "loss": 0.5748, "step": 10363 }, { "epoch": 1.798056904927134, "grad_norm": 0.7938430309295654, "learning_rate": 1.5518773297650613e-06, "loss": 0.4272, "step": 10364 }, { "epoch": 1.7982303955586398, "grad_norm": 1.9784938097000122, "learning_rate": 1.5492469751433658e-06, "loss": 0.3588, "step": 10365 }, { "epoch": 1.7984038861901457, "grad_norm": 0.7573646903038025, "learning_rate": 1.5466187617530647e-06, "loss": 0.4295, "step": 10366 }, { "epoch": 1.7985773768216515, "grad_norm": 0.8965981602668762, "learning_rate": 1.543992689899152e-06, "loss": 0.3536, "step": 10367 }, { "epoch": 1.7987508674531574, "grad_norm": 0.7845249772071838, "learning_rate": 1.541368759886397e-06, "loss": 0.3402, "step": 10368 }, { "epoch": 1.7989243580846634, "grad_norm": 0.685406506061554, "learning_rate": 1.5387469720193048e-06, "loss": 0.5784, "step": 10369 }, { "epoch": 1.7990978487161693, "grad_norm": 0.6819063425064087, "learning_rate": 1.5361273266021392e-06, "loss": 0.5713, "step": 10370 }, { "epoch": 1.7992713393476754, "grad_norm": 0.6778554320335388, "learning_rate": 1.5335098239389102e-06, "loss": 0.4902, "step": 10371 }, { "epoch": 1.7994448299791812, "grad_norm": 0.6078512668609619, "learning_rate": 1.5308944643333857e-06, "loss": 0.5503, "step": 10372 }, { "epoch": 1.799618320610687, "grad_norm": 0.7713558077812195, "learning_rate": 1.5282812480890784e-06, "loss": 0.4594, "step": 10373 }, { "epoch": 1.799791811242193, "grad_norm": 0.6342697143554688, "learning_rate": 1.5256701755092574e-06, "loss": 0.5511, "step": 10374 }, { "epoch": 1.7999653018736987, "grad_norm": 0.811905026435852, "learning_rate": 1.5230612468969352e-06, "loss": 0.4613, "step": 10375 }, { "epoch": 1.8001387925052046, "grad_norm": 0.6842833161354065, "learning_rate": 1.5204544625548922e-06, "loss": 0.4199, "step": 10376 }, { "epoch": 1.8003122831367107, "grad_norm": 0.8111035823822021, "learning_rate": 1.5178498227856353e-06, "loss": 0.4482, "step": 10377 }, { "epoch": 1.8004857737682165, "grad_norm": 0.774206817150116, "learning_rate": 1.5152473278914447e-06, "loss": 0.4365, "step": 10378 }, { "epoch": 1.8006592643997226, "grad_norm": 0.9334141612052917, "learning_rate": 1.5126469781743436e-06, "loss": 0.4044, "step": 10379 }, { "epoch": 1.8008327550312284, "grad_norm": 0.7614678144454956, "learning_rate": 1.5100487739360993e-06, "loss": 0.3746, "step": 10380 }, { "epoch": 1.8010062456627343, "grad_norm": 0.7316577434539795, "learning_rate": 1.5074527154782393e-06, "loss": 0.4777, "step": 10381 }, { "epoch": 1.80117973629424, "grad_norm": 0.8212964534759521, "learning_rate": 1.5048588031020405e-06, "loss": 0.3773, "step": 10382 }, { "epoch": 1.801353226925746, "grad_norm": 2.187487840652466, "learning_rate": 1.5022670371085314e-06, "loss": 0.5046, "step": 10383 }, { "epoch": 1.8015267175572518, "grad_norm": 0.6952325701713562, "learning_rate": 1.4996774177984818e-06, "loss": 0.4368, "step": 10384 }, { "epoch": 1.8017002081887576, "grad_norm": 1.030739665031433, "learning_rate": 1.4970899454724319e-06, "loss": 0.543, "step": 10385 }, { "epoch": 1.8018736988202637, "grad_norm": 0.6696087718009949, "learning_rate": 1.494504620430648e-06, "loss": 0.4968, "step": 10386 }, { "epoch": 1.8020471894517696, "grad_norm": 0.9498783349990845, "learning_rate": 1.4919214429731677e-06, "loss": 0.3719, "step": 10387 }, { "epoch": 1.8022206800832756, "grad_norm": 0.7543936371803284, "learning_rate": 1.489340413399769e-06, "loss": 0.4169, "step": 10388 }, { "epoch": 1.8023941707147815, "grad_norm": 0.7348383069038391, "learning_rate": 1.4867615320099904e-06, "loss": 0.4058, "step": 10389 }, { "epoch": 1.8025676613462873, "grad_norm": 1.013086199760437, "learning_rate": 1.484184799103101e-06, "loss": 0.399, "step": 10390 }, { "epoch": 1.8027411519777932, "grad_norm": 0.8915300369262695, "learning_rate": 1.4816102149781442e-06, "loss": 0.3633, "step": 10391 }, { "epoch": 1.802914642609299, "grad_norm": 0.7792953252792358, "learning_rate": 1.4790377799339007e-06, "loss": 0.4714, "step": 10392 }, { "epoch": 1.8030881332408049, "grad_norm": 0.6869494318962097, "learning_rate": 1.476467494268905e-06, "loss": 0.5162, "step": 10393 }, { "epoch": 1.803261623872311, "grad_norm": 0.7749457359313965, "learning_rate": 1.4738993582814343e-06, "loss": 0.5782, "step": 10394 }, { "epoch": 1.8034351145038168, "grad_norm": 0.6259393692016602, "learning_rate": 1.471333372269539e-06, "loss": 0.4888, "step": 10395 }, { "epoch": 1.8036086051353228, "grad_norm": 0.9049027562141418, "learning_rate": 1.4687695365309895e-06, "loss": 0.4958, "step": 10396 }, { "epoch": 1.8037820957668287, "grad_norm": 0.8636259436607361, "learning_rate": 1.4662078513633327e-06, "loss": 0.4354, "step": 10397 }, { "epoch": 1.8039555863983345, "grad_norm": 0.949533998966217, "learning_rate": 1.4636483170638505e-06, "loss": 0.5353, "step": 10398 }, { "epoch": 1.8041290770298404, "grad_norm": 0.8600096702575684, "learning_rate": 1.4610909339295788e-06, "loss": 0.4138, "step": 10399 }, { "epoch": 1.8043025676613462, "grad_norm": 0.8306368589401245, "learning_rate": 1.4585357022573043e-06, "loss": 0.4678, "step": 10400 }, { "epoch": 1.804476058292852, "grad_norm": 0.7927076816558838, "learning_rate": 1.455982622343568e-06, "loss": 0.4506, "step": 10401 }, { "epoch": 1.8046495489243581, "grad_norm": 0.8654670715332031, "learning_rate": 1.4534316944846595e-06, "loss": 0.4847, "step": 10402 }, { "epoch": 1.804823039555864, "grad_norm": 0.7912794947624207, "learning_rate": 1.4508829189766104e-06, "loss": 0.424, "step": 10403 }, { "epoch": 1.8049965301873698, "grad_norm": 0.7177606821060181, "learning_rate": 1.4483362961152114e-06, "loss": 0.4616, "step": 10404 }, { "epoch": 1.805170020818876, "grad_norm": 0.8227936029434204, "learning_rate": 1.4457918261960057e-06, "loss": 0.5386, "step": 10405 }, { "epoch": 1.8053435114503817, "grad_norm": 1.0631943941116333, "learning_rate": 1.4432495095142796e-06, "loss": 0.3779, "step": 10406 }, { "epoch": 1.8055170020818876, "grad_norm": 0.9247499108314514, "learning_rate": 1.4407093463650679e-06, "loss": 0.5629, "step": 10407 }, { "epoch": 1.8056904927133934, "grad_norm": 1.0188275575637817, "learning_rate": 1.438171337043164e-06, "loss": 0.42, "step": 10408 }, { "epoch": 1.8058639833448993, "grad_norm": 0.669012725353241, "learning_rate": 1.4356354818431028e-06, "loss": 0.4211, "step": 10409 }, { "epoch": 1.8060374739764051, "grad_norm": 0.7970829606056213, "learning_rate": 1.4331017810591764e-06, "loss": 0.431, "step": 10410 }, { "epoch": 1.8062109646079112, "grad_norm": 0.9136406183242798, "learning_rate": 1.4305702349854245e-06, "loss": 0.499, "step": 10411 }, { "epoch": 1.806384455239417, "grad_norm": 0.9455069303512573, "learning_rate": 1.4280408439156369e-06, "loss": 0.3217, "step": 10412 }, { "epoch": 1.806557945870923, "grad_norm": 0.6552941799163818, "learning_rate": 1.425513608143343e-06, "loss": 0.4631, "step": 10413 }, { "epoch": 1.806731436502429, "grad_norm": 0.6552343964576721, "learning_rate": 1.4229885279618461e-06, "loss": 0.4694, "step": 10414 }, { "epoch": 1.8069049271339348, "grad_norm": 0.7940164804458618, "learning_rate": 1.4204656036641717e-06, "loss": 0.3919, "step": 10415 }, { "epoch": 1.8070784177654406, "grad_norm": 0.7920050024986267, "learning_rate": 1.4179448355431168e-06, "loss": 0.4413, "step": 10416 }, { "epoch": 1.8072519083969465, "grad_norm": 0.7787114977836609, "learning_rate": 1.415426223891212e-06, "loss": 0.4563, "step": 10417 }, { "epoch": 1.8074253990284523, "grad_norm": 0.8907957673072815, "learning_rate": 1.4129097690007543e-06, "loss": 0.5712, "step": 10418 }, { "epoch": 1.8075988896599584, "grad_norm": 0.8291707634925842, "learning_rate": 1.4103954711637724e-06, "loss": 0.3877, "step": 10419 }, { "epoch": 1.8077723802914643, "grad_norm": 1.100144863128662, "learning_rate": 1.4078833306720573e-06, "loss": 0.4199, "step": 10420 }, { "epoch": 1.8079458709229703, "grad_norm": 0.8819019198417664, "learning_rate": 1.4053733478171493e-06, "loss": 0.5428, "step": 10421 }, { "epoch": 1.8081193615544762, "grad_norm": 0.6700547933578491, "learning_rate": 1.4028655228903286e-06, "loss": 0.4471, "step": 10422 }, { "epoch": 1.808292852185982, "grad_norm": 0.7875379920005798, "learning_rate": 1.4003598561826337e-06, "loss": 0.3529, "step": 10423 }, { "epoch": 1.8084663428174879, "grad_norm": 0.9104384183883667, "learning_rate": 1.3978563479848538e-06, "loss": 0.4794, "step": 10424 }, { "epoch": 1.8086398334489937, "grad_norm": 0.9371272921562195, "learning_rate": 1.3953549985875148e-06, "loss": 0.3531, "step": 10425 }, { "epoch": 1.8088133240804996, "grad_norm": 0.7614732980728149, "learning_rate": 1.3928558082809107e-06, "loss": 0.4926, "step": 10426 }, { "epoch": 1.8089868147120054, "grad_norm": 0.8160543441772461, "learning_rate": 1.390358777355072e-06, "loss": 0.4971, "step": 10427 }, { "epoch": 1.8091603053435115, "grad_norm": 1.235662579536438, "learning_rate": 1.3878639060997822e-06, "loss": 0.3992, "step": 10428 }, { "epoch": 1.8093337959750173, "grad_norm": 0.9123250842094421, "learning_rate": 1.3853711948045678e-06, "loss": 0.4918, "step": 10429 }, { "epoch": 1.8095072866065234, "grad_norm": 0.7046394944190979, "learning_rate": 1.3828806437587216e-06, "loss": 0.4102, "step": 10430 }, { "epoch": 1.8096807772380292, "grad_norm": 0.9694055914878845, "learning_rate": 1.3803922532512703e-06, "loss": 0.4529, "step": 10431 }, { "epoch": 1.809854267869535, "grad_norm": 0.8562542200088501, "learning_rate": 1.3779060235709918e-06, "loss": 0.5111, "step": 10432 }, { "epoch": 1.810027758501041, "grad_norm": 0.7286620736122131, "learning_rate": 1.3754219550064196e-06, "loss": 0.4911, "step": 10433 }, { "epoch": 1.8102012491325468, "grad_norm": 0.9972683191299438, "learning_rate": 1.3729400478458322e-06, "loss": 0.4991, "step": 10434 }, { "epoch": 1.8103747397640526, "grad_norm": 0.7474499344825745, "learning_rate": 1.3704603023772567e-06, "loss": 0.3622, "step": 10435 }, { "epoch": 1.8105482303955587, "grad_norm": 1.534613013267517, "learning_rate": 1.3679827188884675e-06, "loss": 0.4198, "step": 10436 }, { "epoch": 1.8107217210270645, "grad_norm": 0.8257385492324829, "learning_rate": 1.3655072976670014e-06, "loss": 0.5247, "step": 10437 }, { "epoch": 1.8108952116585706, "grad_norm": 0.897869884967804, "learning_rate": 1.3630340390001195e-06, "loss": 0.3745, "step": 10438 }, { "epoch": 1.8110687022900764, "grad_norm": 0.7141605019569397, "learning_rate": 1.360562943174859e-06, "loss": 0.4653, "step": 10439 }, { "epoch": 1.8112421929215823, "grad_norm": 0.8600662350654602, "learning_rate": 1.358094010477986e-06, "loss": 0.3854, "step": 10440 }, { "epoch": 1.8114156835530881, "grad_norm": 0.5740742087364197, "learning_rate": 1.355627241196027e-06, "loss": 0.5186, "step": 10441 }, { "epoch": 1.811589174184594, "grad_norm": 0.6400471925735474, "learning_rate": 1.353162635615246e-06, "loss": 0.5026, "step": 10442 }, { "epoch": 1.8117626648160998, "grad_norm": 0.8278120160102844, "learning_rate": 1.3507001940216767e-06, "loss": 0.4049, "step": 10443 }, { "epoch": 1.8119361554476057, "grad_norm": 1.3171443939208984, "learning_rate": 1.3482399167010752e-06, "loss": 0.3921, "step": 10444 }, { "epoch": 1.8121096460791117, "grad_norm": 1.3614461421966553, "learning_rate": 1.345781803938968e-06, "loss": 0.3783, "step": 10445 }, { "epoch": 1.8122831367106176, "grad_norm": 0.786858320236206, "learning_rate": 1.3433258560206165e-06, "loss": 0.5413, "step": 10446 }, { "epoch": 1.8124566273421236, "grad_norm": 0.8558787703514099, "learning_rate": 1.3408720732310432e-06, "loss": 0.3998, "step": 10447 }, { "epoch": 1.8126301179736295, "grad_norm": 0.9111509919166565, "learning_rate": 1.3384204558550028e-06, "loss": 0.4165, "step": 10448 }, { "epoch": 1.8128036086051353, "grad_norm": 1.0603716373443604, "learning_rate": 1.3359710041770147e-06, "loss": 0.3884, "step": 10449 }, { "epoch": 1.8129770992366412, "grad_norm": 3.856734037399292, "learning_rate": 1.333523718481342e-06, "loss": 0.5634, "step": 10450 }, { "epoch": 1.813150589868147, "grad_norm": 0.672587513923645, "learning_rate": 1.3310785990519913e-06, "loss": 0.5081, "step": 10451 }, { "epoch": 1.8133240804996529, "grad_norm": 0.8601288795471191, "learning_rate": 1.3286356461727202e-06, "loss": 0.5056, "step": 10452 }, { "epoch": 1.813497571131159, "grad_norm": 0.9924600720405579, "learning_rate": 1.3261948601270413e-06, "loss": 0.3995, "step": 10453 }, { "epoch": 1.8136710617626648, "grad_norm": 0.8877877593040466, "learning_rate": 1.3237562411982086e-06, "loss": 0.4657, "step": 10454 }, { "epoch": 1.8138445523941709, "grad_norm": 0.6752092242240906, "learning_rate": 1.3213197896692264e-06, "loss": 0.6122, "step": 10455 }, { "epoch": 1.8140180430256767, "grad_norm": 0.9856737852096558, "learning_rate": 1.3188855058228468e-06, "loss": 0.4596, "step": 10456 }, { "epoch": 1.8141915336571826, "grad_norm": 0.7623448967933655, "learning_rate": 1.31645338994157e-06, "loss": 0.5175, "step": 10457 }, { "epoch": 1.8143650242886884, "grad_norm": 0.8224581480026245, "learning_rate": 1.3140234423076504e-06, "loss": 0.4199, "step": 10458 }, { "epoch": 1.8145385149201942, "grad_norm": 0.7056581974029541, "learning_rate": 1.3115956632030845e-06, "loss": 0.3624, "step": 10459 }, { "epoch": 1.8147120055517, "grad_norm": 0.7192043662071228, "learning_rate": 1.3091700529096186e-06, "loss": 0.5594, "step": 10460 }, { "epoch": 1.8148854961832062, "grad_norm": 0.6109599471092224, "learning_rate": 1.3067466117087424e-06, "loss": 0.5602, "step": 10461 }, { "epoch": 1.815058986814712, "grad_norm": 0.9726971983909607, "learning_rate": 1.304325339881709e-06, "loss": 0.5156, "step": 10462 }, { "epoch": 1.8152324774462179, "grad_norm": 1.1493715047836304, "learning_rate": 1.3019062377095026e-06, "loss": 0.293, "step": 10463 }, { "epoch": 1.815405968077724, "grad_norm": 0.8043683171272278, "learning_rate": 1.2994893054728653e-06, "loss": 0.3525, "step": 10464 }, { "epoch": 1.8155794587092298, "grad_norm": 0.821807324886322, "learning_rate": 1.2970745434522835e-06, "loss": 0.4825, "step": 10465 }, { "epoch": 1.8157529493407356, "grad_norm": 4.892745494842529, "learning_rate": 1.2946619519279979e-06, "loss": 0.3548, "step": 10466 }, { "epoch": 1.8159264399722415, "grad_norm": 1.036017894744873, "learning_rate": 1.2922515311799843e-06, "loss": 0.4144, "step": 10467 }, { "epoch": 1.8160999306037473, "grad_norm": 1.1273837089538574, "learning_rate": 1.2898432814879813e-06, "loss": 0.421, "step": 10468 }, { "epoch": 1.8162734212352531, "grad_norm": 1.5924334526062012, "learning_rate": 1.2874372031314675e-06, "loss": 0.5062, "step": 10469 }, { "epoch": 1.8164469118667592, "grad_norm": 1.2282288074493408, "learning_rate": 1.2850332963896706e-06, "loss": 0.4563, "step": 10470 }, { "epoch": 1.816620402498265, "grad_norm": 0.7977563142776489, "learning_rate": 1.2826315615415652e-06, "loss": 0.476, "step": 10471 }, { "epoch": 1.8167938931297711, "grad_norm": 0.8569274544715881, "learning_rate": 1.2802319988658818e-06, "loss": 0.3538, "step": 10472 }, { "epoch": 1.816967383761277, "grad_norm": 0.6680198907852173, "learning_rate": 1.277834608641082e-06, "loss": 0.5015, "step": 10473 }, { "epoch": 1.8171408743927828, "grad_norm": 0.7478260397911072, "learning_rate": 1.2754393911453944e-06, "loss": 0.5546, "step": 10474 }, { "epoch": 1.8173143650242887, "grad_norm": 1.1330883502960205, "learning_rate": 1.273046346656781e-06, "loss": 0.3973, "step": 10475 }, { "epoch": 1.8174878556557945, "grad_norm": 0.7933983206748962, "learning_rate": 1.2706554754529665e-06, "loss": 0.3932, "step": 10476 }, { "epoch": 1.8176613462873004, "grad_norm": 0.918095052242279, "learning_rate": 1.2682667778114022e-06, "loss": 0.4001, "step": 10477 }, { "epoch": 1.8178348369188064, "grad_norm": 0.7663379907608032, "learning_rate": 1.2658802540093084e-06, "loss": 0.3914, "step": 10478 }, { "epoch": 1.8180083275503123, "grad_norm": 0.7536807060241699, "learning_rate": 1.2634959043236395e-06, "loss": 0.537, "step": 10479 }, { "epoch": 1.8181818181818183, "grad_norm": 1.1602418422698975, "learning_rate": 1.2611137290311003e-06, "loss": 0.449, "step": 10480 }, { "epoch": 1.8183553088133242, "grad_norm": 0.8546183705329895, "learning_rate": 1.2587337284081524e-06, "loss": 0.3546, "step": 10481 }, { "epoch": 1.81852879944483, "grad_norm": 1.0874425172805786, "learning_rate": 1.2563559027309925e-06, "loss": 0.4035, "step": 10482 }, { "epoch": 1.8187022900763359, "grad_norm": 0.9559531807899475, "learning_rate": 1.2539802522755707e-06, "loss": 0.4085, "step": 10483 }, { "epoch": 1.8188757807078417, "grad_norm": 0.7305978536605835, "learning_rate": 1.2516067773175822e-06, "loss": 0.5343, "step": 10484 }, { "epoch": 1.8190492713393476, "grad_norm": 0.7242780923843384, "learning_rate": 1.2492354781324778e-06, "loss": 0.35, "step": 10485 }, { "epoch": 1.8192227619708534, "grad_norm": 0.8772955536842346, "learning_rate": 1.2468663549954397e-06, "loss": 0.3341, "step": 10486 }, { "epoch": 1.8193962526023595, "grad_norm": 0.790827751159668, "learning_rate": 1.2444994081814165e-06, "loss": 0.4374, "step": 10487 }, { "epoch": 1.8195697432338653, "grad_norm": 0.7314450144767761, "learning_rate": 1.2421346379650868e-06, "loss": 0.3947, "step": 10488 }, { "epoch": 1.8197432338653714, "grad_norm": 0.8860965371131897, "learning_rate": 1.2397720446208972e-06, "loss": 0.3487, "step": 10489 }, { "epoch": 1.8199167244968772, "grad_norm": 0.8088721036911011, "learning_rate": 1.2374116284230153e-06, "loss": 0.3801, "step": 10490 }, { "epoch": 1.820090215128383, "grad_norm": 0.8200245499610901, "learning_rate": 1.2350533896453799e-06, "loss": 0.3249, "step": 10491 }, { "epoch": 1.820263705759889, "grad_norm": 1.028024673461914, "learning_rate": 1.2326973285616628e-06, "loss": 0.4276, "step": 10492 }, { "epoch": 1.8204371963913948, "grad_norm": 0.7007224559783936, "learning_rate": 1.2303434454452901e-06, "loss": 0.5033, "step": 10493 }, { "epoch": 1.8206106870229006, "grad_norm": 0.7374883890151978, "learning_rate": 1.2279917405694298e-06, "loss": 0.4574, "step": 10494 }, { "epoch": 1.8207841776544067, "grad_norm": 0.7309583425521851, "learning_rate": 1.2256422142070057e-06, "loss": 0.4271, "step": 10495 }, { "epoch": 1.8209576682859125, "grad_norm": 0.7799156904220581, "learning_rate": 1.2232948666306732e-06, "loss": 0.4905, "step": 10496 }, { "epoch": 1.8211311589174186, "grad_norm": 1.1447699069976807, "learning_rate": 1.2209496981128543e-06, "loss": 0.5212, "step": 10497 }, { "epoch": 1.8213046495489245, "grad_norm": 0.6853039264678955, "learning_rate": 1.218606708925707e-06, "loss": 0.6538, "step": 10498 }, { "epoch": 1.8214781401804303, "grad_norm": 1.4950239658355713, "learning_rate": 1.2162658993411336e-06, "loss": 0.557, "step": 10499 }, { "epoch": 1.8216516308119362, "grad_norm": 0.827681303024292, "learning_rate": 1.2139272696307857e-06, "loss": 0.5206, "step": 10500 }, { "epoch": 1.821825121443442, "grad_norm": 1.179308295249939, "learning_rate": 1.2115908200660731e-06, "loss": 0.4045, "step": 10501 }, { "epoch": 1.8219986120749478, "grad_norm": 1.6559895277023315, "learning_rate": 1.2092565509181386e-06, "loss": 0.4258, "step": 10502 }, { "epoch": 1.8221721027064537, "grad_norm": 1.2037452459335327, "learning_rate": 1.2069244624578768e-06, "loss": 0.3815, "step": 10503 }, { "epoch": 1.8223455933379598, "grad_norm": 0.9145764112472534, "learning_rate": 1.2045945549559269e-06, "loss": 0.3381, "step": 10504 }, { "epoch": 1.8225190839694656, "grad_norm": 0.7107357382774353, "learning_rate": 1.202266828682681e-06, "loss": 0.4226, "step": 10505 }, { "epoch": 1.8226925746009717, "grad_norm": 1.0881927013397217, "learning_rate": 1.1999412839082748e-06, "loss": 0.5344, "step": 10506 }, { "epoch": 1.8228660652324775, "grad_norm": 0.9871724843978882, "learning_rate": 1.1976179209025896e-06, "loss": 0.4009, "step": 10507 }, { "epoch": 1.8230395558639834, "grad_norm": 0.9611091613769531, "learning_rate": 1.1952967399352522e-06, "loss": 0.3506, "step": 10508 }, { "epoch": 1.8232130464954892, "grad_norm": 0.9632605314254761, "learning_rate": 1.1929777412756382e-06, "loss": 0.3148, "step": 10509 }, { "epoch": 1.823386537126995, "grad_norm": 1.171610713005066, "learning_rate": 1.1906609251928746e-06, "loss": 0.4294, "step": 10510 }, { "epoch": 1.823560027758501, "grad_norm": 0.9603515863418579, "learning_rate": 1.1883462919558263e-06, "loss": 0.4021, "step": 10511 }, { "epoch": 1.823733518390007, "grad_norm": 0.8278272151947021, "learning_rate": 1.186033841833112e-06, "loss": 0.4159, "step": 10512 }, { "epoch": 1.8239070090215128, "grad_norm": 0.9903392791748047, "learning_rate": 1.1837235750930898e-06, "loss": 0.4825, "step": 10513 }, { "epoch": 1.8240804996530189, "grad_norm": 0.928013801574707, "learning_rate": 1.1814154920038789e-06, "loss": 0.3651, "step": 10514 }, { "epoch": 1.8242539902845247, "grad_norm": 0.636833667755127, "learning_rate": 1.179109592833323e-06, "loss": 0.4077, "step": 10515 }, { "epoch": 1.8244274809160306, "grad_norm": 1.2930148839950562, "learning_rate": 1.17680587784903e-06, "loss": 0.5465, "step": 10516 }, { "epoch": 1.8246009715475364, "grad_norm": 0.766405463218689, "learning_rate": 1.174504347318346e-06, "loss": 0.5189, "step": 10517 }, { "epoch": 1.8247744621790423, "grad_norm": 0.7657548189163208, "learning_rate": 1.1722050015083752e-06, "loss": 0.4747, "step": 10518 }, { "epoch": 1.8249479528105481, "grad_norm": 0.7444841861724854, "learning_rate": 1.1699078406859466e-06, "loss": 0.3865, "step": 10519 }, { "epoch": 1.8251214434420542, "grad_norm": 0.8649503588676453, "learning_rate": 1.1676128651176578e-06, "loss": 0.6057, "step": 10520 }, { "epoch": 1.82529493407356, "grad_norm": 0.674697756767273, "learning_rate": 1.1653200750698402e-06, "loss": 0.4941, "step": 10521 }, { "epoch": 1.8254684247050659, "grad_norm": 0.7950411438941956, "learning_rate": 1.163029470808572e-06, "loss": 0.4839, "step": 10522 }, { "epoch": 1.825641915336572, "grad_norm": 1.2704163789749146, "learning_rate": 1.1607410525996832e-06, "loss": 0.3766, "step": 10523 }, { "epoch": 1.8258154059680778, "grad_norm": 0.9411687850952148, "learning_rate": 1.1584548207087498e-06, "loss": 0.3929, "step": 10524 }, { "epoch": 1.8259888965995836, "grad_norm": 0.7350007891654968, "learning_rate": 1.1561707754010887e-06, "loss": 0.5835, "step": 10525 }, { "epoch": 1.8261623872310895, "grad_norm": 0.7564200758934021, "learning_rate": 1.1538889169417654e-06, "loss": 0.6437, "step": 10526 }, { "epoch": 1.8263358778625953, "grad_norm": 1.0063823461532593, "learning_rate": 1.1516092455955951e-06, "loss": 0.4537, "step": 10527 }, { "epoch": 1.8265093684941012, "grad_norm": 2.187542200088501, "learning_rate": 1.1493317616271327e-06, "loss": 0.3118, "step": 10528 }, { "epoch": 1.8266828591256072, "grad_norm": 0.7954789400100708, "learning_rate": 1.1470564653006844e-06, "loss": 0.42, "step": 10529 }, { "epoch": 1.826856349757113, "grad_norm": 0.8000457286834717, "learning_rate": 1.1447833568803036e-06, "loss": 0.5223, "step": 10530 }, { "epoch": 1.8270298403886192, "grad_norm": 0.8941777944564819, "learning_rate": 1.1425124366297858e-06, "loss": 0.4643, "step": 10531 }, { "epoch": 1.827203331020125, "grad_norm": 0.8192468285560608, "learning_rate": 1.140243704812667e-06, "loss": 0.5059, "step": 10532 }, { "epoch": 1.8273768216516308, "grad_norm": 0.8780628442764282, "learning_rate": 1.1379771616922474e-06, "loss": 0.4836, "step": 10533 }, { "epoch": 1.8275503122831367, "grad_norm": 1.4004952907562256, "learning_rate": 1.1357128075315572e-06, "loss": 0.458, "step": 10534 }, { "epoch": 1.8277238029146425, "grad_norm": 0.8280038833618164, "learning_rate": 1.1334506425933745e-06, "loss": 0.4286, "step": 10535 }, { "epoch": 1.8278972935461484, "grad_norm": 1.0875632762908936, "learning_rate": 1.1311906671402274e-06, "loss": 0.381, "step": 10536 }, { "epoch": 1.8280707841776545, "grad_norm": 1.107812762260437, "learning_rate": 1.1289328814343969e-06, "loss": 0.358, "step": 10537 }, { "epoch": 1.8282442748091603, "grad_norm": 0.6956422924995422, "learning_rate": 1.126677285737887e-06, "loss": 0.5237, "step": 10538 }, { "epoch": 1.8284177654406664, "grad_norm": 0.6917555332183838, "learning_rate": 1.124423880312473e-06, "loss": 0.4739, "step": 10539 }, { "epoch": 1.8285912560721722, "grad_norm": 0.9574361443519592, "learning_rate": 1.122172665419663e-06, "loss": 0.3891, "step": 10540 }, { "epoch": 1.828764746703678, "grad_norm": 1.0273165702819824, "learning_rate": 1.1199236413207127e-06, "loss": 0.3956, "step": 10541 }, { "epoch": 1.828938237335184, "grad_norm": 0.8338664770126343, "learning_rate": 1.1176768082766177e-06, "loss": 0.4883, "step": 10542 }, { "epoch": 1.8291117279666897, "grad_norm": 0.733159601688385, "learning_rate": 1.1154321665481404e-06, "loss": 0.412, "step": 10543 }, { "epoch": 1.8292852185981956, "grad_norm": 0.7883723974227905, "learning_rate": 1.1131897163957573e-06, "loss": 0.578, "step": 10544 }, { "epoch": 1.8294587092297014, "grad_norm": 0.8465474843978882, "learning_rate": 1.1109494580797175e-06, "loss": 0.4261, "step": 10545 }, { "epoch": 1.8296321998612075, "grad_norm": 0.6504952311515808, "learning_rate": 1.1087113918600023e-06, "loss": 0.5467, "step": 10546 }, { "epoch": 1.8298056904927134, "grad_norm": 0.9745897054672241, "learning_rate": 1.1064755179963483e-06, "loss": 0.4567, "step": 10547 }, { "epoch": 1.8299791811242194, "grad_norm": 0.9075683951377869, "learning_rate": 1.1042418367482188e-06, "loss": 0.4359, "step": 10548 }, { "epoch": 1.8301526717557253, "grad_norm": 0.8048148155212402, "learning_rate": 1.1020103483748469e-06, "loss": 0.5377, "step": 10549 }, { "epoch": 1.8303261623872311, "grad_norm": 0.8447327613830566, "learning_rate": 1.0997810531351916e-06, "loss": 0.5651, "step": 10550 }, { "epoch": 1.830499653018737, "grad_norm": 0.7904649376869202, "learning_rate": 1.0975539512879707e-06, "loss": 0.398, "step": 10551 }, { "epoch": 1.8306731436502428, "grad_norm": 0.7916994094848633, "learning_rate": 1.0953290430916353e-06, "loss": 0.5654, "step": 10552 }, { "epoch": 1.8308466342817487, "grad_norm": 0.932354211807251, "learning_rate": 1.0931063288043963e-06, "loss": 0.4965, "step": 10553 }, { "epoch": 1.8310201249132547, "grad_norm": 1.4943174123764038, "learning_rate": 1.0908858086841989e-06, "loss": 0.4603, "step": 10554 }, { "epoch": 1.8311936155447606, "grad_norm": 0.7021231055259705, "learning_rate": 1.0886674829887323e-06, "loss": 0.4224, "step": 10555 }, { "epoch": 1.8313671061762666, "grad_norm": 0.7502918839454651, "learning_rate": 1.0864513519754484e-06, "loss": 0.5375, "step": 10556 }, { "epoch": 1.8315405968077725, "grad_norm": 0.7647824883460999, "learning_rate": 1.0842374159015167e-06, "loss": 0.5237, "step": 10557 }, { "epoch": 1.8317140874392783, "grad_norm": 0.8950784802436829, "learning_rate": 1.082025675023879e-06, "loss": 0.4702, "step": 10558 }, { "epoch": 1.8318875780707842, "grad_norm": 0.7453234791755676, "learning_rate": 1.0798161295992004e-06, "loss": 0.5563, "step": 10559 }, { "epoch": 1.83206106870229, "grad_norm": 0.9420710802078247, "learning_rate": 1.077608779883912e-06, "loss": 0.4962, "step": 10560 }, { "epoch": 1.8322345593337959, "grad_norm": 2.714823007583618, "learning_rate": 1.0754036261341683e-06, "loss": 0.436, "step": 10561 }, { "epoch": 1.8324080499653017, "grad_norm": 0.8746506571769714, "learning_rate": 1.0732006686058893e-06, "loss": 0.3875, "step": 10562 }, { "epoch": 1.8325815405968078, "grad_norm": 1.7194308042526245, "learning_rate": 1.070999907554726e-06, "loss": 0.4247, "step": 10563 }, { "epoch": 1.8327550312283136, "grad_norm": 1.1054048538208008, "learning_rate": 1.0688013432360811e-06, "loss": 0.368, "step": 10564 }, { "epoch": 1.8329285218598197, "grad_norm": 1.1475105285644531, "learning_rate": 1.0666049759050945e-06, "loss": 0.3995, "step": 10565 }, { "epoch": 1.8331020124913255, "grad_norm": 0.7717334628105164, "learning_rate": 1.0644108058166692e-06, "loss": 0.4082, "step": 10566 }, { "epoch": 1.8332755031228314, "grad_norm": 0.8031683564186096, "learning_rate": 1.0622188332254302e-06, "loss": 0.5031, "step": 10567 }, { "epoch": 1.8334489937543372, "grad_norm": 0.905417799949646, "learning_rate": 1.0600290583857631e-06, "loss": 0.4075, "step": 10568 }, { "epoch": 1.833622484385843, "grad_norm": 0.850563108921051, "learning_rate": 1.0578414815517936e-06, "loss": 0.4917, "step": 10569 }, { "epoch": 1.833795975017349, "grad_norm": 0.6751877665519714, "learning_rate": 1.0556561029773914e-06, "loss": 0.3687, "step": 10570 }, { "epoch": 1.833969465648855, "grad_norm": 0.6808153986930847, "learning_rate": 1.0534729229161722e-06, "loss": 0.4904, "step": 10571 }, { "epoch": 1.8341429562803608, "grad_norm": 0.90809565782547, "learning_rate": 1.0512919416214995e-06, "loss": 0.4065, "step": 10572 }, { "epoch": 1.834316446911867, "grad_norm": 0.7460927367210388, "learning_rate": 1.0491131593464755e-06, "loss": 0.4163, "step": 10573 }, { "epoch": 1.8344899375433728, "grad_norm": 0.8532546162605286, "learning_rate": 1.0469365763439532e-06, "loss": 0.4336, "step": 10574 }, { "epoch": 1.8346634281748786, "grad_norm": 1.418191909790039, "learning_rate": 1.044762192866522e-06, "loss": 0.4834, "step": 10575 }, { "epoch": 1.8348369188063844, "grad_norm": 0.8263697028160095, "learning_rate": 1.0425900091665286e-06, "loss": 0.3685, "step": 10576 }, { "epoch": 1.8350104094378903, "grad_norm": 0.9749839305877686, "learning_rate": 1.040420025496054e-06, "loss": 0.4115, "step": 10577 }, { "epoch": 1.8351839000693961, "grad_norm": 0.9161428213119507, "learning_rate": 1.0382522421069274e-06, "loss": 0.3691, "step": 10578 }, { "epoch": 1.8353573907009022, "grad_norm": 0.8972198367118835, "learning_rate": 1.0360866592507236e-06, "loss": 0.5196, "step": 10579 }, { "epoch": 1.835530881332408, "grad_norm": 0.6500658392906189, "learning_rate": 1.033923277178759e-06, "loss": 0.5386, "step": 10580 }, { "epoch": 1.835704371963914, "grad_norm": 0.6305949687957764, "learning_rate": 1.0317620961420993e-06, "loss": 0.4969, "step": 10581 }, { "epoch": 1.83587786259542, "grad_norm": 0.6953607201576233, "learning_rate": 1.029603116391551e-06, "loss": 0.5178, "step": 10582 }, { "epoch": 1.8360513532269258, "grad_norm": 0.8355174660682678, "learning_rate": 1.0274463381776646e-06, "loss": 0.4069, "step": 10583 }, { "epoch": 1.8362248438584317, "grad_norm": 1.1380466222763062, "learning_rate": 1.0252917617507374e-06, "loss": 0.4872, "step": 10584 }, { "epoch": 1.8363983344899375, "grad_norm": 0.7222457528114319, "learning_rate": 1.0231393873608164e-06, "loss": 0.4034, "step": 10585 }, { "epoch": 1.8365718251214433, "grad_norm": 0.7789379358291626, "learning_rate": 1.020989215257675e-06, "loss": 0.4271, "step": 10586 }, { "epoch": 1.8367453157529492, "grad_norm": 0.8136478662490845, "learning_rate": 1.0188412456908537e-06, "loss": 0.4878, "step": 10587 }, { "epoch": 1.8369188063844553, "grad_norm": 0.7922045588493347, "learning_rate": 1.0166954789096194e-06, "loss": 0.4371, "step": 10588 }, { "epoch": 1.837092297015961, "grad_norm": 0.9087977409362793, "learning_rate": 1.0145519151630023e-06, "loss": 0.4856, "step": 10589 }, { "epoch": 1.8372657876474672, "grad_norm": 1.0198791027069092, "learning_rate": 1.0124105546997521e-06, "loss": 0.4105, "step": 10590 }, { "epoch": 1.837439278278973, "grad_norm": 0.6983714699745178, "learning_rate": 1.0102713977683832e-06, "loss": 0.4863, "step": 10591 }, { "epoch": 1.8376127689104789, "grad_norm": 1.7546323537826538, "learning_rate": 1.008134444617146e-06, "loss": 0.3851, "step": 10592 }, { "epoch": 1.8377862595419847, "grad_norm": 1.195565104484558, "learning_rate": 1.0059996954940377e-06, "loss": 0.4205, "step": 10593 }, { "epoch": 1.8379597501734906, "grad_norm": 1.0703043937683105, "learning_rate": 1.0038671506467934e-06, "loss": 0.3286, "step": 10594 }, { "epoch": 1.8381332408049964, "grad_norm": 1.0536229610443115, "learning_rate": 1.0017368103229086e-06, "loss": 0.3837, "step": 10595 }, { "epoch": 1.8383067314365025, "grad_norm": 0.873356819152832, "learning_rate": 9.996086747695966e-07, "loss": 0.3619, "step": 10596 }, { "epoch": 1.8384802220680083, "grad_norm": 0.8494886755943298, "learning_rate": 9.97482744233842e-07, "loss": 0.4105, "step": 10597 }, { "epoch": 1.8386537126995144, "grad_norm": 0.9837471842765808, "learning_rate": 9.953590189623563e-07, "loss": 0.4213, "step": 10598 }, { "epoch": 1.8388272033310202, "grad_norm": 0.7704948782920837, "learning_rate": 9.932374992016002e-07, "loss": 0.4456, "step": 10599 }, { "epoch": 1.839000693962526, "grad_norm": 0.7444039583206177, "learning_rate": 9.911181851977792e-07, "loss": 0.5393, "step": 10600 }, { "epoch": 1.839174184594032, "grad_norm": 0.9906989932060242, "learning_rate": 9.890010771968428e-07, "loss": 0.3533, "step": 10601 }, { "epoch": 1.8393476752255378, "grad_norm": 1.106204867362976, "learning_rate": 9.868861754444858e-07, "loss": 0.322, "step": 10602 }, { "epoch": 1.8395211658570436, "grad_norm": 1.4313024282455444, "learning_rate": 9.847734801861387e-07, "loss": 0.4437, "step": 10603 }, { "epoch": 1.8396946564885495, "grad_norm": 1.0267696380615234, "learning_rate": 9.826629916669917e-07, "loss": 0.4823, "step": 10604 }, { "epoch": 1.8398681471200555, "grad_norm": 0.7928594350814819, "learning_rate": 9.805547101319601e-07, "loss": 0.4402, "step": 10605 }, { "epoch": 1.8400416377515614, "grad_norm": 0.8010715842247009, "learning_rate": 9.784486358257194e-07, "loss": 0.4567, "step": 10606 }, { "epoch": 1.8402151283830674, "grad_norm": 0.7973601222038269, "learning_rate": 9.763447689926763e-07, "loss": 0.3728, "step": 10607 }, { "epoch": 1.8403886190145733, "grad_norm": 1.1704373359680176, "learning_rate": 9.742431098769933e-07, "loss": 0.3536, "step": 10608 }, { "epoch": 1.8405621096460791, "grad_norm": 0.74921715259552, "learning_rate": 9.721436587225618e-07, "loss": 0.4862, "step": 10609 }, { "epoch": 1.840735600277585, "grad_norm": 1.003538966178894, "learning_rate": 9.700464157730338e-07, "loss": 0.3729, "step": 10610 }, { "epoch": 1.8409090909090908, "grad_norm": 0.8335089087486267, "learning_rate": 9.679513812717945e-07, "loss": 0.4648, "step": 10611 }, { "epoch": 1.8410825815405967, "grad_norm": 0.7531124949455261, "learning_rate": 9.658585554619737e-07, "loss": 0.3383, "step": 10612 }, { "epoch": 1.8412560721721027, "grad_norm": 0.68199223279953, "learning_rate": 9.637679385864417e-07, "loss": 0.4976, "step": 10613 }, { "epoch": 1.8414295628036086, "grad_norm": 1.115538239479065, "learning_rate": 9.616795308878313e-07, "loss": 0.3687, "step": 10614 }, { "epoch": 1.8416030534351147, "grad_norm": 1.10149347782135, "learning_rate": 9.59593332608486e-07, "loss": 0.3217, "step": 10615 }, { "epoch": 1.8417765440666205, "grad_norm": 0.9675643444061279, "learning_rate": 9.575093439905259e-07, "loss": 0.5491, "step": 10616 }, { "epoch": 1.8419500346981263, "grad_norm": 0.7751714587211609, "learning_rate": 9.554275652757928e-07, "loss": 0.5919, "step": 10617 }, { "epoch": 1.8421235253296322, "grad_norm": 0.9060567617416382, "learning_rate": 9.533479967058867e-07, "loss": 0.4501, "step": 10618 }, { "epoch": 1.842297015961138, "grad_norm": 0.73239666223526, "learning_rate": 9.512706385221348e-07, "loss": 0.4877, "step": 10619 }, { "epoch": 1.8424705065926439, "grad_norm": 1.1301404237747192, "learning_rate": 9.491954909656242e-07, "loss": 0.4669, "step": 10620 }, { "epoch": 1.8426439972241497, "grad_norm": 0.5790345668792725, "learning_rate": 9.471225542771734e-07, "loss": 0.4295, "step": 10621 }, { "epoch": 1.8428174878556558, "grad_norm": 0.8333771824836731, "learning_rate": 9.450518286973542e-07, "loss": 0.4653, "step": 10622 }, { "epoch": 1.8429909784871616, "grad_norm": 1.0313693284988403, "learning_rate": 9.42983314466468e-07, "loss": 0.4835, "step": 10623 }, { "epoch": 1.8431644691186677, "grad_norm": 0.8666651248931885, "learning_rate": 9.409170118245803e-07, "loss": 0.5321, "step": 10624 }, { "epoch": 1.8433379597501736, "grad_norm": 0.7337196469306946, "learning_rate": 9.388529210114794e-07, "loss": 0.6223, "step": 10625 }, { "epoch": 1.8435114503816794, "grad_norm": 0.6616241335868835, "learning_rate": 9.36791042266707e-07, "loss": 0.4851, "step": 10626 }, { "epoch": 1.8436849410131853, "grad_norm": 0.7307387590408325, "learning_rate": 9.347313758295473e-07, "loss": 0.451, "step": 10627 }, { "epoch": 1.843858431644691, "grad_norm": 0.9073920249938965, "learning_rate": 9.326739219390246e-07, "loss": 0.5051, "step": 10628 }, { "epoch": 1.844031922276197, "grad_norm": 1.2759959697723389, "learning_rate": 9.306186808339146e-07, "loss": 0.4465, "step": 10629 }, { "epoch": 1.844205412907703, "grad_norm": 0.6395655274391174, "learning_rate": 9.285656527527264e-07, "loss": 0.6111, "step": 10630 }, { "epoch": 1.8443789035392089, "grad_norm": 0.8483899831771851, "learning_rate": 9.265148379337164e-07, "loss": 0.4337, "step": 10631 }, { "epoch": 1.844552394170715, "grad_norm": 0.9316275119781494, "learning_rate": 9.244662366148826e-07, "loss": 0.399, "step": 10632 }, { "epoch": 1.8447258848022208, "grad_norm": 1.5879032611846924, "learning_rate": 9.224198490339731e-07, "loss": 0.4324, "step": 10633 }, { "epoch": 1.8448993754337266, "grad_norm": 1.099420428276062, "learning_rate": 9.203756754284665e-07, "loss": 0.4224, "step": 10634 }, { "epoch": 1.8450728660652325, "grad_norm": 0.6880114078521729, "learning_rate": 9.183337160355976e-07, "loss": 0.5272, "step": 10635 }, { "epoch": 1.8452463566967383, "grad_norm": 0.8261739015579224, "learning_rate": 9.162939710923324e-07, "loss": 0.5243, "step": 10636 }, { "epoch": 1.8454198473282442, "grad_norm": 0.9456450343132019, "learning_rate": 9.142564408353949e-07, "loss": 0.2906, "step": 10637 }, { "epoch": 1.8455933379597502, "grad_norm": 0.7066915035247803, "learning_rate": 9.122211255012292e-07, "loss": 0.4951, "step": 10638 }, { "epoch": 1.845766828591256, "grad_norm": 0.825546145439148, "learning_rate": 9.101880253260487e-07, "loss": 0.3624, "step": 10639 }, { "epoch": 1.845940319222762, "grad_norm": 0.9259288311004639, "learning_rate": 9.081571405457912e-07, "loss": 0.3451, "step": 10640 }, { "epoch": 1.846113809854268, "grad_norm": 0.8269305229187012, "learning_rate": 9.061284713961416e-07, "loss": 0.3447, "step": 10641 }, { "epoch": 1.8462873004857738, "grad_norm": 0.779420018196106, "learning_rate": 9.041020181125315e-07, "loss": 0.4613, "step": 10642 }, { "epoch": 1.8464607911172797, "grad_norm": 0.8219154477119446, "learning_rate": 9.020777809301396e-07, "loss": 0.4539, "step": 10643 }, { "epoch": 1.8466342817487855, "grad_norm": 0.8316527605056763, "learning_rate": 9.000557600838666e-07, "loss": 0.494, "step": 10644 }, { "epoch": 1.8468077723802914, "grad_norm": 0.8427203297615051, "learning_rate": 8.980359558083828e-07, "loss": 0.3285, "step": 10645 }, { "epoch": 1.8469812630117972, "grad_norm": 0.9683478474617004, "learning_rate": 8.960183683380807e-07, "loss": 0.3295, "step": 10646 }, { "epoch": 1.8471547536433033, "grad_norm": 0.7695950865745544, "learning_rate": 8.940029979071152e-07, "loss": 0.4235, "step": 10647 }, { "epoch": 1.8473282442748091, "grad_norm": 1.1187565326690674, "learning_rate": 8.919898447493569e-07, "loss": 0.4458, "step": 10648 }, { "epoch": 1.8475017349063152, "grad_norm": 0.9226999878883362, "learning_rate": 8.899789090984457e-07, "loss": 0.4841, "step": 10649 }, { "epoch": 1.847675225537821, "grad_norm": 0.8049643635749817, "learning_rate": 8.879701911877503e-07, "loss": 0.4939, "step": 10650 }, { "epoch": 1.8478487161693269, "grad_norm": 0.8780126571655273, "learning_rate": 8.859636912503822e-07, "loss": 0.3636, "step": 10651 }, { "epoch": 1.8480222068008327, "grad_norm": 1.5426934957504272, "learning_rate": 8.839594095191995e-07, "loss": 0.3875, "step": 10652 }, { "epoch": 1.8481956974323386, "grad_norm": 0.9760283827781677, "learning_rate": 8.81957346226805e-07, "loss": 0.379, "step": 10653 }, { "epoch": 1.8483691880638444, "grad_norm": 1.1850718259811401, "learning_rate": 8.799575016055373e-07, "loss": 0.4696, "step": 10654 }, { "epoch": 1.8485426786953505, "grad_norm": 0.9418070316314697, "learning_rate": 8.779598758874774e-07, "loss": 0.429, "step": 10655 }, { "epoch": 1.8487161693268563, "grad_norm": 0.9057166576385498, "learning_rate": 8.7596446930446e-07, "loss": 0.4333, "step": 10656 }, { "epoch": 1.8488896599583624, "grad_norm": 1.0457319021224976, "learning_rate": 8.739712820880441e-07, "loss": 0.4059, "step": 10657 }, { "epoch": 1.8490631505898683, "grad_norm": 1.0274351835250854, "learning_rate": 8.719803144695516e-07, "loss": 0.3586, "step": 10658 }, { "epoch": 1.849236641221374, "grad_norm": 0.9383352398872375, "learning_rate": 8.699915666800285e-07, "loss": 0.4126, "step": 10659 }, { "epoch": 1.84941013185288, "grad_norm": 1.4329230785369873, "learning_rate": 8.680050389502814e-07, "loss": 0.4045, "step": 10660 }, { "epoch": 1.8495836224843858, "grad_norm": 1.739837408065796, "learning_rate": 8.660207315108371e-07, "loss": 0.3916, "step": 10661 }, { "epoch": 1.8497571131158916, "grad_norm": 1.2653837203979492, "learning_rate": 8.640386445919847e-07, "loss": 0.313, "step": 10662 }, { "epoch": 1.8499306037473975, "grad_norm": 1.0398705005645752, "learning_rate": 8.620587784237444e-07, "loss": 0.4147, "step": 10663 }, { "epoch": 1.8501040943789036, "grad_norm": 1.1222190856933594, "learning_rate": 8.600811332358861e-07, "loss": 0.4729, "step": 10664 }, { "epoch": 1.8502775850104094, "grad_norm": 0.9458642601966858, "learning_rate": 8.581057092579081e-07, "loss": 0.531, "step": 10665 }, { "epoch": 1.8504510756419155, "grad_norm": 1.0276833772659302, "learning_rate": 8.561325067190762e-07, "loss": 0.373, "step": 10666 }, { "epoch": 1.8506245662734213, "grad_norm": 1.9846408367156982, "learning_rate": 8.541615258483671e-07, "loss": 0.3614, "step": 10667 }, { "epoch": 1.8507980569049272, "grad_norm": 1.0455187559127808, "learning_rate": 8.521927668745244e-07, "loss": 0.4457, "step": 10668 }, { "epoch": 1.850971547536433, "grad_norm": 0.6620354652404785, "learning_rate": 8.502262300260234e-07, "loss": 0.5068, "step": 10669 }, { "epoch": 1.8511450381679388, "grad_norm": 0.9360253810882568, "learning_rate": 8.482619155310812e-07, "loss": 0.3726, "step": 10670 }, { "epoch": 1.8513185287994447, "grad_norm": 1.065224528312683, "learning_rate": 8.462998236176578e-07, "loss": 0.3748, "step": 10671 }, { "epoch": 1.8514920194309508, "grad_norm": 0.7790229916572571, "learning_rate": 8.443399545134623e-07, "loss": 0.5011, "step": 10672 }, { "epoch": 1.8516655100624566, "grad_norm": 7.807602405548096, "learning_rate": 8.423823084459349e-07, "loss": 0.4644, "step": 10673 }, { "epoch": 1.8518390006939627, "grad_norm": 0.8364564180374146, "learning_rate": 8.404268856422626e-07, "loss": 0.4195, "step": 10674 }, { "epoch": 1.8520124913254685, "grad_norm": 0.9146015048027039, "learning_rate": 8.384736863293729e-07, "loss": 0.3894, "step": 10675 }, { "epoch": 1.8521859819569744, "grad_norm": 1.106425404548645, "learning_rate": 8.365227107339447e-07, "loss": 0.4592, "step": 10676 }, { "epoch": 1.8523594725884802, "grad_norm": 0.6851017475128174, "learning_rate": 8.345739590823832e-07, "loss": 0.5092, "step": 10677 }, { "epoch": 1.852532963219986, "grad_norm": 1.0080454349517822, "learning_rate": 8.326274316008475e-07, "loss": 0.3457, "step": 10678 }, { "epoch": 1.852706453851492, "grad_norm": 1.062127947807312, "learning_rate": 8.306831285152328e-07, "loss": 0.5371, "step": 10679 }, { "epoch": 1.8528799444829978, "grad_norm": 1.255875587463379, "learning_rate": 8.287410500511739e-07, "loss": 0.4425, "step": 10680 }, { "epoch": 1.8530534351145038, "grad_norm": 0.6575207710266113, "learning_rate": 8.268011964340595e-07, "loss": 0.4111, "step": 10681 }, { "epoch": 1.8532269257460097, "grad_norm": 0.9395532011985779, "learning_rate": 8.248635678890049e-07, "loss": 0.3969, "step": 10682 }, { "epoch": 1.8534004163775157, "grad_norm": 1.6537333726882935, "learning_rate": 8.229281646408793e-07, "loss": 0.5719, "step": 10683 }, { "epoch": 1.8535739070090216, "grad_norm": 0.6713128089904785, "learning_rate": 8.209949869142808e-07, "loss": 0.6195, "step": 10684 }, { "epoch": 1.8537473976405274, "grad_norm": 0.8929294347763062, "learning_rate": 8.190640349335699e-07, "loss": 0.4669, "step": 10685 }, { "epoch": 1.8539208882720333, "grad_norm": 0.7345629334449768, "learning_rate": 8.171353089228206e-07, "loss": 0.5186, "step": 10686 }, { "epoch": 1.8540943789035391, "grad_norm": 0.961094856262207, "learning_rate": 8.152088091058741e-07, "loss": 0.39, "step": 10687 }, { "epoch": 1.854267869535045, "grad_norm": 1.1825238466262817, "learning_rate": 8.132845357062979e-07, "loss": 0.4435, "step": 10688 }, { "epoch": 1.854441360166551, "grad_norm": 0.9082005620002747, "learning_rate": 8.113624889474136e-07, "loss": 0.3444, "step": 10689 }, { "epoch": 1.8546148507980569, "grad_norm": 0.8415492177009583, "learning_rate": 8.094426690522672e-07, "loss": 0.3608, "step": 10690 }, { "epoch": 1.854788341429563, "grad_norm": 0.5826301574707031, "learning_rate": 8.075250762436626e-07, "loss": 0.4982, "step": 10691 }, { "epoch": 1.8549618320610688, "grad_norm": 0.8230922818183899, "learning_rate": 8.056097107441352e-07, "loss": 0.4164, "step": 10692 }, { "epoch": 1.8551353226925746, "grad_norm": 1.342071771621704, "learning_rate": 8.036965727759693e-07, "loss": 0.3766, "step": 10693 }, { "epoch": 1.8553088133240805, "grad_norm": 0.7745541334152222, "learning_rate": 8.017856625611809e-07, "loss": 0.353, "step": 10694 }, { "epoch": 1.8554823039555863, "grad_norm": 0.8241654634475708, "learning_rate": 7.998769803215389e-07, "loss": 0.3567, "step": 10695 }, { "epoch": 1.8556557945870922, "grad_norm": 1.0320768356323242, "learning_rate": 7.979705262785442e-07, "loss": 0.3619, "step": 10696 }, { "epoch": 1.8558292852185982, "grad_norm": 0.924490749835968, "learning_rate": 7.960663006534464e-07, "loss": 0.4421, "step": 10697 }, { "epoch": 1.856002775850104, "grad_norm": 0.9098055362701416, "learning_rate": 7.941643036672309e-07, "loss": 0.5029, "step": 10698 }, { "epoch": 1.85617626648161, "grad_norm": 0.7238054871559143, "learning_rate": 7.922645355406256e-07, "loss": 0.4734, "step": 10699 }, { "epoch": 1.856349757113116, "grad_norm": 1.7676187753677368, "learning_rate": 7.903669964941052e-07, "loss": 0.4181, "step": 10700 }, { "epoch": 1.8565232477446219, "grad_norm": 1.4307605028152466, "learning_rate": 7.884716867478782e-07, "loss": 0.4248, "step": 10701 }, { "epoch": 1.8566967383761277, "grad_norm": 0.7783779501914978, "learning_rate": 7.865786065218973e-07, "loss": 0.5836, "step": 10702 }, { "epoch": 1.8568702290076335, "grad_norm": 0.778427004814148, "learning_rate": 7.846877560358535e-07, "loss": 0.3923, "step": 10703 }, { "epoch": 1.8570437196391394, "grad_norm": 1.9426189661026, "learning_rate": 7.827991355091891e-07, "loss": 0.4073, "step": 10704 }, { "epoch": 1.8572172102706452, "grad_norm": 1.0605652332305908, "learning_rate": 7.809127451610776e-07, "loss": 0.4138, "step": 10705 }, { "epoch": 1.8573907009021513, "grad_norm": 1.0573015213012695, "learning_rate": 7.790285852104373e-07, "loss": 0.3669, "step": 10706 }, { "epoch": 1.8575641915336571, "grad_norm": 0.973831057548523, "learning_rate": 7.771466558759244e-07, "loss": 0.4637, "step": 10707 }, { "epoch": 1.8577376821651632, "grad_norm": 0.8816852569580078, "learning_rate": 7.752669573759464e-07, "loss": 0.417, "step": 10708 }, { "epoch": 1.857911172796669, "grad_norm": 1.2028803825378418, "learning_rate": 7.733894899286332e-07, "loss": 0.417, "step": 10709 }, { "epoch": 1.858084663428175, "grad_norm": 0.7927412986755371, "learning_rate": 7.715142537518771e-07, "loss": 0.4883, "step": 10710 }, { "epoch": 1.8582581540596808, "grad_norm": 1.1162534952163696, "learning_rate": 7.696412490632954e-07, "loss": 0.4596, "step": 10711 }, { "epoch": 1.8584316446911866, "grad_norm": 0.8915563821792603, "learning_rate": 7.677704760802562e-07, "loss": 0.4584, "step": 10712 }, { "epoch": 1.8586051353226924, "grad_norm": 0.8360008001327515, "learning_rate": 7.659019350198593e-07, "loss": 0.5146, "step": 10713 }, { "epoch": 1.8587786259541985, "grad_norm": 0.9344514012336731, "learning_rate": 7.640356260989601e-07, "loss": 0.3469, "step": 10714 }, { "epoch": 1.8589521165857044, "grad_norm": 0.9535170793533325, "learning_rate": 7.621715495341364e-07, "loss": 0.42, "step": 10715 }, { "epoch": 1.8591256072172104, "grad_norm": 0.7365037798881531, "learning_rate": 7.603097055417242e-07, "loss": 0.5088, "step": 10716 }, { "epoch": 1.8592990978487163, "grad_norm": 0.8689652681350708, "learning_rate": 7.58450094337786e-07, "loss": 0.5477, "step": 10717 }, { "epoch": 1.8594725884802221, "grad_norm": 0.8216843605041504, "learning_rate": 7.565927161381403e-07, "loss": 0.3713, "step": 10718 }, { "epoch": 1.859646079111728, "grad_norm": 0.8870757818222046, "learning_rate": 7.547375711583282e-07, "loss": 0.3389, "step": 10719 }, { "epoch": 1.8598195697432338, "grad_norm": 1.0497157573699951, "learning_rate": 7.528846596136485e-07, "loss": 0.4905, "step": 10720 }, { "epoch": 1.8599930603747397, "grad_norm": 0.6659301519393921, "learning_rate": 7.510339817191314e-07, "loss": 0.5076, "step": 10721 }, { "epoch": 1.8601665510062455, "grad_norm": 0.7435609698295593, "learning_rate": 7.491855376895519e-07, "loss": 0.4567, "step": 10722 }, { "epoch": 1.8603400416377516, "grad_norm": 0.8212759494781494, "learning_rate": 7.473393277394181e-07, "loss": 0.4438, "step": 10723 }, { "epoch": 1.8605135322692574, "grad_norm": 0.7654137015342712, "learning_rate": 7.454953520829899e-07, "loss": 0.563, "step": 10724 }, { "epoch": 1.8606870229007635, "grad_norm": 1.0974982976913452, "learning_rate": 7.436536109342651e-07, "loss": 0.4906, "step": 10725 }, { "epoch": 1.8608605135322693, "grad_norm": 0.7998368144035339, "learning_rate": 7.418141045069727e-07, "loss": 0.363, "step": 10726 }, { "epoch": 1.8610340041637752, "grad_norm": 0.9827405214309692, "learning_rate": 7.399768330145995e-07, "loss": 0.3574, "step": 10727 }, { "epoch": 1.861207494795281, "grad_norm": 0.8307923078536987, "learning_rate": 7.381417966703508e-07, "loss": 0.5055, "step": 10728 }, { "epoch": 1.8613809854267869, "grad_norm": 1.0587093830108643, "learning_rate": 7.363089956871961e-07, "loss": 0.4659, "step": 10729 }, { "epoch": 1.8615544760582927, "grad_norm": 0.9478718042373657, "learning_rate": 7.344784302778274e-07, "loss": 0.3449, "step": 10730 }, { "epoch": 1.8617279666897988, "grad_norm": 0.7864168286323547, "learning_rate": 7.32650100654686e-07, "loss": 0.4905, "step": 10731 }, { "epoch": 1.8619014573213046, "grad_norm": 0.7862717509269714, "learning_rate": 7.308240070299489e-07, "loss": 0.4059, "step": 10732 }, { "epoch": 1.8620749479528107, "grad_norm": 0.7209656834602356, "learning_rate": 7.290001496155418e-07, "loss": 0.5417, "step": 10733 }, { "epoch": 1.8622484385843165, "grad_norm": 1.1701611280441284, "learning_rate": 7.271785286231204e-07, "loss": 0.4844, "step": 10734 }, { "epoch": 1.8624219292158224, "grad_norm": 0.6251929998397827, "learning_rate": 7.253591442640906e-07, "loss": 0.5845, "step": 10735 }, { "epoch": 1.8625954198473282, "grad_norm": 0.725080132484436, "learning_rate": 7.235419967495883e-07, "loss": 0.5399, "step": 10736 }, { "epoch": 1.862768910478834, "grad_norm": 0.704907238483429, "learning_rate": 7.217270862905023e-07, "loss": 0.463, "step": 10737 }, { "epoch": 1.86294240111034, "grad_norm": 0.8071426153182983, "learning_rate": 7.199144130974489e-07, "loss": 0.4295, "step": 10738 }, { "epoch": 1.8631158917418458, "grad_norm": 2.144774913787842, "learning_rate": 7.181039773807952e-07, "loss": 0.3875, "step": 10739 }, { "epoch": 1.8632893823733518, "grad_norm": 0.7036840319633484, "learning_rate": 7.16295779350642e-07, "loss": 0.4941, "step": 10740 }, { "epoch": 1.8634628730048577, "grad_norm": 0.6882051229476929, "learning_rate": 7.144898192168348e-07, "loss": 0.4265, "step": 10741 }, { "epoch": 1.8636363636363638, "grad_norm": 0.798976719379425, "learning_rate": 7.126860971889527e-07, "loss": 0.3959, "step": 10742 }, { "epoch": 1.8638098542678696, "grad_norm": 0.7891309261322021, "learning_rate": 7.108846134763258e-07, "loss": 0.494, "step": 10743 }, { "epoch": 1.8639833448993754, "grad_norm": 0.6543323397636414, "learning_rate": 7.090853682880161e-07, "loss": 0.4839, "step": 10744 }, { "epoch": 1.8641568355308813, "grad_norm": 1.4919569492340088, "learning_rate": 7.072883618328275e-07, "loss": 0.3905, "step": 10745 }, { "epoch": 1.8643303261623871, "grad_norm": 0.8552188277244568, "learning_rate": 7.054935943193042e-07, "loss": 0.3601, "step": 10746 }, { "epoch": 1.864503816793893, "grad_norm": 1.8037493228912354, "learning_rate": 7.037010659557309e-07, "loss": 0.3279, "step": 10747 }, { "epoch": 1.864677307425399, "grad_norm": 0.9860910773277283, "learning_rate": 7.019107769501366e-07, "loss": 0.3587, "step": 10748 }, { "epoch": 1.864850798056905, "grad_norm": 0.6967023611068726, "learning_rate": 7.001227275102818e-07, "loss": 0.3929, "step": 10749 }, { "epoch": 1.865024288688411, "grad_norm": 1.349442481994629, "learning_rate": 6.983369178436739e-07, "loss": 0.4524, "step": 10750 }, { "epoch": 1.8651977793199168, "grad_norm": 0.9055932760238647, "learning_rate": 6.96553348157556e-07, "loss": 0.363, "step": 10751 }, { "epoch": 1.8653712699514227, "grad_norm": 1.137967586517334, "learning_rate": 6.947720186589158e-07, "loss": 0.4176, "step": 10752 }, { "epoch": 1.8655447605829285, "grad_norm": 0.9633527994155884, "learning_rate": 6.92992929554479e-07, "loss": 0.5557, "step": 10753 }, { "epoch": 1.8657182512144344, "grad_norm": 0.6777358055114746, "learning_rate": 6.912160810507096e-07, "loss": 0.4574, "step": 10754 }, { "epoch": 1.8658917418459402, "grad_norm": 2.1395766735076904, "learning_rate": 6.894414733538113e-07, "loss": 0.4868, "step": 10755 }, { "epoch": 1.8660652324774463, "grad_norm": 0.7489055395126343, "learning_rate": 6.876691066697349e-07, "loss": 0.4636, "step": 10756 }, { "epoch": 1.8662387231089521, "grad_norm": 1.3545432090759277, "learning_rate": 6.858989812041583e-07, "loss": 0.4578, "step": 10757 }, { "epoch": 1.8664122137404582, "grad_norm": 1.128914475440979, "learning_rate": 6.841310971625103e-07, "loss": 0.3976, "step": 10758 }, { "epoch": 1.866585704371964, "grad_norm": 1.8644378185272217, "learning_rate": 6.823654547499581e-07, "loss": 0.3995, "step": 10759 }, { "epoch": 1.8667591950034699, "grad_norm": 0.9472402334213257, "learning_rate": 6.806020541714042e-07, "loss": 0.4569, "step": 10760 }, { "epoch": 1.8669326856349757, "grad_norm": 0.7936835885047913, "learning_rate": 6.788408956314918e-07, "loss": 0.4657, "step": 10761 }, { "epoch": 1.8671061762664816, "grad_norm": 0.8923001885414124, "learning_rate": 6.770819793346084e-07, "loss": 0.4745, "step": 10762 }, { "epoch": 1.8672796668979874, "grad_norm": 0.9739224910736084, "learning_rate": 6.753253054848774e-07, "loss": 0.397, "step": 10763 }, { "epoch": 1.8674531575294933, "grad_norm": 0.834381639957428, "learning_rate": 6.735708742861624e-07, "loss": 0.4258, "step": 10764 }, { "epoch": 1.8676266481609993, "grad_norm": 0.8976614475250244, "learning_rate": 6.71818685942065e-07, "loss": 0.4316, "step": 10765 }, { "epoch": 1.8678001387925052, "grad_norm": 0.8833864331245422, "learning_rate": 6.700687406559359e-07, "loss": 0.4022, "step": 10766 }, { "epoch": 1.8679736294240112, "grad_norm": 0.7446100115776062, "learning_rate": 6.683210386308481e-07, "loss": 0.4232, "step": 10767 }, { "epoch": 1.868147120055517, "grad_norm": 1.0145565271377563, "learning_rate": 6.665755800696305e-07, "loss": 0.4338, "step": 10768 }, { "epoch": 1.868320610687023, "grad_norm": 0.8664763569831848, "learning_rate": 6.648323651748457e-07, "loss": 0.3817, "step": 10769 }, { "epoch": 1.8684941013185288, "grad_norm": 0.8469116687774658, "learning_rate": 6.63091394148796e-07, "loss": 0.4214, "step": 10770 }, { "epoch": 1.8686675919500346, "grad_norm": 0.8851863741874695, "learning_rate": 6.6135266719352e-07, "loss": 0.5551, "step": 10771 }, { "epoch": 1.8688410825815405, "grad_norm": 0.9367919564247131, "learning_rate": 6.596161845108006e-07, "loss": 0.511, "step": 10772 }, { "epoch": 1.8690145732130465, "grad_norm": 0.6908042430877686, "learning_rate": 6.578819463021612e-07, "loss": 0.5587, "step": 10773 }, { "epoch": 1.8691880638445524, "grad_norm": 0.7423384785652161, "learning_rate": 6.561499527688586e-07, "loss": 0.5619, "step": 10774 }, { "epoch": 1.8693615544760585, "grad_norm": 0.6981567740440369, "learning_rate": 6.544202041118941e-07, "loss": 0.5239, "step": 10775 }, { "epoch": 1.8695350451075643, "grad_norm": 0.6232300400733948, "learning_rate": 6.526927005320072e-07, "loss": 0.5656, "step": 10776 }, { "epoch": 1.8697085357390701, "grad_norm": 0.9474436044692993, "learning_rate": 6.509674422296775e-07, "loss": 0.4579, "step": 10777 }, { "epoch": 1.869882026370576, "grad_norm": 0.9173471331596375, "learning_rate": 6.492444294051204e-07, "loss": 0.4402, "step": 10778 }, { "epoch": 1.8700555170020818, "grad_norm": 0.9363657832145691, "learning_rate": 6.47523662258298e-07, "loss": 0.3816, "step": 10779 }, { "epoch": 1.8702290076335877, "grad_norm": 0.9286483526229858, "learning_rate": 6.458051409889021e-07, "loss": 0.3766, "step": 10780 }, { "epoch": 1.8704024982650935, "grad_norm": 0.7287443280220032, "learning_rate": 6.440888657963729e-07, "loss": 0.3782, "step": 10781 }, { "epoch": 1.8705759888965996, "grad_norm": 0.7181859612464905, "learning_rate": 6.423748368798843e-07, "loss": 0.4519, "step": 10782 }, { "epoch": 1.8707494795281054, "grad_norm": 0.7785375714302063, "learning_rate": 6.40663054438353e-07, "loss": 0.3562, "step": 10783 }, { "epoch": 1.8709229701596115, "grad_norm": 0.8017449975013733, "learning_rate": 6.38953518670431e-07, "loss": 0.4777, "step": 10784 }, { "epoch": 1.8710964607911174, "grad_norm": 1.0029230117797852, "learning_rate": 6.372462297745152e-07, "loss": 0.5099, "step": 10785 }, { "epoch": 1.8712699514226232, "grad_norm": 0.9567710161209106, "learning_rate": 6.355411879487339e-07, "loss": 0.3909, "step": 10786 }, { "epoch": 1.871443442054129, "grad_norm": 0.7558448910713196, "learning_rate": 6.338383933909642e-07, "loss": 0.3772, "step": 10787 }, { "epoch": 1.871616932685635, "grad_norm": 1.0863302946090698, "learning_rate": 6.321378462988148e-07, "loss": 0.3905, "step": 10788 }, { "epoch": 1.8717904233171407, "grad_norm": 0.874183177947998, "learning_rate": 6.304395468696345e-07, "loss": 0.5513, "step": 10789 }, { "epoch": 1.8719639139486468, "grad_norm": 0.8077466487884521, "learning_rate": 6.287434953005145e-07, "loss": 0.4374, "step": 10790 }, { "epoch": 1.8721374045801527, "grad_norm": 0.8777197003364563, "learning_rate": 6.270496917882863e-07, "loss": 0.4331, "step": 10791 }, { "epoch": 1.8723108952116587, "grad_norm": 0.9433843493461609, "learning_rate": 6.253581365295148e-07, "loss": 0.4448, "step": 10792 }, { "epoch": 1.8724843858431646, "grad_norm": 0.7966040968894958, "learning_rate": 6.236688297205074e-07, "loss": 0.4144, "step": 10793 }, { "epoch": 1.8726578764746704, "grad_norm": 0.6521954536437988, "learning_rate": 6.219817715573073e-07, "loss": 0.4707, "step": 10794 }, { "epoch": 1.8728313671061763, "grad_norm": 0.8022371530532837, "learning_rate": 6.202969622357069e-07, "loss": 0.5208, "step": 10795 }, { "epoch": 1.873004857737682, "grad_norm": 0.8339186310768127, "learning_rate": 6.18614401951223e-07, "loss": 0.5664, "step": 10796 }, { "epoch": 1.873178348369188, "grad_norm": 0.6526886820793152, "learning_rate": 6.16934090899124e-07, "loss": 0.5718, "step": 10797 }, { "epoch": 1.8733518390006938, "grad_norm": 0.6890487670898438, "learning_rate": 6.152560292744091e-07, "loss": 0.5109, "step": 10798 }, { "epoch": 1.8735253296321999, "grad_norm": 3.252506971359253, "learning_rate": 6.135802172718164e-07, "loss": 0.4457, "step": 10799 }, { "epoch": 1.8736988202637057, "grad_norm": 1.3863199949264526, "learning_rate": 6.119066550858321e-07, "loss": 0.337, "step": 10800 }, { "epoch": 1.8738723108952118, "grad_norm": 0.8325015902519226, "learning_rate": 6.102353429106722e-07, "loss": 0.5292, "step": 10801 }, { "epoch": 1.8740458015267176, "grad_norm": 0.6851802468299866, "learning_rate": 6.085662809402926e-07, "loss": 0.4647, "step": 10802 }, { "epoch": 1.8742192921582235, "grad_norm": 0.8349834680557251, "learning_rate": 6.068994693683916e-07, "loss": 0.4414, "step": 10803 }, { "epoch": 1.8743927827897293, "grad_norm": 0.7992815375328064, "learning_rate": 6.052349083884057e-07, "loss": 0.5262, "step": 10804 }, { "epoch": 1.8745662734212352, "grad_norm": 0.7497966885566711, "learning_rate": 6.035725981935092e-07, "loss": 0.4694, "step": 10805 }, { "epoch": 1.874739764052741, "grad_norm": 1.340006709098816, "learning_rate": 6.019125389766123e-07, "loss": 0.5042, "step": 10806 }, { "epoch": 1.874913254684247, "grad_norm": 0.6926025152206421, "learning_rate": 6.002547309303674e-07, "loss": 0.4989, "step": 10807 }, { "epoch": 1.875086745315753, "grad_norm": 0.8088293671607971, "learning_rate": 5.985991742471698e-07, "loss": 0.3966, "step": 10808 }, { "epoch": 1.875260235947259, "grad_norm": 0.703403651714325, "learning_rate": 5.969458691191432e-07, "loss": 0.4469, "step": 10809 }, { "epoch": 1.8754337265787648, "grad_norm": 1.162053108215332, "learning_rate": 5.952948157381566e-07, "loss": 0.5541, "step": 10810 }, { "epoch": 1.8756072172102707, "grad_norm": 0.9857968091964722, "learning_rate": 5.936460142958189e-07, "loss": 0.4306, "step": 10811 }, { "epoch": 1.8757807078417765, "grad_norm": 0.7314509153366089, "learning_rate": 5.919994649834748e-07, "loss": 0.5096, "step": 10812 }, { "epoch": 1.8759541984732824, "grad_norm": 1.1454625129699707, "learning_rate": 5.903551679922049e-07, "loss": 0.3865, "step": 10813 }, { "epoch": 1.8761276891047882, "grad_norm": 0.7919958233833313, "learning_rate": 5.887131235128385e-07, "loss": 0.3586, "step": 10814 }, { "epoch": 1.8763011797362943, "grad_norm": 0.9079128503799438, "learning_rate": 5.870733317359278e-07, "loss": 0.4118, "step": 10815 }, { "epoch": 1.8764746703678001, "grad_norm": 0.908237099647522, "learning_rate": 5.854357928517806e-07, "loss": 0.364, "step": 10816 }, { "epoch": 1.8766481609993062, "grad_norm": 0.8470625877380371, "learning_rate": 5.838005070504293e-07, "loss": 0.3947, "step": 10817 }, { "epoch": 1.876821651630812, "grad_norm": 0.6253703236579895, "learning_rate": 5.821674745216599e-07, "loss": 0.449, "step": 10818 }, { "epoch": 1.876995142262318, "grad_norm": 0.6411759853363037, "learning_rate": 5.80536695454974e-07, "loss": 0.4144, "step": 10819 }, { "epoch": 1.8771686328938237, "grad_norm": 0.745097815990448, "learning_rate": 5.789081700396381e-07, "loss": 0.462, "step": 10820 }, { "epoch": 1.8773421235253296, "grad_norm": 0.760269284248352, "learning_rate": 5.772818984646367e-07, "loss": 0.4585, "step": 10821 }, { "epoch": 1.8775156141568354, "grad_norm": 1.3237879276275635, "learning_rate": 5.756578809187008e-07, "loss": 0.5519, "step": 10822 }, { "epoch": 1.8776891047883413, "grad_norm": 0.8203462958335876, "learning_rate": 5.740361175903042e-07, "loss": 0.3639, "step": 10823 }, { "epoch": 1.8778625954198473, "grad_norm": 1.1125695705413818, "learning_rate": 5.724166086676542e-07, "loss": 0.3593, "step": 10824 }, { "epoch": 1.8780360860513532, "grad_norm": 0.8598060011863708, "learning_rate": 5.707993543386914e-07, "loss": 0.3606, "step": 10825 }, { "epoch": 1.8782095766828593, "grad_norm": 0.7895324230194092, "learning_rate": 5.691843547911013e-07, "loss": 0.4056, "step": 10826 }, { "epoch": 1.878383067314365, "grad_norm": 0.8858323097229004, "learning_rate": 5.675716102123141e-07, "loss": 0.5073, "step": 10827 }, { "epoch": 1.878556557945871, "grad_norm": 0.6717455387115479, "learning_rate": 5.6596112078948e-07, "loss": 0.4431, "step": 10828 }, { "epoch": 1.8787300485773768, "grad_norm": 0.706802248954773, "learning_rate": 5.643528867095049e-07, "loss": 0.4803, "step": 10829 }, { "epoch": 1.8789035392088826, "grad_norm": 0.7239381670951843, "learning_rate": 5.627469081590242e-07, "loss": 0.4407, "step": 10830 }, { "epoch": 1.8790770298403885, "grad_norm": 0.9374978542327881, "learning_rate": 5.611431853244132e-07, "loss": 0.3917, "step": 10831 }, { "epoch": 1.8792505204718946, "grad_norm": 0.6650722026824951, "learning_rate": 5.595417183917851e-07, "loss": 0.6089, "step": 10832 }, { "epoch": 1.8794240111034004, "grad_norm": 0.8580365777015686, "learning_rate": 5.579425075469936e-07, "loss": 0.4304, "step": 10833 }, { "epoch": 1.8795975017349065, "grad_norm": 0.6936100721359253, "learning_rate": 5.563455529756301e-07, "loss": 0.493, "step": 10834 }, { "epoch": 1.8797709923664123, "grad_norm": 0.7324767112731934, "learning_rate": 5.547508548630221e-07, "loss": 0.3958, "step": 10835 }, { "epoch": 1.8799444829979182, "grad_norm": 0.7372058629989624, "learning_rate": 5.531584133942325e-07, "loss": 0.3337, "step": 10836 }, { "epoch": 1.880117973629424, "grad_norm": 0.79083251953125, "learning_rate": 5.515682287540736e-07, "loss": 0.4375, "step": 10837 }, { "epoch": 1.8802914642609299, "grad_norm": 0.9389005899429321, "learning_rate": 5.499803011270776e-07, "loss": 0.3457, "step": 10838 }, { "epoch": 1.8804649548924357, "grad_norm": 2.623318910598755, "learning_rate": 5.483946306975374e-07, "loss": 0.3108, "step": 10839 }, { "epoch": 1.8806384455239415, "grad_norm": 1.211706280708313, "learning_rate": 5.468112176494633e-07, "loss": 0.4312, "step": 10840 }, { "epoch": 1.8808119361554476, "grad_norm": 0.8657251596450806, "learning_rate": 5.452300621666151e-07, "loss": 0.4696, "step": 10841 }, { "epoch": 1.8809854267869535, "grad_norm": 0.7004914879798889, "learning_rate": 5.43651164432486e-07, "loss": 0.4318, "step": 10842 }, { "epoch": 1.8811589174184595, "grad_norm": 1.0329854488372803, "learning_rate": 5.420745246303116e-07, "loss": 0.5156, "step": 10843 }, { "epoch": 1.8813324080499654, "grad_norm": 0.9545468091964722, "learning_rate": 5.405001429430634e-07, "loss": 0.3884, "step": 10844 }, { "epoch": 1.8815058986814712, "grad_norm": 1.3794701099395752, "learning_rate": 5.389280195534463e-07, "loss": 0.515, "step": 10845 }, { "epoch": 1.881679389312977, "grad_norm": 0.7027184963226318, "learning_rate": 5.373581546439077e-07, "loss": 0.5974, "step": 10846 }, { "epoch": 1.881852879944483, "grad_norm": 0.8707473874092102, "learning_rate": 5.357905483966375e-07, "loss": 0.4139, "step": 10847 }, { "epoch": 1.8820263705759888, "grad_norm": 0.8357599973678589, "learning_rate": 5.342252009935522e-07, "loss": 0.392, "step": 10848 }, { "epoch": 1.8821998612074948, "grad_norm": 1.2941255569458008, "learning_rate": 5.326621126163157e-07, "loss": 0.4586, "step": 10849 }, { "epoch": 1.8823733518390007, "grad_norm": 0.8082398176193237, "learning_rate": 5.311012834463247e-07, "loss": 0.4844, "step": 10850 }, { "epoch": 1.8825468424705067, "grad_norm": 1.1110472679138184, "learning_rate": 5.295427136647124e-07, "loss": 0.4257, "step": 10851 }, { "epoch": 1.8827203331020126, "grad_norm": 0.774032711982727, "learning_rate": 5.279864034523586e-07, "loss": 0.4403, "step": 10852 }, { "epoch": 1.8828938237335184, "grad_norm": 0.9818394184112549, "learning_rate": 5.26432352989874e-07, "loss": 0.4296, "step": 10853 }, { "epoch": 1.8830673143650243, "grad_norm": 0.7783525586128235, "learning_rate": 5.248805624576037e-07, "loss": 0.4307, "step": 10854 }, { "epoch": 1.8832408049965301, "grad_norm": 0.9560015201568604, "learning_rate": 5.233310320356366e-07, "loss": 0.5295, "step": 10855 }, { "epoch": 1.883414295628036, "grad_norm": 0.689906656742096, "learning_rate": 5.217837619038002e-07, "loss": 0.4695, "step": 10856 }, { "epoch": 1.8835877862595418, "grad_norm": 0.826659083366394, "learning_rate": 5.20238752241653e-07, "loss": 0.4233, "step": 10857 }, { "epoch": 1.8837612768910479, "grad_norm": 0.8049799799919128, "learning_rate": 5.186960032284983e-07, "loss": 0.4156, "step": 10858 }, { "epoch": 1.8839347675225537, "grad_norm": 0.9805461168289185, "learning_rate": 5.171555150433705e-07, "loss": 0.3573, "step": 10859 }, { "epoch": 1.8841082581540598, "grad_norm": 0.6907187700271606, "learning_rate": 5.156172878650489e-07, "loss": 0.4215, "step": 10860 }, { "epoch": 1.8842817487855656, "grad_norm": 0.7992402911186218, "learning_rate": 5.140813218720442e-07, "loss": 0.3917, "step": 10861 }, { "epoch": 1.8844552394170715, "grad_norm": 1.0171290636062622, "learning_rate": 5.125476172426092e-07, "loss": 0.4378, "step": 10862 }, { "epoch": 1.8846287300485773, "grad_norm": 0.6632357835769653, "learning_rate": 5.110161741547281e-07, "loss": 0.5093, "step": 10863 }, { "epoch": 1.8848022206800832, "grad_norm": 1.0032912492752075, "learning_rate": 5.094869927861323e-07, "loss": 0.4222, "step": 10864 }, { "epoch": 1.884975711311589, "grad_norm": 0.8233975172042847, "learning_rate": 5.079600733142775e-07, "loss": 0.5587, "step": 10865 }, { "epoch": 1.885149201943095, "grad_norm": 0.8972030878067017, "learning_rate": 5.064354159163754e-07, "loss": 0.5225, "step": 10866 }, { "epoch": 1.885322692574601, "grad_norm": 1.1515147686004639, "learning_rate": 5.049130207693509e-07, "loss": 0.3785, "step": 10867 }, { "epoch": 1.885496183206107, "grad_norm": 0.9869181513786316, "learning_rate": 5.033928880498917e-07, "loss": 0.4395, "step": 10868 }, { "epoch": 1.8856696738376129, "grad_norm": 0.8651929497718811, "learning_rate": 5.018750179344034e-07, "loss": 0.4138, "step": 10869 }, { "epoch": 1.8858431644691187, "grad_norm": 0.750634491443634, "learning_rate": 5.003594105990384e-07, "loss": 0.3713, "step": 10870 }, { "epoch": 1.8860166551006246, "grad_norm": 0.8486379981040955, "learning_rate": 4.98846066219687e-07, "loss": 0.4789, "step": 10871 }, { "epoch": 1.8861901457321304, "grad_norm": 0.8471236228942871, "learning_rate": 4.973349849719733e-07, "loss": 0.4246, "step": 10872 }, { "epoch": 1.8863636363636362, "grad_norm": 0.8552502393722534, "learning_rate": 4.958261670312591e-07, "loss": 0.4172, "step": 10873 }, { "epoch": 1.8865371269951423, "grad_norm": 0.675193727016449, "learning_rate": 4.943196125726446e-07, "loss": 0.4707, "step": 10874 }, { "epoch": 1.8867106176266482, "grad_norm": 0.7469701170921326, "learning_rate": 4.928153217709674e-07, "loss": 0.3881, "step": 10875 }, { "epoch": 1.8868841082581542, "grad_norm": 0.9664652943611145, "learning_rate": 4.913132948008037e-07, "loss": 0.3854, "step": 10876 }, { "epoch": 1.88705759888966, "grad_norm": 0.8605236411094666, "learning_rate": 4.89813531836465e-07, "loss": 0.3905, "step": 10877 }, { "epoch": 1.887231089521166, "grad_norm": 0.7831318974494934, "learning_rate": 4.883160330519965e-07, "loss": 0.4537, "step": 10878 }, { "epoch": 1.8874045801526718, "grad_norm": 0.8109844326972961, "learning_rate": 4.868207986211926e-07, "loss": 0.3942, "step": 10879 }, { "epoch": 1.8875780707841776, "grad_norm": 0.7134442925453186, "learning_rate": 4.853278287175677e-07, "loss": 0.4084, "step": 10880 }, { "epoch": 1.8877515614156835, "grad_norm": 0.8275427222251892, "learning_rate": 4.838371235143902e-07, "loss": 0.3734, "step": 10881 }, { "epoch": 1.8879250520471893, "grad_norm": 0.6749014854431152, "learning_rate": 4.823486831846547e-07, "loss": 0.4794, "step": 10882 }, { "epoch": 1.8880985426786954, "grad_norm": 0.7942231297492981, "learning_rate": 4.808625079010987e-07, "loss": 0.509, "step": 10883 }, { "epoch": 1.8882720333102012, "grad_norm": 0.7638766169548035, "learning_rate": 4.793785978361887e-07, "loss": 0.5704, "step": 10884 }, { "epoch": 1.8884455239417073, "grad_norm": 0.7045732140541077, "learning_rate": 4.778969531621447e-07, "loss": 0.5597, "step": 10885 }, { "epoch": 1.8886190145732131, "grad_norm": 0.8598750829696655, "learning_rate": 4.764175740509025e-07, "loss": 0.4932, "step": 10886 }, { "epoch": 1.888792505204719, "grad_norm": 0.7648863792419434, "learning_rate": 4.749404606741514e-07, "loss": 0.5145, "step": 10887 }, { "epoch": 1.8889659958362248, "grad_norm": 1.0697370767593384, "learning_rate": 4.7346561320330997e-07, "loss": 0.4833, "step": 10888 }, { "epoch": 1.8891394864677307, "grad_norm": 1.54634690284729, "learning_rate": 4.7199303180953894e-07, "loss": 0.3649, "step": 10889 }, { "epoch": 1.8893129770992365, "grad_norm": 0.7695502042770386, "learning_rate": 4.7052271666373053e-07, "loss": 0.4333, "step": 10890 }, { "epoch": 1.8894864677307426, "grad_norm": 1.0159564018249512, "learning_rate": 4.6905466793651713e-07, "loss": 0.3546, "step": 10891 }, { "epoch": 1.8896599583622484, "grad_norm": 0.9595746397972107, "learning_rate": 4.675888857982669e-07, "loss": 0.3325, "step": 10892 }, { "epoch": 1.8898334489937545, "grad_norm": 0.86198890209198, "learning_rate": 4.6612537041908823e-07, "loss": 0.3857, "step": 10893 }, { "epoch": 1.8900069396252603, "grad_norm": 0.6959157586097717, "learning_rate": 4.646641219688186e-07, "loss": 0.4095, "step": 10894 }, { "epoch": 1.8901804302567662, "grad_norm": 1.132491111755371, "learning_rate": 4.6320514061704236e-07, "loss": 0.5416, "step": 10895 }, { "epoch": 1.890353920888272, "grad_norm": 1.1667431592941284, "learning_rate": 4.617484265330752e-07, "loss": 0.3961, "step": 10896 }, { "epoch": 1.8905274115197779, "grad_norm": 0.7082688808441162, "learning_rate": 4.602939798859685e-07, "loss": 0.483, "step": 10897 }, { "epoch": 1.8907009021512837, "grad_norm": 0.8543420433998108, "learning_rate": 4.588418008445161e-07, "loss": 0.4236, "step": 10898 }, { "epoch": 1.8908743927827896, "grad_norm": 0.6347614526748657, "learning_rate": 4.573918895772389e-07, "loss": 0.4264, "step": 10899 }, { "epoch": 1.8910478834142956, "grad_norm": 0.9070220589637756, "learning_rate": 4.5594424625240887e-07, "loss": 0.3589, "step": 10900 }, { "epoch": 1.8912213740458015, "grad_norm": 3.528459310531616, "learning_rate": 4.544988710380205e-07, "loss": 0.3573, "step": 10901 }, { "epoch": 1.8913948646773076, "grad_norm": 1.0848753452301025, "learning_rate": 4.5305576410181293e-07, "loss": 0.3687, "step": 10902 }, { "epoch": 1.8915683553088134, "grad_norm": 1.0708909034729004, "learning_rate": 4.51614925611259e-07, "loss": 0.4875, "step": 10903 }, { "epoch": 1.8917418459403192, "grad_norm": 0.761223554611206, "learning_rate": 4.5017635573357366e-07, "loss": 0.5276, "step": 10904 }, { "epoch": 1.891915336571825, "grad_norm": 0.9501824378967285, "learning_rate": 4.4874005463570126e-07, "loss": 0.4119, "step": 10905 }, { "epoch": 1.892088827203331, "grad_norm": 0.8690057992935181, "learning_rate": 4.4730602248432843e-07, "loss": 0.3707, "step": 10906 }, { "epoch": 1.8922623178348368, "grad_norm": 0.9825139045715332, "learning_rate": 4.4587425944587317e-07, "loss": 0.5173, "step": 10907 }, { "epoch": 1.8924358084663429, "grad_norm": 0.7432997226715088, "learning_rate": 4.444447656864981e-07, "loss": 0.4092, "step": 10908 }, { "epoch": 1.8926092990978487, "grad_norm": 0.7270981073379517, "learning_rate": 4.4301754137209277e-07, "loss": 0.5334, "step": 10909 }, { "epoch": 1.8927827897293548, "grad_norm": 1.37782883644104, "learning_rate": 4.4159258666828907e-07, "loss": 0.4818, "step": 10910 }, { "epoch": 1.8929562803608606, "grad_norm": 0.8546935319900513, "learning_rate": 4.401699017404593e-07, "loss": 0.5096, "step": 10911 }, { "epoch": 1.8931297709923665, "grad_norm": 1.0219379663467407, "learning_rate": 4.3874948675370233e-07, "loss": 0.3737, "step": 10912 }, { "epoch": 1.8933032616238723, "grad_norm": 0.9887694716453552, "learning_rate": 4.373313418728575e-07, "loss": 0.3583, "step": 10913 }, { "epoch": 1.8934767522553781, "grad_norm": 1.070763349533081, "learning_rate": 4.3591546726250877e-07, "loss": 0.574, "step": 10914 }, { "epoch": 1.893650242886884, "grad_norm": 0.7176077961921692, "learning_rate": 4.3450186308696685e-07, "loss": 0.5289, "step": 10915 }, { "epoch": 1.89382373351839, "grad_norm": 1.0414750576019287, "learning_rate": 4.3309052951028275e-07, "loss": 0.3427, "step": 10916 }, { "epoch": 1.893997224149896, "grad_norm": 0.8068662881851196, "learning_rate": 4.31681466696241e-07, "loss": 0.3973, "step": 10917 }, { "epoch": 1.8941707147814018, "grad_norm": 0.8175247311592102, "learning_rate": 4.302746748083664e-07, "loss": 0.5437, "step": 10918 }, { "epoch": 1.8943442054129078, "grad_norm": 0.9541627764701843, "learning_rate": 4.2887015400991937e-07, "loss": 0.3667, "step": 10919 }, { "epoch": 1.8945176960444137, "grad_norm": 0.7084746360778809, "learning_rate": 4.2746790446389853e-07, "loss": 0.5047, "step": 10920 }, { "epoch": 1.8946911866759195, "grad_norm": 0.8773970603942871, "learning_rate": 4.260679263330314e-07, "loss": 0.4802, "step": 10921 }, { "epoch": 1.8948646773074254, "grad_norm": 0.8791499733924866, "learning_rate": 4.2467021977978806e-07, "loss": 0.3822, "step": 10922 }, { "epoch": 1.8950381679389312, "grad_norm": 0.791195273399353, "learning_rate": 4.2327478496637877e-07, "loss": 0.3689, "step": 10923 }, { "epoch": 1.895211658570437, "grad_norm": 0.9519442915916443, "learning_rate": 4.218816220547406e-07, "loss": 0.5925, "step": 10924 }, { "epoch": 1.8953851492019431, "grad_norm": 0.6581597924232483, "learning_rate": 4.2049073120655315e-07, "loss": 0.4708, "step": 10925 }, { "epoch": 1.895558639833449, "grad_norm": 0.8855475783348083, "learning_rate": 4.1910211258322954e-07, "loss": 0.4625, "step": 10926 }, { "epoch": 1.895732130464955, "grad_norm": 0.7376269102096558, "learning_rate": 4.1771576634592524e-07, "loss": 0.381, "step": 10927 }, { "epoch": 1.8959056210964609, "grad_norm": 1.098069190979004, "learning_rate": 4.1633169265552274e-07, "loss": 0.4945, "step": 10928 }, { "epoch": 1.8960791117279667, "grad_norm": 0.8522683382034302, "learning_rate": 4.149498916726469e-07, "loss": 0.4251, "step": 10929 }, { "epoch": 1.8962526023594726, "grad_norm": 0.7908493280410767, "learning_rate": 4.13570363557656e-07, "loss": 0.3824, "step": 10930 }, { "epoch": 1.8964260929909784, "grad_norm": 0.9189895987510681, "learning_rate": 4.1219310847064876e-07, "loss": 0.3522, "step": 10931 }, { "epoch": 1.8965995836224843, "grad_norm": 0.8488624095916748, "learning_rate": 4.108181265714528e-07, "loss": 0.4468, "step": 10932 }, { "epoch": 1.8967730742539903, "grad_norm": 1.0427064895629883, "learning_rate": 4.0944541801964275e-07, "loss": 0.3856, "step": 10933 }, { "epoch": 1.8969465648854962, "grad_norm": 0.8867191672325134, "learning_rate": 4.0807498297451786e-07, "loss": 0.3552, "step": 10934 }, { "epoch": 1.8971200555170022, "grad_norm": 0.7627947330474854, "learning_rate": 4.06706821595122e-07, "loss": 0.43, "step": 10935 }, { "epoch": 1.897293546148508, "grad_norm": 0.803941011428833, "learning_rate": 4.053409340402259e-07, "loss": 0.5505, "step": 10936 }, { "epoch": 1.897467036780014, "grad_norm": 1.2127147912979126, "learning_rate": 4.039773204683517e-07, "loss": 0.4297, "step": 10937 }, { "epoch": 1.8976405274115198, "grad_norm": 0.8328952789306641, "learning_rate": 4.026159810377417e-07, "loss": 0.401, "step": 10938 }, { "epoch": 1.8978140180430256, "grad_norm": 0.720675528049469, "learning_rate": 4.012569159063806e-07, "loss": 0.4507, "step": 10939 }, { "epoch": 1.8979875086745315, "grad_norm": 0.7106125354766846, "learning_rate": 3.999001252319934e-07, "loss": 0.4246, "step": 10940 }, { "epoch": 1.8981609993060373, "grad_norm": 0.8404808640480042, "learning_rate": 3.9854560917203635e-07, "loss": 0.4801, "step": 10941 }, { "epoch": 1.8983344899375434, "grad_norm": 0.8275275230407715, "learning_rate": 3.971933678836992e-07, "loss": 0.3975, "step": 10942 }, { "epoch": 1.8985079805690492, "grad_norm": 1.1109490394592285, "learning_rate": 3.958434015239143e-07, "loss": 0.3765, "step": 10943 }, { "epoch": 1.8986814712005553, "grad_norm": 0.8118730783462524, "learning_rate": 3.944957102493474e-07, "loss": 0.3887, "step": 10944 }, { "epoch": 1.8988549618320612, "grad_norm": 1.2894953489303589, "learning_rate": 3.931502942163956e-07, "loss": 0.5012, "step": 10945 }, { "epoch": 1.899028452463567, "grad_norm": 0.985551655292511, "learning_rate": 3.918071535812007e-07, "loss": 0.5458, "step": 10946 }, { "epoch": 1.8992019430950728, "grad_norm": 0.6166860461235046, "learning_rate": 3.904662884996335e-07, "loss": 0.5833, "step": 10947 }, { "epoch": 1.8993754337265787, "grad_norm": 0.9930065870285034, "learning_rate": 3.8912769912730297e-07, "loss": 0.3824, "step": 10948 }, { "epoch": 1.8995489243580845, "grad_norm": 0.7751320004463196, "learning_rate": 3.8779138561955145e-07, "loss": 0.3983, "step": 10949 }, { "epoch": 1.8997224149895906, "grad_norm": 0.8659856915473938, "learning_rate": 3.864573481314682e-07, "loss": 0.3284, "step": 10950 }, { "epoch": 1.8998959056210964, "grad_norm": 0.9467388391494751, "learning_rate": 3.8512558681785826e-07, "loss": 0.5446, "step": 10951 }, { "epoch": 1.9000693962526025, "grad_norm": 0.8677664399147034, "learning_rate": 3.837961018332825e-07, "loss": 0.4783, "step": 10952 }, { "epoch": 1.9002428868841084, "grad_norm": 1.2335377931594849, "learning_rate": 3.824688933320264e-07, "loss": 0.4369, "step": 10953 }, { "epoch": 1.9004163775156142, "grad_norm": 0.9301946759223938, "learning_rate": 3.811439614681156e-07, "loss": 0.356, "step": 10954 }, { "epoch": 1.90058986814712, "grad_norm": 0.9937767386436462, "learning_rate": 3.798213063953049e-07, "loss": 0.3503, "step": 10955 }, { "epoch": 1.900763358778626, "grad_norm": 0.7710212469100952, "learning_rate": 3.7850092826709817e-07, "loss": 0.5227, "step": 10956 }, { "epoch": 1.9009368494101317, "grad_norm": 0.8241571187973022, "learning_rate": 3.771828272367195e-07, "loss": 0.4879, "step": 10957 }, { "epoch": 1.9011103400416376, "grad_norm": 0.8966371417045593, "learning_rate": 3.758670034571399e-07, "loss": 0.4624, "step": 10958 }, { "epoch": 1.9012838306731437, "grad_norm": 0.925626277923584, "learning_rate": 3.745534570810616e-07, "loss": 0.4044, "step": 10959 }, { "epoch": 1.9014573213046495, "grad_norm": 0.7923619747161865, "learning_rate": 3.7324218826092053e-07, "loss": 0.5719, "step": 10960 }, { "epoch": 1.9016308119361556, "grad_norm": 0.9957678914070129, "learning_rate": 3.7193319714889487e-07, "loss": 0.4647, "step": 10961 }, { "epoch": 1.9018043025676614, "grad_norm": 0.9195384383201599, "learning_rate": 3.7062648389689204e-07, "loss": 0.5049, "step": 10962 }, { "epoch": 1.9019777931991673, "grad_norm": 0.9187933802604675, "learning_rate": 3.6932204865655963e-07, "loss": 0.6035, "step": 10963 }, { "epoch": 1.9021512838306731, "grad_norm": 1.0127263069152832, "learning_rate": 3.680198915792765e-07, "loss": 0.3387, "step": 10964 }, { "epoch": 1.902324774462179, "grad_norm": 0.7908935546875, "learning_rate": 3.6672001281616186e-07, "loss": 0.3312, "step": 10965 }, { "epoch": 1.9024982650936848, "grad_norm": 0.7882046699523926, "learning_rate": 3.654224125180661e-07, "loss": 0.3206, "step": 10966 }, { "epoch": 1.9026717557251909, "grad_norm": 1.0293998718261719, "learning_rate": 3.6412709083557984e-07, "loss": 0.4872, "step": 10967 }, { "epoch": 1.9028452463566967, "grad_norm": 1.3950729370117188, "learning_rate": 3.628340479190229e-07, "loss": 0.4183, "step": 10968 }, { "epoch": 1.9030187369882028, "grad_norm": 0.7494522929191589, "learning_rate": 3.6154328391845963e-07, "loss": 0.4952, "step": 10969 }, { "epoch": 1.9031922276197086, "grad_norm": 0.7665684223175049, "learning_rate": 3.602547989836769e-07, "loss": 0.4425, "step": 10970 }, { "epoch": 1.9033657182512145, "grad_norm": 0.7976112365722656, "learning_rate": 3.5896859326421284e-07, "loss": 0.5042, "step": 10971 }, { "epoch": 1.9035392088827203, "grad_norm": 0.8296693563461304, "learning_rate": 3.5768466690933036e-07, "loss": 0.3983, "step": 10972 }, { "epoch": 1.9037126995142262, "grad_norm": 0.8233347535133362, "learning_rate": 3.564030200680302e-07, "loss": 0.4572, "step": 10973 }, { "epoch": 1.903886190145732, "grad_norm": 1.0415624380111694, "learning_rate": 3.551236528890445e-07, "loss": 0.4337, "step": 10974 }, { "epoch": 1.904059680777238, "grad_norm": 0.7998337745666504, "learning_rate": 3.538465655208545e-07, "loss": 0.369, "step": 10975 }, { "epoch": 1.904233171408744, "grad_norm": 0.6770361065864563, "learning_rate": 3.5257175811166166e-07, "loss": 0.5106, "step": 10976 }, { "epoch": 1.9044066620402498, "grad_norm": 0.9979817271232605, "learning_rate": 3.5129923080940985e-07, "loss": 0.3523, "step": 10977 }, { "epoch": 1.9045801526717558, "grad_norm": 1.1292340755462646, "learning_rate": 3.500289837617765e-07, "loss": 0.4637, "step": 10978 }, { "epoch": 1.9047536433032617, "grad_norm": 0.9591286182403564, "learning_rate": 3.4876101711617924e-07, "loss": 0.3842, "step": 10979 }, { "epoch": 1.9049271339347675, "grad_norm": 0.8513092398643494, "learning_rate": 3.474953310197604e-07, "loss": 0.3371, "step": 10980 }, { "epoch": 1.9051006245662734, "grad_norm": 0.6872183680534363, "learning_rate": 3.462319256194113e-07, "loss": 0.5271, "step": 10981 }, { "epoch": 1.9052741151977792, "grad_norm": 0.7488598227500916, "learning_rate": 3.4497080106174806e-07, "loss": 0.4376, "step": 10982 }, { "epoch": 1.905447605829285, "grad_norm": 0.8067551255226135, "learning_rate": 3.437119574931247e-07, "loss": 0.4055, "step": 10983 }, { "epoch": 1.9056210964607911, "grad_norm": 0.7539083957672119, "learning_rate": 3.424553950596332e-07, "loss": 0.473, "step": 10984 }, { "epoch": 1.905794587092297, "grad_norm": 1.280732274055481, "learning_rate": 3.412011139070992e-07, "loss": 0.486, "step": 10985 }, { "epoch": 1.905968077723803, "grad_norm": 0.9366377592086792, "learning_rate": 3.3994911418108176e-07, "loss": 0.315, "step": 10986 }, { "epoch": 1.906141568355309, "grad_norm": 0.8942596316337585, "learning_rate": 3.3869939602687806e-07, "loss": 0.412, "step": 10987 }, { "epoch": 1.9063150589868147, "grad_norm": 0.8256127834320068, "learning_rate": 3.374519595895209e-07, "loss": 0.4327, "step": 10988 }, { "epoch": 1.9064885496183206, "grad_norm": 1.0570706129074097, "learning_rate": 3.362068050137768e-07, "loss": 0.5269, "step": 10989 }, { "epoch": 1.9066620402498264, "grad_norm": 1.0103695392608643, "learning_rate": 3.3496393244414114e-07, "loss": 0.4835, "step": 10990 }, { "epoch": 1.9068355308813323, "grad_norm": 0.7182628512382507, "learning_rate": 3.3372334202485867e-07, "loss": 0.5101, "step": 10991 }, { "epoch": 1.9070090215128384, "grad_norm": 0.8422587513923645, "learning_rate": 3.324850338998964e-07, "loss": 0.4139, "step": 10992 }, { "epoch": 1.9071825121443442, "grad_norm": 0.8667024970054626, "learning_rate": 3.312490082129638e-07, "loss": 0.4093, "step": 10993 }, { "epoch": 1.9073560027758503, "grad_norm": 0.7709800004959106, "learning_rate": 3.300152651075039e-07, "loss": 0.5382, "step": 10994 }, { "epoch": 1.9075294934073561, "grad_norm": 0.799622654914856, "learning_rate": 3.2878380472669116e-07, "loss": 0.4028, "step": 10995 }, { "epoch": 1.907702984038862, "grad_norm": 0.8478907942771912, "learning_rate": 3.2755462721344e-07, "loss": 0.4365, "step": 10996 }, { "epoch": 1.9078764746703678, "grad_norm": 1.0206315517425537, "learning_rate": 3.2632773271039644e-07, "loss": 0.3174, "step": 10997 }, { "epoch": 1.9080499653018737, "grad_norm": 0.9754865765571594, "learning_rate": 3.251031213599465e-07, "loss": 0.4686, "step": 10998 }, { "epoch": 1.9082234559333795, "grad_norm": 0.9375216960906982, "learning_rate": 3.2388079330420095e-07, "loss": 0.4592, "step": 10999 }, { "epoch": 1.9083969465648853, "grad_norm": 0.9150240421295166, "learning_rate": 3.2266074868501976e-07, "loss": 0.4347, "step": 11000 }, { "epoch": 1.9085704371963914, "grad_norm": 0.8404751420021057, "learning_rate": 3.2144298764398505e-07, "loss": 0.3708, "step": 11001 }, { "epoch": 1.9087439278278973, "grad_norm": 0.8384784460067749, "learning_rate": 3.2022751032242396e-07, "loss": 0.4595, "step": 11002 }, { "epoch": 1.9089174184594033, "grad_norm": 0.7938383221626282, "learning_rate": 3.190143168613902e-07, "loss": 0.4695, "step": 11003 }, { "epoch": 1.9090909090909092, "grad_norm": 0.985153079032898, "learning_rate": 3.178034074016778e-07, "loss": 0.465, "step": 11004 }, { "epoch": 1.909264399722415, "grad_norm": 0.9028046131134033, "learning_rate": 3.1659478208381665e-07, "loss": 0.3541, "step": 11005 }, { "epoch": 1.9094378903539209, "grad_norm": 0.8445605635643005, "learning_rate": 3.1538844104806343e-07, "loss": 0.535, "step": 11006 }, { "epoch": 1.9096113809854267, "grad_norm": 0.921342134475708, "learning_rate": 3.141843844344195e-07, "loss": 0.433, "step": 11007 }, { "epoch": 1.9097848716169326, "grad_norm": 0.746217668056488, "learning_rate": 3.1298261238261964e-07, "loss": 0.5485, "step": 11008 }, { "epoch": 1.9099583622484386, "grad_norm": 1.069266676902771, "learning_rate": 3.1178312503212347e-07, "loss": 0.4598, "step": 11009 }, { "epoch": 1.9101318528799445, "grad_norm": 0.6470270156860352, "learning_rate": 3.105859225221397e-07, "loss": 0.5859, "step": 11010 }, { "epoch": 1.9103053435114505, "grad_norm": 0.9771518707275391, "learning_rate": 3.0939100499160155e-07, "loss": 0.458, "step": 11011 }, { "epoch": 1.9104788341429564, "grad_norm": 0.9310368299484253, "learning_rate": 3.0819837257918037e-07, "loss": 0.392, "step": 11012 }, { "epoch": 1.9106523247744622, "grad_norm": 0.9798033833503723, "learning_rate": 3.0700802542328325e-07, "loss": 0.4277, "step": 11013 }, { "epoch": 1.910825815405968, "grad_norm": 0.7718014121055603, "learning_rate": 3.058199636620529e-07, "loss": 0.4647, "step": 11014 }, { "epoch": 1.910999306037474, "grad_norm": 0.5992761254310608, "learning_rate": 3.046341874333636e-07, "loss": 0.4934, "step": 11015 }, { "epoch": 1.9111727966689798, "grad_norm": 0.8946365118026733, "learning_rate": 3.034506968748274e-07, "loss": 0.413, "step": 11016 }, { "epoch": 1.9113462873004856, "grad_norm": 0.7185291051864624, "learning_rate": 3.0226949212378786e-07, "loss": 0.5156, "step": 11017 }, { "epoch": 1.9115197779319917, "grad_norm": 0.7932426929473877, "learning_rate": 3.010905733173264e-07, "loss": 0.4785, "step": 11018 }, { "epoch": 1.9116932685634975, "grad_norm": 1.4811848402023315, "learning_rate": 2.9991394059225797e-07, "loss": 0.4618, "step": 11019 }, { "epoch": 1.9118667591950036, "grad_norm": 0.8622974753379822, "learning_rate": 2.987395940851312e-07, "loss": 0.3826, "step": 11020 }, { "epoch": 1.9120402498265094, "grad_norm": 0.939264714717865, "learning_rate": 2.975675339322326e-07, "loss": 0.3724, "step": 11021 }, { "epoch": 1.9122137404580153, "grad_norm": 0.7853683829307556, "learning_rate": 2.9639776026957777e-07, "loss": 0.4312, "step": 11022 }, { "epoch": 1.9123872310895211, "grad_norm": 1.148753046989441, "learning_rate": 2.9523027323292264e-07, "loss": 0.4413, "step": 11023 }, { "epoch": 1.912560721721027, "grad_norm": 0.7600244283676147, "learning_rate": 2.9406507295775657e-07, "loss": 0.472, "step": 11024 }, { "epoch": 1.9127342123525328, "grad_norm": 0.8786101937294006, "learning_rate": 2.9290215957929804e-07, "loss": 0.3672, "step": 11025 }, { "epoch": 1.912907702984039, "grad_norm": 0.6643133163452148, "learning_rate": 2.91741533232508e-07, "loss": 0.4315, "step": 11026 }, { "epoch": 1.9130811936155447, "grad_norm": 1.05028235912323, "learning_rate": 2.905831940520809e-07, "loss": 0.4463, "step": 11027 }, { "epoch": 1.9132546842470508, "grad_norm": 0.9317830801010132, "learning_rate": 2.894271421724359e-07, "loss": 0.3679, "step": 11028 }, { "epoch": 1.9134281748785567, "grad_norm": 1.1145577430725098, "learning_rate": 2.882733777277391e-07, "loss": 0.4349, "step": 11029 }, { "epoch": 1.9136016655100625, "grad_norm": 0.8806248307228088, "learning_rate": 2.871219008518877e-07, "loss": 0.47, "step": 11030 }, { "epoch": 1.9137751561415683, "grad_norm": 0.7149972915649414, "learning_rate": 2.859727116785083e-07, "loss": 0.5299, "step": 11031 }, { "epoch": 1.9139486467730742, "grad_norm": 0.6683125495910645, "learning_rate": 2.8482581034096733e-07, "loss": 0.5088, "step": 11032 }, { "epoch": 1.91412213740458, "grad_norm": 1.0784515142440796, "learning_rate": 2.8368119697236297e-07, "loss": 0.4847, "step": 11033 }, { "epoch": 1.914295628036086, "grad_norm": 1.723268747329712, "learning_rate": 2.825388717055311e-07, "loss": 0.3745, "step": 11034 }, { "epoch": 1.914469118667592, "grad_norm": 0.8399494290351868, "learning_rate": 2.8139883467303896e-07, "loss": 0.4497, "step": 11035 }, { "epoch": 1.9146426092990978, "grad_norm": 0.7521690726280212, "learning_rate": 2.8026108600718746e-07, "loss": 0.5481, "step": 11036 }, { "epoch": 1.9148160999306039, "grad_norm": 2.2653141021728516, "learning_rate": 2.7912562584001766e-07, "loss": 0.3365, "step": 11037 }, { "epoch": 1.9149895905621097, "grad_norm": 0.9587934017181396, "learning_rate": 2.7799245430329526e-07, "loss": 0.4149, "step": 11038 }, { "epoch": 1.9151630811936156, "grad_norm": 0.7059314846992493, "learning_rate": 2.768615715285283e-07, "loss": 0.4384, "step": 11039 }, { "epoch": 1.9153365718251214, "grad_norm": 0.7510185837745667, "learning_rate": 2.7573297764696085e-07, "loss": 0.4666, "step": 11040 }, { "epoch": 1.9155100624566272, "grad_norm": 1.0781872272491455, "learning_rate": 2.7460667278956355e-07, "loss": 0.4735, "step": 11041 }, { "epoch": 1.915683553088133, "grad_norm": 1.0659220218658447, "learning_rate": 2.7348265708704745e-07, "loss": 0.4026, "step": 11042 }, { "epoch": 1.9158570437196392, "grad_norm": 0.7296978831291199, "learning_rate": 2.723609306698527e-07, "loss": 0.5192, "step": 11043 }, { "epoch": 1.916030534351145, "grad_norm": 1.140160083770752, "learning_rate": 2.7124149366816177e-07, "loss": 0.4902, "step": 11044 }, { "epoch": 1.916204024982651, "grad_norm": 0.7639879584312439, "learning_rate": 2.701243462118819e-07, "loss": 0.4689, "step": 11045 }, { "epoch": 1.916377515614157, "grad_norm": 0.7788089513778687, "learning_rate": 2.690094884306649e-07, "loss": 0.447, "step": 11046 }, { "epoch": 1.9165510062456628, "grad_norm": 0.6959072947502136, "learning_rate": 2.678969204538828e-07, "loss": 0.5604, "step": 11047 }, { "epoch": 1.9167244968771686, "grad_norm": 0.7567552924156189, "learning_rate": 2.667866424106591e-07, "loss": 0.4889, "step": 11048 }, { "epoch": 1.9168979875086745, "grad_norm": 0.9657365679740906, "learning_rate": 2.656786544298373e-07, "loss": 0.4136, "step": 11049 }, { "epoch": 1.9170714781401803, "grad_norm": 0.9236763715744019, "learning_rate": 2.6457295664000573e-07, "loss": 0.427, "step": 11050 }, { "epoch": 1.9172449687716864, "grad_norm": 0.8116747140884399, "learning_rate": 2.634695491694772e-07, "loss": 0.3677, "step": 11051 }, { "epoch": 1.9174184594031922, "grad_norm": 0.860243558883667, "learning_rate": 2.623684321463049e-07, "loss": 0.3685, "step": 11052 }, { "epoch": 1.9175919500346983, "grad_norm": 0.7028695344924927, "learning_rate": 2.6126960569827554e-07, "loss": 0.5715, "step": 11053 }, { "epoch": 1.9177654406662041, "grad_norm": 0.8180177807807922, "learning_rate": 2.6017306995290926e-07, "loss": 0.4062, "step": 11054 }, { "epoch": 1.91793893129771, "grad_norm": 1.0677820444107056, "learning_rate": 2.5907882503745764e-07, "loss": 0.4395, "step": 11055 }, { "epoch": 1.9181124219292158, "grad_norm": 1.0341719388961792, "learning_rate": 2.579868710789124e-07, "loss": 0.513, "step": 11056 }, { "epoch": 1.9182859125607217, "grad_norm": 0.8403821587562561, "learning_rate": 2.5689720820399445e-07, "loss": 0.374, "step": 11057 }, { "epoch": 1.9184594031922275, "grad_norm": 0.813058614730835, "learning_rate": 2.5580983653916035e-07, "loss": 0.4329, "step": 11058 }, { "epoch": 1.9186328938237334, "grad_norm": 0.6556667685508728, "learning_rate": 2.5472475621060255e-07, "loss": 0.5389, "step": 11059 }, { "epoch": 1.9188063844552394, "grad_norm": 1.0568166971206665, "learning_rate": 2.5364196734424475e-07, "loss": 0.4509, "step": 11060 }, { "epoch": 1.9189798750867453, "grad_norm": 0.6956100463867188, "learning_rate": 2.5256147006574195e-07, "loss": 0.5348, "step": 11061 }, { "epoch": 1.9191533657182513, "grad_norm": 0.6176671981811523, "learning_rate": 2.514832645004939e-07, "loss": 0.5112, "step": 11062 }, { "epoch": 1.9193268563497572, "grad_norm": 1.0088751316070557, "learning_rate": 2.504073507736249e-07, "loss": 0.5662, "step": 11063 }, { "epoch": 1.919500346981263, "grad_norm": 0.7418835759162903, "learning_rate": 2.493337290099973e-07, "loss": 0.6219, "step": 11064 }, { "epoch": 1.9196738376127689, "grad_norm": 0.7149689793586731, "learning_rate": 2.482623993342004e-07, "loss": 0.4348, "step": 11065 }, { "epoch": 1.9198473282442747, "grad_norm": 0.9855237007141113, "learning_rate": 2.471933618705702e-07, "loss": 0.3461, "step": 11066 }, { "epoch": 1.9200208188757806, "grad_norm": 0.8616670370101929, "learning_rate": 2.4612661674316527e-07, "loss": 0.421, "step": 11067 }, { "epoch": 1.9201943095072866, "grad_norm": 0.8050004839897156, "learning_rate": 2.4506216407578665e-07, "loss": 0.3878, "step": 11068 }, { "epoch": 1.9203678001387925, "grad_norm": 0.8894696831703186, "learning_rate": 2.440000039919621e-07, "loss": 0.3459, "step": 11069 }, { "epoch": 1.9205412907702986, "grad_norm": 1.3463835716247559, "learning_rate": 2.429401366149553e-07, "loss": 0.2943, "step": 11070 }, { "epoch": 1.9207147814018044, "grad_norm": 0.8622847199440002, "learning_rate": 2.4188256206776785e-07, "loss": 0.4121, "step": 11071 }, { "epoch": 1.9208882720333103, "grad_norm": 0.9659159779548645, "learning_rate": 2.4082728047313487e-07, "loss": 0.5378, "step": 11072 }, { "epoch": 1.921061762664816, "grad_norm": 0.9911413788795471, "learning_rate": 2.397742919535162e-07, "loss": 0.3511, "step": 11073 }, { "epoch": 1.921235253296322, "grad_norm": 0.736795961856842, "learning_rate": 2.3872359663111856e-07, "loss": 0.4901, "step": 11074 }, { "epoch": 1.9214087439278278, "grad_norm": 0.8180474042892456, "learning_rate": 2.3767519462787326e-07, "loss": 0.5552, "step": 11075 }, { "epoch": 1.9215822345593336, "grad_norm": 1.6108638048171997, "learning_rate": 2.3662908606544964e-07, "loss": 0.4609, "step": 11076 }, { "epoch": 1.9217557251908397, "grad_norm": 0.8285250663757324, "learning_rate": 2.355852710652484e-07, "loss": 0.5109, "step": 11077 }, { "epoch": 1.9219292158223455, "grad_norm": 3.328786611557007, "learning_rate": 2.34543749748406e-07, "loss": 0.3688, "step": 11078 }, { "epoch": 1.9221027064538516, "grad_norm": 0.9429089426994324, "learning_rate": 2.3350452223579678e-07, "loss": 0.3843, "step": 11079 }, { "epoch": 1.9222761970853575, "grad_norm": 0.771192193031311, "learning_rate": 2.3246758864801544e-07, "loss": 0.3145, "step": 11080 }, { "epoch": 1.9224496877168633, "grad_norm": 0.9310546517372131, "learning_rate": 2.3143294910540794e-07, "loss": 0.4742, "step": 11081 }, { "epoch": 1.9226231783483692, "grad_norm": 0.8659986853599548, "learning_rate": 2.304006037280404e-07, "loss": 0.4091, "step": 11082 }, { "epoch": 1.922796668979875, "grad_norm": 0.8585099577903748, "learning_rate": 2.2937055263571928e-07, "loss": 0.5405, "step": 11083 }, { "epoch": 1.9229701596113808, "grad_norm": 0.7499017119407654, "learning_rate": 2.2834279594798002e-07, "loss": 0.3819, "step": 11084 }, { "epoch": 1.923143650242887, "grad_norm": 0.8291475772857666, "learning_rate": 2.2731733378410058e-07, "loss": 0.463, "step": 11085 }, { "epoch": 1.9233171408743928, "grad_norm": 0.5632716417312622, "learning_rate": 2.2629416626308353e-07, "loss": 0.5469, "step": 11086 }, { "epoch": 1.9234906315058988, "grad_norm": 0.7836756110191345, "learning_rate": 2.2527329350367166e-07, "loss": 0.4356, "step": 11087 }, { "epoch": 1.9236641221374047, "grad_norm": 0.9071162939071655, "learning_rate": 2.2425471562433466e-07, "loss": 0.4471, "step": 11088 }, { "epoch": 1.9238376127689105, "grad_norm": 1.5258830785751343, "learning_rate": 2.2323843274327793e-07, "loss": 0.4359, "step": 11089 }, { "epoch": 1.9240111034004164, "grad_norm": 0.8846318125724792, "learning_rate": 2.222244449784494e-07, "loss": 0.351, "step": 11090 }, { "epoch": 1.9241845940319222, "grad_norm": 0.9541126489639282, "learning_rate": 2.2121275244751939e-07, "loss": 0.4517, "step": 11091 }, { "epoch": 1.924358084663428, "grad_norm": 0.6848516464233398, "learning_rate": 2.2020335526789616e-07, "loss": 0.5078, "step": 11092 }, { "epoch": 1.9245315752949341, "grad_norm": 0.7995764017105103, "learning_rate": 2.1919625355671936e-07, "loss": 0.4564, "step": 11093 }, { "epoch": 1.92470506592644, "grad_norm": 1.459402084350586, "learning_rate": 2.1819144743086883e-07, "loss": 0.3871, "step": 11094 }, { "epoch": 1.9248785565579458, "grad_norm": 1.27466881275177, "learning_rate": 2.1718893700695132e-07, "loss": 0.452, "step": 11095 }, { "epoch": 1.9250520471894519, "grad_norm": 0.7997653484344482, "learning_rate": 2.1618872240130928e-07, "loss": 0.3805, "step": 11096 }, { "epoch": 1.9252255378209577, "grad_norm": 1.0554280281066895, "learning_rate": 2.1519080373001655e-07, "loss": 0.3438, "step": 11097 }, { "epoch": 1.9253990284524636, "grad_norm": 0.6617966890335083, "learning_rate": 2.1419518110888938e-07, "loss": 0.5869, "step": 11098 }, { "epoch": 1.9255725190839694, "grad_norm": 0.6871652007102966, "learning_rate": 2.13201854653462e-07, "loss": 0.4417, "step": 11099 }, { "epoch": 1.9257460097154753, "grad_norm": 0.6930230259895325, "learning_rate": 2.1221082447901774e-07, "loss": 0.5223, "step": 11100 }, { "epoch": 1.9259195003469811, "grad_norm": 0.887649416923523, "learning_rate": 2.1122209070056466e-07, "loss": 0.3975, "step": 11101 }, { "epoch": 1.9260929909784872, "grad_norm": 0.9196067452430725, "learning_rate": 2.1023565343284425e-07, "loss": 0.3744, "step": 11102 }, { "epoch": 1.926266481609993, "grad_norm": 1.1521224975585938, "learning_rate": 2.0925151279033828e-07, "loss": 0.4348, "step": 11103 }, { "epoch": 1.926439972241499, "grad_norm": 0.8633833527565002, "learning_rate": 2.082696688872554e-07, "loss": 0.5502, "step": 11104 }, { "epoch": 1.926613462873005, "grad_norm": 1.1526999473571777, "learning_rate": 2.0729012183753783e-07, "loss": 0.3765, "step": 11105 }, { "epoch": 1.9267869535045108, "grad_norm": 0.8256582021713257, "learning_rate": 2.063128717548657e-07, "loss": 0.4686, "step": 11106 }, { "epoch": 1.9269604441360166, "grad_norm": 0.8180612921714783, "learning_rate": 2.0533791875264608e-07, "loss": 0.3721, "step": 11107 }, { "epoch": 1.9271339347675225, "grad_norm": 0.7702810764312744, "learning_rate": 2.043652629440307e-07, "loss": 0.5107, "step": 11108 }, { "epoch": 1.9273074253990283, "grad_norm": 1.0001927614212036, "learning_rate": 2.0339490444188925e-07, "loss": 0.5558, "step": 11109 }, { "epoch": 1.9274809160305344, "grad_norm": 1.0472002029418945, "learning_rate": 2.0242684335884056e-07, "loss": 0.3996, "step": 11110 }, { "epoch": 1.9276544066620402, "grad_norm": 0.9137578010559082, "learning_rate": 2.0146107980722362e-07, "loss": 0.4105, "step": 11111 }, { "epoch": 1.9278278972935463, "grad_norm": 1.6406512260437012, "learning_rate": 2.0049761389911772e-07, "loss": 0.384, "step": 11112 }, { "epoch": 1.9280013879250522, "grad_norm": 0.7500235438346863, "learning_rate": 1.9953644574633335e-07, "loss": 0.4729, "step": 11113 }, { "epoch": 1.928174878556558, "grad_norm": 0.95838862657547, "learning_rate": 1.9857757546041912e-07, "loss": 0.3784, "step": 11114 }, { "epoch": 1.9283483691880638, "grad_norm": 1.3327245712280273, "learning_rate": 1.9762100315265043e-07, "loss": 0.5729, "step": 11115 }, { "epoch": 1.9285218598195697, "grad_norm": 0.7426334619522095, "learning_rate": 1.9666672893403627e-07, "loss": 0.4587, "step": 11116 }, { "epoch": 1.9286953504510755, "grad_norm": 1.5321580171585083, "learning_rate": 1.9571475291532805e-07, "loss": 0.4171, "step": 11117 }, { "epoch": 1.9288688410825814, "grad_norm": 0.6488795280456543, "learning_rate": 1.9476507520699518e-07, "loss": 0.4987, "step": 11118 }, { "epoch": 1.9290423317140875, "grad_norm": 0.7648763656616211, "learning_rate": 1.9381769591925614e-07, "loss": 0.5037, "step": 11119 }, { "epoch": 1.9292158223455933, "grad_norm": 0.8581758737564087, "learning_rate": 1.928726151620497e-07, "loss": 0.4137, "step": 11120 }, { "epoch": 1.9293893129770994, "grad_norm": 0.6748529076576233, "learning_rate": 1.9192983304505697e-07, "loss": 0.4775, "step": 11121 }, { "epoch": 1.9295628036086052, "grad_norm": 0.9109181761741638, "learning_rate": 1.9098934967768823e-07, "loss": 0.5974, "step": 11122 }, { "epoch": 1.929736294240111, "grad_norm": 0.6578526496887207, "learning_rate": 1.9005116516908729e-07, "loss": 0.5822, "step": 11123 }, { "epoch": 1.929909784871617, "grad_norm": 0.7644575834274292, "learning_rate": 1.891152796281337e-07, "loss": 0.4944, "step": 11124 }, { "epoch": 1.9300832755031228, "grad_norm": 0.693666398525238, "learning_rate": 1.8818169316343393e-07, "loss": 0.5334, "step": 11125 }, { "epoch": 1.9302567661346286, "grad_norm": 0.6019750833511353, "learning_rate": 1.8725040588333466e-07, "loss": 0.5242, "step": 11126 }, { "epoch": 1.9304302567661347, "grad_norm": 0.8340058326721191, "learning_rate": 1.8632141789591384e-07, "loss": 0.3843, "step": 11127 }, { "epoch": 1.9306037473976405, "grad_norm": 0.7593865990638733, "learning_rate": 1.853947293089764e-07, "loss": 0.4756, "step": 11128 }, { "epoch": 1.9307772380291466, "grad_norm": 1.217259407043457, "learning_rate": 1.8447034023007183e-07, "loss": 0.5166, "step": 11129 }, { "epoch": 1.9309507286606524, "grad_norm": 0.8378265500068665, "learning_rate": 1.8354825076647432e-07, "loss": 0.5759, "step": 11130 }, { "epoch": 1.9311242192921583, "grad_norm": 0.7213740944862366, "learning_rate": 1.826284610251916e-07, "loss": 0.5657, "step": 11131 }, { "epoch": 1.9312977099236641, "grad_norm": 0.7749907374382019, "learning_rate": 1.817109711129672e-07, "loss": 0.4541, "step": 11132 }, { "epoch": 1.93147120055517, "grad_norm": 0.7630119323730469, "learning_rate": 1.8079578113627815e-07, "loss": 0.4769, "step": 11133 }, { "epoch": 1.9316446911866758, "grad_norm": 1.7729418277740479, "learning_rate": 1.7988289120133507e-07, "loss": 0.3316, "step": 11134 }, { "epoch": 1.9318181818181817, "grad_norm": 0.7646145820617676, "learning_rate": 1.789723014140754e-07, "loss": 0.5846, "step": 11135 }, { "epoch": 1.9319916724496877, "grad_norm": 0.9360604882240295, "learning_rate": 1.7806401188017463e-07, "loss": 0.5652, "step": 11136 }, { "epoch": 1.9321651630811936, "grad_norm": 0.8564902544021606, "learning_rate": 1.77158022705044e-07, "loss": 0.3467, "step": 11137 }, { "epoch": 1.9323386537126996, "grad_norm": 1.0362969636917114, "learning_rate": 1.7625433399382386e-07, "loss": 0.4261, "step": 11138 }, { "epoch": 1.9325121443442055, "grad_norm": 0.7522450089454651, "learning_rate": 1.7535294585138808e-07, "loss": 0.5601, "step": 11139 }, { "epoch": 1.9326856349757113, "grad_norm": 0.6849558353424072, "learning_rate": 1.7445385838234185e-07, "loss": 0.4843, "step": 11140 }, { "epoch": 1.9328591256072172, "grad_norm": 0.7014449238777161, "learning_rate": 1.735570716910262e-07, "loss": 0.5789, "step": 11141 }, { "epoch": 1.933032616238723, "grad_norm": 0.7415981292724609, "learning_rate": 1.7266258588151562e-07, "loss": 0.4229, "step": 11142 }, { "epoch": 1.9332061068702289, "grad_norm": 0.8598527908325195, "learning_rate": 1.71770401057616e-07, "loss": 0.4771, "step": 11143 }, { "epoch": 1.933379597501735, "grad_norm": 0.6886983513832092, "learning_rate": 1.7088051732286448e-07, "loss": 0.5484, "step": 11144 }, { "epoch": 1.9335530881332408, "grad_norm": 1.1314243078231812, "learning_rate": 1.6999293478053404e-07, "loss": 0.4929, "step": 11145 }, { "epoch": 1.9337265787647469, "grad_norm": 0.7523395419120789, "learning_rate": 1.6910765353363334e-07, "loss": 0.4861, "step": 11146 }, { "epoch": 1.9339000693962527, "grad_norm": 0.8565650582313538, "learning_rate": 1.682246736848936e-07, "loss": 0.5773, "step": 11147 }, { "epoch": 1.9340735600277585, "grad_norm": 0.7338639497756958, "learning_rate": 1.6734399533679057e-07, "loss": 0.4894, "step": 11148 }, { "epoch": 1.9342470506592644, "grad_norm": 1.2362143993377686, "learning_rate": 1.6646561859152476e-07, "loss": 0.4126, "step": 11149 }, { "epoch": 1.9344205412907702, "grad_norm": 0.9865709543228149, "learning_rate": 1.6558954355103686e-07, "loss": 0.3748, "step": 11150 }, { "epoch": 1.934594031922276, "grad_norm": 0.7517378330230713, "learning_rate": 1.6471577031699214e-07, "loss": 0.4597, "step": 11151 }, { "epoch": 1.9347675225537821, "grad_norm": 0.8060457706451416, "learning_rate": 1.6384429899079624e-07, "loss": 0.3928, "step": 11152 }, { "epoch": 1.934941013185288, "grad_norm": 0.8719451427459717, "learning_rate": 1.6297512967358374e-07, "loss": 0.348, "step": 11153 }, { "epoch": 1.9351145038167938, "grad_norm": 1.1927706003189087, "learning_rate": 1.6210826246622068e-07, "loss": 0.364, "step": 11154 }, { "epoch": 1.9352879944483, "grad_norm": 1.2058417797088623, "learning_rate": 1.6124369746931102e-07, "loss": 0.5424, "step": 11155 }, { "epoch": 1.9354614850798058, "grad_norm": 0.6051502227783203, "learning_rate": 1.603814347831856e-07, "loss": 0.4691, "step": 11156 }, { "epoch": 1.9356349757113116, "grad_norm": 0.7322439551353455, "learning_rate": 1.595214745079132e-07, "loss": 0.5056, "step": 11157 }, { "epoch": 1.9358084663428174, "grad_norm": 0.6667351722717285, "learning_rate": 1.586638167432919e-07, "loss": 0.5811, "step": 11158 }, { "epoch": 1.9359819569743233, "grad_norm": 0.7149832248687744, "learning_rate": 1.5780846158885533e-07, "loss": 0.5526, "step": 11159 }, { "epoch": 1.9361554476058291, "grad_norm": 1.4899497032165527, "learning_rate": 1.5695540914386632e-07, "loss": 0.5562, "step": 11160 }, { "epoch": 1.9363289382373352, "grad_norm": 0.8246138691902161, "learning_rate": 1.5610465950732569e-07, "loss": 0.5623, "step": 11161 }, { "epoch": 1.936502428868841, "grad_norm": 2.793271541595459, "learning_rate": 1.552562127779611e-07, "loss": 0.4957, "step": 11162 }, { "epoch": 1.9366759195003471, "grad_norm": 0.9781505465507507, "learning_rate": 1.5441006905423605e-07, "loss": 0.4288, "step": 11163 }, { "epoch": 1.936849410131853, "grad_norm": 0.9941627383232117, "learning_rate": 1.5356622843434533e-07, "loss": 0.5642, "step": 11164 }, { "epoch": 1.9370229007633588, "grad_norm": 0.7078642845153809, "learning_rate": 1.527246910162239e-07, "loss": 0.4048, "step": 11165 }, { "epoch": 1.9371963913948647, "grad_norm": 0.6741459965705872, "learning_rate": 1.51885456897527e-07, "loss": 0.4824, "step": 11166 }, { "epoch": 1.9373698820263705, "grad_norm": 0.8584177494049072, "learning_rate": 1.5104852617565004e-07, "loss": 0.4513, "step": 11167 }, { "epoch": 1.9375433726578764, "grad_norm": 0.5705842971801758, "learning_rate": 1.5021389894771753e-07, "loss": 0.5319, "step": 11168 }, { "epoch": 1.9377168632893824, "grad_norm": 0.7550567388534546, "learning_rate": 1.493815753105965e-07, "loss": 0.4417, "step": 11169 }, { "epoch": 1.9378903539208883, "grad_norm": 0.9085505604743958, "learning_rate": 1.4855155536087184e-07, "loss": 0.3988, "step": 11170 }, { "epoch": 1.9380638445523943, "grad_norm": 2.0384068489074707, "learning_rate": 1.47723839194871e-07, "loss": 0.3557, "step": 11171 }, { "epoch": 1.9382373351839002, "grad_norm": 0.9320572018623352, "learning_rate": 1.4689842690865042e-07, "loss": 0.3676, "step": 11172 }, { "epoch": 1.938410825815406, "grad_norm": 0.8271452188491821, "learning_rate": 1.4607531859800238e-07, "loss": 0.4604, "step": 11173 }, { "epoch": 1.9385843164469119, "grad_norm": 0.8859689235687256, "learning_rate": 1.4525451435844608e-07, "loss": 0.3803, "step": 11174 }, { "epoch": 1.9387578070784177, "grad_norm": 0.7108444571495056, "learning_rate": 1.444360142852408e-07, "loss": 0.3311, "step": 11175 }, { "epoch": 1.9389312977099236, "grad_norm": 1.1863503456115723, "learning_rate": 1.436198184733706e-07, "loss": 0.401, "step": 11176 }, { "epoch": 1.9391047883414294, "grad_norm": 1.4052577018737793, "learning_rate": 1.428059270175597e-07, "loss": 0.3954, "step": 11177 }, { "epoch": 1.9392782789729355, "grad_norm": 0.7990099191665649, "learning_rate": 1.4199434001225697e-07, "loss": 0.4471, "step": 11178 }, { "epoch": 1.9394517696044413, "grad_norm": 0.9306536316871643, "learning_rate": 1.411850575516538e-07, "loss": 0.4528, "step": 11179 }, { "epoch": 1.9396252602359474, "grad_norm": 0.9495396614074707, "learning_rate": 1.4037807972966167e-07, "loss": 0.5072, "step": 11180 }, { "epoch": 1.9397987508674532, "grad_norm": 1.8335951566696167, "learning_rate": 1.3957340663993458e-07, "loss": 0.583, "step": 11181 }, { "epoch": 1.939972241498959, "grad_norm": 0.869001567363739, "learning_rate": 1.387710383758556e-07, "loss": 0.4868, "step": 11182 }, { "epoch": 1.940145732130465, "grad_norm": 0.9316543340682983, "learning_rate": 1.3797097503054136e-07, "loss": 0.3813, "step": 11183 }, { "epoch": 1.9403192227619708, "grad_norm": 1.5570003986358643, "learning_rate": 1.3717321669683981e-07, "loss": 0.4044, "step": 11184 }, { "epoch": 1.9404927133934766, "grad_norm": 0.6643952131271362, "learning_rate": 1.3637776346733022e-07, "loss": 0.3754, "step": 11185 }, { "epoch": 1.9406662040249827, "grad_norm": 1.1451090574264526, "learning_rate": 1.3558461543432767e-07, "loss": 0.3491, "step": 11186 }, { "epoch": 1.9408396946564885, "grad_norm": 0.9554349780082703, "learning_rate": 1.3479377268987626e-07, "loss": 0.5867, "step": 11187 }, { "epoch": 1.9410131852879946, "grad_norm": 0.7808243036270142, "learning_rate": 1.3400523532575592e-07, "loss": 0.3839, "step": 11188 }, { "epoch": 1.9411866759195004, "grad_norm": 0.8965674638748169, "learning_rate": 1.332190034334757e-07, "loss": 0.3585, "step": 11189 }, { "epoch": 1.9413601665510063, "grad_norm": 0.901248574256897, "learning_rate": 1.324350771042804e-07, "loss": 0.3152, "step": 11190 }, { "epoch": 1.9415336571825121, "grad_norm": 7.298800468444824, "learning_rate": 1.3165345642914385e-07, "loss": 0.3949, "step": 11191 }, { "epoch": 1.941707147814018, "grad_norm": 1.6301578283309937, "learning_rate": 1.3087414149877574e-07, "loss": 0.4851, "step": 11192 }, { "epoch": 1.9418806384455238, "grad_norm": 1.0139745473861694, "learning_rate": 1.3009713240361488e-07, "loss": 0.5591, "step": 11193 }, { "epoch": 1.9420541290770297, "grad_norm": 1.1971397399902344, "learning_rate": 1.2932242923383575e-07, "loss": 0.4487, "step": 11194 }, { "epoch": 1.9422276197085357, "grad_norm": 1.0054748058319092, "learning_rate": 1.2855003207934203e-07, "loss": 0.3332, "step": 11195 }, { "epoch": 1.9424011103400416, "grad_norm": 0.9131165146827698, "learning_rate": 1.277799410297731e-07, "loss": 0.4636, "step": 11196 }, { "epoch": 1.9425746009715477, "grad_norm": 1.2870208024978638, "learning_rate": 1.2701215617449526e-07, "loss": 0.5453, "step": 11197 }, { "epoch": 1.9427480916030535, "grad_norm": 0.7163132429122925, "learning_rate": 1.26246677602615e-07, "loss": 0.4519, "step": 11198 }, { "epoch": 1.9429215822345594, "grad_norm": 0.9892769455909729, "learning_rate": 1.2548350540296573e-07, "loss": 0.4975, "step": 11199 }, { "epoch": 1.9430950728660652, "grad_norm": 1.3368390798568726, "learning_rate": 1.2472263966411214e-07, "loss": 0.3402, "step": 11200 }, { "epoch": 1.943268563497571, "grad_norm": 1.0240529775619507, "learning_rate": 1.2396408047435694e-07, "loss": 0.3857, "step": 11201 }, { "epoch": 1.943442054129077, "grad_norm": 0.7207836508750916, "learning_rate": 1.2320782792173192e-07, "loss": 0.4889, "step": 11202 }, { "epoch": 1.943615544760583, "grad_norm": 1.571078896522522, "learning_rate": 1.224538820939958e-07, "loss": 0.4353, "step": 11203 }, { "epoch": 1.9437890353920888, "grad_norm": 0.9303411841392517, "learning_rate": 1.2170224307865185e-07, "loss": 0.4258, "step": 11204 }, { "epoch": 1.9439625260235949, "grad_norm": 1.0531816482543945, "learning_rate": 1.2095291096292373e-07, "loss": 0.3763, "step": 11205 }, { "epoch": 1.9441360166551007, "grad_norm": 0.9104992747306824, "learning_rate": 1.2020588583377513e-07, "loss": 0.4343, "step": 11206 }, { "epoch": 1.9443095072866066, "grad_norm": 0.9288526773452759, "learning_rate": 1.1946116777789673e-07, "loss": 0.4338, "step": 11207 }, { "epoch": 1.9444829979181124, "grad_norm": 0.7803632020950317, "learning_rate": 1.187187568817172e-07, "loss": 0.5033, "step": 11208 }, { "epoch": 1.9446564885496183, "grad_norm": 0.7046752572059631, "learning_rate": 1.179786532313898e-07, "loss": 0.5062, "step": 11209 }, { "epoch": 1.944829979181124, "grad_norm": 0.913530707359314, "learning_rate": 1.1724085691280806e-07, "loss": 0.4622, "step": 11210 }, { "epoch": 1.9450034698126302, "grad_norm": 1.06773042678833, "learning_rate": 1.165053680115924e-07, "loss": 0.4644, "step": 11211 }, { "epoch": 1.945176960444136, "grad_norm": 0.8160414099693298, "learning_rate": 1.1577218661309896e-07, "loss": 0.4015, "step": 11212 }, { "epoch": 1.9453504510756419, "grad_norm": 0.8438186645507812, "learning_rate": 1.1504131280241083e-07, "loss": 0.4371, "step": 11213 }, { "epoch": 1.945523941707148, "grad_norm": 0.822489857673645, "learning_rate": 1.1431274666435121e-07, "loss": 0.5104, "step": 11214 }, { "epoch": 1.9456974323386538, "grad_norm": 0.7588160634040833, "learning_rate": 1.1358648828346808e-07, "loss": 0.4696, "step": 11215 }, { "epoch": 1.9458709229701596, "grad_norm": 0.8216853737831116, "learning_rate": 1.1286253774404288e-07, "loss": 0.4314, "step": 11216 }, { "epoch": 1.9460444136016655, "grad_norm": 1.0485059022903442, "learning_rate": 1.121408951300973e-07, "loss": 0.3275, "step": 11217 }, { "epoch": 1.9462179042331713, "grad_norm": 0.6730453968048096, "learning_rate": 1.11421560525371e-07, "loss": 0.432, "step": 11218 }, { "epoch": 1.9463913948646772, "grad_norm": 1.148043155670166, "learning_rate": 1.1070453401335058e-07, "loss": 0.3652, "step": 11219 }, { "epoch": 1.9465648854961832, "grad_norm": 1.0786750316619873, "learning_rate": 1.0998981567724276e-07, "loss": 0.4781, "step": 11220 }, { "epoch": 1.946738376127689, "grad_norm": 1.0060077905654907, "learning_rate": 1.0927740559999455e-07, "loss": 0.4407, "step": 11221 }, { "epoch": 1.9469118667591951, "grad_norm": 0.7473181486129761, "learning_rate": 1.0856730386427983e-07, "loss": 0.4492, "step": 11222 }, { "epoch": 1.947085357390701, "grad_norm": 1.0398026704788208, "learning_rate": 1.078595105525082e-07, "loss": 0.4271, "step": 11223 }, { "epoch": 1.9472588480222068, "grad_norm": 0.624709963798523, "learning_rate": 1.0715402574681843e-07, "loss": 0.5225, "step": 11224 }, { "epoch": 1.9474323386537127, "grad_norm": 0.8374506235122681, "learning_rate": 1.0645084952908502e-07, "loss": 0.3796, "step": 11225 }, { "epoch": 1.9476058292852185, "grad_norm": 1.1964772939682007, "learning_rate": 1.0574998198090935e-07, "loss": 0.334, "step": 11226 }, { "epoch": 1.9477793199167244, "grad_norm": 0.7303481101989746, "learning_rate": 1.0505142318363082e-07, "loss": 0.5492, "step": 11227 }, { "epoch": 1.9479528105482304, "grad_norm": 1.1537619829177856, "learning_rate": 1.0435517321831568e-07, "loss": 0.4274, "step": 11228 }, { "epoch": 1.9481263011797363, "grad_norm": 0.7701857686042786, "learning_rate": 1.0366123216576817e-07, "loss": 0.3414, "step": 11229 }, { "epoch": 1.9482997918112424, "grad_norm": 1.5516411066055298, "learning_rate": 1.0296960010651725e-07, "loss": 0.3502, "step": 11230 }, { "epoch": 1.9484732824427482, "grad_norm": 0.9262827634811401, "learning_rate": 1.0228027712082755e-07, "loss": 0.3613, "step": 11231 }, { "epoch": 1.948646773074254, "grad_norm": 1.0155408382415771, "learning_rate": 1.0159326328869734e-07, "loss": 0.5461, "step": 11232 }, { "epoch": 1.94882026370576, "grad_norm": 0.8864052295684814, "learning_rate": 1.0090855868985616e-07, "loss": 0.4274, "step": 11233 }, { "epoch": 1.9489937543372657, "grad_norm": 0.7725752592086792, "learning_rate": 1.0022616340376489e-07, "loss": 0.5043, "step": 11234 }, { "epoch": 1.9491672449687716, "grad_norm": 0.7712228298187256, "learning_rate": 9.954607750961353e-08, "loss": 0.5599, "step": 11235 }, { "epoch": 1.9493407356002774, "grad_norm": 1.3105132579803467, "learning_rate": 9.886830108632784e-08, "loss": 0.5045, "step": 11236 }, { "epoch": 1.9495142262317835, "grad_norm": 0.7010358572006226, "learning_rate": 9.819283421256709e-08, "loss": 0.4967, "step": 11237 }, { "epoch": 1.9496877168632893, "grad_norm": 0.8237800002098083, "learning_rate": 9.751967696671749e-08, "loss": 0.4004, "step": 11238 }, { "epoch": 1.9498612074947954, "grad_norm": 1.1764156818389893, "learning_rate": 9.684882942690099e-08, "loss": 0.5132, "step": 11239 }, { "epoch": 1.9500346981263013, "grad_norm": 0.6724979877471924, "learning_rate": 9.618029167096865e-08, "loss": 0.5677, "step": 11240 }, { "epoch": 1.950208188757807, "grad_norm": 0.788466215133667, "learning_rate": 9.551406377650507e-08, "loss": 0.3838, "step": 11241 }, { "epoch": 1.950381679389313, "grad_norm": 0.6369549036026001, "learning_rate": 9.485014582083063e-08, "loss": 0.5577, "step": 11242 }, { "epoch": 1.9505551700208188, "grad_norm": 0.7695828080177307, "learning_rate": 9.418853788098814e-08, "loss": 0.541, "step": 11243 }, { "epoch": 1.9507286606523246, "grad_norm": 0.9789599180221558, "learning_rate": 9.352924003376285e-08, "loss": 0.3652, "step": 11244 }, { "epoch": 1.9509021512838307, "grad_norm": 0.75556480884552, "learning_rate": 9.287225235566244e-08, "loss": 0.5649, "step": 11245 }, { "epoch": 1.9510756419153366, "grad_norm": 0.7612143158912659, "learning_rate": 9.221757492293704e-08, "loss": 0.6423, "step": 11246 }, { "epoch": 1.9512491325468426, "grad_norm": 0.9684509634971619, "learning_rate": 9.156520781155698e-08, "loss": 0.4524, "step": 11247 }, { "epoch": 1.9514226231783485, "grad_norm": 0.9864277243614197, "learning_rate": 9.091515109723281e-08, "loss": 0.3969, "step": 11248 }, { "epoch": 1.9515961138098543, "grad_norm": 0.8819375038146973, "learning_rate": 9.026740485540197e-08, "loss": 0.5066, "step": 11249 }, { "epoch": 1.9517696044413602, "grad_norm": 0.841587483882904, "learning_rate": 8.96219691612421e-08, "loss": 0.4746, "step": 11250 }, { "epoch": 1.951943095072866, "grad_norm": 0.8239153027534485, "learning_rate": 8.897884408964885e-08, "loss": 0.3904, "step": 11251 }, { "epoch": 1.9521165857043719, "grad_norm": 0.8045653700828552, "learning_rate": 8.833802971526472e-08, "loss": 0.4621, "step": 11252 }, { "epoch": 1.9522900763358777, "grad_norm": 0.9592332243919373, "learning_rate": 8.769952611245248e-08, "loss": 0.4333, "step": 11253 }, { "epoch": 1.9524635669673838, "grad_norm": 0.8656499981880188, "learning_rate": 8.706333335531503e-08, "loss": 0.571, "step": 11254 }, { "epoch": 1.9526370575988896, "grad_norm": 1.0844917297363281, "learning_rate": 8.642945151767779e-08, "loss": 0.4554, "step": 11255 }, { "epoch": 1.9528105482303957, "grad_norm": 0.8129265904426575, "learning_rate": 8.579788067310858e-08, "loss": 0.4396, "step": 11256 }, { "epoch": 1.9529840388619015, "grad_norm": 1.2656227350234985, "learning_rate": 8.516862089489986e-08, "loss": 0.4579, "step": 11257 }, { "epoch": 1.9531575294934074, "grad_norm": 0.8240922093391418, "learning_rate": 8.454167225607768e-08, "loss": 0.4358, "step": 11258 }, { "epoch": 1.9533310201249132, "grad_norm": 1.1505444049835205, "learning_rate": 8.391703482939939e-08, "loss": 0.4935, "step": 11259 }, { "epoch": 1.953504510756419, "grad_norm": 0.8059971928596497, "learning_rate": 8.32947086873559e-08, "loss": 0.5243, "step": 11260 }, { "epoch": 1.953678001387925, "grad_norm": 0.8615079522132874, "learning_rate": 8.267469390217164e-08, "loss": 0.3658, "step": 11261 }, { "epoch": 1.953851492019431, "grad_norm": 1.0338557958602905, "learning_rate": 8.205699054579575e-08, "loss": 0.386, "step": 11262 }, { "epoch": 1.9540249826509368, "grad_norm": 0.7419182658195496, "learning_rate": 8.14415986899153e-08, "loss": 0.4982, "step": 11263 }, { "epoch": 1.954198473282443, "grad_norm": 0.9103652834892273, "learning_rate": 8.082851840594652e-08, "loss": 0.5511, "step": 11264 }, { "epoch": 1.9543719639139487, "grad_norm": 0.8418128490447998, "learning_rate": 8.021774976503915e-08, "loss": 0.4381, "step": 11265 }, { "epoch": 1.9545454545454546, "grad_norm": 0.7194525599479675, "learning_rate": 7.960929283807429e-08, "loss": 0.5033, "step": 11266 }, { "epoch": 1.9547189451769604, "grad_norm": 0.9392405152320862, "learning_rate": 7.900314769566208e-08, "loss": 0.5702, "step": 11267 }, { "epoch": 1.9548924358084663, "grad_norm": 0.6881068348884583, "learning_rate": 7.83993144081463e-08, "loss": 0.5391, "step": 11268 }, { "epoch": 1.9550659264399721, "grad_norm": 0.7253534197807312, "learning_rate": 7.77977930456042e-08, "loss": 0.4403, "step": 11269 }, { "epoch": 1.9552394170714782, "grad_norm": 0.9656354188919067, "learning_rate": 7.719858367784216e-08, "loss": 0.5089, "step": 11270 }, { "epoch": 1.955412907702984, "grad_norm": 0.573002278804779, "learning_rate": 7.66016863743979e-08, "loss": 0.5603, "step": 11271 }, { "epoch": 1.9555863983344899, "grad_norm": 0.6548886299133301, "learning_rate": 7.600710120454491e-08, "loss": 0.5315, "step": 11272 }, { "epoch": 1.955759888965996, "grad_norm": 3.2776379585266113, "learning_rate": 7.541482823728352e-08, "loss": 0.4942, "step": 11273 }, { "epoch": 1.9559333795975018, "grad_norm": 1.1634551286697388, "learning_rate": 7.482486754134765e-08, "loss": 0.4874, "step": 11274 }, { "epoch": 1.9561068702290076, "grad_norm": 0.9876819849014282, "learning_rate": 7.423721918520477e-08, "loss": 0.5181, "step": 11275 }, { "epoch": 1.9562803608605135, "grad_norm": 0.7193216681480408, "learning_rate": 7.365188323704919e-08, "loss": 0.4337, "step": 11276 }, { "epoch": 1.9564538514920193, "grad_norm": 0.8667590618133545, "learning_rate": 7.306885976481104e-08, "loss": 0.4097, "step": 11277 }, { "epoch": 1.9566273421235252, "grad_norm": 0.9627739787101746, "learning_rate": 7.248814883615174e-08, "loss": 0.5001, "step": 11278 }, { "epoch": 1.9568008327550312, "grad_norm": 0.7978907823562622, "learning_rate": 7.190975051846406e-08, "loss": 0.4882, "step": 11279 }, { "epoch": 1.956974323386537, "grad_norm": 0.685856282711029, "learning_rate": 7.133366487886762e-08, "loss": 0.6051, "step": 11280 }, { "epoch": 1.9571478140180432, "grad_norm": 0.6509477496147156, "learning_rate": 7.07598919842245e-08, "loss": 0.6182, "step": 11281 }, { "epoch": 1.957321304649549, "grad_norm": 0.8358609676361084, "learning_rate": 7.018843190111479e-08, "loss": 0.3975, "step": 11282 }, { "epoch": 1.9574947952810549, "grad_norm": 1.0334913730621338, "learning_rate": 6.961928469586321e-08, "loss": 0.5237, "step": 11283 }, { "epoch": 1.9576682859125607, "grad_norm": 0.5888184309005737, "learning_rate": 6.90524504345147e-08, "loss": 0.4191, "step": 11284 }, { "epoch": 1.9578417765440665, "grad_norm": 0.9598404169082642, "learning_rate": 6.848792918285663e-08, "loss": 0.4172, "step": 11285 }, { "epoch": 1.9580152671755724, "grad_norm": 1.0853744745254517, "learning_rate": 6.792572100639661e-08, "loss": 0.344, "step": 11286 }, { "epoch": 1.9581887578070785, "grad_norm": 0.9685314893722534, "learning_rate": 6.736582597038243e-08, "loss": 0.4727, "step": 11287 }, { "epoch": 1.9583622484385843, "grad_norm": 0.7487372159957886, "learning_rate": 6.680824413979103e-08, "loss": 0.4193, "step": 11288 }, { "epoch": 1.9585357390700904, "grad_norm": 0.9230625629425049, "learning_rate": 6.625297557932842e-08, "loss": 0.3921, "step": 11289 }, { "epoch": 1.9587092297015962, "grad_norm": 0.8867302536964417, "learning_rate": 6.570002035343636e-08, "loss": 0.3885, "step": 11290 }, { "epoch": 1.958882720333102, "grad_norm": 1.5723206996917725, "learning_rate": 6.514937852628578e-08, "loss": 0.3677, "step": 11291 }, { "epoch": 1.959056210964608, "grad_norm": 2.336076498031616, "learning_rate": 6.460105016177887e-08, "loss": 0.439, "step": 11292 }, { "epoch": 1.9592297015961138, "grad_norm": 0.865355908870697, "learning_rate": 6.405503532354695e-08, "loss": 0.402, "step": 11293 }, { "epoch": 1.9594031922276196, "grad_norm": 1.979523777961731, "learning_rate": 6.351133407495936e-08, "loss": 0.5083, "step": 11294 }, { "epoch": 1.9595766828591255, "grad_norm": 0.8357073068618774, "learning_rate": 6.296994647911448e-08, "loss": 0.4095, "step": 11295 }, { "epoch": 1.9597501734906315, "grad_norm": 0.9894707202911377, "learning_rate": 6.24308725988354e-08, "loss": 0.4575, "step": 11296 }, { "epoch": 1.9599236641221374, "grad_norm": 0.8860669136047363, "learning_rate": 6.189411249668542e-08, "loss": 0.3809, "step": 11297 }, { "epoch": 1.9600971547536434, "grad_norm": 0.9638963937759399, "learning_rate": 6.135966623495915e-08, "loss": 0.3666, "step": 11298 }, { "epoch": 1.9602706453851493, "grad_norm": 1.0723718404769897, "learning_rate": 6.082753387567364e-08, "loss": 0.496, "step": 11299 }, { "epoch": 1.9604441360166551, "grad_norm": 1.1697533130645752, "learning_rate": 6.029771548058838e-08, "loss": 0.4438, "step": 11300 }, { "epoch": 1.960617626648161, "grad_norm": 0.9623770117759705, "learning_rate": 5.977021111118752e-08, "loss": 0.348, "step": 11301 }, { "epoch": 1.9607911172796668, "grad_norm": 1.0842669010162354, "learning_rate": 5.924502082868655e-08, "loss": 0.3619, "step": 11302 }, { "epoch": 1.9609646079111727, "grad_norm": 1.1347887516021729, "learning_rate": 5.872214469403892e-08, "loss": 0.4243, "step": 11303 }, { "epoch": 1.9611380985426787, "grad_norm": 0.8801885843276978, "learning_rate": 5.820158276792054e-08, "loss": 0.4603, "step": 11304 }, { "epoch": 1.9613115891741846, "grad_norm": 0.6171122193336487, "learning_rate": 5.768333511074753e-08, "loss": 0.5493, "step": 11305 }, { "epoch": 1.9614850798056906, "grad_norm": 0.8176255822181702, "learning_rate": 5.716740178266067e-08, "loss": 0.4808, "step": 11306 }, { "epoch": 1.9616585704371965, "grad_norm": 0.7006524205207825, "learning_rate": 5.665378284353207e-08, "loss": 0.5588, "step": 11307 }, { "epoch": 1.9618320610687023, "grad_norm": 0.9156227707862854, "learning_rate": 5.614247835297404e-08, "loss": 0.5604, "step": 11308 }, { "epoch": 1.9620055517002082, "grad_norm": 0.8872699737548828, "learning_rate": 5.563348837031912e-08, "loss": 0.4641, "step": 11309 }, { "epoch": 1.962179042331714, "grad_norm": 0.9895948171615601, "learning_rate": 5.512681295463784e-08, "loss": 0.449, "step": 11310 }, { "epoch": 1.9623525329632199, "grad_norm": 1.2851054668426514, "learning_rate": 5.462245216472983e-08, "loss": 0.4772, "step": 11311 }, { "epoch": 1.9625260235947257, "grad_norm": 0.9638526439666748, "learning_rate": 5.4120406059128274e-08, "loss": 0.4779, "step": 11312 }, { "epoch": 1.9626995142262318, "grad_norm": 0.9218172430992126, "learning_rate": 5.362067469609322e-08, "loss": 0.4899, "step": 11313 }, { "epoch": 1.9628730048577376, "grad_norm": 0.8112190365791321, "learning_rate": 5.312325813362274e-08, "loss": 0.4637, "step": 11314 }, { "epoch": 1.9630464954892437, "grad_norm": 0.9659833908081055, "learning_rate": 5.262815642943953e-08, "loss": 0.4358, "step": 11315 }, { "epoch": 1.9632199861207495, "grad_norm": 0.9530385732650757, "learning_rate": 5.21353696410043e-08, "loss": 0.5176, "step": 11316 }, { "epoch": 1.9633934767522554, "grad_norm": 0.8164271712303162, "learning_rate": 5.1644897825502416e-08, "loss": 0.4286, "step": 11317 }, { "epoch": 1.9635669673837612, "grad_norm": 0.9133117198944092, "learning_rate": 5.11567410398528e-08, "loss": 0.525, "step": 11318 }, { "epoch": 1.963740458015267, "grad_norm": 0.8649409413337708, "learning_rate": 5.0670899340710124e-08, "loss": 0.4792, "step": 11319 }, { "epoch": 1.963913948646773, "grad_norm": 0.8076494932174683, "learning_rate": 5.0187372784453734e-08, "loss": 0.4442, "step": 11320 }, { "epoch": 1.964087439278279, "grad_norm": 1.2337809801101685, "learning_rate": 4.970616142720097e-08, "loss": 0.4258, "step": 11321 }, { "epoch": 1.9642609299097848, "grad_norm": 0.6409690380096436, "learning_rate": 4.922726532479383e-08, "loss": 0.5856, "step": 11322 }, { "epoch": 1.964434420541291, "grad_norm": 0.9598265886306763, "learning_rate": 4.8750684532810064e-08, "loss": 0.4957, "step": 11323 }, { "epoch": 1.9646079111727968, "grad_norm": 1.5102465152740479, "learning_rate": 4.827641910655656e-08, "loss": 0.4661, "step": 11324 }, { "epoch": 1.9647814018043026, "grad_norm": 0.7760123610496521, "learning_rate": 4.7804469101073725e-08, "loss": 0.4912, "step": 11325 }, { "epoch": 1.9649548924358085, "grad_norm": 0.7980136871337891, "learning_rate": 4.7334834571128866e-08, "loss": 0.4211, "step": 11326 }, { "epoch": 1.9651283830673143, "grad_norm": 0.8376246094703674, "learning_rate": 4.6867515571229485e-08, "loss": 0.3892, "step": 11327 }, { "epoch": 1.9653018736988201, "grad_norm": 0.8894677758216858, "learning_rate": 4.640251215560332e-08, "loss": 0.384, "step": 11328 }, { "epoch": 1.9654753643303262, "grad_norm": 0.8758121728897095, "learning_rate": 4.593982437821609e-08, "loss": 0.3887, "step": 11329 }, { "epoch": 1.965648854961832, "grad_norm": 0.7407442927360535, "learning_rate": 4.547945229276263e-08, "loss": 0.4206, "step": 11330 }, { "epoch": 1.965822345593338, "grad_norm": 0.7268120646476746, "learning_rate": 4.5021395952671297e-08, "loss": 0.5168, "step": 11331 }, { "epoch": 1.965995836224844, "grad_norm": 0.9915668368339539, "learning_rate": 4.456565541109958e-08, "loss": 0.4204, "step": 11332 }, { "epoch": 1.9661693268563498, "grad_norm": 1.1841089725494385, "learning_rate": 4.411223072093629e-08, "loss": 0.4003, "step": 11333 }, { "epoch": 1.9663428174878557, "grad_norm": 0.8532261848449707, "learning_rate": 4.366112193480154e-08, "loss": 0.4045, "step": 11334 }, { "epoch": 1.9665163081193615, "grad_norm": 0.8033369183540344, "learning_rate": 4.32123291050468e-08, "loss": 0.5156, "step": 11335 }, { "epoch": 1.9666897987508674, "grad_norm": 0.7514310479164124, "learning_rate": 4.276585228375485e-08, "loss": 0.4547, "step": 11336 }, { "epoch": 1.9668632893823732, "grad_norm": 0.7093128561973572, "learning_rate": 4.2321691522742013e-08, "loss": 0.543, "step": 11337 }, { "epoch": 1.9670367800138793, "grad_norm": 0.8851273059844971, "learning_rate": 4.187984687355151e-08, "loss": 0.4689, "step": 11338 }, { "epoch": 1.9672102706453851, "grad_norm": 1.343516230583191, "learning_rate": 4.14403183874601e-08, "loss": 0.4718, "step": 11339 }, { "epoch": 1.9673837612768912, "grad_norm": 0.8390950560569763, "learning_rate": 4.100310611547809e-08, "loss": 0.5067, "step": 11340 }, { "epoch": 1.967557251908397, "grad_norm": 0.7784774303436279, "learning_rate": 4.056821010834045e-08, "loss": 0.3678, "step": 11341 }, { "epoch": 1.9677307425399029, "grad_norm": 0.8302329778671265, "learning_rate": 4.013563041652013e-08, "loss": 0.5221, "step": 11342 }, { "epoch": 1.9679042331714087, "grad_norm": 1.199031114578247, "learning_rate": 3.9705367090216995e-08, "loss": 0.3467, "step": 11343 }, { "epoch": 1.9680777238029146, "grad_norm": 0.9485912919044495, "learning_rate": 3.927742017936664e-08, "loss": 0.3676, "step": 11344 }, { "epoch": 1.9682512144344204, "grad_norm": 0.9357582926750183, "learning_rate": 3.885178973362713e-08, "loss": 0.454, "step": 11345 }, { "epoch": 1.9684247050659265, "grad_norm": 0.74022376537323, "learning_rate": 3.8428475802398944e-08, "loss": 0.5356, "step": 11346 }, { "epoch": 1.9685981956974323, "grad_norm": 0.6848518252372742, "learning_rate": 3.800747843480501e-08, "loss": 0.5144, "step": 11347 }, { "epoch": 1.9687716863289384, "grad_norm": 0.937227189540863, "learning_rate": 3.7588797679706245e-08, "loss": 0.399, "step": 11348 }, { "epoch": 1.9689451769604442, "grad_norm": 0.6459150314331055, "learning_rate": 3.717243358568379e-08, "loss": 0.5599, "step": 11349 }, { "epoch": 1.96911866759195, "grad_norm": 0.8304569125175476, "learning_rate": 3.6758386201065645e-08, "loss": 0.45, "step": 11350 }, { "epoch": 1.969292158223456, "grad_norm": 1.2463147640228271, "learning_rate": 3.6346655573897823e-08, "loss": 0.3424, "step": 11351 }, { "epoch": 1.9694656488549618, "grad_norm": 0.9651402831077576, "learning_rate": 3.5937241751962115e-08, "loss": 0.532, "step": 11352 }, { "epoch": 1.9696391394864676, "grad_norm": 0.8306807279586792, "learning_rate": 3.5530144782771616e-08, "loss": 0.4624, "step": 11353 }, { "epoch": 1.9698126301179735, "grad_norm": 1.2534830570220947, "learning_rate": 3.5125364713572976e-08, "loss": 0.353, "step": 11354 }, { "epoch": 1.9699861207494795, "grad_norm": 1.3988927602767944, "learning_rate": 3.472290159133751e-08, "loss": 0.4821, "step": 11355 }, { "epoch": 1.9701596113809854, "grad_norm": 0.7488888502120972, "learning_rate": 3.4322755462774525e-08, "loss": 0.604, "step": 11356 }, { "epoch": 1.9703331020124915, "grad_norm": 1.0150195360183716, "learning_rate": 3.392492637432021e-08, "loss": 0.4247, "step": 11357 }, { "epoch": 1.9705065926439973, "grad_norm": 0.9276250600814819, "learning_rate": 3.3529414372142074e-08, "loss": 0.3502, "step": 11358 }, { "epoch": 1.9706800832755031, "grad_norm": 1.1141239404678345, "learning_rate": 3.3136219502143406e-08, "loss": 0.3543, "step": 11359 }, { "epoch": 1.970853573907009, "grad_norm": 0.6158331632614136, "learning_rate": 3.2745341809949923e-08, "loss": 0.5022, "step": 11360 }, { "epoch": 1.9710270645385148, "grad_norm": 0.9471350312232971, "learning_rate": 3.235678134092757e-08, "loss": 0.3702, "step": 11361 }, { "epoch": 1.9712005551700207, "grad_norm": 0.8030177354812622, "learning_rate": 3.1970538140166927e-08, "loss": 0.601, "step": 11362 }, { "epoch": 1.9713740458015268, "grad_norm": 0.9062731266021729, "learning_rate": 3.158661225249437e-08, "loss": 0.4815, "step": 11363 }, { "epoch": 1.9715475364330326, "grad_norm": 0.655491054058075, "learning_rate": 3.1205003722460935e-08, "loss": 0.481, "step": 11364 }, { "epoch": 1.9717210270645387, "grad_norm": 0.81975257396698, "learning_rate": 3.082571259435563e-08, "loss": 0.4901, "step": 11365 }, { "epoch": 1.9718945176960445, "grad_norm": 0.8341805338859558, "learning_rate": 3.0448738912196574e-08, "loss": 0.344, "step": 11366 }, { "epoch": 1.9720680083275504, "grad_norm": 1.1064727306365967, "learning_rate": 3.007408271972878e-08, "loss": 0.3383, "step": 11367 }, { "epoch": 1.9722414989590562, "grad_norm": 1.14534592628479, "learning_rate": 2.9701744060435246e-08, "loss": 0.3644, "step": 11368 }, { "epoch": 1.972414989590562, "grad_norm": 1.072205901145935, "learning_rate": 2.9331722977523625e-08, "loss": 0.3726, "step": 11369 }, { "epoch": 1.972588480222068, "grad_norm": 0.8797577619552612, "learning_rate": 2.8964019513935126e-08, "loss": 0.4996, "step": 11370 }, { "epoch": 1.9727619708535737, "grad_norm": 1.1882641315460205, "learning_rate": 2.8598633712342283e-08, "loss": 0.4749, "step": 11371 }, { "epoch": 1.9729354614850798, "grad_norm": 0.8657835721969604, "learning_rate": 2.8235565615151172e-08, "loss": 0.4869, "step": 11372 }, { "epoch": 1.9731089521165857, "grad_norm": 1.03721284866333, "learning_rate": 2.787481526449476e-08, "loss": 0.3392, "step": 11373 }, { "epoch": 1.9732824427480917, "grad_norm": 0.7597808241844177, "learning_rate": 2.7516382702235112e-08, "loss": 0.4977, "step": 11374 }, { "epoch": 1.9734559333795976, "grad_norm": 0.8189690113067627, "learning_rate": 2.7160267969974508e-08, "loss": 0.3325, "step": 11375 }, { "epoch": 1.9736294240111034, "grad_norm": 0.7845630645751953, "learning_rate": 2.6806471109037668e-08, "loss": 0.4585, "step": 11376 }, { "epoch": 1.9738029146426093, "grad_norm": 0.7243348956108093, "learning_rate": 2.645499216048286e-08, "loss": 0.4583, "step": 11377 }, { "epoch": 1.973976405274115, "grad_norm": 1.1351598501205444, "learning_rate": 2.6105831165099683e-08, "loss": 0.3558, "step": 11378 }, { "epoch": 1.974149895905621, "grad_norm": 1.3864905834197998, "learning_rate": 2.575898816340905e-08, "loss": 0.3636, "step": 11379 }, { "epoch": 1.974323386537127, "grad_norm": 0.6895972490310669, "learning_rate": 2.541446319566321e-08, "loss": 0.4867, "step": 11380 }, { "epoch": 1.9744968771686329, "grad_norm": 0.7314673066139221, "learning_rate": 2.5072256301843513e-08, "loss": 0.4059, "step": 11381 }, { "epoch": 1.974670367800139, "grad_norm": 1.2502267360687256, "learning_rate": 2.473236752166264e-08, "loss": 0.3456, "step": 11382 }, { "epoch": 1.9748438584316448, "grad_norm": 0.7512307167053223, "learning_rate": 2.4394796894566807e-08, "loss": 0.4225, "step": 11383 }, { "epoch": 1.9750173490631506, "grad_norm": 0.8592346906661987, "learning_rate": 2.4059544459731356e-08, "loss": 0.4131, "step": 11384 }, { "epoch": 1.9751908396946565, "grad_norm": 1.1116094589233398, "learning_rate": 2.3726610256062933e-08, "loss": 0.4628, "step": 11385 }, { "epoch": 1.9753643303261623, "grad_norm": 1.1277997493743896, "learning_rate": 2.3395994322199522e-08, "loss": 0.429, "step": 11386 }, { "epoch": 1.9755378209576682, "grad_norm": 0.8579531908035278, "learning_rate": 2.3067696696505992e-08, "loss": 0.5604, "step": 11387 }, { "epoch": 1.9757113115891742, "grad_norm": 1.0007001161575317, "learning_rate": 2.2741717417085196e-08, "loss": 0.4213, "step": 11388 }, { "epoch": 1.97588480222068, "grad_norm": 0.8642428517341614, "learning_rate": 2.2418056521764653e-08, "loss": 0.4944, "step": 11389 }, { "epoch": 1.976058292852186, "grad_norm": 1.2023932933807373, "learning_rate": 2.2096714048109867e-08, "loss": 0.5009, "step": 11390 }, { "epoch": 1.976231783483692, "grad_norm": 0.8968191146850586, "learning_rate": 2.1777690033408795e-08, "loss": 0.4225, "step": 11391 }, { "epoch": 1.9764052741151978, "grad_norm": 0.76925128698349, "learning_rate": 2.1460984514685145e-08, "loss": 0.5323, "step": 11392 }, { "epoch": 1.9765787647467037, "grad_norm": 0.8122766613960266, "learning_rate": 2.114659752869619e-08, "loss": 0.3684, "step": 11393 }, { "epoch": 1.9767522553782095, "grad_norm": 0.7954847812652588, "learning_rate": 2.083452911192163e-08, "loss": 0.6046, "step": 11394 }, { "epoch": 1.9769257460097154, "grad_norm": 0.7282482981681824, "learning_rate": 2.0524779300581386e-08, "loss": 0.4869, "step": 11395 }, { "epoch": 1.9770992366412212, "grad_norm": 0.833308219909668, "learning_rate": 2.021734813062226e-08, "loss": 0.5249, "step": 11396 }, { "epoch": 1.9772727272727273, "grad_norm": 0.7510433793067932, "learning_rate": 1.9912235637720156e-08, "loss": 0.5702, "step": 11397 }, { "epoch": 1.9774462179042331, "grad_norm": 0.7454752326011658, "learning_rate": 1.960944185728675e-08, "loss": 0.5729, "step": 11398 }, { "epoch": 1.9776197085357392, "grad_norm": 0.7768095135688782, "learning_rate": 1.9308966824456154e-08, "loss": 0.4779, "step": 11399 }, { "epoch": 1.977793199167245, "grad_norm": 0.8578220009803772, "learning_rate": 1.9010810574102702e-08, "loss": 0.4636, "step": 11400 }, { "epoch": 1.977966689798751, "grad_norm": 0.8072434067726135, "learning_rate": 1.8714973140827596e-08, "loss": 0.4553, "step": 11401 }, { "epoch": 1.9781401804302567, "grad_norm": 0.8005666732788086, "learning_rate": 1.842145455896338e-08, "loss": 0.3922, "step": 11402 }, { "epoch": 1.9783136710617626, "grad_norm": 0.8736897706985474, "learning_rate": 1.8130254862571693e-08, "loss": 0.4479, "step": 11403 }, { "epoch": 1.9784871616932684, "grad_norm": 1.0670888423919678, "learning_rate": 1.7841374085447728e-08, "loss": 0.3694, "step": 11404 }, { "epoch": 1.9786606523247745, "grad_norm": 0.860889196395874, "learning_rate": 1.7554812261117994e-08, "loss": 0.3245, "step": 11405 }, { "epoch": 1.9788341429562804, "grad_norm": 0.9610465168952942, "learning_rate": 1.727056942283367e-08, "loss": 0.4326, "step": 11406 }, { "epoch": 1.9790076335877864, "grad_norm": 0.8125248551368713, "learning_rate": 1.6988645603586153e-08, "loss": 0.5042, "step": 11407 }, { "epoch": 1.9791811242192923, "grad_norm": 0.7917909026145935, "learning_rate": 1.6709040836089262e-08, "loss": 0.5035, "step": 11408 }, { "epoch": 1.979354614850798, "grad_norm": 0.9587294459342957, "learning_rate": 1.6431755152794827e-08, "loss": 0.3547, "step": 11409 }, { "epoch": 1.979528105482304, "grad_norm": 1.0159237384796143, "learning_rate": 1.6156788585879325e-08, "loss": 0.5345, "step": 11410 }, { "epoch": 1.9797015961138098, "grad_norm": 0.8712301850318909, "learning_rate": 1.5884141167255007e-08, "loss": 0.4755, "step": 11411 }, { "epoch": 1.9798750867453156, "grad_norm": 0.683390736579895, "learning_rate": 1.5613812928563233e-08, "loss": 0.4174, "step": 11412 }, { "epoch": 1.9800485773768215, "grad_norm": 0.6609227061271667, "learning_rate": 1.534580390117446e-08, "loss": 0.5688, "step": 11413 }, { "epoch": 1.9802220680083276, "grad_norm": 0.8728233575820923, "learning_rate": 1.508011411619048e-08, "loss": 0.4512, "step": 11414 }, { "epoch": 1.9803955586398334, "grad_norm": 1.1786134243011475, "learning_rate": 1.4816743604448846e-08, "loss": 0.4167, "step": 11415 }, { "epoch": 1.9805690492713395, "grad_norm": 0.8179705739021301, "learning_rate": 1.4555692396509557e-08, "loss": 0.4727, "step": 11416 }, { "epoch": 1.9807425399028453, "grad_norm": 0.765632688999176, "learning_rate": 1.42969605226706e-08, "loss": 0.5857, "step": 11417 }, { "epoch": 1.9809160305343512, "grad_norm": 0.9371681809425354, "learning_rate": 1.4040548012956844e-08, "loss": 0.4243, "step": 11418 }, { "epoch": 1.981089521165857, "grad_norm": 0.8259038329124451, "learning_rate": 1.378645489712449e-08, "loss": 0.3293, "step": 11419 }, { "epoch": 1.9812630117973629, "grad_norm": 1.594635009765625, "learning_rate": 1.3534681204665502e-08, "loss": 0.5088, "step": 11420 }, { "epoch": 1.9814365024288687, "grad_norm": 0.6444791555404663, "learning_rate": 1.3285226964794284e-08, "loss": 0.5004, "step": 11421 }, { "epoch": 1.9816099930603748, "grad_norm": 1.0436303615570068, "learning_rate": 1.3038092206461017e-08, "loss": 0.3192, "step": 11422 }, { "epoch": 1.9817834836918806, "grad_norm": 0.865827739238739, "learning_rate": 1.2793276958347201e-08, "loss": 0.4277, "step": 11423 }, { "epoch": 1.9819569743233867, "grad_norm": 1.2612078189849854, "learning_rate": 1.2550781248863442e-08, "loss": 0.3894, "step": 11424 }, { "epoch": 1.9821304649548925, "grad_norm": 0.7467960715293884, "learning_rate": 1.2310605106149454e-08, "loss": 0.4304, "step": 11425 }, { "epoch": 1.9823039555863984, "grad_norm": 1.9402074813842773, "learning_rate": 1.2072748558082936e-08, "loss": 0.4438, "step": 11426 }, { "epoch": 1.9824774462179042, "grad_norm": 0.782226026058197, "learning_rate": 1.1837211632264034e-08, "loss": 0.363, "step": 11427 }, { "epoch": 1.98265093684941, "grad_norm": 0.7636884450912476, "learning_rate": 1.1603994356026437e-08, "loss": 0.433, "step": 11428 }, { "epoch": 1.982824427480916, "grad_norm": 0.9060726165771484, "learning_rate": 1.1373096756437385e-08, "loss": 0.3583, "step": 11429 }, { "epoch": 1.9829979181124218, "grad_norm": 1.1688307523727417, "learning_rate": 1.1144518860290998e-08, "loss": 0.3875, "step": 11430 }, { "epoch": 1.9831714087439278, "grad_norm": 1.0081478357315063, "learning_rate": 1.0918260694114947e-08, "loss": 0.3859, "step": 11431 }, { "epoch": 1.9833448993754337, "grad_norm": 0.7923986315727234, "learning_rate": 1.0694322284166003e-08, "loss": 0.4458, "step": 11432 }, { "epoch": 1.9835183900069397, "grad_norm": 1.339667797088623, "learning_rate": 1.047270365643449e-08, "loss": 0.4237, "step": 11433 }, { "epoch": 1.9836918806384456, "grad_norm": 0.7551453709602356, "learning_rate": 1.0253404836637615e-08, "loss": 0.4138, "step": 11434 }, { "epoch": 1.9838653712699514, "grad_norm": 0.7832826375961304, "learning_rate": 1.0036425850226129e-08, "loss": 0.5472, "step": 11435 }, { "epoch": 1.9840388619014573, "grad_norm": 0.7386212944984436, "learning_rate": 9.821766722379888e-09, "loss": 0.5157, "step": 11436 }, { "epoch": 1.9842123525329631, "grad_norm": 1.4132112264633179, "learning_rate": 9.609427478010081e-09, "loss": 0.4332, "step": 11437 }, { "epoch": 1.984385843164469, "grad_norm": 0.6954006552696228, "learning_rate": 9.399408141761434e-09, "loss": 0.4255, "step": 11438 }, { "epoch": 1.984559333795975, "grad_norm": 0.8655229210853577, "learning_rate": 9.191708738003346e-09, "loss": 0.4097, "step": 11439 }, { "epoch": 1.984732824427481, "grad_norm": 1.0122206211090088, "learning_rate": 8.9863292908432e-09, "loss": 0.3265, "step": 11440 }, { "epoch": 1.984906315058987, "grad_norm": 0.6448938250541687, "learning_rate": 8.78326982411304e-09, "loss": 0.5307, "step": 11441 }, { "epoch": 1.9850798056904928, "grad_norm": 1.0658587217330933, "learning_rate": 8.58253036137846e-09, "loss": 0.4203, "step": 11442 }, { "epoch": 1.9852532963219987, "grad_norm": 0.8777505159378052, "learning_rate": 8.384110925936384e-09, "loss": 0.3995, "step": 11443 }, { "epoch": 1.9854267869535045, "grad_norm": 0.9446192979812622, "learning_rate": 8.188011540812834e-09, "loss": 0.4147, "step": 11444 }, { "epoch": 1.9856002775850103, "grad_norm": 0.7639160752296448, "learning_rate": 7.994232228765164e-09, "loss": 0.4045, "step": 11445 }, { "epoch": 1.9857737682165162, "grad_norm": 0.81900554895401, "learning_rate": 7.80277301228205e-09, "loss": 0.4487, "step": 11446 }, { "epoch": 1.9859472588480223, "grad_norm": 0.8122382760047913, "learning_rate": 7.613633913583495e-09, "loss": 0.5093, "step": 11447 }, { "epoch": 1.986120749479528, "grad_norm": 0.8263910412788391, "learning_rate": 7.426814954618611e-09, "loss": 0.5033, "step": 11448 }, { "epoch": 1.9862942401110342, "grad_norm": 0.7352614998817444, "learning_rate": 7.24231615706561e-09, "loss": 0.5255, "step": 11449 }, { "epoch": 1.98646773074254, "grad_norm": 0.7796475887298584, "learning_rate": 7.060137542340695e-09, "loss": 0.4988, "step": 11450 }, { "epoch": 1.9866412213740459, "grad_norm": 0.816645085811615, "learning_rate": 6.8802791315825125e-09, "loss": 0.5663, "step": 11451 }, { "epoch": 1.9868147120055517, "grad_norm": 3.847252607345581, "learning_rate": 6.702740945663256e-09, "loss": 0.6187, "step": 11452 }, { "epoch": 1.9869882026370576, "grad_norm": 0.8500548601150513, "learning_rate": 6.527523005188663e-09, "loss": 0.5137, "step": 11453 }, { "epoch": 1.9871616932685634, "grad_norm": 0.9805374145507812, "learning_rate": 6.35462533049358e-09, "loss": 0.4279, "step": 11454 }, { "epoch": 1.9873351839000692, "grad_norm": 0.7661542892456055, "learning_rate": 6.184047941639737e-09, "loss": 0.5093, "step": 11455 }, { "epoch": 1.9875086745315753, "grad_norm": 0.9448789954185486, "learning_rate": 6.0157908584246305e-09, "loss": 0.3073, "step": 11456 }, { "epoch": 1.9876821651630812, "grad_norm": 1.0407168865203857, "learning_rate": 5.8498541003748634e-09, "loss": 0.4161, "step": 11457 }, { "epoch": 1.9878556557945872, "grad_norm": 0.884189784526825, "learning_rate": 5.686237686746143e-09, "loss": 0.4435, "step": 11458 }, { "epoch": 1.988029146426093, "grad_norm": 1.0521981716156006, "learning_rate": 5.5249416365299455e-09, "loss": 0.3564, "step": 11459 }, { "epoch": 1.988202637057599, "grad_norm": 1.0453505516052246, "learning_rate": 5.36596596844019e-09, "loss": 0.3783, "step": 11460 }, { "epoch": 1.9883761276891048, "grad_norm": 0.8650076389312744, "learning_rate": 5.209310700931003e-09, "loss": 0.4122, "step": 11461 }, { "epoch": 1.9885496183206106, "grad_norm": 1.120578408241272, "learning_rate": 5.054975852176736e-09, "loss": 0.3675, "step": 11462 }, { "epoch": 1.9887231089521165, "grad_norm": 0.8298072218894958, "learning_rate": 4.9029614400941675e-09, "loss": 0.5121, "step": 11463 }, { "epoch": 1.9888965995836225, "grad_norm": 0.7727584838867188, "learning_rate": 4.7532674823203e-09, "loss": 0.4401, "step": 11464 }, { "epoch": 1.9890700902151284, "grad_norm": 0.7380238175392151, "learning_rate": 4.605893996227906e-09, "loss": 0.3819, "step": 11465 }, { "epoch": 1.9892435808466344, "grad_norm": 0.8323608040809631, "learning_rate": 4.4608409989232995e-09, "loss": 0.4757, "step": 11466 }, { "epoch": 1.9894170714781403, "grad_norm": 0.8822579979896545, "learning_rate": 4.318108507235241e-09, "loss": 0.5186, "step": 11467 }, { "epoch": 1.9895905621096461, "grad_norm": 0.8925347924232483, "learning_rate": 4.17769653773048e-09, "loss": 0.3621, "step": 11468 }, { "epoch": 1.989764052741152, "grad_norm": 1.0496636629104614, "learning_rate": 4.039605106704869e-09, "loss": 0.4544, "step": 11469 }, { "epoch": 1.9899375433726578, "grad_norm": 0.8532253503799438, "learning_rate": 3.903834230183368e-09, "loss": 0.3895, "step": 11470 }, { "epoch": 1.9901110340041637, "grad_norm": 0.8864930868148804, "learning_rate": 3.77038392392004e-09, "loss": 0.4286, "step": 11471 }, { "epoch": 1.9902845246356695, "grad_norm": 0.8147814273834229, "learning_rate": 3.639254203406939e-09, "loss": 0.3987, "step": 11472 }, { "epoch": 1.9904580152671756, "grad_norm": 0.9215081930160522, "learning_rate": 3.5104450838563396e-09, "loss": 0.4346, "step": 11473 }, { "epoch": 1.9906315058986814, "grad_norm": 0.771024763584137, "learning_rate": 3.383956580218506e-09, "loss": 0.4662, "step": 11474 }, { "epoch": 1.9908049965301875, "grad_norm": 0.9461677074432373, "learning_rate": 3.2597887071750266e-09, "loss": 0.4031, "step": 11475 }, { "epoch": 1.9909784871616933, "grad_norm": 0.8339713215827942, "learning_rate": 3.1379414791343766e-09, "loss": 0.428, "step": 11476 }, { "epoch": 1.9911519777931992, "grad_norm": 1.1781772375106812, "learning_rate": 3.0184149102341354e-09, "loss": 0.4286, "step": 11477 }, { "epoch": 1.991325468424705, "grad_norm": 0.877213180065155, "learning_rate": 2.9012090143498704e-09, "loss": 0.4603, "step": 11478 }, { "epoch": 1.9914989590562109, "grad_norm": 0.7394884824752808, "learning_rate": 2.786323805081814e-09, "loss": 0.6095, "step": 11479 }, { "epoch": 1.9916724496877167, "grad_norm": 1.008466362953186, "learning_rate": 2.6737592957615243e-09, "loss": 0.5907, "step": 11480 }, { "epoch": 1.9918459403192228, "grad_norm": 0.7919140458106995, "learning_rate": 2.563515499451885e-09, "loss": 0.3691, "step": 11481 }, { "epoch": 1.9920194309507286, "grad_norm": 0.7679170966148376, "learning_rate": 2.4555924289493272e-09, "loss": 0.5347, "step": 11482 }, { "epoch": 1.9921929215822347, "grad_norm": 0.7232471704483032, "learning_rate": 2.349990096777166e-09, "loss": 0.489, "step": 11483 }, { "epoch": 1.9923664122137406, "grad_norm": 3.296504497528076, "learning_rate": 2.2467085151900436e-09, "loss": 0.4176, "step": 11484 }, { "epoch": 1.9925399028452464, "grad_norm": 1.5242438316345215, "learning_rate": 2.145747696173928e-09, "loss": 0.3864, "step": 11485 }, { "epoch": 1.9927133934767522, "grad_norm": 0.8411022424697876, "learning_rate": 2.047107651446112e-09, "loss": 0.386, "step": 11486 }, { "epoch": 1.992886884108258, "grad_norm": 0.8437713980674744, "learning_rate": 1.950788392455216e-09, "loss": 0.4785, "step": 11487 }, { "epoch": 1.993060374739764, "grad_norm": 0.7413724064826965, "learning_rate": 1.8567899303767457e-09, "loss": 0.5667, "step": 11488 }, { "epoch": 1.9932338653712698, "grad_norm": 0.7066528797149658, "learning_rate": 1.7651122761197515e-09, "loss": 0.4302, "step": 11489 }, { "epoch": 1.9934073560027759, "grad_norm": 0.7304266691207886, "learning_rate": 1.6757554403223907e-09, "loss": 0.4046, "step": 11490 }, { "epoch": 1.9935808466342817, "grad_norm": 0.9667731523513794, "learning_rate": 1.5887194333585876e-09, "loss": 0.4823, "step": 11491 }, { "epoch": 1.9937543372657878, "grad_norm": 1.013989806175232, "learning_rate": 1.5040042653269304e-09, "loss": 0.4267, "step": 11492 }, { "epoch": 1.9939278278972936, "grad_norm": 0.7241480946540833, "learning_rate": 1.421609946057334e-09, "loss": 0.4301, "step": 11493 }, { "epoch": 1.9941013185287995, "grad_norm": 0.9055463075637817, "learning_rate": 1.3415364851132595e-09, "loss": 0.5103, "step": 11494 }, { "epoch": 1.9942748091603053, "grad_norm": 1.3344109058380127, "learning_rate": 1.2637838917872735e-09, "loss": 0.5143, "step": 11495 }, { "epoch": 1.9944482997918112, "grad_norm": 0.8314899802207947, "learning_rate": 1.188352175103269e-09, "loss": 0.4003, "step": 11496 }, { "epoch": 1.994621790423317, "grad_norm": 0.9117047786712646, "learning_rate": 1.1152413438120235e-09, "loss": 0.3704, "step": 11497 }, { "epoch": 1.994795281054823, "grad_norm": 1.4010546207427979, "learning_rate": 1.0444514064023025e-09, "loss": 0.4529, "step": 11498 }, { "epoch": 1.994968771686329, "grad_norm": 0.9583075642585754, "learning_rate": 9.759823710853155e-10, "loss": 0.3901, "step": 11499 }, { "epoch": 1.995142262317835, "grad_norm": 0.8378259539604187, "learning_rate": 9.098342458102593e-10, "loss": 0.3929, "step": 11500 }, { "epoch": 1.9953157529493408, "grad_norm": 0.7189657688140869, "learning_rate": 8.460070382532159e-10, "loss": 0.4873, "step": 11501 }, { "epoch": 1.9954892435808467, "grad_norm": 0.7251210808753967, "learning_rate": 7.845007558193729e-10, "loss": 0.3544, "step": 11502 }, { "epoch": 1.9956627342123525, "grad_norm": 0.9614545702934265, "learning_rate": 7.253154056474643e-10, "loss": 0.401, "step": 11503 }, { "epoch": 1.9958362248438584, "grad_norm": 0.6946287155151367, "learning_rate": 6.684509946075501e-10, "loss": 0.4194, "step": 11504 }, { "epoch": 1.9960097154753642, "grad_norm": 0.8468782305717468, "learning_rate": 6.139075292987962e-10, "loss": 0.4404, "step": 11505 }, { "epoch": 1.9961832061068703, "grad_norm": 0.777607262134552, "learning_rate": 5.616850160494736e-10, "loss": 0.5247, "step": 11506 }, { "epoch": 1.9963566967383761, "grad_norm": 0.7831170558929443, "learning_rate": 5.117834609191796e-10, "loss": 0.4486, "step": 11507 }, { "epoch": 1.9965301873698822, "grad_norm": 0.990483283996582, "learning_rate": 4.642028697010581e-10, "loss": 0.4615, "step": 11508 }, { "epoch": 1.996703678001388, "grad_norm": 0.735356330871582, "learning_rate": 4.189432479173583e-10, "loss": 0.4634, "step": 11509 }, { "epoch": 1.9968771686328939, "grad_norm": 0.8335587978363037, "learning_rate": 3.760046008172147e-10, "loss": 0.4609, "step": 11510 }, { "epoch": 1.9970506592643997, "grad_norm": 8.307694435119629, "learning_rate": 3.353869333877491e-10, "loss": 0.4355, "step": 11511 }, { "epoch": 1.9972241498959056, "grad_norm": 0.9904146194458008, "learning_rate": 2.97090250340748e-10, "loss": 0.4745, "step": 11512 }, { "epoch": 1.9973976405274114, "grad_norm": 1.9969080686569214, "learning_rate": 2.611145561215445e-10, "loss": 0.4222, "step": 11513 }, { "epoch": 1.9975711311589173, "grad_norm": 0.8702035546302795, "learning_rate": 2.2745985490235656e-10, "loss": 0.3834, "step": 11514 }, { "epoch": 1.9977446217904233, "grad_norm": 1.0072624683380127, "learning_rate": 1.9612615059116935e-10, "loss": 0.3547, "step": 11515 }, { "epoch": 1.9979181124219292, "grad_norm": 0.9296289086341858, "learning_rate": 1.6711344682507346e-10, "loss": 0.3597, "step": 11516 }, { "epoch": 1.9980916030534353, "grad_norm": 1.0401052236557007, "learning_rate": 1.4042174696804467e-10, "loss": 0.3784, "step": 11517 }, { "epoch": 1.998265093684941, "grad_norm": 0.9357911348342896, "learning_rate": 1.160510541220461e-10, "loss": 0.4485, "step": 11518 }, { "epoch": 1.998438584316447, "grad_norm": 0.9865544438362122, "learning_rate": 9.400137111148511e-11, "loss": 0.4338, "step": 11519 }, { "epoch": 1.9986120749479528, "grad_norm": 1.2822304964065552, "learning_rate": 7.427270049653601e-11, "loss": 0.411, "step": 11520 }, { "epoch": 1.9987855655794586, "grad_norm": 0.9479796886444092, "learning_rate": 5.6865044566478676e-11, "loss": 0.3289, "step": 11521 }, { "epoch": 1.9989590562109645, "grad_norm": 0.7394636273384094, "learning_rate": 4.1778405341919016e-11, "loss": 0.4054, "step": 11522 }, { "epoch": 1.9991325468424705, "grad_norm": 0.8287288546562195, "learning_rate": 2.9012784574788954e-11, "loss": 0.5789, "step": 11523 }, { "epoch": 1.9993060374739764, "grad_norm": 0.9048954248428345, "learning_rate": 1.8568183743905566e-11, "loss": 0.4268, "step": 11524 }, { "epoch": 1.9994795281054825, "grad_norm": 0.8199928998947144, "learning_rate": 1.0444604063852837e-11, "loss": 0.432, "step": 11525 }, { "epoch": 1.9996530187369883, "grad_norm": 0.8431583046913147, "learning_rate": 4.6420464738794465e-12, "loss": 0.499, "step": 11526 }, { "epoch": 1.9998265093684942, "grad_norm": 0.8587999939918518, "learning_rate": 1.160511651221441e-12, "loss": 0.5066, "step": 11527 }, { "epoch": 2.0, "grad_norm": 0.8854231834411621, "learning_rate": 0.0, "loss": 0.3571, "step": 11528 }, { "epoch": 2.0, "step": 11528, "total_flos": 0.0, "train_loss": 0.0, "train_runtime": 1.8833, "train_samples_per_second": 195885.996, "train_steps_per_second": 6121.238 } ], "logging_steps": 1.0, "max_steps": 11528, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }