{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.995635229929852, "eval_steps": 500, "global_step": 3204, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.123711340206186e-07, "loss": 3.2515, "step": 2 }, { "epoch": 0.0, "learning_rate": 8.247422680412372e-07, "loss": 3.2363, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.2371134020618557e-06, "loss": 3.2793, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.6494845360824744e-06, "loss": 3.2382, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.061855670103093e-06, "loss": 3.273, "step": 10 }, { "epoch": 0.01, "learning_rate": 2.4742268041237115e-06, "loss": 3.2146, "step": 12 }, { "epoch": 0.02, "learning_rate": 2.8865979381443297e-06, "loss": 3.1379, "step": 14 }, { "epoch": 0.02, "learning_rate": 3.298969072164949e-06, "loss": 3.1523, "step": 16 }, { "epoch": 0.02, "learning_rate": 3.7113402061855674e-06, "loss": 3.0647, "step": 18 }, { "epoch": 0.02, "learning_rate": 4.123711340206186e-06, "loss": 3.067, "step": 20 }, { "epoch": 0.03, "learning_rate": 4.536082474226804e-06, "loss": 3.1078, "step": 22 }, { "epoch": 0.03, "learning_rate": 4.948453608247423e-06, "loss": 2.9914, "step": 24 }, { "epoch": 0.03, "learning_rate": 5.360824742268042e-06, "loss": 2.9851, "step": 26 }, { "epoch": 0.03, "learning_rate": 5.7731958762886594e-06, "loss": 2.911, "step": 28 }, { "epoch": 0.04, "learning_rate": 6.185567010309279e-06, "loss": 3.0351, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.597938144329898e-06, "loss": 3.0463, "step": 32 }, { "epoch": 0.04, "learning_rate": 7.010309278350515e-06, "loss": 2.9003, "step": 34 }, { "epoch": 0.04, "learning_rate": 7.422680412371135e-06, "loss": 2.9228, "step": 36 }, { "epoch": 0.05, "learning_rate": 7.835051546391754e-06, "loss": 2.9414, "step": 38 }, { "epoch": 0.05, "learning_rate": 8.247422680412371e-06, "loss": 2.8951, "step": 40 }, { "epoch": 0.05, "learning_rate": 8.65979381443299e-06, "loss": 2.9161, "step": 42 }, { "epoch": 0.05, "learning_rate": 9.072164948453609e-06, "loss": 2.9445, "step": 44 }, { "epoch": 0.06, "learning_rate": 9.484536082474226e-06, "loss": 2.8693, "step": 46 }, { "epoch": 0.06, "learning_rate": 9.896907216494846e-06, "loss": 2.8698, "step": 48 }, { "epoch": 0.06, "learning_rate": 1.0309278350515464e-05, "loss": 2.8505, "step": 50 }, { "epoch": 0.06, "learning_rate": 1.0721649484536083e-05, "loss": 2.8884, "step": 52 }, { "epoch": 0.07, "learning_rate": 1.1134020618556703e-05, "loss": 2.8652, "step": 54 }, { "epoch": 0.07, "learning_rate": 1.1546391752577319e-05, "loss": 2.8146, "step": 56 }, { "epoch": 0.07, "learning_rate": 1.1958762886597938e-05, "loss": 2.805, "step": 58 }, { "epoch": 0.07, "learning_rate": 1.2371134020618558e-05, "loss": 2.855, "step": 60 }, { "epoch": 0.08, "learning_rate": 1.2783505154639176e-05, "loss": 2.79, "step": 62 }, { "epoch": 0.08, "learning_rate": 1.3195876288659795e-05, "loss": 2.8766, "step": 64 }, { "epoch": 0.08, "learning_rate": 1.3608247422680415e-05, "loss": 2.7995, "step": 66 }, { "epoch": 0.08, "learning_rate": 1.402061855670103e-05, "loss": 2.7542, "step": 68 }, { "epoch": 0.09, "learning_rate": 1.443298969072165e-05, "loss": 2.7528, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.484536082474227e-05, "loss": 2.7848, "step": 72 }, { "epoch": 0.09, "learning_rate": 1.5257731958762888e-05, "loss": 2.7701, "step": 74 }, { "epoch": 0.09, "learning_rate": 1.5670103092783507e-05, "loss": 2.7374, "step": 76 }, { "epoch": 0.1, "learning_rate": 1.6082474226804127e-05, "loss": 2.6963, "step": 78 }, { "epoch": 0.1, "learning_rate": 1.6494845360824743e-05, "loss": 2.6799, "step": 80 }, { "epoch": 0.1, "learning_rate": 1.6907216494845362e-05, "loss": 2.7273, "step": 82 }, { "epoch": 0.1, "learning_rate": 1.731958762886598e-05, "loss": 2.6891, "step": 84 }, { "epoch": 0.11, "learning_rate": 1.7731958762886598e-05, "loss": 2.6861, "step": 86 }, { "epoch": 0.11, "learning_rate": 1.8144329896907217e-05, "loss": 2.7364, "step": 88 }, { "epoch": 0.11, "learning_rate": 1.8556701030927837e-05, "loss": 2.5863, "step": 90 }, { "epoch": 0.11, "learning_rate": 1.8969072164948453e-05, "loss": 2.6845, "step": 92 }, { "epoch": 0.12, "learning_rate": 1.9381443298969072e-05, "loss": 2.7043, "step": 94 }, { "epoch": 0.12, "learning_rate": 1.9793814432989692e-05, "loss": 2.631, "step": 96 }, { "epoch": 0.12, "learning_rate": 1.9999994888042835e-05, "loss": 2.6999, "step": 98 }, { "epoch": 0.12, "learning_rate": 1.999995399241686e-05, "loss": 2.688, "step": 100 }, { "epoch": 0.13, "learning_rate": 1.999987220133215e-05, "loss": 2.5822, "step": 102 }, { "epoch": 0.13, "learning_rate": 1.99997495151232e-05, "loss": 2.6453, "step": 104 }, { "epoch": 0.13, "learning_rate": 1.999958593429174e-05, "loss": 2.59, "step": 106 }, { "epoch": 0.13, "learning_rate": 1.999938145950675e-05, "loss": 2.5822, "step": 108 }, { "epoch": 0.14, "learning_rate": 1.9999136091604433e-05, "loss": 2.5116, "step": 110 }, { "epoch": 0.14, "learning_rate": 1.999884983158825e-05, "loss": 2.5744, "step": 112 }, { "epoch": 0.14, "learning_rate": 1.9998522680628868e-05, "loss": 2.5379, "step": 114 }, { "epoch": 0.14, "learning_rate": 1.9998154640064196e-05, "loss": 2.5487, "step": 116 }, { "epoch": 0.15, "learning_rate": 1.9997745711399364e-05, "loss": 2.6037, "step": 118 }, { "epoch": 0.15, "learning_rate": 1.9997295896306706e-05, "loss": 2.6105, "step": 120 }, { "epoch": 0.15, "learning_rate": 1.999680519662577e-05, "loss": 2.5882, "step": 122 }, { "epoch": 0.15, "learning_rate": 1.999627361436331e-05, "loss": 2.5904, "step": 124 }, { "epoch": 0.16, "learning_rate": 1.9995701151693265e-05, "loss": 2.5657, "step": 126 }, { "epoch": 0.16, "learning_rate": 1.999508781095675e-05, "loss": 2.603, "step": 128 }, { "epoch": 0.16, "learning_rate": 1.999443359466207e-05, "loss": 2.543, "step": 130 }, { "epoch": 0.16, "learning_rate": 1.9993738505484684e-05, "loss": 2.6112, "step": 132 }, { "epoch": 0.17, "learning_rate": 1.9993002546267203e-05, "loss": 2.5608, "step": 134 }, { "epoch": 0.17, "learning_rate": 1.9992225720019377e-05, "loss": 2.528, "step": 136 }, { "epoch": 0.17, "learning_rate": 1.9991408029918086e-05, "loss": 2.5347, "step": 138 }, { "epoch": 0.17, "learning_rate": 1.9990549479307334e-05, "loss": 2.4668, "step": 140 }, { "epoch": 0.18, "learning_rate": 1.9989650071698214e-05, "loss": 2.5576, "step": 142 }, { "epoch": 0.18, "learning_rate": 1.9988709810768914e-05, "loss": 2.4433, "step": 144 }, { "epoch": 0.18, "learning_rate": 1.998772870036469e-05, "loss": 2.5095, "step": 146 }, { "epoch": 0.18, "learning_rate": 1.9986706744497857e-05, "loss": 2.5265, "step": 148 }, { "epoch": 0.19, "learning_rate": 1.998564394734777e-05, "loss": 2.3981, "step": 150 }, { "epoch": 0.19, "learning_rate": 1.9984540313260808e-05, "loss": 2.4508, "step": 152 }, { "epoch": 0.19, "learning_rate": 1.998339584675035e-05, "loss": 2.4684, "step": 154 }, { "epoch": 0.19, "learning_rate": 1.998221055249677e-05, "loss": 2.4673, "step": 156 }, { "epoch": 0.2, "learning_rate": 1.99809844353474e-05, "loss": 2.46, "step": 158 }, { "epoch": 0.2, "learning_rate": 1.9979717500316532e-05, "loss": 2.4691, "step": 160 }, { "epoch": 0.2, "learning_rate": 1.997840975258538e-05, "loss": 2.4876, "step": 162 }, { "epoch": 0.2, "learning_rate": 1.9977061197502055e-05, "loss": 2.3855, "step": 164 }, { "epoch": 0.21, "learning_rate": 1.997567184058156e-05, "loss": 2.4862, "step": 166 }, { "epoch": 0.21, "learning_rate": 1.9974241687505772e-05, "loss": 2.3955, "step": 168 }, { "epoch": 0.21, "learning_rate": 1.997277074412338e-05, "loss": 2.4416, "step": 170 }, { "epoch": 0.21, "learning_rate": 1.9971259016449913e-05, "loss": 2.4385, "step": 172 }, { "epoch": 0.22, "learning_rate": 1.9969706510667676e-05, "loss": 2.4228, "step": 174 }, { "epoch": 0.22, "learning_rate": 1.996811323312574e-05, "loss": 2.4402, "step": 176 }, { "epoch": 0.22, "learning_rate": 1.9966479190339913e-05, "loss": 2.3951, "step": 178 }, { "epoch": 0.22, "learning_rate": 1.9964804388992725e-05, "loss": 2.4134, "step": 180 }, { "epoch": 0.23, "learning_rate": 1.9963088835933386e-05, "loss": 2.3631, "step": 182 }, { "epoch": 0.23, "learning_rate": 1.9961332538177754e-05, "loss": 2.42, "step": 184 }, { "epoch": 0.23, "learning_rate": 1.9959535502908326e-05, "loss": 2.4121, "step": 186 }, { "epoch": 0.23, "learning_rate": 1.9957697737474198e-05, "loss": 2.3994, "step": 188 }, { "epoch": 0.24, "learning_rate": 1.995581924939102e-05, "loss": 2.391, "step": 190 }, { "epoch": 0.24, "learning_rate": 1.9953900046341005e-05, "loss": 2.4389, "step": 192 }, { "epoch": 0.24, "learning_rate": 1.9951940136172846e-05, "loss": 2.3884, "step": 194 }, { "epoch": 0.24, "learning_rate": 1.9949939526901724e-05, "loss": 2.4058, "step": 196 }, { "epoch": 0.25, "learning_rate": 1.9947898226709267e-05, "loss": 2.3603, "step": 198 }, { "epoch": 0.25, "learning_rate": 1.9945816243943495e-05, "loss": 2.3278, "step": 200 }, { "epoch": 0.25, "learning_rate": 1.9943693587118818e-05, "loss": 2.4248, "step": 202 }, { "epoch": 0.25, "learning_rate": 1.994153026491598e-05, "loss": 2.3969, "step": 204 }, { "epoch": 0.26, "learning_rate": 1.9939326286182016e-05, "loss": 2.327, "step": 206 }, { "epoch": 0.26, "learning_rate": 1.9937081659930255e-05, "loss": 2.4033, "step": 208 }, { "epoch": 0.26, "learning_rate": 1.9934796395340228e-05, "loss": 2.392, "step": 210 }, { "epoch": 0.26, "learning_rate": 1.993247050175768e-05, "loss": 2.391, "step": 212 }, { "epoch": 0.27, "learning_rate": 1.99301039886945e-05, "loss": 2.3343, "step": 214 }, { "epoch": 0.27, "learning_rate": 1.9927696865828698e-05, "loss": 2.3227, "step": 216 }, { "epoch": 0.27, "learning_rate": 1.9925249143004353e-05, "loss": 2.3913, "step": 218 }, { "epoch": 0.27, "learning_rate": 1.9922760830231597e-05, "loss": 2.3677, "step": 220 }, { "epoch": 0.28, "learning_rate": 1.9920231937686538e-05, "loss": 2.3598, "step": 222 }, { "epoch": 0.28, "learning_rate": 1.9917662475711247e-05, "loss": 2.3573, "step": 224 }, { "epoch": 0.28, "learning_rate": 1.9915052454813705e-05, "loss": 2.4229, "step": 226 }, { "epoch": 0.28, "learning_rate": 1.9912401885667765e-05, "loss": 2.2836, "step": 228 }, { "epoch": 0.29, "learning_rate": 1.9909710779113093e-05, "loss": 2.2907, "step": 230 }, { "epoch": 0.29, "learning_rate": 1.990697914615515e-05, "loss": 2.3445, "step": 232 }, { "epoch": 0.29, "learning_rate": 1.9904206997965123e-05, "loss": 2.3142, "step": 234 }, { "epoch": 0.29, "learning_rate": 1.9901394345879893e-05, "loss": 2.2854, "step": 236 }, { "epoch": 0.3, "learning_rate": 1.989854120140198e-05, "loss": 2.3004, "step": 238 }, { "epoch": 0.3, "learning_rate": 1.9895647576199507e-05, "loss": 2.272, "step": 240 }, { "epoch": 0.3, "learning_rate": 1.9892713482106135e-05, "loss": 2.329, "step": 242 }, { "epoch": 0.3, "learning_rate": 1.988973893112104e-05, "loss": 2.3237, "step": 244 }, { "epoch": 0.31, "learning_rate": 1.9886723935408835e-05, "loss": 2.3882, "step": 246 }, { "epoch": 0.31, "learning_rate": 1.9883668507299538e-05, "loss": 2.3263, "step": 248 }, { "epoch": 0.31, "learning_rate": 1.988057265928853e-05, "loss": 2.3502, "step": 250 }, { "epoch": 0.31, "learning_rate": 1.9877436404036466e-05, "loss": 2.2923, "step": 252 }, { "epoch": 0.32, "learning_rate": 1.987425975436928e-05, "loss": 2.3222, "step": 254 }, { "epoch": 0.32, "learning_rate": 1.9871042723278078e-05, "loss": 2.319, "step": 256 }, { "epoch": 0.32, "learning_rate": 1.9867785323919116e-05, "loss": 2.2974, "step": 258 }, { "epoch": 0.32, "learning_rate": 1.9864487569613747e-05, "loss": 2.331, "step": 260 }, { "epoch": 0.33, "learning_rate": 1.9861149473848343e-05, "loss": 2.2826, "step": 262 }, { "epoch": 0.33, "learning_rate": 1.9857771050274267e-05, "loss": 2.3282, "step": 264 }, { "epoch": 0.33, "learning_rate": 1.98543523127078e-05, "loss": 2.2752, "step": 266 }, { "epoch": 0.33, "learning_rate": 1.9850893275130085e-05, "loss": 2.2815, "step": 268 }, { "epoch": 0.34, "learning_rate": 1.9847393951687087e-05, "loss": 2.1985, "step": 270 }, { "epoch": 0.34, "learning_rate": 1.984385435668951e-05, "loss": 2.2914, "step": 272 }, { "epoch": 0.34, "learning_rate": 1.9840274504612763e-05, "loss": 2.244, "step": 274 }, { "epoch": 0.34, "learning_rate": 1.983665441009688e-05, "loss": 2.2515, "step": 276 }, { "epoch": 0.35, "learning_rate": 1.983299408794647e-05, "loss": 2.2607, "step": 278 }, { "epoch": 0.35, "learning_rate": 1.9829293553130656e-05, "loss": 2.226, "step": 280 }, { "epoch": 0.35, "learning_rate": 1.9825552820783018e-05, "loss": 2.2799, "step": 282 }, { "epoch": 0.35, "learning_rate": 1.982177190620152e-05, "loss": 2.2573, "step": 284 }, { "epoch": 0.36, "learning_rate": 1.981795082484846e-05, "loss": 2.276, "step": 286 }, { "epoch": 0.36, "learning_rate": 1.9814089592350395e-05, "loss": 2.2828, "step": 288 }, { "epoch": 0.36, "learning_rate": 1.9810188224498086e-05, "loss": 2.2221, "step": 290 }, { "epoch": 0.36, "learning_rate": 1.980624673724643e-05, "loss": 2.3025, "step": 292 }, { "epoch": 0.37, "learning_rate": 1.9802265146714393e-05, "loss": 2.1947, "step": 294 }, { "epoch": 0.37, "learning_rate": 1.9798243469184947e-05, "loss": 2.3123, "step": 296 }, { "epoch": 0.37, "learning_rate": 1.9794181721105002e-05, "loss": 2.3382, "step": 298 }, { "epoch": 0.37, "learning_rate": 1.979007991908534e-05, "loss": 2.2656, "step": 300 }, { "epoch": 0.38, "learning_rate": 1.9785938079900547e-05, "loss": 2.2834, "step": 302 }, { "epoch": 0.38, "learning_rate": 1.9781756220488938e-05, "loss": 2.249, "step": 304 }, { "epoch": 0.38, "learning_rate": 1.9777534357952503e-05, "loss": 2.2662, "step": 306 }, { "epoch": 0.38, "learning_rate": 1.977327250955682e-05, "loss": 2.1953, "step": 308 }, { "epoch": 0.39, "learning_rate": 1.976897069273099e-05, "loss": 2.2089, "step": 310 }, { "epoch": 0.39, "learning_rate": 1.9764628925067576e-05, "loss": 2.2576, "step": 312 }, { "epoch": 0.39, "learning_rate": 1.976024722432252e-05, "loss": 2.1983, "step": 314 }, { "epoch": 0.39, "learning_rate": 1.9755825608415065e-05, "loss": 2.2231, "step": 316 }, { "epoch": 0.4, "learning_rate": 1.9751364095427694e-05, "loss": 2.2898, "step": 318 }, { "epoch": 0.4, "learning_rate": 1.974686270360606e-05, "loss": 2.2599, "step": 320 }, { "epoch": 0.4, "learning_rate": 1.9742321451358887e-05, "loss": 2.2066, "step": 322 }, { "epoch": 0.4, "learning_rate": 1.973774035725793e-05, "loss": 2.2213, "step": 324 }, { "epoch": 0.41, "learning_rate": 1.9733119440037863e-05, "loss": 2.2995, "step": 326 }, { "epoch": 0.41, "learning_rate": 1.9728458718596228e-05, "loss": 2.2125, "step": 328 }, { "epoch": 0.41, "learning_rate": 1.9723758211993344e-05, "loss": 2.2308, "step": 330 }, { "epoch": 0.41, "learning_rate": 1.971901793945224e-05, "loss": 2.2471, "step": 332 }, { "epoch": 0.42, "learning_rate": 1.9714237920358566e-05, "loss": 2.2093, "step": 334 }, { "epoch": 0.42, "learning_rate": 1.9709418174260523e-05, "loss": 2.2243, "step": 336 }, { "epoch": 0.42, "learning_rate": 1.9704558720868768e-05, "loss": 2.2659, "step": 338 }, { "epoch": 0.42, "learning_rate": 1.9699659580056353e-05, "loss": 2.2232, "step": 340 }, { "epoch": 0.43, "learning_rate": 1.9694720771858632e-05, "loss": 2.2668, "step": 342 }, { "epoch": 0.43, "learning_rate": 1.968974231647318e-05, "loss": 2.2309, "step": 344 }, { "epoch": 0.43, "learning_rate": 1.9684724234259715e-05, "loss": 2.1846, "step": 346 }, { "epoch": 0.43, "learning_rate": 1.9679666545740002e-05, "loss": 2.3046, "step": 348 }, { "epoch": 0.44, "learning_rate": 1.9674569271597792e-05, "loss": 2.233, "step": 350 }, { "epoch": 0.44, "learning_rate": 1.9669432432678713e-05, "loss": 2.2801, "step": 352 }, { "epoch": 0.44, "learning_rate": 1.96642560499902e-05, "loss": 2.1939, "step": 354 }, { "epoch": 0.44, "learning_rate": 1.9659040144701412e-05, "loss": 2.2114, "step": 356 }, { "epoch": 0.45, "learning_rate": 1.9653784738143122e-05, "loss": 2.1578, "step": 358 }, { "epoch": 0.45, "learning_rate": 1.9648489851807662e-05, "loss": 2.2855, "step": 360 }, { "epoch": 0.45, "learning_rate": 1.9643155507348807e-05, "loss": 2.1823, "step": 362 }, { "epoch": 0.45, "learning_rate": 1.9637781726581706e-05, "loss": 2.1782, "step": 364 }, { "epoch": 0.46, "learning_rate": 1.963236853148278e-05, "loss": 2.1873, "step": 366 }, { "epoch": 0.46, "learning_rate": 1.9626915944189647e-05, "loss": 2.1374, "step": 368 }, { "epoch": 0.46, "learning_rate": 1.9621423987001013e-05, "loss": 2.2583, "step": 370 }, { "epoch": 0.46, "learning_rate": 1.961589268237659e-05, "loss": 2.1622, "step": 372 }, { "epoch": 0.47, "learning_rate": 1.961032205293701e-05, "loss": 2.2058, "step": 374 }, { "epoch": 0.47, "learning_rate": 1.9604712121463713e-05, "loss": 2.1792, "step": 376 }, { "epoch": 0.47, "learning_rate": 1.959906291089889e-05, "loss": 2.2287, "step": 378 }, { "epoch": 0.47, "learning_rate": 1.959337444434534e-05, "loss": 2.1758, "step": 380 }, { "epoch": 0.48, "learning_rate": 1.9587646745066424e-05, "loss": 2.2457, "step": 382 }, { "epoch": 0.48, "learning_rate": 1.9581879836485936e-05, "loss": 2.2054, "step": 384 }, { "epoch": 0.48, "learning_rate": 1.9576073742188022e-05, "loss": 2.2095, "step": 386 }, { "epoch": 0.48, "learning_rate": 1.957022848591708e-05, "loss": 2.167, "step": 388 }, { "epoch": 0.49, "learning_rate": 1.9564344091577664e-05, "loss": 2.1353, "step": 390 }, { "epoch": 0.49, "learning_rate": 1.9558420583234382e-05, "loss": 2.2185, "step": 392 }, { "epoch": 0.49, "learning_rate": 1.955245798511181e-05, "loss": 2.1465, "step": 394 }, { "epoch": 0.49, "learning_rate": 1.9546456321594374e-05, "loss": 2.1234, "step": 396 }, { "epoch": 0.5, "learning_rate": 1.954041561722627e-05, "loss": 2.2211, "step": 398 }, { "epoch": 0.5, "learning_rate": 1.9534335896711344e-05, "loss": 2.1423, "step": 400 }, { "epoch": 0.5, "learning_rate": 1.952821718491301e-05, "loss": 2.1656, "step": 402 }, { "epoch": 0.5, "learning_rate": 1.9522059506854133e-05, "loss": 2.1568, "step": 404 }, { "epoch": 0.51, "learning_rate": 1.9515862887716943e-05, "loss": 2.1858, "step": 406 }, { "epoch": 0.51, "learning_rate": 1.95096273528429e-05, "loss": 2.1204, "step": 408 }, { "epoch": 0.51, "learning_rate": 1.9503352927732645e-05, "loss": 2.1687, "step": 410 }, { "epoch": 0.51, "learning_rate": 1.9497039638045833e-05, "loss": 2.1762, "step": 412 }, { "epoch": 0.52, "learning_rate": 1.9490687509601073e-05, "loss": 2.1915, "step": 414 }, { "epoch": 0.52, "learning_rate": 1.948429656837581e-05, "loss": 2.1705, "step": 416 }, { "epoch": 0.52, "learning_rate": 1.9477866840506205e-05, "loss": 2.1434, "step": 418 }, { "epoch": 0.52, "learning_rate": 1.947139835228705e-05, "loss": 2.1287, "step": 420 }, { "epoch": 0.53, "learning_rate": 1.9464891130171647e-05, "loss": 2.0862, "step": 422 }, { "epoch": 0.53, "learning_rate": 1.9458345200771697e-05, "loss": 2.2316, "step": 424 }, { "epoch": 0.53, "learning_rate": 1.9451760590857207e-05, "loss": 2.1802, "step": 426 }, { "epoch": 0.53, "learning_rate": 1.944513732735636e-05, "loss": 2.1693, "step": 428 }, { "epoch": 0.54, "learning_rate": 1.943847543735543e-05, "loss": 2.1848, "step": 430 }, { "epoch": 0.54, "learning_rate": 1.9431774948098633e-05, "loss": 2.2342, "step": 432 }, { "epoch": 0.54, "learning_rate": 1.942503588698806e-05, "loss": 2.1495, "step": 434 }, { "epoch": 0.54, "learning_rate": 1.9418258281583545e-05, "loss": 2.1224, "step": 436 }, { "epoch": 0.55, "learning_rate": 1.9411442159602532e-05, "loss": 2.1866, "step": 438 }, { "epoch": 0.55, "learning_rate": 1.940458754892e-05, "loss": 2.1382, "step": 440 }, { "epoch": 0.55, "learning_rate": 1.939769447756832e-05, "loss": 2.1603, "step": 442 }, { "epoch": 0.55, "learning_rate": 1.939076297373715e-05, "loss": 2.1242, "step": 444 }, { "epoch": 0.56, "learning_rate": 1.9383793065773335e-05, "loss": 2.1245, "step": 446 }, { "epoch": 0.56, "learning_rate": 1.9376784782180747e-05, "loss": 2.1903, "step": 448 }, { "epoch": 0.56, "learning_rate": 1.9369738151620228e-05, "loss": 2.118, "step": 450 }, { "epoch": 0.56, "learning_rate": 1.936265320290943e-05, "loss": 2.1582, "step": 452 }, { "epoch": 0.57, "learning_rate": 1.9355529965022703e-05, "loss": 2.1664, "step": 454 }, { "epoch": 0.57, "learning_rate": 1.9348368467090988e-05, "loss": 2.1747, "step": 456 }, { "epoch": 0.57, "learning_rate": 1.9341168738401696e-05, "loss": 2.1592, "step": 458 }, { "epoch": 0.57, "learning_rate": 1.933393080839859e-05, "loss": 2.1831, "step": 460 }, { "epoch": 0.58, "learning_rate": 1.932665470668164e-05, "loss": 2.133, "step": 462 }, { "epoch": 0.58, "learning_rate": 1.931934046300695e-05, "loss": 2.1431, "step": 464 }, { "epoch": 0.58, "learning_rate": 1.9311988107286584e-05, "loss": 2.131, "step": 466 }, { "epoch": 0.58, "learning_rate": 1.9304597669588472e-05, "loss": 2.1011, "step": 468 }, { "epoch": 0.59, "learning_rate": 1.92971691801363e-05, "loss": 2.1485, "step": 470 }, { "epoch": 0.59, "learning_rate": 1.928970266930934e-05, "loss": 2.1388, "step": 472 }, { "epoch": 0.59, "learning_rate": 1.928219816764238e-05, "loss": 2.1475, "step": 474 }, { "epoch": 0.59, "learning_rate": 1.9274655705825566e-05, "loss": 2.1372, "step": 476 }, { "epoch": 0.6, "learning_rate": 1.9267075314704282e-05, "loss": 2.1498, "step": 478 }, { "epoch": 0.6, "learning_rate": 1.925945702527903e-05, "loss": 2.1308, "step": 480 }, { "epoch": 0.6, "learning_rate": 1.9251800868705292e-05, "loss": 2.122, "step": 482 }, { "epoch": 0.6, "learning_rate": 1.924410687629342e-05, "loss": 2.0804, "step": 484 }, { "epoch": 0.61, "learning_rate": 1.9236375079508492e-05, "loss": 2.0633, "step": 486 }, { "epoch": 0.61, "learning_rate": 1.922860550997019e-05, "loss": 2.1071, "step": 488 }, { "epoch": 0.61, "learning_rate": 1.9220798199452676e-05, "loss": 2.0868, "step": 490 }, { "epoch": 0.61, "learning_rate": 1.9212953179884443e-05, "loss": 2.1227, "step": 492 }, { "epoch": 0.62, "learning_rate": 1.9205070483348216e-05, "loss": 2.0897, "step": 494 }, { "epoch": 0.62, "learning_rate": 1.9197150142080784e-05, "loss": 2.1388, "step": 496 }, { "epoch": 0.62, "learning_rate": 1.9189192188472902e-05, "loss": 2.1197, "step": 498 }, { "epoch": 0.62, "learning_rate": 1.9181196655069126e-05, "loss": 2.1265, "step": 500 }, { "epoch": 0.63, "learning_rate": 1.917316357456772e-05, "loss": 2.1214, "step": 502 }, { "epoch": 0.63, "learning_rate": 1.9165092979820478e-05, "loss": 2.1106, "step": 504 }, { "epoch": 0.63, "learning_rate": 1.915698490383262e-05, "loss": 2.0805, "step": 506 }, { "epoch": 0.63, "learning_rate": 1.914883937976265e-05, "loss": 2.0937, "step": 508 }, { "epoch": 0.64, "learning_rate": 1.9140656440922216e-05, "loss": 2.071, "step": 510 }, { "epoch": 0.64, "learning_rate": 1.9132436120775967e-05, "loss": 2.109, "step": 512 }, { "epoch": 0.64, "learning_rate": 1.9124178452941445e-05, "loss": 2.1233, "step": 514 }, { "epoch": 0.64, "learning_rate": 1.9115883471188916e-05, "loss": 2.0982, "step": 516 }, { "epoch": 0.65, "learning_rate": 1.9107551209441238e-05, "loss": 2.1426, "step": 518 }, { "epoch": 0.65, "learning_rate": 1.9099181701773735e-05, "loss": 2.1051, "step": 520 }, { "epoch": 0.65, "learning_rate": 1.9090774982414056e-05, "loss": 2.1288, "step": 522 }, { "epoch": 0.65, "learning_rate": 1.9082331085742018e-05, "loss": 2.089, "step": 524 }, { "epoch": 0.66, "learning_rate": 1.9073850046289484e-05, "loss": 2.061, "step": 526 }, { "epoch": 0.66, "learning_rate": 1.9065331898740216e-05, "loss": 2.1479, "step": 528 }, { "epoch": 0.66, "learning_rate": 1.9056776677929726e-05, "loss": 2.0863, "step": 530 }, { "epoch": 0.66, "learning_rate": 1.9048184418845146e-05, "loss": 2.1404, "step": 532 }, { "epoch": 0.67, "learning_rate": 1.9039555156625072e-05, "loss": 2.0718, "step": 534 }, { "epoch": 0.67, "learning_rate": 1.9030888926559436e-05, "loss": 2.1136, "step": 536 }, { "epoch": 0.67, "learning_rate": 1.902218576408934e-05, "loss": 2.0681, "step": 538 }, { "epoch": 0.67, "learning_rate": 1.9013445704806933e-05, "loss": 2.123, "step": 540 }, { "epoch": 0.68, "learning_rate": 1.900466878445525e-05, "loss": 2.0872, "step": 542 }, { "epoch": 0.68, "learning_rate": 1.8995855038928078e-05, "loss": 2.1093, "step": 544 }, { "epoch": 0.68, "learning_rate": 1.89870045042698e-05, "loss": 2.052, "step": 546 }, { "epoch": 0.68, "learning_rate": 1.8978117216675246e-05, "loss": 2.1257, "step": 548 }, { "epoch": 0.69, "learning_rate": 1.8969193212489557e-05, "loss": 2.0837, "step": 550 }, { "epoch": 0.69, "learning_rate": 1.896023252820802e-05, "loss": 2.097, "step": 552 }, { "epoch": 0.69, "learning_rate": 1.8951235200475942e-05, "loss": 2.0979, "step": 554 }, { "epoch": 0.69, "learning_rate": 1.894220126608847e-05, "loss": 2.1158, "step": 556 }, { "epoch": 0.7, "learning_rate": 1.8933130761990465e-05, "loss": 2.1114, "step": 558 }, { "epoch": 0.7, "learning_rate": 1.8924023725276345e-05, "loss": 2.119, "step": 560 }, { "epoch": 0.7, "learning_rate": 1.8914880193189912e-05, "loss": 2.0998, "step": 562 }, { "epoch": 0.7, "learning_rate": 1.8905700203124248e-05, "loss": 2.0889, "step": 564 }, { "epoch": 0.71, "learning_rate": 1.8896483792621504e-05, "loss": 2.1034, "step": 566 }, { "epoch": 0.71, "learning_rate": 1.8887230999372795e-05, "loss": 2.0658, "step": 568 }, { "epoch": 0.71, "learning_rate": 1.8877941861218018e-05, "loss": 2.1129, "step": 570 }, { "epoch": 0.71, "learning_rate": 1.8868616416145696e-05, "loss": 2.1012, "step": 572 }, { "epoch": 0.72, "learning_rate": 1.8859254702292847e-05, "loss": 2.0656, "step": 574 }, { "epoch": 0.72, "learning_rate": 1.8849856757944804e-05, "loss": 2.1631, "step": 576 }, { "epoch": 0.72, "learning_rate": 1.8840422621535067e-05, "loss": 2.0432, "step": 578 }, { "epoch": 0.72, "learning_rate": 1.8830952331645144e-05, "loss": 2.0827, "step": 580 }, { "epoch": 0.73, "learning_rate": 1.8821445927004406e-05, "loss": 2.0975, "step": 582 }, { "epoch": 0.73, "learning_rate": 1.8811903446489905e-05, "loss": 2.0677, "step": 584 }, { "epoch": 0.73, "learning_rate": 1.8802324929126232e-05, "loss": 2.0849, "step": 586 }, { "epoch": 0.73, "learning_rate": 1.8792710414085356e-05, "loss": 2.1223, "step": 588 }, { "epoch": 0.74, "learning_rate": 1.8783059940686454e-05, "loss": 2.0426, "step": 590 }, { "epoch": 0.74, "learning_rate": 1.8773373548395762e-05, "loss": 2.0512, "step": 592 }, { "epoch": 0.74, "learning_rate": 1.8763651276826417e-05, "loss": 2.0999, "step": 594 }, { "epoch": 0.74, "learning_rate": 1.8753893165738267e-05, "loss": 2.0729, "step": 596 }, { "epoch": 0.75, "learning_rate": 1.8744099255037737e-05, "loss": 2.078, "step": 598 }, { "epoch": 0.75, "learning_rate": 1.873426958477767e-05, "loss": 2.0587, "step": 600 }, { "epoch": 0.75, "learning_rate": 1.8724404195157127e-05, "loss": 2.0845, "step": 602 }, { "epoch": 0.75, "learning_rate": 1.871450312652126e-05, "loss": 2.0577, "step": 604 }, { "epoch": 0.76, "learning_rate": 1.8704566419361137e-05, "loss": 2.0777, "step": 606 }, { "epoch": 0.76, "learning_rate": 1.8694594114313553e-05, "loss": 2.085, "step": 608 }, { "epoch": 0.76, "learning_rate": 1.8684586252160904e-05, "loss": 2.1124, "step": 610 }, { "epoch": 0.76, "learning_rate": 1.8674542873830986e-05, "loss": 2.0271, "step": 612 }, { "epoch": 0.77, "learning_rate": 1.8664464020396844e-05, "loss": 2.0369, "step": 614 }, { "epoch": 0.77, "learning_rate": 1.86543497330766e-05, "loss": 2.0503, "step": 616 }, { "epoch": 0.77, "learning_rate": 1.864420005323329e-05, "loss": 2.0603, "step": 618 }, { "epoch": 0.77, "learning_rate": 1.8634015022374683e-05, "loss": 2.0858, "step": 620 }, { "epoch": 0.78, "learning_rate": 1.8623794682153122e-05, "loss": 2.0202, "step": 622 }, { "epoch": 0.78, "learning_rate": 1.8613539074365353e-05, "loss": 2.0622, "step": 624 }, { "epoch": 0.78, "learning_rate": 1.8603248240952342e-05, "loss": 1.9967, "step": 626 }, { "epoch": 0.78, "learning_rate": 1.859292222399912e-05, "loss": 2.0614, "step": 628 }, { "epoch": 0.79, "learning_rate": 1.8582561065734602e-05, "loss": 2.0637, "step": 630 }, { "epoch": 0.79, "learning_rate": 1.8572164808531417e-05, "loss": 2.0702, "step": 632 }, { "epoch": 0.79, "learning_rate": 1.8561733494905728e-05, "loss": 2.1353, "step": 634 }, { "epoch": 0.79, "learning_rate": 1.855126716751707e-05, "loss": 2.0729, "step": 636 }, { "epoch": 0.8, "learning_rate": 1.854076586916816e-05, "loss": 2.0428, "step": 638 }, { "epoch": 0.8, "learning_rate": 1.8530229642804742e-05, "loss": 1.9822, "step": 640 }, { "epoch": 0.8, "learning_rate": 1.8519658531515397e-05, "loss": 2.0314, "step": 642 }, { "epoch": 0.8, "learning_rate": 1.850905257853136e-05, "loss": 2.0784, "step": 644 }, { "epoch": 0.81, "learning_rate": 1.849841182722637e-05, "loss": 2.0722, "step": 646 }, { "epoch": 0.81, "learning_rate": 1.8487736321116466e-05, "loss": 2.0681, "step": 648 }, { "epoch": 0.81, "learning_rate": 1.8477026103859823e-05, "loss": 2.0748, "step": 650 }, { "epoch": 0.81, "learning_rate": 1.846628121925656e-05, "loss": 2.0481, "step": 652 }, { "epoch": 0.82, "learning_rate": 1.845550171124858e-05, "loss": 2.0585, "step": 654 }, { "epoch": 0.82, "learning_rate": 1.8444687623919388e-05, "loss": 2.0695, "step": 656 }, { "epoch": 0.82, "learning_rate": 1.843383900149388e-05, "loss": 2.068, "step": 658 }, { "epoch": 0.82, "learning_rate": 1.8422955888338207e-05, "loss": 2.0855, "step": 660 }, { "epoch": 0.83, "learning_rate": 1.841203832895956e-05, "loss": 1.9948, "step": 662 }, { "epoch": 0.83, "learning_rate": 1.840108636800601e-05, "loss": 2.0412, "step": 664 }, { "epoch": 0.83, "learning_rate": 1.8390100050266305e-05, "loss": 2.0012, "step": 666 }, { "epoch": 0.83, "learning_rate": 1.8379079420669702e-05, "loss": 2.0683, "step": 668 }, { "epoch": 0.84, "learning_rate": 1.8368024524285784e-05, "loss": 2.0818, "step": 670 }, { "epoch": 0.84, "learning_rate": 1.835693540632426e-05, "loss": 2.0211, "step": 672 }, { "epoch": 0.84, "learning_rate": 1.8345812112134795e-05, "loss": 2.0427, "step": 674 }, { "epoch": 0.84, "learning_rate": 1.833465468720682e-05, "loss": 2.0726, "step": 676 }, { "epoch": 0.85, "learning_rate": 1.832346317716935e-05, "loss": 2.015, "step": 678 }, { "epoch": 0.85, "learning_rate": 1.8312237627790783e-05, "loss": 2.0292, "step": 680 }, { "epoch": 0.85, "learning_rate": 1.8300978084978736e-05, "loss": 2.0428, "step": 682 }, { "epoch": 0.85, "learning_rate": 1.8289684594779835e-05, "loss": 1.9837, "step": 684 }, { "epoch": 0.86, "learning_rate": 1.8278357203379536e-05, "loss": 2.0786, "step": 686 }, { "epoch": 0.86, "learning_rate": 1.8266995957101944e-05, "loss": 2.0286, "step": 688 }, { "epoch": 0.86, "learning_rate": 1.825560090240961e-05, "loss": 2.0611, "step": 690 }, { "epoch": 0.86, "learning_rate": 1.824417208590334e-05, "loss": 2.0859, "step": 692 }, { "epoch": 0.87, "learning_rate": 1.8232709554322027e-05, "loss": 2.0911, "step": 694 }, { "epoch": 0.87, "learning_rate": 1.822121335454243e-05, "loss": 2.0973, "step": 696 }, { "epoch": 0.87, "learning_rate": 1.8209683533579006e-05, "loss": 2.0489, "step": 698 }, { "epoch": 0.87, "learning_rate": 1.81981201385837e-05, "loss": 2.0586, "step": 700 }, { "epoch": 0.88, "learning_rate": 1.8186523216845763e-05, "loss": 2.0523, "step": 702 }, { "epoch": 0.88, "learning_rate": 1.8174892815791563e-05, "loss": 2.07, "step": 704 }, { "epoch": 0.88, "learning_rate": 1.816322898298437e-05, "loss": 2.0633, "step": 706 }, { "epoch": 0.88, "learning_rate": 1.8151531766124186e-05, "loss": 2.0214, "step": 708 }, { "epoch": 0.89, "learning_rate": 1.8139801213047538e-05, "loss": 2.0275, "step": 710 }, { "epoch": 0.89, "learning_rate": 1.812803737172728e-05, "loss": 2.0443, "step": 712 }, { "epoch": 0.89, "learning_rate": 1.81162402902724e-05, "loss": 2.0277, "step": 714 }, { "epoch": 0.89, "learning_rate": 1.8104410016927828e-05, "loss": 2.0317, "step": 716 }, { "epoch": 0.9, "learning_rate": 1.8092546600074236e-05, "loss": 2.0425, "step": 718 }, { "epoch": 0.9, "learning_rate": 1.8080650088227824e-05, "loss": 2.0706, "step": 720 }, { "epoch": 0.9, "learning_rate": 1.8068720530040157e-05, "loss": 2.0114, "step": 722 }, { "epoch": 0.9, "learning_rate": 1.805675797429793e-05, "loss": 1.9992, "step": 724 }, { "epoch": 0.91, "learning_rate": 1.804476246992279e-05, "loss": 2.0038, "step": 726 }, { "epoch": 0.91, "learning_rate": 1.8032734065971125e-05, "loss": 2.0611, "step": 728 }, { "epoch": 0.91, "learning_rate": 1.8020672811633874e-05, "loss": 2.0218, "step": 730 }, { "epoch": 0.91, "learning_rate": 1.800857875623632e-05, "loss": 2.0177, "step": 732 }, { "epoch": 0.92, "learning_rate": 1.799645194923788e-05, "loss": 2.0428, "step": 734 }, { "epoch": 0.92, "learning_rate": 1.7984292440231915e-05, "loss": 2.0229, "step": 736 }, { "epoch": 0.92, "learning_rate": 1.7972100278945527e-05, "loss": 2.0203, "step": 738 }, { "epoch": 0.92, "learning_rate": 1.795987551523935e-05, "loss": 1.9437, "step": 740 }, { "epoch": 0.93, "learning_rate": 1.794761819910734e-05, "loss": 1.9968, "step": 742 }, { "epoch": 0.93, "learning_rate": 1.7935328380676587e-05, "loss": 1.9886, "step": 744 }, { "epoch": 0.93, "learning_rate": 1.79230061102071e-05, "loss": 2.0842, "step": 746 }, { "epoch": 0.93, "learning_rate": 1.79106514380916e-05, "loss": 2.0212, "step": 748 }, { "epoch": 0.94, "learning_rate": 1.7898264414855314e-05, "loss": 2.0567, "step": 750 }, { "epoch": 0.94, "learning_rate": 1.7885845091155786e-05, "loss": 2.0346, "step": 752 }, { "epoch": 0.94, "learning_rate": 1.787339351778263e-05, "loss": 1.9744, "step": 754 }, { "epoch": 0.94, "learning_rate": 1.786090974565737e-05, "loss": 2.0173, "step": 756 }, { "epoch": 0.95, "learning_rate": 1.784839382583319e-05, "loss": 2.0722, "step": 758 }, { "epoch": 0.95, "learning_rate": 1.783584580949477e-05, "loss": 1.9932, "step": 760 }, { "epoch": 0.95, "learning_rate": 1.782326574795802e-05, "loss": 2.0283, "step": 762 }, { "epoch": 0.95, "learning_rate": 1.781065369266992e-05, "loss": 2.0102, "step": 764 }, { "epoch": 0.96, "learning_rate": 1.7798009695208288e-05, "loss": 1.9523, "step": 766 }, { "epoch": 0.96, "learning_rate": 1.7785333807281567e-05, "loss": 2.0264, "step": 768 }, { "epoch": 0.96, "learning_rate": 1.7772626080728624e-05, "loss": 1.9718, "step": 770 }, { "epoch": 0.96, "learning_rate": 1.775988656751852e-05, "loss": 2.0733, "step": 772 }, { "epoch": 0.97, "learning_rate": 1.774711531975033e-05, "loss": 2.0576, "step": 774 }, { "epoch": 0.97, "learning_rate": 1.7734312389652893e-05, "loss": 2.0249, "step": 776 }, { "epoch": 0.97, "learning_rate": 1.7721477829584617e-05, "loss": 1.9813, "step": 778 }, { "epoch": 0.97, "learning_rate": 1.7708611692033265e-05, "loss": 1.9984, "step": 780 }, { "epoch": 0.98, "learning_rate": 1.769571402961575e-05, "loss": 2.0084, "step": 782 }, { "epoch": 0.98, "learning_rate": 1.768278489507788e-05, "loss": 1.9982, "step": 784 }, { "epoch": 0.98, "learning_rate": 1.7669824341294203e-05, "loss": 1.9854, "step": 786 }, { "epoch": 0.98, "learning_rate": 1.765683242126773e-05, "loss": 2.0392, "step": 788 }, { "epoch": 0.99, "learning_rate": 1.7643809188129765e-05, "loss": 2.0015, "step": 790 }, { "epoch": 0.99, "learning_rate": 1.763075469513966e-05, "loss": 2.0244, "step": 792 }, { "epoch": 0.99, "learning_rate": 1.761766899568461e-05, "loss": 2.0581, "step": 794 }, { "epoch": 0.99, "learning_rate": 1.7604552143279424e-05, "loss": 1.9923, "step": 796 }, { "epoch": 1.0, "learning_rate": 1.759140419156633e-05, "loss": 1.911, "step": 798 }, { "epoch": 1.0, "learning_rate": 1.7578225194314717e-05, "loss": 2.0443, "step": 800 }, { "epoch": 1.0, "learning_rate": 1.7565015205420946e-05, "loss": 1.9745, "step": 802 }, { "epoch": 1.0, "learning_rate": 1.7551774278908128e-05, "loss": 1.9708, "step": 804 }, { "epoch": 1.01, "learning_rate": 1.7538502468925887e-05, "loss": 2.009, "step": 806 }, { "epoch": 1.01, "learning_rate": 1.7525199829750145e-05, "loss": 1.9965, "step": 808 }, { "epoch": 1.01, "learning_rate": 1.7511866415782908e-05, "loss": 2.0283, "step": 810 }, { "epoch": 1.01, "learning_rate": 1.749850228155203e-05, "loss": 2.0294, "step": 812 }, { "epoch": 1.02, "learning_rate": 1.7485107481711014e-05, "loss": 2.0089, "step": 814 }, { "epoch": 1.02, "learning_rate": 1.747168207103875e-05, "loss": 2.0058, "step": 816 }, { "epoch": 1.02, "learning_rate": 1.7458226104439324e-05, "loss": 2.0208, "step": 818 }, { "epoch": 1.02, "learning_rate": 1.7444739636941786e-05, "loss": 1.945, "step": 820 }, { "epoch": 1.03, "learning_rate": 1.7431222723699916e-05, "loss": 2.0033, "step": 822 }, { "epoch": 1.03, "learning_rate": 1.7417675419992003e-05, "loss": 1.9301, "step": 824 }, { "epoch": 1.03, "learning_rate": 1.7404097781220625e-05, "loss": 1.9678, "step": 826 }, { "epoch": 1.03, "learning_rate": 1.739048986291241e-05, "loss": 2.0263, "step": 828 }, { "epoch": 1.04, "learning_rate": 1.7376851720717826e-05, "loss": 1.9615, "step": 830 }, { "epoch": 1.04, "learning_rate": 1.7363183410410933e-05, "loss": 2.0027, "step": 832 }, { "epoch": 1.04, "learning_rate": 1.734948498788917e-05, "loss": 1.9357, "step": 834 }, { "epoch": 1.04, "learning_rate": 1.7335756509173128e-05, "loss": 1.9003, "step": 836 }, { "epoch": 1.05, "learning_rate": 1.7321998030406303e-05, "loss": 1.9607, "step": 838 }, { "epoch": 1.05, "learning_rate": 1.730820960785488e-05, "loss": 1.9667, "step": 840 }, { "epoch": 1.05, "learning_rate": 1.729439129790752e-05, "loss": 1.9807, "step": 842 }, { "epoch": 1.05, "learning_rate": 1.728054315707508e-05, "loss": 1.8775, "step": 844 }, { "epoch": 1.06, "learning_rate": 1.726666524199043e-05, "loss": 1.9992, "step": 846 }, { "epoch": 1.06, "learning_rate": 1.7252757609408216e-05, "loss": 2.0187, "step": 848 }, { "epoch": 1.06, "learning_rate": 1.7238820316204582e-05, "loss": 1.9714, "step": 850 }, { "epoch": 1.06, "learning_rate": 1.7224853419377e-05, "loss": 1.9563, "step": 852 }, { "epoch": 1.07, "learning_rate": 1.7210856976043995e-05, "loss": 1.9302, "step": 854 }, { "epoch": 1.07, "learning_rate": 1.719683104344493e-05, "loss": 1.9393, "step": 856 }, { "epoch": 1.07, "learning_rate": 1.718277567893976e-05, "loss": 1.9751, "step": 858 }, { "epoch": 1.07, "learning_rate": 1.7168690940008813e-05, "loss": 1.9714, "step": 860 }, { "epoch": 1.07, "learning_rate": 1.7154576884252535e-05, "loss": 1.9784, "step": 862 }, { "epoch": 1.08, "learning_rate": 1.7140433569391275e-05, "loss": 1.9815, "step": 864 }, { "epoch": 1.08, "learning_rate": 1.7126261053265025e-05, "loss": 1.9715, "step": 866 }, { "epoch": 1.08, "learning_rate": 1.7112059393833217e-05, "loss": 2.0431, "step": 868 }, { "epoch": 1.08, "learning_rate": 1.709782864917445e-05, "loss": 1.9463, "step": 870 }, { "epoch": 1.09, "learning_rate": 1.7083568877486278e-05, "loss": 1.964, "step": 872 }, { "epoch": 1.09, "learning_rate": 1.7069280137084955e-05, "loss": 1.9445, "step": 874 }, { "epoch": 1.09, "learning_rate": 1.7054962486405212e-05, "loss": 1.9913, "step": 876 }, { "epoch": 1.09, "learning_rate": 1.704061598400001e-05, "loss": 1.923, "step": 878 }, { "epoch": 1.1, "learning_rate": 1.7026240688540295e-05, "loss": 1.9334, "step": 880 }, { "epoch": 1.1, "learning_rate": 1.7011836658814766e-05, "loss": 1.944, "step": 882 }, { "epoch": 1.1, "learning_rate": 1.699740395372964e-05, "loss": 1.9318, "step": 884 }, { "epoch": 1.1, "learning_rate": 1.6982942632308396e-05, "loss": 1.9421, "step": 886 }, { "epoch": 1.11, "learning_rate": 1.6968452753691543e-05, "loss": 1.9397, "step": 888 }, { "epoch": 1.11, "learning_rate": 1.6953934377136375e-05, "loss": 1.9486, "step": 890 }, { "epoch": 1.11, "learning_rate": 1.6939387562016735e-05, "loss": 2.0136, "step": 892 }, { "epoch": 1.11, "learning_rate": 1.6924812367822764e-05, "loss": 1.9234, "step": 894 }, { "epoch": 1.12, "learning_rate": 1.691020885416066e-05, "loss": 1.901, "step": 896 }, { "epoch": 1.12, "learning_rate": 1.689557708075244e-05, "loss": 1.9517, "step": 898 }, { "epoch": 1.12, "learning_rate": 1.688091710743568e-05, "loss": 1.9282, "step": 900 }, { "epoch": 1.12, "learning_rate": 1.68662289941633e-05, "loss": 2.0253, "step": 902 }, { "epoch": 1.13, "learning_rate": 1.6851512801003282e-05, "loss": 1.9939, "step": 904 }, { "epoch": 1.13, "learning_rate": 1.6836768588138452e-05, "loss": 1.9548, "step": 906 }, { "epoch": 1.13, "learning_rate": 1.6821996415866223e-05, "loss": 1.9861, "step": 908 }, { "epoch": 1.13, "learning_rate": 1.6807196344598346e-05, "loss": 1.9772, "step": 910 }, { "epoch": 1.14, "learning_rate": 1.6792368434860672e-05, "loss": 1.958, "step": 912 }, { "epoch": 1.14, "learning_rate": 1.67775127472929e-05, "loss": 1.9588, "step": 914 }, { "epoch": 1.14, "learning_rate": 1.676262934264832e-05, "loss": 1.9887, "step": 916 }, { "epoch": 1.14, "learning_rate": 1.6747718281793582e-05, "loss": 1.9106, "step": 918 }, { "epoch": 1.15, "learning_rate": 1.673277962570843e-05, "loss": 1.914, "step": 920 }, { "epoch": 1.15, "learning_rate": 1.6717813435485473e-05, "loss": 1.9406, "step": 922 }, { "epoch": 1.15, "learning_rate": 1.6702819772329904e-05, "loss": 1.9185, "step": 924 }, { "epoch": 1.15, "learning_rate": 1.668779869755928e-05, "loss": 1.9934, "step": 926 }, { "epoch": 1.16, "learning_rate": 1.6672750272603267e-05, "loss": 1.9559, "step": 928 }, { "epoch": 1.16, "learning_rate": 1.665767455900336e-05, "loss": 2.0021, "step": 930 }, { "epoch": 1.16, "learning_rate": 1.6642571618412673e-05, "loss": 1.9273, "step": 932 }, { "epoch": 1.16, "learning_rate": 1.6627441512595654e-05, "loss": 1.9716, "step": 934 }, { "epoch": 1.17, "learning_rate": 1.6612284303427852e-05, "loss": 1.9463, "step": 936 }, { "epoch": 1.17, "learning_rate": 1.6597100052895653e-05, "loss": 1.8938, "step": 938 }, { "epoch": 1.17, "learning_rate": 1.658188882309604e-05, "loss": 1.9758, "step": 940 }, { "epoch": 1.17, "learning_rate": 1.6566650676236307e-05, "loss": 1.9275, "step": 942 }, { "epoch": 1.18, "learning_rate": 1.655138567463385e-05, "loss": 1.9751, "step": 944 }, { "epoch": 1.18, "learning_rate": 1.6536093880715876e-05, "loss": 1.9754, "step": 946 }, { "epoch": 1.18, "learning_rate": 1.6520775357019174e-05, "loss": 1.968, "step": 948 }, { "epoch": 1.18, "learning_rate": 1.6505430166189828e-05, "loss": 1.9585, "step": 950 }, { "epoch": 1.19, "learning_rate": 1.6490058370982994e-05, "loss": 1.9287, "step": 952 }, { "epoch": 1.19, "learning_rate": 1.6474660034262622e-05, "loss": 1.9728, "step": 954 }, { "epoch": 1.19, "learning_rate": 1.6459235219001204e-05, "loss": 1.978, "step": 956 }, { "epoch": 1.19, "learning_rate": 1.6443783988279523e-05, "loss": 1.9451, "step": 958 }, { "epoch": 1.2, "learning_rate": 1.6428306405286383e-05, "loss": 1.9673, "step": 960 }, { "epoch": 1.2, "learning_rate": 1.6412802533318363e-05, "loss": 1.9358, "step": 962 }, { "epoch": 1.2, "learning_rate": 1.639727243577955e-05, "loss": 1.9624, "step": 964 }, { "epoch": 1.2, "learning_rate": 1.6381716176181288e-05, "loss": 1.9, "step": 966 }, { "epoch": 1.21, "learning_rate": 1.6366133818141893e-05, "loss": 1.9617, "step": 968 }, { "epoch": 1.21, "learning_rate": 1.6350525425386438e-05, "loss": 1.9358, "step": 970 }, { "epoch": 1.21, "learning_rate": 1.6334891061746453e-05, "loss": 1.9565, "step": 972 }, { "epoch": 1.21, "learning_rate": 1.6319230791159676e-05, "loss": 1.9245, "step": 974 }, { "epoch": 1.22, "learning_rate": 1.63035446776698e-05, "loss": 1.939, "step": 976 }, { "epoch": 1.22, "learning_rate": 1.6287832785426196e-05, "loss": 1.9429, "step": 978 }, { "epoch": 1.22, "learning_rate": 1.627209517868367e-05, "loss": 1.9298, "step": 980 }, { "epoch": 1.22, "learning_rate": 1.625633192180218e-05, "loss": 1.9545, "step": 982 }, { "epoch": 1.23, "learning_rate": 1.6240543079246586e-05, "loss": 1.9513, "step": 984 }, { "epoch": 1.23, "learning_rate": 1.6224728715586374e-05, "loss": 1.9926, "step": 986 }, { "epoch": 1.23, "learning_rate": 1.620888889549542e-05, "loss": 1.9601, "step": 988 }, { "epoch": 1.23, "learning_rate": 1.6193023683751682e-05, "loss": 1.9468, "step": 990 }, { "epoch": 1.24, "learning_rate": 1.617713314523697e-05, "loss": 1.9289, "step": 992 }, { "epoch": 1.24, "learning_rate": 1.616121734493668e-05, "loss": 1.9203, "step": 994 }, { "epoch": 1.24, "learning_rate": 1.6145276347939495e-05, "loss": 1.9468, "step": 996 }, { "epoch": 1.24, "learning_rate": 1.612931021943716e-05, "loss": 1.9861, "step": 998 }, { "epoch": 1.25, "learning_rate": 1.6113319024724186e-05, "loss": 1.9802, "step": 1000 }, { "epoch": 1.25, "learning_rate": 1.60973028291976e-05, "loss": 1.9265, "step": 1002 }, { "epoch": 1.25, "learning_rate": 1.6081261698356674e-05, "loss": 1.9757, "step": 1004 }, { "epoch": 1.25, "learning_rate": 1.6065195697802645e-05, "loss": 1.9665, "step": 1006 }, { "epoch": 1.26, "learning_rate": 1.604910489323846e-05, "loss": 1.9503, "step": 1008 }, { "epoch": 1.26, "learning_rate": 1.603298935046851e-05, "loss": 1.9481, "step": 1010 }, { "epoch": 1.26, "learning_rate": 1.601684913539835e-05, "loss": 1.9511, "step": 1012 }, { "epoch": 1.26, "learning_rate": 1.6000684314034426e-05, "loss": 1.9628, "step": 1014 }, { "epoch": 1.27, "learning_rate": 1.598449495248383e-05, "loss": 1.8868, "step": 1016 }, { "epoch": 1.27, "learning_rate": 1.5968281116954e-05, "loss": 1.9337, "step": 1018 }, { "epoch": 1.27, "learning_rate": 1.5952042873752463e-05, "loss": 1.9516, "step": 1020 }, { "epoch": 1.27, "learning_rate": 1.5935780289286566e-05, "loss": 1.967, "step": 1022 }, { "epoch": 1.28, "learning_rate": 1.59194934300632e-05, "loss": 1.9356, "step": 1024 }, { "epoch": 1.28, "learning_rate": 1.590318236268853e-05, "loss": 1.9748, "step": 1026 }, { "epoch": 1.28, "learning_rate": 1.5886847153867723e-05, "loss": 1.9188, "step": 1028 }, { "epoch": 1.28, "learning_rate": 1.587048787040467e-05, "loss": 1.9968, "step": 1030 }, { "epoch": 1.29, "learning_rate": 1.585410457920172e-05, "loss": 1.9988, "step": 1032 }, { "epoch": 1.29, "learning_rate": 1.5837697347259403e-05, "loss": 1.8772, "step": 1034 }, { "epoch": 1.29, "learning_rate": 1.582126624167615e-05, "loss": 1.9178, "step": 1036 }, { "epoch": 1.29, "learning_rate": 1.5804811329648037e-05, "loss": 1.9256, "step": 1038 }, { "epoch": 1.3, "learning_rate": 1.5788332678468488e-05, "loss": 1.8819, "step": 1040 }, { "epoch": 1.3, "learning_rate": 1.5780084471752673e-05, "loss": 1.9045, "step": 1042 }, { "epoch": 1.3, "learning_rate": 1.576357033823344e-05, "loss": 1.9067, "step": 1044 }, { "epoch": 1.3, "learning_rate": 1.5747032634220474e-05, "loss": 1.9169, "step": 1046 }, { "epoch": 1.31, "learning_rate": 1.5730471427345783e-05, "loss": 1.9107, "step": 1048 }, { "epoch": 1.31, "learning_rate": 1.5713886785337497e-05, "loss": 1.9408, "step": 1050 }, { "epoch": 1.31, "learning_rate": 1.5697278776019578e-05, "loss": 1.8719, "step": 1052 }, { "epoch": 1.31, "learning_rate": 1.568064746731156e-05, "loss": 1.8867, "step": 1054 }, { "epoch": 1.32, "learning_rate": 1.5663992927228254e-05, "loss": 1.8921, "step": 1056 }, { "epoch": 1.32, "learning_rate": 1.5647315223879474e-05, "loss": 1.8821, "step": 1058 }, { "epoch": 1.32, "learning_rate": 1.5630614425469776e-05, "loss": 2.0148, "step": 1060 }, { "epoch": 1.32, "learning_rate": 1.5613890600298147e-05, "loss": 1.9553, "step": 1062 }, { "epoch": 1.33, "learning_rate": 1.5597143816757758e-05, "loss": 1.8958, "step": 1064 }, { "epoch": 1.33, "learning_rate": 1.558037414333566e-05, "loss": 1.9025, "step": 1066 }, { "epoch": 1.33, "learning_rate": 1.5563581648612517e-05, "loss": 1.9068, "step": 1068 }, { "epoch": 1.33, "learning_rate": 1.5546766401262328e-05, "loss": 1.8626, "step": 1070 }, { "epoch": 1.34, "learning_rate": 1.5529928470052123e-05, "loss": 1.9218, "step": 1072 }, { "epoch": 1.34, "learning_rate": 1.5513067923841724e-05, "loss": 1.892, "step": 1074 }, { "epoch": 1.34, "learning_rate": 1.549618483158342e-05, "loss": 1.9211, "step": 1076 }, { "epoch": 1.34, "learning_rate": 1.5479279262321708e-05, "loss": 1.9199, "step": 1078 }, { "epoch": 1.35, "learning_rate": 1.5462351285193004e-05, "loss": 1.911, "step": 1080 }, { "epoch": 1.35, "learning_rate": 1.5445400969425372e-05, "loss": 1.9338, "step": 1082 }, { "epoch": 1.35, "learning_rate": 1.5428428384338224e-05, "loss": 1.9059, "step": 1084 }, { "epoch": 1.35, "learning_rate": 1.5411433599342038e-05, "loss": 1.9341, "step": 1086 }, { "epoch": 1.36, "learning_rate": 1.5394416683938095e-05, "loss": 1.9271, "step": 1088 }, { "epoch": 1.36, "learning_rate": 1.537737770771817e-05, "loss": 1.9101, "step": 1090 }, { "epoch": 1.36, "learning_rate": 1.5360316740364248e-05, "loss": 1.8771, "step": 1092 }, { "epoch": 1.36, "learning_rate": 1.5343233851648273e-05, "loss": 1.924, "step": 1094 }, { "epoch": 1.37, "learning_rate": 1.5326129111431814e-05, "loss": 1.9237, "step": 1096 }, { "epoch": 1.37, "learning_rate": 1.530900258966582e-05, "loss": 1.9499, "step": 1098 }, { "epoch": 1.37, "learning_rate": 1.5291854356390304e-05, "loss": 1.8753, "step": 1100 }, { "epoch": 1.37, "learning_rate": 1.5274684481734076e-05, "loss": 1.8888, "step": 1102 }, { "epoch": 1.38, "learning_rate": 1.525749303591445e-05, "loss": 1.9085, "step": 1104 }, { "epoch": 1.38, "learning_rate": 1.5240280089236955e-05, "loss": 1.9355, "step": 1106 }, { "epoch": 1.38, "learning_rate": 1.5223045712095052e-05, "loss": 1.9223, "step": 1108 }, { "epoch": 1.38, "learning_rate": 1.5205789974969836e-05, "loss": 1.9465, "step": 1110 }, { "epoch": 1.39, "learning_rate": 1.5188512948429765e-05, "loss": 1.9348, "step": 1112 }, { "epoch": 1.39, "learning_rate": 1.5171214703130359e-05, "loss": 1.9093, "step": 1114 }, { "epoch": 1.39, "learning_rate": 1.5153895309813903e-05, "loss": 1.8418, "step": 1116 }, { "epoch": 1.39, "learning_rate": 1.5136554839309188e-05, "loss": 1.8449, "step": 1118 }, { "epoch": 1.4, "learning_rate": 1.5119193362531177e-05, "loss": 1.9036, "step": 1120 }, { "epoch": 1.4, "learning_rate": 1.510181095048076e-05, "loss": 1.8714, "step": 1122 }, { "epoch": 1.4, "learning_rate": 1.5084407674244435e-05, "loss": 1.9025, "step": 1124 }, { "epoch": 1.4, "learning_rate": 1.5066983604994021e-05, "loss": 1.8818, "step": 1126 }, { "epoch": 1.41, "learning_rate": 1.5049538813986385e-05, "loss": 1.887, "step": 1128 }, { "epoch": 1.41, "learning_rate": 1.5032073372563118e-05, "loss": 1.9412, "step": 1130 }, { "epoch": 1.41, "learning_rate": 1.501458735215028e-05, "loss": 1.9445, "step": 1132 }, { "epoch": 1.41, "learning_rate": 1.4997080824258084e-05, "loss": 1.9015, "step": 1134 }, { "epoch": 1.42, "learning_rate": 1.49795538604806e-05, "loss": 1.928, "step": 1136 }, { "epoch": 1.42, "learning_rate": 1.496200653249549e-05, "loss": 1.9313, "step": 1138 }, { "epoch": 1.42, "learning_rate": 1.494443891206368e-05, "loss": 1.9288, "step": 1140 }, { "epoch": 1.42, "learning_rate": 1.4926851071029087e-05, "loss": 1.8663, "step": 1142 }, { "epoch": 1.43, "learning_rate": 1.4909243081318335e-05, "loss": 1.9197, "step": 1144 }, { "epoch": 1.43, "learning_rate": 1.4891615014940429e-05, "loss": 1.8568, "step": 1146 }, { "epoch": 1.43, "learning_rate": 1.487396694398649e-05, "loss": 1.8997, "step": 1148 }, { "epoch": 1.43, "learning_rate": 1.4856298940629446e-05, "loss": 1.9245, "step": 1150 }, { "epoch": 1.44, "learning_rate": 1.483861107712374e-05, "loss": 1.8979, "step": 1152 }, { "epoch": 1.44, "learning_rate": 1.4820903425805032e-05, "loss": 1.904, "step": 1154 }, { "epoch": 1.44, "learning_rate": 1.4803176059089905e-05, "loss": 1.9025, "step": 1156 }, { "epoch": 1.44, "learning_rate": 1.4785429049475579e-05, "loss": 1.8939, "step": 1158 }, { "epoch": 1.45, "learning_rate": 1.4767662469539592e-05, "loss": 1.8951, "step": 1160 }, { "epoch": 1.45, "learning_rate": 1.4749876391939526e-05, "loss": 1.8756, "step": 1162 }, { "epoch": 1.45, "learning_rate": 1.4732070889412693e-05, "loss": 1.8907, "step": 1164 }, { "epoch": 1.45, "learning_rate": 1.471424603477585e-05, "loss": 1.9582, "step": 1166 }, { "epoch": 1.46, "learning_rate": 1.469640190092489e-05, "loss": 1.9328, "step": 1168 }, { "epoch": 1.46, "learning_rate": 1.4678538560834552e-05, "loss": 1.8998, "step": 1170 }, { "epoch": 1.46, "learning_rate": 1.4660656087558128e-05, "loss": 1.9282, "step": 1172 }, { "epoch": 1.46, "learning_rate": 1.4642754554227141e-05, "loss": 1.8567, "step": 1174 }, { "epoch": 1.47, "learning_rate": 1.4624834034051072e-05, "loss": 1.8914, "step": 1176 }, { "epoch": 1.47, "learning_rate": 1.4606894600317047e-05, "loss": 1.9352, "step": 1178 }, { "epoch": 1.47, "learning_rate": 1.4588936326389544e-05, "loss": 1.8902, "step": 1180 }, { "epoch": 1.47, "learning_rate": 1.4570959285710088e-05, "loss": 1.8921, "step": 1182 }, { "epoch": 1.48, "learning_rate": 1.4552963551796942e-05, "loss": 1.9311, "step": 1184 }, { "epoch": 1.48, "learning_rate": 1.4534949198244828e-05, "loss": 1.9264, "step": 1186 }, { "epoch": 1.48, "learning_rate": 1.4516916298724607e-05, "loss": 1.9239, "step": 1188 }, { "epoch": 1.48, "learning_rate": 1.4498864926982996e-05, "loss": 1.8674, "step": 1190 }, { "epoch": 1.49, "learning_rate": 1.4480795156842238e-05, "loss": 1.8841, "step": 1192 }, { "epoch": 1.49, "learning_rate": 1.4462707062199834e-05, "loss": 1.8944, "step": 1194 }, { "epoch": 1.49, "learning_rate": 1.4444600717028214e-05, "loss": 1.8848, "step": 1196 }, { "epoch": 1.49, "learning_rate": 1.4426476195374449e-05, "loss": 1.9242, "step": 1198 }, { "epoch": 1.5, "learning_rate": 1.4408333571359943e-05, "loss": 1.8779, "step": 1200 }, { "epoch": 1.5, "learning_rate": 1.4390172919180127e-05, "loss": 1.8901, "step": 1202 }, { "epoch": 1.5, "learning_rate": 1.4371994313104165e-05, "loss": 1.8814, "step": 1204 }, { "epoch": 1.5, "learning_rate": 1.4353797827474643e-05, "loss": 1.9198, "step": 1206 }, { "epoch": 1.51, "learning_rate": 1.4335583536707267e-05, "loss": 1.8793, "step": 1208 }, { "epoch": 1.51, "learning_rate": 1.4317351515290558e-05, "loss": 1.926, "step": 1210 }, { "epoch": 1.51, "learning_rate": 1.4299101837785542e-05, "loss": 1.8822, "step": 1212 }, { "epoch": 1.51, "learning_rate": 1.428083457882546e-05, "loss": 1.8991, "step": 1214 }, { "epoch": 1.52, "learning_rate": 1.426254981311545e-05, "loss": 1.9236, "step": 1216 }, { "epoch": 1.52, "learning_rate": 1.424424761543224e-05, "loss": 1.8481, "step": 1218 }, { "epoch": 1.52, "learning_rate": 1.4225928060623858e-05, "loss": 1.8402, "step": 1220 }, { "epoch": 1.52, "learning_rate": 1.4207591223609298e-05, "loss": 1.8852, "step": 1222 }, { "epoch": 1.53, "learning_rate": 1.4189237179378252e-05, "loss": 1.8733, "step": 1224 }, { "epoch": 1.53, "learning_rate": 1.4170866002990764e-05, "loss": 1.9203, "step": 1226 }, { "epoch": 1.53, "learning_rate": 1.415247776957695e-05, "loss": 1.8417, "step": 1228 }, { "epoch": 1.53, "learning_rate": 1.4134072554336685e-05, "loss": 1.8471, "step": 1230 }, { "epoch": 1.54, "learning_rate": 1.4115650432539281e-05, "loss": 1.9266, "step": 1232 }, { "epoch": 1.54, "learning_rate": 1.4097211479523198e-05, "loss": 1.8342, "step": 1234 }, { "epoch": 1.54, "learning_rate": 1.407875577069573e-05, "loss": 1.8791, "step": 1236 }, { "epoch": 1.54, "learning_rate": 1.4060283381532686e-05, "loss": 1.8806, "step": 1238 }, { "epoch": 1.55, "learning_rate": 1.4041794387578103e-05, "loss": 1.9552, "step": 1240 }, { "epoch": 1.55, "learning_rate": 1.4023288864443915e-05, "loss": 1.8943, "step": 1242 }, { "epoch": 1.55, "learning_rate": 1.4004766887809658e-05, "loss": 1.923, "step": 1244 }, { "epoch": 1.55, "learning_rate": 1.3986228533422151e-05, "loss": 1.831, "step": 1246 }, { "epoch": 1.56, "learning_rate": 1.3967673877095196e-05, "loss": 1.891, "step": 1248 }, { "epoch": 1.56, "learning_rate": 1.3949102994709256e-05, "loss": 1.8571, "step": 1250 }, { "epoch": 1.56, "learning_rate": 1.393051596221116e-05, "loss": 1.8803, "step": 1252 }, { "epoch": 1.56, "learning_rate": 1.3911912855613776e-05, "loss": 1.9329, "step": 1254 }, { "epoch": 1.57, "learning_rate": 1.3893293750995715e-05, "loss": 1.8711, "step": 1256 }, { "epoch": 1.57, "learning_rate": 1.3874658724501013e-05, "loss": 1.8458, "step": 1258 }, { "epoch": 1.57, "learning_rate": 1.3856007852338809e-05, "loss": 1.8371, "step": 1260 }, { "epoch": 1.57, "learning_rate": 1.3837341210783052e-05, "loss": 1.8411, "step": 1262 }, { "epoch": 1.58, "learning_rate": 1.3818658876172189e-05, "loss": 1.8549, "step": 1264 }, { "epoch": 1.58, "learning_rate": 1.3799960924908823e-05, "loss": 1.8738, "step": 1266 }, { "epoch": 1.58, "learning_rate": 1.3781247433459447e-05, "loss": 1.8871, "step": 1268 }, { "epoch": 1.58, "learning_rate": 1.3762518478354086e-05, "loss": 1.8488, "step": 1270 }, { "epoch": 1.59, "learning_rate": 1.374377413618602e-05, "loss": 1.8801, "step": 1272 }, { "epoch": 1.59, "learning_rate": 1.3725014483611443e-05, "loss": 1.8704, "step": 1274 }, { "epoch": 1.59, "learning_rate": 1.3706239597349172e-05, "loss": 1.8813, "step": 1276 }, { "epoch": 1.59, "learning_rate": 1.368744955418032e-05, "loss": 1.8745, "step": 1278 }, { "epoch": 1.6, "learning_rate": 1.3668644430947977e-05, "loss": 1.8702, "step": 1280 }, { "epoch": 1.6, "learning_rate": 1.3649824304556918e-05, "loss": 1.9195, "step": 1282 }, { "epoch": 1.6, "learning_rate": 1.3630989251973263e-05, "loss": 1.8924, "step": 1284 }, { "epoch": 1.6, "learning_rate": 1.3612139350224181e-05, "loss": 1.8678, "step": 1286 }, { "epoch": 1.61, "learning_rate": 1.3593274676397563e-05, "loss": 1.871, "step": 1288 }, { "epoch": 1.61, "learning_rate": 1.3574395307641712e-05, "loss": 1.9188, "step": 1290 }, { "epoch": 1.61, "learning_rate": 1.3555501321165033e-05, "loss": 1.9325, "step": 1292 }, { "epoch": 1.61, "learning_rate": 1.3536592794235696e-05, "loss": 1.8945, "step": 1294 }, { "epoch": 1.62, "learning_rate": 1.3517669804181357e-05, "loss": 1.9346, "step": 1296 }, { "epoch": 1.62, "learning_rate": 1.34987324283888e-05, "loss": 1.8379, "step": 1298 }, { "epoch": 1.62, "learning_rate": 1.3479780744303647e-05, "loss": 1.8548, "step": 1300 }, { "epoch": 1.62, "learning_rate": 1.3460814829430042e-05, "loss": 1.876, "step": 1302 }, { "epoch": 1.63, "learning_rate": 1.3441834761330315e-05, "loss": 1.8611, "step": 1304 }, { "epoch": 1.63, "learning_rate": 1.3422840617624691e-05, "loss": 1.901, "step": 1306 }, { "epoch": 1.63, "learning_rate": 1.3403832475990938e-05, "loss": 1.8878, "step": 1308 }, { "epoch": 1.63, "learning_rate": 1.3384810414164088e-05, "loss": 1.8961, "step": 1310 }, { "epoch": 1.64, "learning_rate": 1.3365774509936097e-05, "loss": 1.848, "step": 1312 }, { "epoch": 1.64, "learning_rate": 1.3346724841155516e-05, "loss": 1.9287, "step": 1314 }, { "epoch": 1.64, "learning_rate": 1.3327661485727204e-05, "loss": 1.8832, "step": 1316 }, { "epoch": 1.64, "learning_rate": 1.3308584521611984e-05, "loss": 1.8757, "step": 1318 }, { "epoch": 1.65, "learning_rate": 1.3289494026826337e-05, "loss": 1.9066, "step": 1320 }, { "epoch": 1.65, "learning_rate": 1.3270390079442082e-05, "loss": 1.8535, "step": 1322 }, { "epoch": 1.65, "learning_rate": 1.3251272757586034e-05, "loss": 1.9422, "step": 1324 }, { "epoch": 1.65, "learning_rate": 1.3232142139439729e-05, "loss": 1.905, "step": 1326 }, { "epoch": 1.66, "learning_rate": 1.3212998303239059e-05, "loss": 1.9303, "step": 1328 }, { "epoch": 1.66, "learning_rate": 1.319384132727399e-05, "loss": 1.8454, "step": 1330 }, { "epoch": 1.66, "learning_rate": 1.3174671289888205e-05, "loss": 1.8755, "step": 1332 }, { "epoch": 1.66, "learning_rate": 1.3155488269478816e-05, "loss": 1.9152, "step": 1334 }, { "epoch": 1.67, "learning_rate": 1.3136292344496026e-05, "loss": 1.8474, "step": 1336 }, { "epoch": 1.67, "learning_rate": 1.3117083593442815e-05, "loss": 1.8779, "step": 1338 }, { "epoch": 1.67, "learning_rate": 1.3097862094874607e-05, "loss": 1.8939, "step": 1340 }, { "epoch": 1.67, "learning_rate": 1.3078627927398968e-05, "loss": 1.8684, "step": 1342 }, { "epoch": 1.68, "learning_rate": 1.3059381169675267e-05, "loss": 1.8827, "step": 1344 }, { "epoch": 1.68, "learning_rate": 1.3040121900414371e-05, "loss": 1.8655, "step": 1346 }, { "epoch": 1.68, "learning_rate": 1.3020850198378299e-05, "loss": 1.8309, "step": 1348 }, { "epoch": 1.68, "learning_rate": 1.300156614237993e-05, "loss": 1.9122, "step": 1350 }, { "epoch": 1.69, "learning_rate": 1.298226981128265e-05, "loss": 1.9131, "step": 1352 }, { "epoch": 1.69, "learning_rate": 1.2962961284000067e-05, "loss": 1.9138, "step": 1354 }, { "epoch": 1.69, "learning_rate": 1.2943640639495639e-05, "loss": 1.8432, "step": 1356 }, { "epoch": 1.69, "learning_rate": 1.2924307956782398e-05, "loss": 1.9196, "step": 1358 }, { "epoch": 1.7, "learning_rate": 1.29049633149226e-05, "loss": 1.9307, "step": 1360 }, { "epoch": 1.7, "learning_rate": 1.2885606793027408e-05, "loss": 1.8495, "step": 1362 }, { "epoch": 1.7, "learning_rate": 1.2866238470256571e-05, "loss": 1.8663, "step": 1364 }, { "epoch": 1.7, "learning_rate": 1.2846858425818097e-05, "loss": 1.8547, "step": 1366 }, { "epoch": 1.71, "learning_rate": 1.2827466738967932e-05, "loss": 1.9008, "step": 1368 }, { "epoch": 1.71, "learning_rate": 1.280806348900964e-05, "loss": 1.8566, "step": 1370 }, { "epoch": 1.71, "learning_rate": 1.2788648755294056e-05, "loss": 1.8615, "step": 1372 }, { "epoch": 1.71, "learning_rate": 1.2769222617218995e-05, "loss": 1.8961, "step": 1374 }, { "epoch": 1.72, "learning_rate": 1.2749785154228904e-05, "loss": 1.888, "step": 1376 }, { "epoch": 1.72, "learning_rate": 1.2730336445814549e-05, "loss": 1.894, "step": 1378 }, { "epoch": 1.72, "learning_rate": 1.2710876571512674e-05, "loss": 1.8882, "step": 1380 }, { "epoch": 1.72, "learning_rate": 1.2691405610905698e-05, "loss": 1.8688, "step": 1382 }, { "epoch": 1.73, "learning_rate": 1.2671923643621376e-05, "loss": 1.8765, "step": 1384 }, { "epoch": 1.73, "learning_rate": 1.2652430749332472e-05, "loss": 1.868, "step": 1386 }, { "epoch": 1.73, "learning_rate": 1.2632927007756438e-05, "loss": 1.8706, "step": 1388 }, { "epoch": 1.73, "learning_rate": 1.2613412498655082e-05, "loss": 1.8333, "step": 1390 }, { "epoch": 1.74, "learning_rate": 1.2593887301834257e-05, "loss": 1.9108, "step": 1392 }, { "epoch": 1.74, "learning_rate": 1.2574351497143522e-05, "loss": 1.8606, "step": 1394 }, { "epoch": 1.74, "learning_rate": 1.2554805164475805e-05, "loss": 1.8601, "step": 1396 }, { "epoch": 1.74, "learning_rate": 1.2535248383767102e-05, "loss": 1.8281, "step": 1398 }, { "epoch": 1.75, "learning_rate": 1.2515681234996133e-05, "loss": 1.8664, "step": 1400 }, { "epoch": 1.75, "learning_rate": 1.2496103798184019e-05, "loss": 1.93, "step": 1402 }, { "epoch": 1.75, "learning_rate": 1.2476516153393954e-05, "loss": 1.8555, "step": 1404 }, { "epoch": 1.75, "learning_rate": 1.2456918380730878e-05, "loss": 1.8202, "step": 1406 }, { "epoch": 1.76, "learning_rate": 1.243731056034115e-05, "loss": 1.8831, "step": 1408 }, { "epoch": 1.76, "learning_rate": 1.2417692772412222e-05, "loss": 1.8999, "step": 1410 }, { "epoch": 1.76, "learning_rate": 1.2398065097172302e-05, "loss": 1.8463, "step": 1412 }, { "epoch": 1.76, "learning_rate": 1.2378427614890041e-05, "loss": 1.9005, "step": 1414 }, { "epoch": 1.77, "learning_rate": 1.2358780405874193e-05, "loss": 1.8984, "step": 1416 }, { "epoch": 1.77, "learning_rate": 1.233912355047329e-05, "loss": 1.8693, "step": 1418 }, { "epoch": 1.77, "learning_rate": 1.2319457129075314e-05, "loss": 1.8947, "step": 1420 }, { "epoch": 1.77, "learning_rate": 1.229978122210737e-05, "loss": 1.8737, "step": 1422 }, { "epoch": 1.78, "learning_rate": 1.2280095910035343e-05, "loss": 1.7621, "step": 1424 }, { "epoch": 1.78, "learning_rate": 1.22604012733636e-05, "loss": 1.8208, "step": 1426 }, { "epoch": 1.78, "learning_rate": 1.2240697392634631e-05, "loss": 1.8368, "step": 1428 }, { "epoch": 1.78, "learning_rate": 1.2220984348428719e-05, "loss": 1.8651, "step": 1430 }, { "epoch": 1.79, "learning_rate": 1.2201262221363652e-05, "loss": 1.8629, "step": 1432 }, { "epoch": 1.79, "learning_rate": 1.218153109209433e-05, "loss": 1.8932, "step": 1434 }, { "epoch": 1.79, "learning_rate": 1.216179104131249e-05, "loss": 1.8631, "step": 1436 }, { "epoch": 1.79, "learning_rate": 1.2142042149746343e-05, "loss": 1.8515, "step": 1438 }, { "epoch": 1.8, "learning_rate": 1.2122284498160256e-05, "loss": 1.7967, "step": 1440 }, { "epoch": 1.8, "learning_rate": 1.2102518167354433e-05, "loss": 1.8777, "step": 1442 }, { "epoch": 1.8, "learning_rate": 1.2082743238164553e-05, "loss": 1.8218, "step": 1444 }, { "epoch": 1.8, "learning_rate": 1.2062959791461473e-05, "loss": 1.8485, "step": 1446 }, { "epoch": 1.81, "learning_rate": 1.2043167908150874e-05, "loss": 1.8748, "step": 1448 }, { "epoch": 1.81, "learning_rate": 1.2023367669172947e-05, "loss": 1.859, "step": 1450 }, { "epoch": 1.81, "learning_rate": 1.2003559155502052e-05, "loss": 1.9179, "step": 1452 }, { "epoch": 1.81, "learning_rate": 1.1983742448146377e-05, "loss": 1.9111, "step": 1454 }, { "epoch": 1.82, "learning_rate": 1.1963917628147644e-05, "loss": 1.8902, "step": 1456 }, { "epoch": 1.82, "learning_rate": 1.1944084776580722e-05, "loss": 1.8747, "step": 1458 }, { "epoch": 1.82, "learning_rate": 1.1924243974553349e-05, "loss": 1.9109, "step": 1460 }, { "epoch": 1.82, "learning_rate": 1.1904395303205764e-05, "loss": 1.806, "step": 1462 }, { "epoch": 1.83, "learning_rate": 1.1884538843710396e-05, "loss": 1.8412, "step": 1464 }, { "epoch": 1.83, "learning_rate": 1.1864674677271521e-05, "loss": 1.79, "step": 1466 }, { "epoch": 1.83, "learning_rate": 1.1844802885124928e-05, "loss": 1.8406, "step": 1468 }, { "epoch": 1.83, "learning_rate": 1.1824923548537602e-05, "loss": 1.8018, "step": 1470 }, { "epoch": 1.84, "learning_rate": 1.1805036748807371e-05, "loss": 1.799, "step": 1472 }, { "epoch": 1.84, "learning_rate": 1.1785142567262591e-05, "loss": 1.8686, "step": 1474 }, { "epoch": 1.84, "learning_rate": 1.1765241085261802e-05, "loss": 1.8628, "step": 1476 }, { "epoch": 1.84, "learning_rate": 1.1745332384193408e-05, "loss": 1.8055, "step": 1478 }, { "epoch": 1.85, "learning_rate": 1.1725416545475328e-05, "loss": 1.8962, "step": 1480 }, { "epoch": 1.85, "learning_rate": 1.1705493650554667e-05, "loss": 1.8155, "step": 1482 }, { "epoch": 1.85, "learning_rate": 1.16855637809074e-05, "loss": 1.8189, "step": 1484 }, { "epoch": 1.85, "learning_rate": 1.1665627018038013e-05, "loss": 1.8312, "step": 1486 }, { "epoch": 1.86, "learning_rate": 1.164568344347919e-05, "loss": 1.8848, "step": 1488 }, { "epoch": 1.86, "learning_rate": 1.1625733138791468e-05, "loss": 1.8952, "step": 1490 }, { "epoch": 1.86, "learning_rate": 1.1605776185562909e-05, "loss": 1.8726, "step": 1492 }, { "epoch": 1.86, "learning_rate": 1.1585812665408764e-05, "loss": 1.8264, "step": 1494 }, { "epoch": 1.87, "learning_rate": 1.156584265997114e-05, "loss": 1.8375, "step": 1496 }, { "epoch": 1.87, "learning_rate": 1.1545866250918667e-05, "loss": 1.8172, "step": 1498 }, { "epoch": 1.87, "learning_rate": 1.1525883519946163e-05, "loss": 1.8732, "step": 1500 }, { "epoch": 1.87, "learning_rate": 1.1505894548774294e-05, "loss": 1.8296, "step": 1502 }, { "epoch": 1.88, "learning_rate": 1.148589941914926e-05, "loss": 1.9029, "step": 1504 }, { "epoch": 1.88, "learning_rate": 1.1465898212842426e-05, "loss": 1.8767, "step": 1506 }, { "epoch": 1.88, "learning_rate": 1.1445891011650025e-05, "loss": 1.8337, "step": 1508 }, { "epoch": 1.88, "learning_rate": 1.1425877897392799e-05, "loss": 1.8418, "step": 1510 }, { "epoch": 1.89, "learning_rate": 1.1405858951915676e-05, "loss": 1.8653, "step": 1512 }, { "epoch": 1.89, "learning_rate": 1.1385834257087427e-05, "loss": 1.822, "step": 1514 }, { "epoch": 1.89, "learning_rate": 1.1365803894800334e-05, "loss": 1.8383, "step": 1516 }, { "epoch": 1.89, "learning_rate": 1.1345767946969866e-05, "loss": 1.8838, "step": 1518 }, { "epoch": 1.9, "learning_rate": 1.132572649553432e-05, "loss": 1.817, "step": 1520 }, { "epoch": 1.9, "learning_rate": 1.1305679622454511e-05, "loss": 1.8163, "step": 1522 }, { "epoch": 1.9, "learning_rate": 1.1285627409713424e-05, "loss": 1.8337, "step": 1524 }, { "epoch": 1.9, "learning_rate": 1.1265569939315882e-05, "loss": 1.8499, "step": 1526 }, { "epoch": 1.91, "learning_rate": 1.1245507293288204e-05, "loss": 1.8354, "step": 1528 }, { "epoch": 1.91, "learning_rate": 1.1225439553677881e-05, "loss": 1.7944, "step": 1530 }, { "epoch": 1.91, "learning_rate": 1.1205366802553231e-05, "loss": 1.8707, "step": 1532 }, { "epoch": 1.91, "learning_rate": 1.1185289122003071e-05, "loss": 1.839, "step": 1534 }, { "epoch": 1.92, "learning_rate": 1.1165206594136371e-05, "loss": 1.8798, "step": 1536 }, { "epoch": 1.92, "learning_rate": 1.114511930108193e-05, "loss": 1.8741, "step": 1538 }, { "epoch": 1.92, "learning_rate": 1.1125027324988029e-05, "loss": 1.8993, "step": 1540 }, { "epoch": 1.92, "learning_rate": 1.1104930748022109e-05, "loss": 1.8224, "step": 1542 }, { "epoch": 1.93, "learning_rate": 1.1084829652370417e-05, "loss": 1.8658, "step": 1544 }, { "epoch": 1.93, "learning_rate": 1.1064724120237687e-05, "loss": 1.8583, "step": 1546 }, { "epoch": 1.93, "learning_rate": 1.104461423384679e-05, "loss": 1.8516, "step": 1548 }, { "epoch": 1.93, "learning_rate": 1.1024500075438414e-05, "loss": 1.893, "step": 1550 }, { "epoch": 1.94, "learning_rate": 1.1004381727270704e-05, "loss": 1.8721, "step": 1552 }, { "epoch": 1.94, "learning_rate": 1.0984259271618947e-05, "loss": 1.8654, "step": 1554 }, { "epoch": 1.94, "learning_rate": 1.0964132790775231e-05, "loss": 1.9032, "step": 1556 }, { "epoch": 1.94, "learning_rate": 1.0944002367048097e-05, "loss": 1.8805, "step": 1558 }, { "epoch": 1.95, "learning_rate": 1.0923868082762217e-05, "loss": 1.8381, "step": 1560 }, { "epoch": 1.95, "learning_rate": 1.0903730020258052e-05, "loss": 1.8339, "step": 1562 }, { "epoch": 1.95, "learning_rate": 1.0883588261891507e-05, "loss": 1.8639, "step": 1564 }, { "epoch": 1.95, "learning_rate": 1.0863442890033608e-05, "loss": 1.7967, "step": 1566 }, { "epoch": 1.96, "learning_rate": 1.0843293987070154e-05, "loss": 1.8535, "step": 1568 }, { "epoch": 1.96, "learning_rate": 1.0823141635401388e-05, "loss": 1.8319, "step": 1570 }, { "epoch": 1.96, "learning_rate": 1.0802985917441657e-05, "loss": 1.8606, "step": 1572 }, { "epoch": 1.96, "learning_rate": 1.0782826915619074e-05, "loss": 1.8677, "step": 1574 }, { "epoch": 1.97, "learning_rate": 1.0762664712375179e-05, "loss": 1.8294, "step": 1576 }, { "epoch": 1.97, "learning_rate": 1.0742499390164609e-05, "loss": 1.9059, "step": 1578 }, { "epoch": 1.97, "learning_rate": 1.0722331031454749e-05, "loss": 1.8348, "step": 1580 }, { "epoch": 1.97, "learning_rate": 1.0702159718725413e-05, "loss": 1.8599, "step": 1582 }, { "epoch": 1.98, "learning_rate": 1.0681985534468484e-05, "loss": 1.814, "step": 1584 }, { "epoch": 1.98, "learning_rate": 1.0661808561187597e-05, "loss": 1.9452, "step": 1586 }, { "epoch": 1.98, "learning_rate": 1.0641628881397785e-05, "loss": 1.7966, "step": 1588 }, { "epoch": 1.98, "learning_rate": 1.0621446577625154e-05, "loss": 1.9034, "step": 1590 }, { "epoch": 1.99, "learning_rate": 1.060126173240655e-05, "loss": 1.831, "step": 1592 }, { "epoch": 1.99, "learning_rate": 1.0581074428289193e-05, "loss": 1.8329, "step": 1594 }, { "epoch": 1.99, "learning_rate": 1.0560884747830375e-05, "loss": 1.8224, "step": 1596 }, { "epoch": 1.99, "learning_rate": 1.0540692773597097e-05, "loss": 1.8928, "step": 1598 }, { "epoch": 2.0, "learning_rate": 1.0520498588165746e-05, "loss": 1.8735, "step": 1600 }, { "epoch": 2.0, "learning_rate": 1.0500302274121748e-05, "loss": 1.8609, "step": 1602 }, { "epoch": 2.0, "learning_rate": 1.0480103914059235e-05, "loss": 1.812, "step": 1604 }, { "epoch": 2.0, "learning_rate": 1.0459903590580706e-05, "loss": 1.8607, "step": 1606 }, { "epoch": 2.01, "learning_rate": 1.0439701386296696e-05, "loss": 1.8484, "step": 1608 }, { "epoch": 2.01, "learning_rate": 1.0419497383825425e-05, "loss": 1.8574, "step": 1610 }, { "epoch": 2.01, "learning_rate": 1.0399291665792463e-05, "loss": 1.8783, "step": 1612 }, { "epoch": 2.01, "learning_rate": 1.0379084314830405e-05, "loss": 1.8025, "step": 1614 }, { "epoch": 2.02, "learning_rate": 1.0358875413578524e-05, "loss": 1.8968, "step": 1616 }, { "epoch": 2.02, "learning_rate": 1.0338665044682418e-05, "loss": 1.8557, "step": 1618 }, { "epoch": 2.02, "learning_rate": 1.0318453290793706e-05, "loss": 1.8275, "step": 1620 }, { "epoch": 2.02, "learning_rate": 1.0298240234569661e-05, "loss": 1.7935, "step": 1622 }, { "epoch": 2.03, "learning_rate": 1.0278025958672886e-05, "loss": 1.7812, "step": 1624 }, { "epoch": 2.03, "learning_rate": 1.0257810545770966e-05, "loss": 1.7503, "step": 1626 }, { "epoch": 2.03, "learning_rate": 1.0237594078536141e-05, "loss": 1.7857, "step": 1628 }, { "epoch": 2.03, "learning_rate": 1.0217376639644964e-05, "loss": 1.8114, "step": 1630 }, { "epoch": 2.04, "learning_rate": 1.0197158311777957e-05, "loss": 1.7967, "step": 1632 }, { "epoch": 2.04, "learning_rate": 1.0176939177619283e-05, "loss": 1.7743, "step": 1634 }, { "epoch": 2.04, "learning_rate": 1.015671931985639e-05, "loss": 1.8089, "step": 1636 }, { "epoch": 2.04, "learning_rate": 1.0136498821179704e-05, "loss": 1.821, "step": 1638 }, { "epoch": 2.05, "learning_rate": 1.011627776428226e-05, "loss": 1.8334, "step": 1640 }, { "epoch": 2.05, "learning_rate": 1.0096056231859375e-05, "loss": 1.8699, "step": 1642 }, { "epoch": 2.05, "learning_rate": 1.007583430660832e-05, "loss": 1.7575, "step": 1644 }, { "epoch": 2.05, "learning_rate": 1.0055612071227958e-05, "loss": 1.8264, "step": 1646 }, { "epoch": 2.06, "learning_rate": 1.0035389608418435e-05, "loss": 1.8277, "step": 1648 }, { "epoch": 2.06, "learning_rate": 1.001516700088082e-05, "loss": 1.7808, "step": 1650 }, { "epoch": 2.06, "learning_rate": 9.994944331316771e-06, "loss": 1.8691, "step": 1652 }, { "epoch": 2.06, "learning_rate": 9.974721682428208e-06, "loss": 1.7907, "step": 1654 }, { "epoch": 2.07, "learning_rate": 9.95449913691696e-06, "loss": 1.8094, "step": 1656 }, { "epoch": 2.07, "learning_rate": 9.934276777484436e-06, "loss": 1.8144, "step": 1658 }, { "epoch": 2.07, "learning_rate": 9.914054686831281e-06, "loss": 1.8336, "step": 1660 }, { "epoch": 2.07, "learning_rate": 9.893832947657042e-06, "loss": 1.7941, "step": 1662 }, { "epoch": 2.08, "learning_rate": 9.873611642659833e-06, "loss": 1.802, "step": 1664 }, { "epoch": 2.08, "learning_rate": 9.853390854535988e-06, "loss": 1.8163, "step": 1666 }, { "epoch": 2.08, "learning_rate": 9.833170665979725e-06, "loss": 1.8184, "step": 1668 }, { "epoch": 2.08, "learning_rate": 9.812951159682817e-06, "loss": 1.8055, "step": 1670 }, { "epoch": 2.09, "learning_rate": 9.792732418334243e-06, "loss": 1.8084, "step": 1672 }, { "epoch": 2.09, "learning_rate": 9.772514524619846e-06, "loss": 1.8102, "step": 1674 }, { "epoch": 2.09, "learning_rate": 9.752297561222023e-06, "loss": 1.8164, "step": 1676 }, { "epoch": 2.09, "learning_rate": 9.732081610819346e-06, "loss": 1.8212, "step": 1678 }, { "epoch": 2.1, "learning_rate": 9.711866756086252e-06, "loss": 1.7861, "step": 1680 }, { "epoch": 2.1, "learning_rate": 9.691653079692694e-06, "loss": 1.8109, "step": 1682 }, { "epoch": 2.1, "learning_rate": 9.671440664303813e-06, "loss": 1.8495, "step": 1684 }, { "epoch": 2.1, "learning_rate": 9.651229592579596e-06, "loss": 1.8491, "step": 1686 }, { "epoch": 2.11, "learning_rate": 9.631019947174514e-06, "loss": 1.852, "step": 1688 }, { "epoch": 2.11, "learning_rate": 9.610811810737231e-06, "loss": 1.8011, "step": 1690 }, { "epoch": 2.11, "learning_rate": 9.590605265910225e-06, "loss": 1.7655, "step": 1692 }, { "epoch": 2.11, "learning_rate": 9.570400395329466e-06, "loss": 1.8085, "step": 1694 }, { "epoch": 2.12, "learning_rate": 9.550197281624078e-06, "loss": 1.8162, "step": 1696 }, { "epoch": 2.12, "learning_rate": 9.529996007416007e-06, "loss": 1.7833, "step": 1698 }, { "epoch": 2.12, "learning_rate": 9.509796655319665e-06, "loss": 1.7894, "step": 1700 }, { "epoch": 2.12, "learning_rate": 9.489599307941608e-06, "loss": 1.8012, "step": 1702 }, { "epoch": 2.13, "learning_rate": 9.469404047880205e-06, "loss": 1.7753, "step": 1704 }, { "epoch": 2.13, "learning_rate": 9.44921095772527e-06, "loss": 1.7456, "step": 1706 }, { "epoch": 2.13, "learning_rate": 9.429020120057747e-06, "loss": 1.815, "step": 1708 }, { "epoch": 2.13, "learning_rate": 9.408831617449385e-06, "loss": 1.8138, "step": 1710 }, { "epoch": 2.13, "learning_rate": 9.388645532462366e-06, "loss": 1.8053, "step": 1712 }, { "epoch": 2.14, "learning_rate": 9.368461947648986e-06, "loss": 1.7459, "step": 1714 }, { "epoch": 2.14, "learning_rate": 9.348280945551324e-06, "loss": 1.813, "step": 1716 }, { "epoch": 2.14, "learning_rate": 9.328102608700895e-06, "loss": 1.788, "step": 1718 }, { "epoch": 2.14, "learning_rate": 9.307927019618313e-06, "loss": 1.7766, "step": 1720 }, { "epoch": 2.15, "learning_rate": 9.287754260812949e-06, "loss": 1.7857, "step": 1722 }, { "epoch": 2.15, "learning_rate": 9.267584414782614e-06, "loss": 1.7624, "step": 1724 }, { "epoch": 2.15, "learning_rate": 9.247417564013192e-06, "loss": 1.7824, "step": 1726 }, { "epoch": 2.15, "learning_rate": 9.227253790978326e-06, "loss": 1.8102, "step": 1728 }, { "epoch": 2.16, "learning_rate": 9.207093178139067e-06, "loss": 1.7498, "step": 1730 }, { "epoch": 2.16, "learning_rate": 9.186935807943546e-06, "loss": 1.7743, "step": 1732 }, { "epoch": 2.16, "learning_rate": 9.16678176282664e-06, "loss": 1.7482, "step": 1734 }, { "epoch": 2.16, "learning_rate": 9.146631125209608e-06, "loss": 1.7707, "step": 1736 }, { "epoch": 2.17, "learning_rate": 9.126483977499797e-06, "loss": 1.8034, "step": 1738 }, { "epoch": 2.17, "learning_rate": 9.106340402090266e-06, "loss": 1.8073, "step": 1740 }, { "epoch": 2.17, "learning_rate": 9.08620048135947e-06, "loss": 1.8022, "step": 1742 }, { "epoch": 2.17, "learning_rate": 9.066064297670914e-06, "loss": 1.7745, "step": 1744 }, { "epoch": 2.18, "learning_rate": 9.04593193337283e-06, "loss": 1.8555, "step": 1746 }, { "epoch": 2.18, "learning_rate": 9.025803470797823e-06, "loss": 1.8339, "step": 1748 }, { "epoch": 2.18, "learning_rate": 9.005678992262535e-06, "loss": 1.7979, "step": 1750 }, { "epoch": 2.18, "learning_rate": 8.985558580067337e-06, "loss": 1.7941, "step": 1752 }, { "epoch": 2.19, "learning_rate": 8.965442316495945e-06, "loss": 1.8046, "step": 1754 }, { "epoch": 2.19, "learning_rate": 8.94533028381512e-06, "loss": 1.7796, "step": 1756 }, { "epoch": 2.19, "learning_rate": 8.92522256427433e-06, "loss": 1.7516, "step": 1758 }, { "epoch": 2.19, "learning_rate": 8.905119240105386e-06, "loss": 1.833, "step": 1760 }, { "epoch": 2.2, "learning_rate": 8.885020393522136e-06, "loss": 1.8348, "step": 1762 }, { "epoch": 2.2, "learning_rate": 8.86492610672011e-06, "loss": 1.8329, "step": 1764 }, { "epoch": 2.2, "learning_rate": 8.8448364618762e-06, "loss": 1.8099, "step": 1766 }, { "epoch": 2.2, "learning_rate": 8.824751541148305e-06, "loss": 1.8097, "step": 1768 }, { "epoch": 2.21, "learning_rate": 8.804671426675003e-06, "loss": 1.7472, "step": 1770 }, { "epoch": 2.21, "learning_rate": 8.78459620057523e-06, "loss": 1.8142, "step": 1772 }, { "epoch": 2.21, "learning_rate": 8.764525944947915e-06, "loss": 1.8346, "step": 1774 }, { "epoch": 2.21, "learning_rate": 8.74446074187167e-06, "loss": 1.7788, "step": 1776 }, { "epoch": 2.22, "learning_rate": 8.724400673404438e-06, "loss": 1.8045, "step": 1778 }, { "epoch": 2.22, "learning_rate": 8.704345821583169e-06, "loss": 1.775, "step": 1780 }, { "epoch": 2.22, "learning_rate": 8.684296268423477e-06, "loss": 1.7687, "step": 1782 }, { "epoch": 2.22, "learning_rate": 8.664252095919303e-06, "loss": 1.7898, "step": 1784 }, { "epoch": 2.23, "learning_rate": 8.644213386042594e-06, "loss": 1.8123, "step": 1786 }, { "epoch": 2.23, "learning_rate": 8.624180220742945e-06, "loss": 1.8162, "step": 1788 }, { "epoch": 2.23, "learning_rate": 8.60415268194728e-06, "loss": 1.8055, "step": 1790 }, { "epoch": 2.23, "learning_rate": 8.58413085155952e-06, "loss": 1.8186, "step": 1792 }, { "epoch": 2.24, "learning_rate": 8.564114811460233e-06, "loss": 1.8004, "step": 1794 }, { "epoch": 2.24, "learning_rate": 8.544104643506308e-06, "loss": 1.793, "step": 1796 }, { "epoch": 2.24, "learning_rate": 8.524100429530621e-06, "loss": 1.823, "step": 1798 }, { "epoch": 2.24, "learning_rate": 8.504102251341704e-06, "loss": 1.8422, "step": 1800 }, { "epoch": 2.25, "learning_rate": 8.484110190723396e-06, "loss": 1.8116, "step": 1802 }, { "epoch": 2.25, "learning_rate": 8.464124329434522e-06, "loss": 1.7723, "step": 1804 }, { "epoch": 2.25, "learning_rate": 8.444144749208558e-06, "loss": 1.8069, "step": 1806 }, { "epoch": 2.25, "learning_rate": 8.424171531753288e-06, "loss": 1.8267, "step": 1808 }, { "epoch": 2.26, "learning_rate": 8.404204758750475e-06, "loss": 1.7934, "step": 1810 }, { "epoch": 2.26, "learning_rate": 8.38424451185553e-06, "loss": 1.721, "step": 1812 }, { "epoch": 2.26, "learning_rate": 8.364290872697175e-06, "loss": 1.7947, "step": 1814 }, { "epoch": 2.26, "learning_rate": 8.344343922877107e-06, "loss": 1.8914, "step": 1816 }, { "epoch": 2.27, "learning_rate": 8.324403743969666e-06, "loss": 1.8247, "step": 1818 }, { "epoch": 2.27, "learning_rate": 8.30447041752151e-06, "loss": 1.7762, "step": 1820 }, { "epoch": 2.27, "learning_rate": 8.28454402505126e-06, "loss": 1.7987, "step": 1822 }, { "epoch": 2.27, "learning_rate": 8.264624648049188e-06, "loss": 1.7829, "step": 1824 }, { "epoch": 2.28, "learning_rate": 8.244712367976878e-06, "loss": 1.8121, "step": 1826 }, { "epoch": 2.28, "learning_rate": 8.224807266266883e-06, "loss": 1.7393, "step": 1828 }, { "epoch": 2.28, "learning_rate": 8.20490942432241e-06, "loss": 1.7823, "step": 1830 }, { "epoch": 2.28, "learning_rate": 8.185018923516963e-06, "loss": 1.7875, "step": 1832 }, { "epoch": 2.29, "learning_rate": 8.16513584519404e-06, "loss": 1.7663, "step": 1834 }, { "epoch": 2.29, "learning_rate": 8.145260270666775e-06, "loss": 1.7447, "step": 1836 }, { "epoch": 2.29, "learning_rate": 8.125392281217605e-06, "loss": 1.807, "step": 1838 }, { "epoch": 2.29, "learning_rate": 8.105531958097973e-06, "loss": 1.8077, "step": 1840 }, { "epoch": 2.3, "learning_rate": 8.085679382527945e-06, "loss": 1.8029, "step": 1842 }, { "epoch": 2.3, "learning_rate": 8.06583463569592e-06, "loss": 1.7999, "step": 1844 }, { "epoch": 2.3, "learning_rate": 8.045997798758263e-06, "loss": 1.8458, "step": 1846 }, { "epoch": 2.3, "learning_rate": 8.026168952839014e-06, "loss": 1.7925, "step": 1848 }, { "epoch": 2.31, "learning_rate": 8.006348179029517e-06, "loss": 1.7963, "step": 1850 }, { "epoch": 2.31, "learning_rate": 7.986535558388103e-06, "loss": 1.8324, "step": 1852 }, { "epoch": 2.31, "learning_rate": 7.966731171939776e-06, "loss": 1.8655, "step": 1854 }, { "epoch": 2.31, "learning_rate": 7.946935100675848e-06, "loss": 1.7805, "step": 1856 }, { "epoch": 2.32, "learning_rate": 7.927147425553635e-06, "loss": 1.7398, "step": 1858 }, { "epoch": 2.32, "learning_rate": 7.907368227496111e-06, "loss": 1.853, "step": 1860 }, { "epoch": 2.32, "learning_rate": 7.887597587391591e-06, "loss": 1.8018, "step": 1862 }, { "epoch": 2.32, "learning_rate": 7.86783558609339e-06, "loss": 1.836, "step": 1864 }, { "epoch": 2.33, "learning_rate": 7.848082304419478e-06, "loss": 1.7692, "step": 1866 }, { "epoch": 2.33, "learning_rate": 7.82833782315219e-06, "loss": 1.8082, "step": 1868 }, { "epoch": 2.33, "learning_rate": 7.808602223037855e-06, "loss": 1.8154, "step": 1870 }, { "epoch": 2.33, "learning_rate": 7.788875584786484e-06, "loss": 1.816, "step": 1872 }, { "epoch": 2.34, "learning_rate": 7.769157989071447e-06, "loss": 1.8358, "step": 1874 }, { "epoch": 2.34, "learning_rate": 7.74944951652912e-06, "loss": 1.843, "step": 1876 }, { "epoch": 2.34, "learning_rate": 7.729750247758582e-06, "loss": 1.7787, "step": 1878 }, { "epoch": 2.34, "learning_rate": 7.710060263321259e-06, "loss": 1.8051, "step": 1880 }, { "epoch": 2.35, "learning_rate": 7.690379643740628e-06, "loss": 1.8016, "step": 1882 }, { "epoch": 2.35, "learning_rate": 7.670708469501848e-06, "loss": 1.8164, "step": 1884 }, { "epoch": 2.35, "learning_rate": 7.651046821051454e-06, "loss": 1.808, "step": 1886 }, { "epoch": 2.35, "learning_rate": 7.631394778797042e-06, "loss": 1.7887, "step": 1888 }, { "epoch": 2.36, "learning_rate": 7.6117524231068985e-06, "loss": 1.7793, "step": 1890 }, { "epoch": 2.36, "learning_rate": 7.5921198343097145e-06, "loss": 1.8733, "step": 1892 }, { "epoch": 2.36, "learning_rate": 7.5724970926942265e-06, "loss": 1.7316, "step": 1894 }, { "epoch": 2.36, "learning_rate": 7.552884278508913e-06, "loss": 1.7447, "step": 1896 }, { "epoch": 2.37, "learning_rate": 7.533281471961642e-06, "loss": 1.7504, "step": 1898 }, { "epoch": 2.37, "learning_rate": 7.523483846606048e-06, "loss": 1.8538, "step": 1900 }, { "epoch": 2.37, "learning_rate": 7.5038962018159845e-06, "loss": 1.8364, "step": 1902 }, { "epoch": 2.37, "learning_rate": 7.484318765003867e-06, "loss": 1.8108, "step": 1904 }, { "epoch": 2.38, "learning_rate": 7.464751616232902e-06, "loss": 1.7535, "step": 1906 }, { "epoch": 2.38, "learning_rate": 7.445194835524198e-06, "loss": 1.7906, "step": 1908 }, { "epoch": 2.38, "learning_rate": 7.425648502856483e-06, "loss": 1.7392, "step": 1910 }, { "epoch": 2.38, "learning_rate": 7.406112698165742e-06, "loss": 1.7712, "step": 1912 }, { "epoch": 2.39, "learning_rate": 7.3865875013449195e-06, "loss": 1.8206, "step": 1914 }, { "epoch": 2.39, "learning_rate": 7.367072992243569e-06, "loss": 1.793, "step": 1916 }, { "epoch": 2.39, "learning_rate": 7.34756925066753e-06, "loss": 1.7805, "step": 1918 }, { "epoch": 2.39, "learning_rate": 7.328076356378626e-06, "loss": 1.7996, "step": 1920 }, { "epoch": 2.4, "learning_rate": 7.308594389094306e-06, "loss": 1.784, "step": 1922 }, { "epoch": 2.4, "learning_rate": 7.28912342848733e-06, "loss": 1.8146, "step": 1924 }, { "epoch": 2.4, "learning_rate": 7.269663554185455e-06, "loss": 1.7956, "step": 1926 }, { "epoch": 2.4, "learning_rate": 7.2502148457711e-06, "loss": 1.8095, "step": 1928 }, { "epoch": 2.41, "learning_rate": 7.230777382781012e-06, "loss": 1.7846, "step": 1930 }, { "epoch": 2.41, "learning_rate": 7.211351244705947e-06, "loss": 1.7649, "step": 1932 }, { "epoch": 2.41, "learning_rate": 7.191936510990365e-06, "loss": 1.7949, "step": 1934 }, { "epoch": 2.41, "learning_rate": 7.172533261032069e-06, "loss": 1.817, "step": 1936 }, { "epoch": 2.42, "learning_rate": 7.153141574181903e-06, "loss": 1.7536, "step": 1938 }, { "epoch": 2.42, "learning_rate": 7.133761529743432e-06, "loss": 1.8015, "step": 1940 }, { "epoch": 2.42, "learning_rate": 7.1143932069725956e-06, "loss": 1.7432, "step": 1942 }, { "epoch": 2.42, "learning_rate": 7.095036685077404e-06, "loss": 1.8241, "step": 1944 }, { "epoch": 2.43, "learning_rate": 7.0756920432176035e-06, "loss": 1.769, "step": 1946 }, { "epoch": 2.43, "learning_rate": 7.056359360504363e-06, "loss": 1.7899, "step": 1948 }, { "epoch": 2.43, "learning_rate": 7.037038715999939e-06, "loss": 1.7825, "step": 1950 }, { "epoch": 2.43, "learning_rate": 7.017730188717348e-06, "loss": 1.7613, "step": 1952 }, { "epoch": 2.44, "learning_rate": 6.998433857620075e-06, "loss": 1.8186, "step": 1954 }, { "epoch": 2.44, "learning_rate": 6.979149801621703e-06, "loss": 1.8042, "step": 1956 }, { "epoch": 2.44, "learning_rate": 6.959878099585634e-06, "loss": 1.8133, "step": 1958 }, { "epoch": 2.44, "learning_rate": 6.940618830324732e-06, "loss": 1.7957, "step": 1960 }, { "epoch": 2.45, "learning_rate": 6.921372072601035e-06, "loss": 1.8154, "step": 1962 }, { "epoch": 2.45, "learning_rate": 6.902137905125397e-06, "loss": 1.8523, "step": 1964 }, { "epoch": 2.45, "learning_rate": 6.882916406557188e-06, "loss": 1.8621, "step": 1966 }, { "epoch": 2.45, "learning_rate": 6.863707655503975e-06, "loss": 1.8588, "step": 1968 }, { "epoch": 2.46, "learning_rate": 6.844511730521186e-06, "loss": 1.7631, "step": 1970 }, { "epoch": 2.46, "learning_rate": 6.825328710111801e-06, "loss": 1.8152, "step": 1972 }, { "epoch": 2.46, "learning_rate": 6.806158672726013e-06, "loss": 1.8447, "step": 1974 }, { "epoch": 2.46, "learning_rate": 6.787001696760942e-06, "loss": 1.7771, "step": 1976 }, { "epoch": 2.47, "learning_rate": 6.767857860560276e-06, "loss": 1.8188, "step": 1978 }, { "epoch": 2.47, "learning_rate": 6.748727242413966e-06, "loss": 1.8101, "step": 1980 }, { "epoch": 2.47, "learning_rate": 6.729609920557922e-06, "loss": 1.782, "step": 1982 }, { "epoch": 2.47, "learning_rate": 6.7105059731736645e-06, "loss": 1.748, "step": 1984 }, { "epoch": 2.48, "learning_rate": 6.691415478388016e-06, "loss": 1.7672, "step": 1986 }, { "epoch": 2.48, "learning_rate": 6.672338514272801e-06, "loss": 1.8024, "step": 1988 }, { "epoch": 2.48, "learning_rate": 6.653275158844488e-06, "loss": 1.7669, "step": 1990 }, { "epoch": 2.48, "learning_rate": 6.634225490063909e-06, "loss": 1.8115, "step": 1992 }, { "epoch": 2.49, "learning_rate": 6.615189585835912e-06, "loss": 1.785, "step": 1994 }, { "epoch": 2.49, "learning_rate": 6.596167524009064e-06, "loss": 1.7757, "step": 1996 }, { "epoch": 2.49, "learning_rate": 6.577159382375316e-06, "loss": 1.8101, "step": 1998 }, { "epoch": 2.49, "learning_rate": 6.558165238669685e-06, "loss": 1.754, "step": 2000 }, { "epoch": 2.5, "learning_rate": 6.539185170569962e-06, "loss": 1.7901, "step": 2002 }, { "epoch": 2.5, "learning_rate": 6.520219255696356e-06, "loss": 1.7702, "step": 2004 }, { "epoch": 2.5, "learning_rate": 6.501267571611207e-06, "loss": 1.7951, "step": 2006 }, { "epoch": 2.5, "learning_rate": 6.482330195818646e-06, "loss": 1.8218, "step": 2008 }, { "epoch": 2.51, "learning_rate": 6.4634072057643045e-06, "loss": 1.7576, "step": 2010 }, { "epoch": 2.51, "learning_rate": 6.444498678834974e-06, "loss": 1.7987, "step": 2012 }, { "epoch": 2.51, "learning_rate": 6.4256046923582895e-06, "loss": 1.7833, "step": 2014 }, { "epoch": 2.51, "learning_rate": 6.406725323602441e-06, "loss": 1.7606, "step": 2016 }, { "epoch": 2.52, "learning_rate": 6.387860649775822e-06, "loss": 1.7676, "step": 2018 }, { "epoch": 2.52, "learning_rate": 6.369010748026739e-06, "loss": 1.7803, "step": 2020 }, { "epoch": 2.52, "learning_rate": 6.350175695443085e-06, "loss": 1.7635, "step": 2022 }, { "epoch": 2.52, "learning_rate": 6.331355569052027e-06, "loss": 1.807, "step": 2024 }, { "epoch": 2.53, "learning_rate": 6.312550445819687e-06, "loss": 1.7618, "step": 2026 }, { "epoch": 2.53, "learning_rate": 6.2937604026508295e-06, "loss": 1.8215, "step": 2028 }, { "epoch": 2.53, "learning_rate": 6.27498551638856e-06, "loss": 1.7923, "step": 2030 }, { "epoch": 2.53, "learning_rate": 6.256225863813985e-06, "loss": 1.7972, "step": 2032 }, { "epoch": 2.54, "learning_rate": 6.237481521645915e-06, "loss": 1.801, "step": 2034 }, { "epoch": 2.54, "learning_rate": 6.218752566540555e-06, "loss": 1.7799, "step": 2036 }, { "epoch": 2.54, "learning_rate": 6.2000390750911775e-06, "loss": 1.8538, "step": 2038 }, { "epoch": 2.54, "learning_rate": 6.181341123827816e-06, "loss": 1.7324, "step": 2040 }, { "epoch": 2.55, "learning_rate": 6.162658789216946e-06, "loss": 1.7931, "step": 2042 }, { "epoch": 2.55, "learning_rate": 6.143992147661195e-06, "loss": 1.7487, "step": 2044 }, { "epoch": 2.55, "learning_rate": 6.1253412754989926e-06, "loss": 1.7697, "step": 2046 }, { "epoch": 2.55, "learning_rate": 6.106706249004284e-06, "loss": 1.7764, "step": 2048 }, { "epoch": 2.56, "learning_rate": 6.088087144386225e-06, "loss": 1.7744, "step": 2050 }, { "epoch": 2.56, "learning_rate": 6.069484037788844e-06, "loss": 1.7781, "step": 2052 }, { "epoch": 2.56, "learning_rate": 6.050897005290749e-06, "loss": 1.8226, "step": 2054 }, { "epoch": 2.56, "learning_rate": 6.032326122904808e-06, "loss": 1.8304, "step": 2056 }, { "epoch": 2.57, "learning_rate": 6.013771466577851e-06, "loss": 1.7915, "step": 2058 }, { "epoch": 2.57, "learning_rate": 5.9952331121903466e-06, "loss": 1.813, "step": 2060 }, { "epoch": 2.57, "learning_rate": 5.976711135556086e-06, "loss": 1.8069, "step": 2062 }, { "epoch": 2.57, "learning_rate": 5.9582056124219e-06, "loss": 1.8052, "step": 2064 }, { "epoch": 2.58, "learning_rate": 5.939716618467317e-06, "loss": 1.8314, "step": 2066 }, { "epoch": 2.58, "learning_rate": 5.921244229304275e-06, "loss": 1.7603, "step": 2068 }, { "epoch": 2.58, "learning_rate": 5.9027885204768045e-06, "loss": 1.7968, "step": 2070 }, { "epoch": 2.58, "learning_rate": 5.884349567460723e-06, "loss": 1.7951, "step": 2072 }, { "epoch": 2.59, "learning_rate": 5.8659274456633195e-06, "loss": 1.7493, "step": 2074 }, { "epoch": 2.59, "learning_rate": 5.8475222304230505e-06, "loss": 1.8222, "step": 2076 }, { "epoch": 2.59, "learning_rate": 5.829133997009238e-06, "loss": 1.8119, "step": 2078 }, { "epoch": 2.59, "learning_rate": 5.8107628206217516e-06, "loss": 1.7982, "step": 2080 }, { "epoch": 2.6, "learning_rate": 5.792408776390701e-06, "loss": 1.7449, "step": 2082 }, { "epoch": 2.6, "learning_rate": 5.774071939376146e-06, "loss": 1.8028, "step": 2084 }, { "epoch": 2.6, "learning_rate": 5.755752384567762e-06, "loss": 1.7828, "step": 2086 }, { "epoch": 2.6, "learning_rate": 5.737450186884555e-06, "loss": 1.7213, "step": 2088 }, { "epoch": 2.61, "learning_rate": 5.7191654211745405e-06, "loss": 1.7802, "step": 2090 }, { "epoch": 2.61, "learning_rate": 5.700898162214461e-06, "loss": 1.8378, "step": 2092 }, { "epoch": 2.61, "learning_rate": 5.682648484709447e-06, "loss": 1.8132, "step": 2094 }, { "epoch": 2.61, "learning_rate": 5.664416463292734e-06, "loss": 1.7297, "step": 2096 }, { "epoch": 2.62, "learning_rate": 5.646202172525359e-06, "loss": 1.7344, "step": 2098 }, { "epoch": 2.62, "learning_rate": 5.62800568689584e-06, "loss": 1.8234, "step": 2100 }, { "epoch": 2.62, "learning_rate": 5.609827080819876e-06, "loss": 1.8401, "step": 2102 }, { "epoch": 2.62, "learning_rate": 5.591666428640062e-06, "loss": 1.8273, "step": 2104 }, { "epoch": 2.63, "learning_rate": 5.573523804625551e-06, "loss": 1.7594, "step": 2106 }, { "epoch": 2.63, "learning_rate": 5.555399282971787e-06, "loss": 1.7938, "step": 2108 }, { "epoch": 2.63, "learning_rate": 5.537292937800165e-06, "loss": 1.7991, "step": 2110 }, { "epoch": 2.63, "learning_rate": 5.519204843157762e-06, "loss": 1.8119, "step": 2112 }, { "epoch": 2.64, "learning_rate": 5.501135073017008e-06, "loss": 1.8301, "step": 2114 }, { "epoch": 2.64, "learning_rate": 5.483083701275391e-06, "loss": 1.7656, "step": 2116 }, { "epoch": 2.64, "learning_rate": 5.465050801755174e-06, "loss": 1.8065, "step": 2118 }, { "epoch": 2.64, "learning_rate": 5.447036448203062e-06, "loss": 1.8037, "step": 2120 }, { "epoch": 2.65, "learning_rate": 5.4290407142899175e-06, "loss": 1.7335, "step": 2122 }, { "epoch": 2.65, "learning_rate": 5.4110636736104545e-06, "loss": 1.7776, "step": 2124 }, { "epoch": 2.65, "learning_rate": 5.393105399682954e-06, "loss": 1.7953, "step": 2126 }, { "epoch": 2.65, "learning_rate": 5.3751659659489334e-06, "loss": 1.7409, "step": 2128 }, { "epoch": 2.66, "learning_rate": 5.357245445772863e-06, "loss": 1.7813, "step": 2130 }, { "epoch": 2.66, "learning_rate": 5.339343912441877e-06, "loss": 1.7938, "step": 2132 }, { "epoch": 2.66, "learning_rate": 5.321461439165452e-06, "loss": 1.8118, "step": 2134 }, { "epoch": 2.66, "learning_rate": 5.3035980990751135e-06, "loss": 1.7505, "step": 2136 }, { "epoch": 2.67, "learning_rate": 5.285753965224154e-06, "loss": 1.7165, "step": 2138 }, { "epoch": 2.67, "learning_rate": 5.267929110587308e-06, "loss": 1.7894, "step": 2140 }, { "epoch": 2.67, "learning_rate": 5.250123608060476e-06, "loss": 1.8054, "step": 2142 }, { "epoch": 2.67, "learning_rate": 5.2323375304604076e-06, "loss": 1.7548, "step": 2144 }, { "epoch": 2.68, "learning_rate": 5.2145709505244225e-06, "loss": 1.7672, "step": 2146 }, { "epoch": 2.68, "learning_rate": 5.196823940910096e-06, "loss": 1.7785, "step": 2148 }, { "epoch": 2.68, "learning_rate": 5.17909657419497e-06, "loss": 1.8292, "step": 2150 }, { "epoch": 2.68, "learning_rate": 5.161388922876263e-06, "loss": 1.7949, "step": 2152 }, { "epoch": 2.69, "learning_rate": 5.143701059370556e-06, "loss": 1.7908, "step": 2154 }, { "epoch": 2.69, "learning_rate": 5.126033056013513e-06, "loss": 1.7925, "step": 2156 }, { "epoch": 2.69, "learning_rate": 5.108384985059572e-06, "loss": 1.742, "step": 2158 }, { "epoch": 2.69, "learning_rate": 5.090756918681669e-06, "loss": 1.7872, "step": 2160 }, { "epoch": 2.7, "learning_rate": 5.073148928970917e-06, "loss": 1.7558, "step": 2162 }, { "epoch": 2.7, "learning_rate": 5.055561087936325e-06, "loss": 1.7815, "step": 2164 }, { "epoch": 2.7, "learning_rate": 5.037993467504515e-06, "loss": 1.8125, "step": 2166 }, { "epoch": 2.7, "learning_rate": 5.020446139519404e-06, "loss": 1.8043, "step": 2168 }, { "epoch": 2.71, "learning_rate": 5.0029191757419185e-06, "loss": 1.7691, "step": 2170 }, { "epoch": 2.71, "learning_rate": 4.985412647849721e-06, "loss": 1.7858, "step": 2172 }, { "epoch": 2.71, "learning_rate": 4.967926627436882e-06, "loss": 1.7839, "step": 2174 }, { "epoch": 2.71, "learning_rate": 4.9504611860136185e-06, "loss": 1.8227, "step": 2176 }, { "epoch": 2.72, "learning_rate": 4.933016395005979e-06, "loss": 1.7703, "step": 2178 }, { "epoch": 2.72, "learning_rate": 4.915592325755569e-06, "loss": 1.8115, "step": 2180 }, { "epoch": 2.72, "learning_rate": 4.898189049519243e-06, "loss": 1.782, "step": 2182 }, { "epoch": 2.72, "learning_rate": 4.880806637468828e-06, "loss": 1.7804, "step": 2184 }, { "epoch": 2.73, "learning_rate": 4.863445160690815e-06, "loss": 1.8148, "step": 2186 }, { "epoch": 2.73, "learning_rate": 4.846104690186097e-06, "loss": 1.7726, "step": 2188 }, { "epoch": 2.73, "learning_rate": 4.828785296869646e-06, "loss": 1.7901, "step": 2190 }, { "epoch": 2.73, "learning_rate": 4.811487051570235e-06, "loss": 1.765, "step": 2192 }, { "epoch": 2.74, "learning_rate": 4.794210025030167e-06, "loss": 1.8182, "step": 2194 }, { "epoch": 2.74, "learning_rate": 4.776954287904955e-06, "loss": 1.8019, "step": 2196 }, { "epoch": 2.74, "learning_rate": 4.759719910763049e-06, "loss": 1.7577, "step": 2198 }, { "epoch": 2.74, "learning_rate": 4.742506964085555e-06, "loss": 1.7665, "step": 2200 }, { "epoch": 2.75, "learning_rate": 4.725315518265926e-06, "loss": 1.7769, "step": 2202 }, { "epoch": 2.75, "learning_rate": 4.7081456436097e-06, "loss": 1.7785, "step": 2204 }, { "epoch": 2.75, "learning_rate": 4.69099741033418e-06, "loss": 1.7803, "step": 2206 }, { "epoch": 2.75, "learning_rate": 4.673870888568185e-06, "loss": 1.781, "step": 2208 }, { "epoch": 2.76, "learning_rate": 4.656766148351729e-06, "loss": 1.7455, "step": 2210 }, { "epoch": 2.76, "learning_rate": 4.63968325963575e-06, "loss": 1.7833, "step": 2212 }, { "epoch": 2.76, "learning_rate": 4.6226222922818345e-06, "loss": 1.7523, "step": 2214 }, { "epoch": 2.76, "learning_rate": 4.6055833160619076e-06, "loss": 1.7976, "step": 2216 }, { "epoch": 2.77, "learning_rate": 4.588566400657965e-06, "loss": 1.749, "step": 2218 }, { "epoch": 2.77, "learning_rate": 4.57157161566178e-06, "loss": 1.77, "step": 2220 }, { "epoch": 2.77, "learning_rate": 4.55459903057463e-06, "loss": 1.8299, "step": 2222 }, { "epoch": 2.77, "learning_rate": 4.5376487148069995e-06, "loss": 1.7764, "step": 2224 }, { "epoch": 2.78, "learning_rate": 4.5207207376782954e-06, "loss": 1.7518, "step": 2226 }, { "epoch": 2.78, "learning_rate": 4.503815168416584e-06, "loss": 1.7597, "step": 2228 }, { "epoch": 2.78, "learning_rate": 4.486932076158279e-06, "loss": 1.8031, "step": 2230 }, { "epoch": 2.78, "learning_rate": 4.470071529947877e-06, "loss": 1.7573, "step": 2232 }, { "epoch": 2.79, "learning_rate": 4.453233598737678e-06, "loss": 1.7651, "step": 2234 }, { "epoch": 2.79, "learning_rate": 4.436418351387483e-06, "loss": 1.7526, "step": 2236 }, { "epoch": 2.79, "learning_rate": 4.419625856664342e-06, "loss": 1.7442, "step": 2238 }, { "epoch": 2.79, "learning_rate": 4.402856183242241e-06, "loss": 1.7393, "step": 2240 }, { "epoch": 2.8, "learning_rate": 4.386109399701853e-06, "loss": 1.7378, "step": 2242 }, { "epoch": 2.8, "learning_rate": 4.369385574530227e-06, "loss": 1.8041, "step": 2244 }, { "epoch": 2.8, "learning_rate": 4.352684776120525e-06, "loss": 1.7883, "step": 2246 }, { "epoch": 2.8, "learning_rate": 4.336007072771749e-06, "loss": 1.7932, "step": 2248 }, { "epoch": 2.81, "learning_rate": 4.319352532688444e-06, "loss": 1.771, "step": 2250 }, { "epoch": 2.81, "learning_rate": 4.302721223980426e-06, "loss": 1.7801, "step": 2252 }, { "epoch": 2.81, "learning_rate": 4.286113214662507e-06, "loss": 1.7642, "step": 2254 }, { "epoch": 2.81, "learning_rate": 4.269528572654221e-06, "loss": 1.7942, "step": 2256 }, { "epoch": 2.82, "learning_rate": 4.252967365779532e-06, "loss": 1.806, "step": 2258 }, { "epoch": 2.82, "learning_rate": 4.236429661766562e-06, "loss": 1.7692, "step": 2260 }, { "epoch": 2.82, "learning_rate": 4.219915528247331e-06, "loss": 1.7478, "step": 2262 }, { "epoch": 2.82, "learning_rate": 4.203425032757449e-06, "loss": 1.7524, "step": 2264 }, { "epoch": 2.83, "learning_rate": 4.186958242735861e-06, "loss": 1.8303, "step": 2266 }, { "epoch": 2.83, "learning_rate": 4.1705152255245774e-06, "loss": 1.7769, "step": 2268 }, { "epoch": 2.83, "learning_rate": 4.15409604836838e-06, "loss": 1.7617, "step": 2270 }, { "epoch": 2.83, "learning_rate": 4.137700778414555e-06, "loss": 1.8643, "step": 2272 }, { "epoch": 2.84, "learning_rate": 4.121329482712615e-06, "loss": 1.7643, "step": 2274 }, { "epoch": 2.84, "learning_rate": 4.104982228214039e-06, "loss": 1.7848, "step": 2276 }, { "epoch": 2.84, "learning_rate": 4.0886590817719795e-06, "loss": 1.8095, "step": 2278 }, { "epoch": 2.84, "learning_rate": 4.072360110140996e-06, "loss": 1.8052, "step": 2280 }, { "epoch": 2.85, "learning_rate": 4.056085379976794e-06, "loss": 1.7737, "step": 2282 }, { "epoch": 2.85, "learning_rate": 4.039834957835933e-06, "loss": 1.8049, "step": 2284 }, { "epoch": 2.85, "learning_rate": 4.023608910175564e-06, "loss": 1.8058, "step": 2286 }, { "epoch": 2.85, "learning_rate": 4.007407303353156e-06, "loss": 1.7713, "step": 2288 }, { "epoch": 2.86, "learning_rate": 3.991230203626234e-06, "loss": 1.7632, "step": 2290 }, { "epoch": 2.86, "learning_rate": 3.97507767715209e-06, "loss": 1.7341, "step": 2292 }, { "epoch": 2.86, "learning_rate": 3.9589497899875265e-06, "loss": 1.8128, "step": 2294 }, { "epoch": 2.86, "learning_rate": 3.942846608088583e-06, "loss": 1.7962, "step": 2296 }, { "epoch": 2.87, "learning_rate": 3.926768197310259e-06, "loss": 1.7576, "step": 2298 }, { "epoch": 2.87, "learning_rate": 3.910714623406263e-06, "loss": 1.7766, "step": 2300 }, { "epoch": 2.87, "learning_rate": 3.894685952028716e-06, "loss": 1.843, "step": 2302 }, { "epoch": 2.87, "learning_rate": 3.8786822487279145e-06, "loss": 1.8207, "step": 2304 }, { "epoch": 2.88, "learning_rate": 3.862703578952034e-06, "loss": 1.7656, "step": 2306 }, { "epoch": 2.88, "learning_rate": 3.846750008046875e-06, "loss": 1.729, "step": 2308 }, { "epoch": 2.88, "learning_rate": 3.830821601255603e-06, "loss": 1.7607, "step": 2310 }, { "epoch": 2.88, "learning_rate": 3.814918423718467e-06, "loss": 1.7865, "step": 2312 }, { "epoch": 2.89, "learning_rate": 3.799040540472536e-06, "loss": 1.8241, "step": 2314 }, { "epoch": 2.89, "learning_rate": 3.7831880164514467e-06, "loss": 1.7874, "step": 2316 }, { "epoch": 2.89, "learning_rate": 3.7673609164851197e-06, "loss": 1.8109, "step": 2318 }, { "epoch": 2.89, "learning_rate": 3.7515593052995027e-06, "loss": 1.8103, "step": 2320 }, { "epoch": 2.9, "learning_rate": 3.735783247516305e-06, "loss": 1.7593, "step": 2322 }, { "epoch": 2.9, "learning_rate": 3.72003280765274e-06, "loss": 1.7942, "step": 2324 }, { "epoch": 2.9, "learning_rate": 3.704308050121248e-06, "loss": 1.77, "step": 2326 }, { "epoch": 2.9, "learning_rate": 3.6886090392292397e-06, "loss": 1.7753, "step": 2328 }, { "epoch": 2.91, "learning_rate": 3.672935839178842e-06, "loss": 1.7744, "step": 2330 }, { "epoch": 2.91, "learning_rate": 3.6572885140666125e-06, "loss": 1.7915, "step": 2332 }, { "epoch": 2.91, "learning_rate": 3.6416671278833072e-06, "loss": 1.7754, "step": 2334 }, { "epoch": 2.91, "learning_rate": 3.6260717445135886e-06, "loss": 1.7285, "step": 2336 }, { "epoch": 2.92, "learning_rate": 3.6105024277357925e-06, "loss": 1.7752, "step": 2338 }, { "epoch": 2.92, "learning_rate": 3.5949592412216437e-06, "loss": 1.7785, "step": 2340 }, { "epoch": 2.92, "learning_rate": 3.5794422485360058e-06, "loss": 1.7382, "step": 2342 }, { "epoch": 2.92, "learning_rate": 3.5639515131366297e-06, "loss": 1.7592, "step": 2344 }, { "epoch": 2.93, "learning_rate": 3.5484870983738774e-06, "loss": 1.7759, "step": 2346 }, { "epoch": 2.93, "learning_rate": 3.5330490674904737e-06, "loss": 1.7923, "step": 2348 }, { "epoch": 2.93, "learning_rate": 3.517637483621241e-06, "loss": 1.7296, "step": 2350 }, { "epoch": 2.93, "learning_rate": 3.5022524097928546e-06, "loss": 1.7618, "step": 2352 }, { "epoch": 2.94, "learning_rate": 3.4868939089235666e-06, "loss": 1.7458, "step": 2354 }, { "epoch": 2.94, "learning_rate": 3.471562043822957e-06, "loss": 1.8129, "step": 2356 }, { "epoch": 2.94, "learning_rate": 3.456256877191684e-06, "loss": 1.7321, "step": 2358 }, { "epoch": 2.94, "learning_rate": 3.4409784716212124e-06, "loss": 1.773, "step": 2360 }, { "epoch": 2.95, "learning_rate": 3.425726889593577e-06, "loss": 1.8245, "step": 2362 }, { "epoch": 2.95, "learning_rate": 3.4105021934811e-06, "loss": 1.7942, "step": 2364 }, { "epoch": 2.95, "learning_rate": 3.3953044455461705e-06, "loss": 1.7436, "step": 2366 }, { "epoch": 2.95, "learning_rate": 3.3801337079409566e-06, "loss": 1.8017, "step": 2368 }, { "epoch": 2.96, "learning_rate": 3.364990042707168e-06, "loss": 1.773, "step": 2370 }, { "epoch": 2.96, "learning_rate": 3.3498735117758107e-06, "loss": 1.7625, "step": 2372 }, { "epoch": 2.96, "learning_rate": 3.334784176966912e-06, "loss": 1.7515, "step": 2374 }, { "epoch": 2.96, "learning_rate": 3.3197220999892785e-06, "loss": 1.7847, "step": 2376 }, { "epoch": 2.97, "learning_rate": 3.304687342440257e-06, "loss": 1.7762, "step": 2378 }, { "epoch": 2.97, "learning_rate": 3.289679965805457e-06, "loss": 1.7914, "step": 2380 }, { "epoch": 2.97, "learning_rate": 3.274700031458514e-06, "loss": 1.771, "step": 2382 }, { "epoch": 2.97, "learning_rate": 3.2597476006608388e-06, "loss": 1.7516, "step": 2384 }, { "epoch": 2.98, "learning_rate": 3.244822734561368e-06, "loss": 1.7604, "step": 2386 }, { "epoch": 2.98, "learning_rate": 3.2299254941963055e-06, "loss": 1.7139, "step": 2388 }, { "epoch": 2.98, "learning_rate": 3.215055940488875e-06, "loss": 1.7367, "step": 2390 }, { "epoch": 2.98, "learning_rate": 3.2002141342490854e-06, "loss": 1.7927, "step": 2392 }, { "epoch": 2.99, "learning_rate": 3.1854001361734564e-06, "loss": 1.6907, "step": 2394 }, { "epoch": 2.99, "learning_rate": 3.170614006844799e-06, "loss": 1.7805, "step": 2396 }, { "epoch": 2.99, "learning_rate": 3.155855806731938e-06, "loss": 1.783, "step": 2398 }, { "epoch": 2.99, "learning_rate": 3.141125596189494e-06, "loss": 1.7553, "step": 2400 }, { "epoch": 3.0, "learning_rate": 3.126423435457614e-06, "loss": 1.7994, "step": 2402 }, { "epoch": 3.0, "learning_rate": 3.11174938466173e-06, "loss": 1.7779, "step": 2404 }, { "epoch": 3.0, "learning_rate": 3.0971035038123297e-06, "loss": 1.7561, "step": 2406 }, { "epoch": 3.0, "learning_rate": 3.0824858528046873e-06, "loss": 1.7267, "step": 2408 }, { "epoch": 3.01, "learning_rate": 3.0678964914186282e-06, "loss": 1.7596, "step": 2410 }, { "epoch": 3.01, "learning_rate": 3.053335479318297e-06, "loss": 1.7735, "step": 2412 }, { "epoch": 3.01, "learning_rate": 3.038802876051891e-06, "loss": 1.8203, "step": 2414 }, { "epoch": 3.01, "learning_rate": 3.024298741051429e-06, "loss": 1.7813, "step": 2416 }, { "epoch": 3.02, "learning_rate": 3.00982313363251e-06, "loss": 1.7504, "step": 2418 }, { "epoch": 3.02, "learning_rate": 2.9953761129940706e-06, "loss": 1.7908, "step": 2420 }, { "epoch": 3.02, "learning_rate": 2.9809577382181344e-06, "loss": 1.7508, "step": 2422 }, { "epoch": 3.02, "learning_rate": 2.966568068269574e-06, "loss": 1.7996, "step": 2424 }, { "epoch": 3.03, "learning_rate": 2.952207161995879e-06, "loss": 1.8341, "step": 2426 }, { "epoch": 3.03, "learning_rate": 2.937875078126907e-06, "loss": 1.7176, "step": 2428 }, { "epoch": 3.03, "learning_rate": 2.92357187527464e-06, "loss": 1.689, "step": 2430 }, { "epoch": 3.03, "learning_rate": 2.9092976119329485e-06, "loss": 1.7535, "step": 2432 }, { "epoch": 3.04, "learning_rate": 2.8950523464773604e-06, "loss": 1.8065, "step": 2434 }, { "epoch": 3.04, "learning_rate": 2.8808361371648073e-06, "loss": 1.7231, "step": 2436 }, { "epoch": 3.04, "learning_rate": 2.866649042133396e-06, "loss": 1.7789, "step": 2438 }, { "epoch": 3.04, "learning_rate": 2.852491119402172e-06, "loss": 1.6767, "step": 2440 }, { "epoch": 3.05, "learning_rate": 2.8383624268708766e-06, "loss": 1.6767, "step": 2442 }, { "epoch": 3.05, "learning_rate": 2.8242630223197064e-06, "loss": 1.7596, "step": 2444 }, { "epoch": 3.05, "learning_rate": 2.8101929634090964e-06, "loss": 1.7839, "step": 2446 }, { "epoch": 3.05, "learning_rate": 2.7961523076794584e-06, "loss": 1.7519, "step": 2448 }, { "epoch": 3.06, "learning_rate": 2.782141112550961e-06, "loss": 1.7686, "step": 2450 }, { "epoch": 3.06, "learning_rate": 2.7681594353232934e-06, "loss": 1.7489, "step": 2452 }, { "epoch": 3.06, "learning_rate": 2.7542073331754316e-06, "loss": 1.7261, "step": 2454 }, { "epoch": 3.06, "learning_rate": 2.7402848631653956e-06, "loss": 1.745, "step": 2456 }, { "epoch": 3.07, "learning_rate": 2.726392082230034e-06, "loss": 1.7932, "step": 2458 }, { "epoch": 3.07, "learning_rate": 2.7125290471847653e-06, "loss": 1.7539, "step": 2460 }, { "epoch": 3.07, "learning_rate": 2.6986958147233754e-06, "loss": 1.758, "step": 2462 }, { "epoch": 3.07, "learning_rate": 2.684892441417759e-06, "loss": 1.6835, "step": 2464 }, { "epoch": 3.08, "learning_rate": 2.671118983717702e-06, "loss": 1.7887, "step": 2466 }, { "epoch": 3.08, "learning_rate": 2.6573754979506574e-06, "loss": 1.763, "step": 2468 }, { "epoch": 3.08, "learning_rate": 2.6436620403214953e-06, "loss": 1.7728, "step": 2470 }, { "epoch": 3.08, "learning_rate": 2.629978666912284e-06, "loss": 1.7607, "step": 2472 }, { "epoch": 3.09, "learning_rate": 2.616325433682072e-06, "loss": 1.7733, "step": 2474 }, { "epoch": 3.09, "learning_rate": 2.6027023964666354e-06, "loss": 1.7926, "step": 2476 }, { "epoch": 3.09, "learning_rate": 2.5891096109782644e-06, "loss": 1.7717, "step": 2478 }, { "epoch": 3.09, "learning_rate": 2.5755471328055394e-06, "loss": 1.8207, "step": 2480 }, { "epoch": 3.1, "learning_rate": 2.56201501741309e-06, "loss": 1.7626, "step": 2482 }, { "epoch": 3.1, "learning_rate": 2.548513320141377e-06, "loss": 1.7431, "step": 2484 }, { "epoch": 3.1, "learning_rate": 2.5350420962064614e-06, "loss": 1.7708, "step": 2486 }, { "epoch": 3.1, "learning_rate": 2.5216014006997925e-06, "loss": 1.7373, "step": 2488 }, { "epoch": 3.11, "learning_rate": 2.5081912885879558e-06, "loss": 1.7781, "step": 2490 }, { "epoch": 3.11, "learning_rate": 2.49481181471248e-06, "loss": 1.6714, "step": 2492 }, { "epoch": 3.11, "learning_rate": 2.4814630337895816e-06, "loss": 1.7549, "step": 2494 }, { "epoch": 3.11, "learning_rate": 2.4681450004099715e-06, "loss": 1.7998, "step": 2496 }, { "epoch": 3.12, "learning_rate": 2.4548577690386044e-06, "loss": 1.7495, "step": 2498 }, { "epoch": 3.12, "learning_rate": 2.44160139401447e-06, "loss": 1.7551, "step": 2500 }, { "epoch": 3.12, "learning_rate": 2.428375929550377e-06, "loss": 1.7441, "step": 2502 }, { "epoch": 3.12, "learning_rate": 2.4151814297327157e-06, "loss": 1.7314, "step": 2504 }, { "epoch": 3.13, "learning_rate": 2.4020179485212437e-06, "loss": 1.7881, "step": 2506 }, { "epoch": 3.13, "learning_rate": 2.388885539748873e-06, "loss": 1.7789, "step": 2508 }, { "epoch": 3.13, "learning_rate": 2.3757842571214384e-06, "loss": 1.7356, "step": 2510 }, { "epoch": 3.13, "learning_rate": 2.36271415421748e-06, "loss": 1.7548, "step": 2512 }, { "epoch": 3.14, "learning_rate": 2.349675284488029e-06, "loss": 1.7691, "step": 2514 }, { "epoch": 3.14, "learning_rate": 2.336667701256391e-06, "loss": 1.7612, "step": 2516 }, { "epoch": 3.14, "learning_rate": 2.323691457717916e-06, "loss": 1.6773, "step": 2518 }, { "epoch": 3.14, "learning_rate": 2.3107466069397886e-06, "loss": 1.7832, "step": 2520 }, { "epoch": 3.15, "learning_rate": 2.297833201860816e-06, "loss": 1.7523, "step": 2522 }, { "epoch": 3.15, "learning_rate": 2.284951295291208e-06, "loss": 1.7636, "step": 2524 }, { "epoch": 3.15, "learning_rate": 2.272100939912347e-06, "loss": 1.6904, "step": 2526 }, { "epoch": 3.15, "learning_rate": 2.2592821882766e-06, "loss": 1.7878, "step": 2528 }, { "epoch": 3.16, "learning_rate": 2.246495092807077e-06, "loss": 1.7404, "step": 2530 }, { "epoch": 3.16, "learning_rate": 2.2337397057974343e-06, "loss": 1.7535, "step": 2532 }, { "epoch": 3.16, "learning_rate": 2.2210160794116466e-06, "loss": 1.7139, "step": 2534 }, { "epoch": 3.16, "learning_rate": 2.2083242656838134e-06, "loss": 1.7012, "step": 2536 }, { "epoch": 3.17, "learning_rate": 2.195664316517926e-06, "loss": 1.7612, "step": 2538 }, { "epoch": 3.17, "learning_rate": 2.1830362836876617e-06, "loss": 1.8057, "step": 2540 }, { "epoch": 3.17, "learning_rate": 2.170440218836184e-06, "loss": 1.7806, "step": 2542 }, { "epoch": 3.17, "learning_rate": 2.1578761734759122e-06, "loss": 1.7413, "step": 2544 }, { "epoch": 3.18, "learning_rate": 2.1453441989883215e-06, "loss": 1.7482, "step": 2546 }, { "epoch": 3.18, "learning_rate": 2.132844346623731e-06, "loss": 1.7893, "step": 2548 }, { "epoch": 3.18, "learning_rate": 2.1203766675011007e-06, "loss": 1.7371, "step": 2550 }, { "epoch": 3.18, "learning_rate": 2.1079412126078035e-06, "loss": 1.753, "step": 2552 }, { "epoch": 3.19, "learning_rate": 2.0955380327994445e-06, "loss": 1.7221, "step": 2554 }, { "epoch": 3.19, "learning_rate": 2.083167178799623e-06, "loss": 1.7094, "step": 2556 }, { "epoch": 3.19, "learning_rate": 2.0708287011997528e-06, "loss": 1.7715, "step": 2558 }, { "epoch": 3.19, "learning_rate": 2.0585226504588306e-06, "loss": 1.7701, "step": 2560 }, { "epoch": 3.2, "learning_rate": 2.0462490769032528e-06, "loss": 1.697, "step": 2562 }, { "epoch": 3.2, "learning_rate": 2.0340080307265887e-06, "loss": 1.6981, "step": 2564 }, { "epoch": 3.2, "learning_rate": 2.0217995619893894e-06, "loss": 1.7085, "step": 2566 }, { "epoch": 3.2, "learning_rate": 2.009623720618974e-06, "loss": 1.7761, "step": 2568 }, { "epoch": 3.2, "learning_rate": 1.9974805564092403e-06, "loss": 1.748, "step": 2570 }, { "epoch": 3.21, "learning_rate": 1.9853701190204387e-06, "loss": 1.6954, "step": 2572 }, { "epoch": 3.21, "learning_rate": 1.9732924579789857e-06, "loss": 1.7337, "step": 2574 }, { "epoch": 3.21, "learning_rate": 1.9612476226772627e-06, "loss": 1.7605, "step": 2576 }, { "epoch": 3.21, "learning_rate": 1.9492356623733987e-06, "loss": 1.7695, "step": 2578 }, { "epoch": 3.22, "learning_rate": 1.937256626191083e-06, "loss": 1.78, "step": 2580 }, { "epoch": 3.22, "learning_rate": 1.925310563119358e-06, "loss": 1.7368, "step": 2582 }, { "epoch": 3.22, "learning_rate": 1.9133975220124246e-06, "loss": 1.7892, "step": 2584 }, { "epoch": 3.22, "learning_rate": 1.9015175515894303e-06, "loss": 1.75, "step": 2586 }, { "epoch": 3.23, "learning_rate": 1.8896707004342851e-06, "loss": 1.7678, "step": 2588 }, { "epoch": 3.23, "learning_rate": 1.8778570169954568e-06, "loss": 1.7973, "step": 2590 }, { "epoch": 3.23, "learning_rate": 1.8660765495857648e-06, "loss": 1.7518, "step": 2592 }, { "epoch": 3.23, "learning_rate": 1.8543293463821922e-06, "loss": 1.7464, "step": 2594 }, { "epoch": 3.24, "learning_rate": 1.8426154554256836e-06, "loss": 1.7284, "step": 2596 }, { "epoch": 3.24, "learning_rate": 1.8309349246209607e-06, "loss": 1.7154, "step": 2598 }, { "epoch": 3.24, "learning_rate": 1.8192878017363048e-06, "loss": 1.7579, "step": 2600 }, { "epoch": 3.24, "learning_rate": 1.8076741344033777e-06, "loss": 1.7239, "step": 2602 }, { "epoch": 3.25, "learning_rate": 1.7960939701170278e-06, "loss": 1.8123, "step": 2604 }, { "epoch": 3.25, "learning_rate": 1.7845473562350835e-06, "loss": 1.7921, "step": 2606 }, { "epoch": 3.25, "learning_rate": 1.773034339978167e-06, "loss": 1.7595, "step": 2608 }, { "epoch": 3.25, "learning_rate": 1.7615549684295074e-06, "loss": 1.7653, "step": 2610 }, { "epoch": 3.26, "learning_rate": 1.7501092885347349e-06, "loss": 1.7557, "step": 2612 }, { "epoch": 3.26, "learning_rate": 1.7386973471016954e-06, "loss": 1.7247, "step": 2614 }, { "epoch": 3.26, "learning_rate": 1.7273191908002663e-06, "loss": 1.7685, "step": 2616 }, { "epoch": 3.26, "learning_rate": 1.7159748661621501e-06, "loss": 1.7214, "step": 2618 }, { "epoch": 3.27, "learning_rate": 1.7046644195806995e-06, "loss": 1.7555, "step": 2620 }, { "epoch": 3.27, "learning_rate": 1.6933878973107133e-06, "loss": 1.7323, "step": 2622 }, { "epoch": 3.27, "learning_rate": 1.6821453454682635e-06, "loss": 1.7416, "step": 2624 }, { "epoch": 3.27, "learning_rate": 1.6709368100304911e-06, "loss": 1.7537, "step": 2626 }, { "epoch": 3.28, "learning_rate": 1.6597623368354277e-06, "loss": 1.8397, "step": 2628 }, { "epoch": 3.28, "learning_rate": 1.6486219715817998e-06, "loss": 1.7253, "step": 2630 }, { "epoch": 3.28, "learning_rate": 1.6375157598288572e-06, "loss": 1.7509, "step": 2632 }, { "epoch": 3.28, "learning_rate": 1.6264437469961703e-06, "loss": 1.7857, "step": 2634 }, { "epoch": 3.29, "learning_rate": 1.615405978363447e-06, "loss": 1.7102, "step": 2636 }, { "epoch": 3.29, "learning_rate": 1.6044024990703634e-06, "loss": 1.7563, "step": 2638 }, { "epoch": 3.29, "learning_rate": 1.593433354116356e-06, "loss": 1.786, "step": 2640 }, { "epoch": 3.29, "learning_rate": 1.5824985883604526e-06, "loss": 1.7106, "step": 2642 }, { "epoch": 3.3, "learning_rate": 1.5715982465210844e-06, "loss": 1.7562, "step": 2644 }, { "epoch": 3.3, "learning_rate": 1.560732373175907e-06, "loss": 1.7084, "step": 2646 }, { "epoch": 3.3, "learning_rate": 1.5499010127616087e-06, "loss": 1.6984, "step": 2648 }, { "epoch": 3.3, "learning_rate": 1.539104209573743e-06, "loss": 1.7293, "step": 2650 }, { "epoch": 3.31, "learning_rate": 1.5283420077665312e-06, "loss": 1.7559, "step": 2652 }, { "epoch": 3.31, "learning_rate": 1.517614451352697e-06, "loss": 1.7396, "step": 2654 }, { "epoch": 3.31, "learning_rate": 1.5069215842032725e-06, "loss": 1.7581, "step": 2656 }, { "epoch": 3.31, "learning_rate": 1.4962634500474338e-06, "loss": 1.7492, "step": 2658 }, { "epoch": 3.32, "learning_rate": 1.485640092472308e-06, "loss": 1.7579, "step": 2660 }, { "epoch": 3.32, "learning_rate": 1.475051554922804e-06, "loss": 1.793, "step": 2662 }, { "epoch": 3.32, "learning_rate": 1.4644978807014276e-06, "loss": 1.7413, "step": 2664 }, { "epoch": 3.32, "learning_rate": 1.4539791129681157e-06, "loss": 1.7035, "step": 2666 }, { "epoch": 3.33, "learning_rate": 1.4434952947400505e-06, "loss": 1.6727, "step": 2668 }, { "epoch": 3.33, "learning_rate": 1.4330464688914792e-06, "loss": 1.7254, "step": 2670 }, { "epoch": 3.33, "learning_rate": 1.422632678153557e-06, "loss": 1.7443, "step": 2672 }, { "epoch": 3.33, "learning_rate": 1.412253965114152e-06, "loss": 1.767, "step": 2674 }, { "epoch": 3.34, "learning_rate": 1.401910372217684e-06, "loss": 1.7036, "step": 2676 }, { "epoch": 3.34, "learning_rate": 1.3916019417649418e-06, "loss": 1.7526, "step": 2678 }, { "epoch": 3.34, "learning_rate": 1.3813287159129208e-06, "loss": 1.7227, "step": 2680 }, { "epoch": 3.34, "learning_rate": 1.371090736674644e-06, "loss": 1.7712, "step": 2682 }, { "epoch": 3.35, "learning_rate": 1.3608880459189877e-06, "loss": 1.719, "step": 2684 }, { "epoch": 3.35, "learning_rate": 1.3507206853705178e-06, "loss": 1.8072, "step": 2686 }, { "epoch": 3.35, "learning_rate": 1.340588696609313e-06, "loss": 1.7315, "step": 2688 }, { "epoch": 3.35, "learning_rate": 1.3304921210707922e-06, "loss": 1.6884, "step": 2690 }, { "epoch": 3.36, "learning_rate": 1.3204310000455612e-06, "loss": 1.7803, "step": 2692 }, { "epoch": 3.36, "learning_rate": 1.31040537467922e-06, "loss": 1.7541, "step": 2694 }, { "epoch": 3.36, "learning_rate": 1.3004152859722152e-06, "loss": 1.7513, "step": 2696 }, { "epoch": 3.36, "learning_rate": 1.2904607747796561e-06, "loss": 1.7111, "step": 2698 }, { "epoch": 3.37, "learning_rate": 1.2805418818111658e-06, "loss": 1.811, "step": 2700 }, { "epoch": 3.37, "learning_rate": 1.2706586476306971e-06, "loss": 1.7671, "step": 2702 }, { "epoch": 3.37, "learning_rate": 1.2608111126563715e-06, "loss": 1.7514, "step": 2704 }, { "epoch": 3.37, "learning_rate": 1.2509993171603263e-06, "loss": 1.7736, "step": 2706 }, { "epoch": 3.38, "learning_rate": 1.2412233012685315e-06, "loss": 1.7376, "step": 2708 }, { "epoch": 3.38, "learning_rate": 1.2314831049606325e-06, "loss": 1.7316, "step": 2710 }, { "epoch": 3.38, "learning_rate": 1.221778768069799e-06, "loss": 1.7132, "step": 2712 }, { "epoch": 3.38, "learning_rate": 1.2121103302825388e-06, "loss": 1.7622, "step": 2714 }, { "epoch": 3.39, "learning_rate": 1.2024778311385588e-06, "loss": 1.7271, "step": 2716 }, { "epoch": 3.39, "learning_rate": 1.1928813100305826e-06, "loss": 1.6945, "step": 2718 }, { "epoch": 3.39, "learning_rate": 1.1833208062042078e-06, "loss": 1.7107, "step": 2720 }, { "epoch": 3.39, "learning_rate": 1.1737963587577318e-06, "loss": 1.7716, "step": 2722 }, { "epoch": 3.4, "learning_rate": 1.1643080066419977e-06, "loss": 1.721, "step": 2724 }, { "epoch": 3.4, "learning_rate": 1.154855788660234e-06, "loss": 1.7474, "step": 2726 }, { "epoch": 3.4, "learning_rate": 1.1454397434679022e-06, "loss": 1.6985, "step": 2728 }, { "epoch": 3.4, "learning_rate": 1.1360599095725243e-06, "loss": 1.7498, "step": 2730 }, { "epoch": 3.41, "learning_rate": 1.1267163253335378e-06, "loss": 1.7129, "step": 2732 }, { "epoch": 3.41, "learning_rate": 1.1174090289621386e-06, "loss": 1.756, "step": 2734 }, { "epoch": 3.41, "learning_rate": 1.1081380585211133e-06, "loss": 1.7459, "step": 2736 }, { "epoch": 3.41, "learning_rate": 1.0989034519246956e-06, "loss": 1.7708, "step": 2738 }, { "epoch": 3.42, "learning_rate": 1.0897052469384095e-06, "loss": 1.6919, "step": 2740 }, { "epoch": 3.42, "learning_rate": 1.0805434811789073e-06, "loss": 1.7396, "step": 2742 }, { "epoch": 3.42, "learning_rate": 1.071418192113821e-06, "loss": 1.7501, "step": 2744 }, { "epoch": 3.42, "learning_rate": 1.0623294170616128e-06, "loss": 1.7437, "step": 2746 }, { "epoch": 3.43, "learning_rate": 1.0532771931914177e-06, "loss": 1.765, "step": 2748 }, { "epoch": 3.43, "learning_rate": 1.0442615575228875e-06, "loss": 1.7283, "step": 2750 }, { "epoch": 3.43, "learning_rate": 1.0352825469260485e-06, "loss": 1.7591, "step": 2752 }, { "epoch": 3.43, "learning_rate": 1.0263401981211475e-06, "loss": 1.7487, "step": 2754 }, { "epoch": 3.44, "learning_rate": 1.0174345476784963e-06, "loss": 1.7649, "step": 2756 }, { "epoch": 3.44, "learning_rate": 1.008565632018328e-06, "loss": 1.7493, "step": 2758 }, { "epoch": 3.44, "learning_rate": 9.997334874106468e-07, "loss": 1.7429, "step": 2760 }, { "epoch": 3.44, "learning_rate": 9.909381499750824e-07, "loss": 1.743, "step": 2762 }, { "epoch": 3.45, "learning_rate": 9.82179655680734e-07, "loss": 1.7327, "step": 2764 }, { "epoch": 3.45, "learning_rate": 9.734580403460281e-07, "loss": 1.7447, "step": 2766 }, { "epoch": 3.45, "learning_rate": 9.647733396385794e-07, "loss": 1.7381, "step": 2768 }, { "epoch": 3.45, "learning_rate": 9.56125589075032e-07, "loss": 1.7667, "step": 2770 }, { "epoch": 3.46, "learning_rate": 9.475148240209175e-07, "loss": 1.7485, "step": 2772 }, { "epoch": 3.46, "learning_rate": 9.389410796905229e-07, "loss": 1.7752, "step": 2774 }, { "epoch": 3.46, "learning_rate": 9.304043911467242e-07, "loss": 1.7736, "step": 2776 }, { "epoch": 3.46, "learning_rate": 9.219047933008662e-07, "loss": 1.7809, "step": 2778 }, { "epoch": 3.47, "learning_rate": 9.134423209125998e-07, "loss": 1.7512, "step": 2780 }, { "epoch": 3.47, "learning_rate": 9.05017008589758e-07, "loss": 1.7496, "step": 2782 }, { "epoch": 3.47, "learning_rate": 8.966288907881981e-07, "loss": 1.7285, "step": 2784 }, { "epoch": 3.47, "learning_rate": 8.882780018116688e-07, "loss": 1.7315, "step": 2786 }, { "epoch": 3.48, "learning_rate": 8.799643758116739e-07, "loss": 1.7887, "step": 2788 }, { "epoch": 3.48, "learning_rate": 8.716880467873235e-07, "loss": 1.7173, "step": 2790 }, { "epoch": 3.48, "learning_rate": 8.634490485851998e-07, "loss": 1.7744, "step": 2792 }, { "epoch": 3.48, "learning_rate": 8.552474148992174e-07, "loss": 1.7322, "step": 2794 }, { "epoch": 3.49, "learning_rate": 8.470831792704925e-07, "loss": 1.7163, "step": 2796 }, { "epoch": 3.49, "learning_rate": 8.389563750871921e-07, "loss": 1.7543, "step": 2798 }, { "epoch": 3.49, "learning_rate": 8.308670355844051e-07, "loss": 1.7267, "step": 2800 }, { "epoch": 3.49, "learning_rate": 8.228151938440132e-07, "loss": 1.7467, "step": 2802 }, { "epoch": 3.5, "learning_rate": 8.148008827945431e-07, "loss": 1.7355, "step": 2804 }, { "epoch": 3.5, "learning_rate": 8.06824135211034e-07, "loss": 1.7533, "step": 2806 }, { "epoch": 3.5, "learning_rate": 7.98884983714917e-07, "loss": 1.8103, "step": 2808 }, { "epoch": 3.5, "learning_rate": 7.90983460773862e-07, "loss": 1.8002, "step": 2810 }, { "epoch": 3.51, "learning_rate": 7.831195987016604e-07, "loss": 1.7631, "step": 2812 }, { "epoch": 3.51, "learning_rate": 7.752934296580816e-07, "loss": 1.7571, "step": 2814 }, { "epoch": 3.51, "learning_rate": 7.675049856487549e-07, "loss": 1.7618, "step": 2816 }, { "epoch": 3.51, "learning_rate": 7.597542985250228e-07, "loss": 1.7473, "step": 2818 }, { "epoch": 3.52, "learning_rate": 7.520413999838205e-07, "loss": 1.7714, "step": 2820 }, { "epoch": 3.52, "learning_rate": 7.44366321567549e-07, "loss": 1.7322, "step": 2822 }, { "epoch": 3.52, "learning_rate": 7.36729094663936e-07, "loss": 1.7755, "step": 2824 }, { "epoch": 3.52, "learning_rate": 7.291297505059158e-07, "loss": 1.7062, "step": 2826 }, { "epoch": 3.53, "learning_rate": 7.215683201714951e-07, "loss": 1.7667, "step": 2828 }, { "epoch": 3.53, "learning_rate": 7.14044834583637e-07, "loss": 1.7912, "step": 2830 }, { "epoch": 3.53, "learning_rate": 7.065593245101188e-07, "loss": 1.6748, "step": 2832 }, { "epoch": 3.53, "learning_rate": 6.991118205634184e-07, "loss": 1.8296, "step": 2834 }, { "epoch": 3.54, "learning_rate": 6.917023532005885e-07, "loss": 1.6914, "step": 2836 }, { "epoch": 3.54, "learning_rate": 6.843309527231212e-07, "loss": 1.7107, "step": 2838 }, { "epoch": 3.54, "learning_rate": 6.76997649276836e-07, "loss": 1.7289, "step": 2840 }, { "epoch": 3.54, "learning_rate": 6.697024728517531e-07, "loss": 1.7092, "step": 2842 }, { "epoch": 3.55, "learning_rate": 6.624454532819702e-07, "loss": 1.7089, "step": 2844 }, { "epoch": 3.55, "learning_rate": 6.552266202455348e-07, "loss": 1.7465, "step": 2846 }, { "epoch": 3.55, "learning_rate": 6.480460032643321e-07, "loss": 1.7386, "step": 2848 }, { "epoch": 3.55, "learning_rate": 6.409036317039619e-07, "loss": 1.7095, "step": 2850 }, { "epoch": 3.56, "learning_rate": 6.337995347736137e-07, "loss": 1.8104, "step": 2852 }, { "epoch": 3.56, "learning_rate": 6.2673374152595e-07, "loss": 1.76, "step": 2854 }, { "epoch": 3.56, "learning_rate": 6.197062808569909e-07, "loss": 1.7191, "step": 2856 }, { "epoch": 3.56, "learning_rate": 6.127171815059918e-07, "loss": 1.7726, "step": 2858 }, { "epoch": 3.57, "learning_rate": 6.057664720553258e-07, "loss": 1.7279, "step": 2860 }, { "epoch": 3.57, "learning_rate": 5.988541809303671e-07, "loss": 1.7625, "step": 2862 }, { "epoch": 3.57, "learning_rate": 5.919803363993815e-07, "loss": 1.7329, "step": 2864 }, { "epoch": 3.57, "learning_rate": 5.851449665733977e-07, "loss": 1.7362, "step": 2866 }, { "epoch": 3.58, "learning_rate": 5.783480994061019e-07, "loss": 1.7445, "step": 2868 }, { "epoch": 3.58, "learning_rate": 5.715897626937261e-07, "loss": 1.816, "step": 2870 }, { "epoch": 3.58, "learning_rate": 5.648699840749205e-07, "loss": 1.7441, "step": 2872 }, { "epoch": 3.58, "learning_rate": 5.581887910306594e-07, "loss": 1.7526, "step": 2874 }, { "epoch": 3.59, "learning_rate": 5.515462108841107e-07, "loss": 1.7149, "step": 2876 }, { "epoch": 3.59, "learning_rate": 5.44942270800537e-07, "loss": 1.6937, "step": 2878 }, { "epoch": 3.59, "learning_rate": 5.383769977871778e-07, "loss": 1.748, "step": 2880 }, { "epoch": 3.59, "learning_rate": 5.318504186931416e-07, "loss": 1.7413, "step": 2882 }, { "epoch": 3.6, "learning_rate": 5.253625602092971e-07, "loss": 1.7593, "step": 2884 }, { "epoch": 3.6, "learning_rate": 5.189134488681602e-07, "loss": 1.7053, "step": 2886 }, { "epoch": 3.6, "learning_rate": 5.125031110437883e-07, "loss": 1.7037, "step": 2888 }, { "epoch": 3.6, "learning_rate": 5.061315729516736e-07, "loss": 1.6846, "step": 2890 }, { "epoch": 3.61, "learning_rate": 4.997988606486336e-07, "loss": 1.7661, "step": 2892 }, { "epoch": 3.61, "learning_rate": 4.935050000327046e-07, "loss": 1.7705, "step": 2894 }, { "epoch": 3.61, "learning_rate": 4.87250016843035e-07, "loss": 1.7635, "step": 2896 }, { "epoch": 3.61, "learning_rate": 4.81033936659786e-07, "loss": 1.7525, "step": 2898 }, { "epoch": 3.62, "learning_rate": 4.7485678490401755e-07, "loss": 1.7974, "step": 2900 }, { "epoch": 3.62, "learning_rate": 4.6871858683759206e-07, "loss": 1.6898, "step": 2902 }, { "epoch": 3.62, "learning_rate": 4.6261936756306746e-07, "loss": 1.7686, "step": 2904 }, { "epoch": 3.62, "learning_rate": 4.565591520235957e-07, "loss": 1.7554, "step": 2906 }, { "epoch": 3.63, "learning_rate": 4.5053796500282076e-07, "loss": 1.7925, "step": 2908 }, { "epoch": 3.63, "learning_rate": 4.445558311247755e-07, "loss": 1.738, "step": 2910 }, { "epoch": 3.63, "learning_rate": 4.3861277485378384e-07, "loss": 1.778, "step": 2912 }, { "epoch": 3.63, "learning_rate": 4.327088204943597e-07, "loss": 1.7361, "step": 2914 }, { "epoch": 3.64, "learning_rate": 4.2684399219110493e-07, "loss": 1.7336, "step": 2916 }, { "epoch": 3.64, "learning_rate": 4.2101831392861505e-07, "loss": 1.7825, "step": 2918 }, { "epoch": 3.64, "learning_rate": 4.1523180953137785e-07, "loss": 1.7401, "step": 2920 }, { "epoch": 3.64, "learning_rate": 4.094845026636773e-07, "loss": 1.7292, "step": 2922 }, { "epoch": 3.65, "learning_rate": 4.0377641682949667e-07, "loss": 1.7585, "step": 2924 }, { "epoch": 3.65, "learning_rate": 3.9810757537242175e-07, "loss": 1.7571, "step": 2926 }, { "epoch": 3.65, "learning_rate": 3.9247800147554805e-07, "loss": 1.7688, "step": 2928 }, { "epoch": 3.65, "learning_rate": 3.868877181613806e-07, "loss": 1.7733, "step": 2930 }, { "epoch": 3.66, "learning_rate": 3.8133674829174515e-07, "loss": 1.7647, "step": 2932 }, { "epoch": 3.66, "learning_rate": 3.7582511456769165e-07, "loss": 1.7717, "step": 2934 }, { "epoch": 3.66, "learning_rate": 3.703528395294043e-07, "loss": 1.7208, "step": 2936 }, { "epoch": 3.66, "learning_rate": 3.6491994555610257e-07, "loss": 1.6513, "step": 2938 }, { "epoch": 3.67, "learning_rate": 3.595264548659616e-07, "loss": 1.7232, "step": 2940 }, { "epoch": 3.67, "learning_rate": 3.5417238951600986e-07, "loss": 1.7238, "step": 2942 }, { "epoch": 3.67, "learning_rate": 3.4885777140204025e-07, "loss": 1.7453, "step": 2944 }, { "epoch": 3.67, "learning_rate": 3.4358262225853255e-07, "loss": 1.7241, "step": 2946 }, { "epoch": 3.68, "learning_rate": 3.383469636585468e-07, "loss": 1.8228, "step": 2948 }, { "epoch": 3.68, "learning_rate": 3.331508170136477e-07, "loss": 1.6952, "step": 2950 }, { "epoch": 3.68, "learning_rate": 3.2799420357381486e-07, "loss": 1.7492, "step": 2952 }, { "epoch": 3.68, "learning_rate": 3.2287714442735264e-07, "loss": 1.6934, "step": 2954 }, { "epoch": 3.69, "learning_rate": 3.177996605008038e-07, "loss": 1.7871, "step": 2956 }, { "epoch": 3.69, "learning_rate": 3.1276177255886606e-07, "loss": 1.7564, "step": 2958 }, { "epoch": 3.69, "learning_rate": 3.0776350120431233e-07, "loss": 1.7099, "step": 2960 }, { "epoch": 3.69, "learning_rate": 3.028048668778938e-07, "loss": 1.7299, "step": 2962 }, { "epoch": 3.7, "learning_rate": 2.978858898582659e-07, "loss": 1.7445, "step": 2964 }, { "epoch": 3.7, "learning_rate": 2.9300659026190504e-07, "loss": 1.7694, "step": 2966 }, { "epoch": 3.7, "learning_rate": 2.8816698804302043e-07, "loss": 1.7813, "step": 2968 }, { "epoch": 3.7, "learning_rate": 2.8336710299348034e-07, "loss": 1.7317, "step": 2970 }, { "epoch": 3.71, "learning_rate": 2.786069547427239e-07, "loss": 1.7423, "step": 2972 }, { "epoch": 3.71, "learning_rate": 2.738865627576881e-07, "loss": 1.7888, "step": 2974 }, { "epoch": 3.71, "learning_rate": 2.692059463427177e-07, "loss": 1.7366, "step": 2976 }, { "epoch": 3.71, "learning_rate": 2.645651246394976e-07, "loss": 1.7844, "step": 2978 }, { "epoch": 3.72, "learning_rate": 2.599641166269684e-07, "loss": 1.6804, "step": 2980 }, { "epoch": 3.72, "learning_rate": 2.5540294112125107e-07, "loss": 1.76, "step": 2982 }, { "epoch": 3.72, "learning_rate": 2.508816167755668e-07, "loss": 1.7709, "step": 2984 }, { "epoch": 3.72, "learning_rate": 2.464001620801637e-07, "loss": 1.7451, "step": 2986 }, { "epoch": 3.73, "learning_rate": 2.4195859536224165e-07, "loss": 1.7944, "step": 2988 }, { "epoch": 3.73, "learning_rate": 2.3755693478587416e-07, "loss": 1.6974, "step": 2990 }, { "epoch": 3.73, "learning_rate": 2.331951983519365e-07, "loss": 1.7314, "step": 2992 }, { "epoch": 3.73, "learning_rate": 2.2887340389803338e-07, "loss": 1.7597, "step": 2994 }, { "epoch": 3.74, "learning_rate": 2.245915690984224e-07, "loss": 1.7064, "step": 2996 }, { "epoch": 3.74, "learning_rate": 2.2034971146394302e-07, "loss": 1.7006, "step": 2998 }, { "epoch": 3.74, "learning_rate": 2.1614784834194658e-07, "loss": 1.7599, "step": 3000 }, { "epoch": 3.74, "learning_rate": 2.1198599691622634e-07, "loss": 1.7349, "step": 3002 }, { "epoch": 3.75, "learning_rate": 2.0786417420693982e-07, "loss": 1.721, "step": 3004 }, { "epoch": 3.75, "learning_rate": 2.0378239707054882e-07, "loss": 1.7761, "step": 3006 }, { "epoch": 3.75, "learning_rate": 1.9974068219974607e-07, "loss": 1.7402, "step": 3008 }, { "epoch": 3.75, "learning_rate": 1.9573904612338545e-07, "loss": 1.7401, "step": 3010 }, { "epoch": 3.76, "learning_rate": 1.9177750520641525e-07, "loss": 1.7541, "step": 3012 }, { "epoch": 3.76, "learning_rate": 1.878560756498149e-07, "loss": 1.6704, "step": 3014 }, { "epoch": 3.76, "learning_rate": 1.8397477349052395e-07, "loss": 1.776, "step": 3016 }, { "epoch": 3.76, "learning_rate": 1.801336146013777e-07, "loss": 1.7534, "step": 3018 }, { "epoch": 3.77, "learning_rate": 1.7633261469104378e-07, "loss": 1.7372, "step": 3020 }, { "epoch": 3.77, "learning_rate": 1.7257178930395912e-07, "loss": 1.7403, "step": 3022 }, { "epoch": 3.77, "learning_rate": 1.6885115382026084e-07, "loss": 1.8037, "step": 3024 }, { "epoch": 3.77, "learning_rate": 1.651707234557287e-07, "loss": 1.7416, "step": 3026 }, { "epoch": 3.78, "learning_rate": 1.6153051326172063e-07, "loss": 1.7563, "step": 3028 }, { "epoch": 3.78, "learning_rate": 1.5793053812511172e-07, "loss": 1.7859, "step": 3030 }, { "epoch": 3.78, "learning_rate": 1.5437081276823417e-07, "loss": 1.7562, "step": 3032 }, { "epoch": 3.78, "learning_rate": 1.5085135174881416e-07, "loss": 1.7266, "step": 3034 }, { "epoch": 3.79, "learning_rate": 1.4737216945991505e-07, "loss": 1.7662, "step": 3036 }, { "epoch": 3.79, "learning_rate": 1.4393328012987872e-07, "loss": 1.8232, "step": 3038 }, { "epoch": 3.79, "learning_rate": 1.4053469782226437e-07, "loss": 1.7156, "step": 3040 }, { "epoch": 3.79, "learning_rate": 1.3717643643579525e-07, "loss": 1.7574, "step": 3042 }, { "epoch": 3.8, "learning_rate": 1.3385850970429882e-07, "loss": 1.7577, "step": 3044 }, { "epoch": 3.8, "learning_rate": 1.3058093119664882e-07, "loss": 1.7602, "step": 3046 }, { "epoch": 3.8, "learning_rate": 1.2734371431671777e-07, "loss": 1.7145, "step": 3048 }, { "epoch": 3.8, "learning_rate": 1.2414687230331124e-07, "loss": 1.7752, "step": 3050 }, { "epoch": 3.81, "learning_rate": 1.2099041823012136e-07, "loss": 1.7904, "step": 3052 }, { "epoch": 3.81, "learning_rate": 1.178743650056724e-07, "loss": 1.8119, "step": 3054 }, { "epoch": 3.81, "learning_rate": 1.147987253732652e-07, "loss": 1.7413, "step": 3056 }, { "epoch": 3.81, "learning_rate": 1.1176351191092727e-07, "loss": 1.7737, "step": 3058 }, { "epoch": 3.82, "learning_rate": 1.0876873703135949e-07, "loss": 1.7112, "step": 3060 }, { "epoch": 3.82, "learning_rate": 1.0581441298188944e-07, "loss": 1.7561, "step": 3062 }, { "epoch": 3.82, "learning_rate": 1.0290055184441372e-07, "loss": 1.7256, "step": 3064 }, { "epoch": 3.82, "learning_rate": 1.0002716553535685e-07, "loss": 1.728, "step": 3066 }, { "epoch": 3.83, "learning_rate": 9.719426580561908e-08, "loss": 1.7338, "step": 3068 }, { "epoch": 3.83, "learning_rate": 9.440186424052755e-08, "loss": 1.7488, "step": 3070 }, { "epoch": 3.83, "learning_rate": 9.164997225978745e-08, "loss": 1.7515, "step": 3072 }, { "epoch": 3.83, "learning_rate": 8.893860111743868e-08, "loss": 1.7867, "step": 3074 }, { "epoch": 3.84, "learning_rate": 8.62677619018104e-08, "loss": 1.7451, "step": 3076 }, { "epoch": 3.84, "learning_rate": 8.363746553547214e-08, "loss": 1.744, "step": 3078 }, { "epoch": 3.84, "learning_rate": 8.104772277519047e-08, "loss": 1.7599, "step": 3080 }, { "epoch": 3.84, "learning_rate": 7.849854421188574e-08, "loss": 1.7123, "step": 3082 }, { "epoch": 3.85, "learning_rate": 7.598994027058992e-08, "loss": 1.7468, "step": 3084 }, { "epoch": 3.85, "learning_rate": 7.352192121039992e-08, "loss": 1.6844, "step": 3086 }, { "epoch": 3.85, "learning_rate": 7.109449712443873e-08, "loss": 1.7591, "step": 3088 }, { "epoch": 3.85, "learning_rate": 6.870767793981658e-08, "loss": 1.7483, "step": 3090 }, { "epoch": 3.86, "learning_rate": 6.636147341758215e-08, "loss": 1.7963, "step": 3092 }, { "epoch": 3.86, "learning_rate": 6.405589315269246e-08, "loss": 1.7774, "step": 3094 }, { "epoch": 3.86, "learning_rate": 6.179094657396634e-08, "loss": 1.7685, "step": 3096 }, { "epoch": 3.86, "learning_rate": 5.9566642944050015e-08, "loss": 1.7408, "step": 3098 }, { "epoch": 3.87, "learning_rate": 5.7382991359375975e-08, "loss": 1.7275, "step": 3100 }, { "epoch": 3.87, "learning_rate": 5.5240000750129695e-08, "loss": 1.7765, "step": 3102 }, { "epoch": 3.87, "learning_rate": 5.313767988020857e-08, "loss": 1.7012, "step": 3104 }, { "epoch": 3.87, "learning_rate": 5.107603734719191e-08, "loss": 1.6892, "step": 3106 }, { "epoch": 3.88, "learning_rate": 4.905508158229877e-08, "loss": 1.7605, "step": 3108 }, { "epoch": 3.88, "learning_rate": 4.7074820850357974e-08, "loss": 1.7936, "step": 3110 }, { "epoch": 3.88, "learning_rate": 4.513526324977591e-08, "loss": 1.7406, "step": 3112 }, { "epoch": 3.88, "learning_rate": 4.3236416712496565e-08, "loss": 1.69, "step": 3114 }, { "epoch": 3.89, "learning_rate": 4.1378289003977115e-08, "loss": 1.7447, "step": 3116 }, { "epoch": 3.89, "learning_rate": 3.956088772315014e-08, "loss": 1.7381, "step": 3118 }, { "epoch": 3.89, "learning_rate": 3.7784220302397036e-08, "loss": 1.7742, "step": 3120 }, { "epoch": 3.89, "learning_rate": 3.604829400751242e-08, "loss": 1.7281, "step": 3122 }, { "epoch": 3.9, "learning_rate": 3.435311593768087e-08, "loss": 1.782, "step": 3124 }, { "epoch": 3.9, "learning_rate": 3.2698693025441374e-08, "loss": 1.7599, "step": 3126 }, { "epoch": 3.9, "learning_rate": 3.108503203666402e-08, "loss": 1.7887, "step": 3128 }, { "epoch": 3.9, "learning_rate": 2.9512139570520016e-08, "loss": 1.7642, "step": 3130 }, { "epoch": 3.91, "learning_rate": 2.7980022059453938e-08, "loss": 1.776, "step": 3132 }, { "epoch": 3.91, "learning_rate": 2.6488685769161528e-08, "loss": 1.7862, "step": 3134 }, { "epoch": 3.91, "learning_rate": 2.5038136798556377e-08, "loss": 1.8244, "step": 3136 }, { "epoch": 3.91, "learning_rate": 2.3628381079754403e-08, "loss": 1.7425, "step": 3138 }, { "epoch": 3.92, "learning_rate": 2.2259424378041628e-08, "loss": 1.7029, "step": 3140 }, { "epoch": 3.92, "learning_rate": 2.093127229185532e-08, "loss": 1.7273, "step": 3142 }, { "epoch": 3.92, "learning_rate": 1.9643930252760678e-08, "loss": 1.7155, "step": 3144 }, { "epoch": 3.92, "learning_rate": 1.8397403525424184e-08, "loss": 1.7583, "step": 3146 }, { "epoch": 3.93, "learning_rate": 1.7789447503233638e-08, "loss": 1.7538, "step": 3148 }, { "epoch": 3.93, "learning_rate": 1.6604153249651923e-08, "loss": 1.7628, "step": 3150 }, { "epoch": 3.93, "learning_rate": 1.54596867391954e-08, "loss": 1.7237, "step": 3152 }, { "epoch": 3.93, "learning_rate": 1.4356052652231277e-08, "loss": 1.8494, "step": 3154 }, { "epoch": 3.94, "learning_rate": 1.3293255502144776e-08, "loss": 1.7827, "step": 3156 }, { "epoch": 3.94, "learning_rate": 1.2271299635311373e-08, "loss": 1.7227, "step": 3158 }, { "epoch": 3.94, "learning_rate": 1.1290189231087934e-08, "loss": 1.7628, "step": 3160 }, { "epoch": 3.94, "learning_rate": 1.034992830178716e-08, "loss": 1.7185, "step": 3162 }, { "epoch": 3.95, "learning_rate": 9.450520692667609e-09, "loss": 1.7533, "step": 3164 }, { "epoch": 3.95, "learning_rate": 8.591970081914813e-09, "loss": 1.7421, "step": 3166 }, { "epoch": 3.95, "learning_rate": 7.774279980626853e-09, "loss": 1.8076, "step": 3168 }, { "epoch": 3.95, "learning_rate": 6.9974537328010295e-09, "loss": 1.777, "step": 3170 }, { "epoch": 3.96, "learning_rate": 6.261494515317212e-09, "loss": 1.6956, "step": 3172 }, { "epoch": 3.96, "learning_rate": 5.566405337930069e-09, "loss": 1.7326, "step": 3174 }, { "epoch": 3.96, "learning_rate": 4.912189043250193e-09, "loss": 1.7357, "step": 3176 }, { "epoch": 3.96, "learning_rate": 4.2988483067374355e-09, "loss": 1.7927, "step": 3178 }, { "epoch": 3.97, "learning_rate": 3.726385636689811e-09, "loss": 1.6958, "step": 3180 }, { "epoch": 3.97, "learning_rate": 3.1948033742290606e-09, "loss": 1.7589, "step": 3182 }, { "epoch": 3.97, "learning_rate": 2.7041036932962117e-09, "loss": 1.7557, "step": 3184 }, { "epoch": 3.97, "learning_rate": 2.2542886006382547e-09, "loss": 1.7444, "step": 3186 }, { "epoch": 3.98, "learning_rate": 1.8453599358048136e-09, "loss": 1.7866, "step": 3188 }, { "epoch": 3.98, "learning_rate": 1.477319371133712e-09, "loss": 1.768, "step": 3190 }, { "epoch": 3.98, "learning_rate": 1.1501684117531941e-09, "loss": 1.7777, "step": 3192 }, { "epoch": 3.98, "learning_rate": 8.639083955663819e-10, "loss": 1.7533, "step": 3194 }, { "epoch": 3.99, "learning_rate": 6.185404932523841e-10, "loss": 1.7906, "step": 3196 }, { "epoch": 3.99, "learning_rate": 4.140657082607469e-10, "loss": 1.6971, "step": 3198 }, { "epoch": 3.99, "learning_rate": 2.504848768025703e-10, "loss": 1.7139, "step": 3200 }, { "epoch": 3.99, "learning_rate": 1.2779866785161966e-10, "loss": 1.7268, "step": 3202 }, { "epoch": 4.0, "learning_rate": 4.600758314321496e-11, "loss": 1.744, "step": 3204 }, { "epoch": 4.0, "step": 3204, "total_flos": 1.1937678110176051e+17, "train_loss": 1.9303735846586143, "train_runtime": 18412.5115, "train_samples_per_second": 11.148, "train_steps_per_second": 0.174 } ], "logging_steps": 2, "max_steps": 3204, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "total_flos": 1.1937678110176051e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }