{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9944341372912802, "eval_steps": 800, "global_step": 4300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0, "loss": 2.4801, "step": 4 }, { "epoch": 0.0, "learning_rate": 0, "loss": 2.4284, "step": 8 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.2651, "step": 12 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.411, "step": 16 }, { "epoch": 0.01, "learning_rate": 0, "loss": 2.8299, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 2.2188, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.3082402064781276e-06, "loss": 1.345, "step": 28 }, { "epoch": 0.01, "learning_rate": 1.9623603097171917e-06, "loss": 0.5695, "step": 32 }, { "epoch": 0.02, "learning_rate": 2.262883767531511e-06, "loss": 0.8812, "step": 36 }, { "epoch": 0.02, "learning_rate": 2.5555756797431724e-06, "loss": 0.8725, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.7786547836457785e-06, "loss": 0.4851, "step": 44 }, { "epoch": 0.02, "learning_rate": 2.9589528137043157e-06, "loss": 0.8354, "step": 48 }, { "epoch": 0.02, "learning_rate": 3.110267503805303e-06, "loss": 0.4873, "step": 52 }, { "epoch": 0.03, "learning_rate": 3.2406394020168525e-06, "loss": 0.6584, "step": 56 }, { "epoch": 0.03, "learning_rate": 3.3551671365864186e-06, "loss": 0.529, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.4572878450621517e-06, "loss": 0.5812, "step": 64 }, { "epoch": 0.03, "learning_rate": 3.5494288615482305e-06, "loss": 0.5163, "step": 68 }, { "epoch": 0.03, "learning_rate": 3.6333682331099297e-06, "loss": 0.6595, "step": 72 }, { "epoch": 0.04, "learning_rate": 3.710447450306277e-06, "loss": 0.4775, "step": 76 }, { "epoch": 0.04, "learning_rate": 3.7817036126729157e-06, "loss": 0.682, "step": 80 }, { "epoch": 0.04, "learning_rate": 3.84795507876713e-06, "loss": 0.7048, "step": 84 }, { "epoch": 0.04, "learning_rate": 3.909858960648549e-06, "loss": 0.9478, "step": 88 }, { "epoch": 0.04, "learning_rate": 3.9679508875196075e-06, "loss": 0.5932, "step": 92 }, { "epoch": 0.04, "learning_rate": 4.022673220704539e-06, "loss": 0.5733, "step": 96 }, { "epoch": 0.05, "learning_rate": 4.074395524884577e-06, "loss": 0.5817, "step": 100 }, { "epoch": 0.05, "learning_rate": 4.123429713794031e-06, "loss": 0.6372, "step": 104 }, { "epoch": 0.05, "learning_rate": 4.170041450985754e-06, "loss": 0.5108, "step": 108 }, { "epoch": 0.05, "learning_rate": 4.214458864668026e-06, "loss": 0.6262, "step": 112 }, { "epoch": 0.05, "learning_rate": 4.256879301905398e-06, "loss": 0.8594, "step": 116 }, { "epoch": 0.06, "learning_rate": 4.297474628787183e-06, "loss": 0.7241, "step": 120 }, { "epoch": 0.06, "learning_rate": 4.336395436735046e-06, "loss": 0.6471, "step": 124 }, { "epoch": 0.06, "learning_rate": 4.373774415149143e-06, "loss": 0.6144, "step": 128 }, { "epoch": 0.06, "learning_rate": 4.409729081127459e-06, "loss": 0.672, "step": 132 }, { "epoch": 0.06, "learning_rate": 4.444364007946065e-06, "loss": 0.6802, "step": 136 }, { "epoch": 0.06, "learning_rate": 4.4777726588457195e-06, "loss": 0.514, "step": 140 }, { "epoch": 0.07, "learning_rate": 4.510038907149524e-06, "loss": 0.715, "step": 144 }, { "epoch": 0.07, "learning_rate": 4.541238304971202e-06, "loss": 0.5189, "step": 148 }, { "epoch": 0.07, "learning_rate": 4.5714391488166745e-06, "loss": 0.7188, "step": 152 }, { "epoch": 0.07, "learning_rate": 4.600703379889684e-06, "loss": 0.5829, "step": 156 }, { "epoch": 0.07, "learning_rate": 4.629087348946707e-06, "loss": 0.5551, "step": 160 }, { "epoch": 0.08, "learning_rate": 4.656642469442713e-06, "loss": 0.7016, "step": 164 }, { "epoch": 0.08, "learning_rate": 4.683415777991895e-06, "loss": 0.5357, "step": 168 }, { "epoch": 0.08, "learning_rate": 4.709450417491796e-06, "loss": 0.6232, "step": 172 }, { "epoch": 0.08, "learning_rate": 4.734786055373451e-06, "loss": 0.7218, "step": 176 }, { "epoch": 0.08, "learning_rate": 4.759459247158257e-06, "loss": 0.51, "step": 180 }, { "epoch": 0.09, "learning_rate": 4.783503753685794e-06, "loss": 0.8871, "step": 184 }, { "epoch": 0.09, "learning_rate": 4.806950818921448e-06, "loss": 0.8177, "step": 188 }, { "epoch": 0.09, "learning_rate": 4.8298294140798465e-06, "loss": 0.5602, "step": 192 }, { "epoch": 0.09, "learning_rate": 4.852166452849314e-06, "loss": 0.7395, "step": 196 }, { "epoch": 0.09, "learning_rate": 4.8739869817278244e-06, "loss": 0.7008, "step": 200 }, { "epoch": 0.09, "learning_rate": 4.89531434884623e-06, "loss": 0.7096, "step": 204 }, { "epoch": 0.1, "learning_rate": 4.916170354132174e-06, "loss": 0.7617, "step": 208 }, { "epoch": 0.1, "learning_rate": 4.936575383236021e-06, "loss": 0.637, "step": 212 }, { "epoch": 0.1, "learning_rate": 4.956548527281403e-06, "loss": 0.7149, "step": 216 }, { "epoch": 0.1, "learning_rate": 4.976107690203556e-06, "loss": 0.585, "step": 220 }, { "epoch": 0.1, "learning_rate": 4.995269685187989e-06, "loss": 0.4153, "step": 224 }, { "epoch": 0.11, "learning_rate": 4.998404594767071e-06, "loss": 0.8215, "step": 228 }, { "epoch": 0.11, "learning_rate": 4.995213784301213e-06, "loss": 0.6051, "step": 232 }, { "epoch": 0.11, "learning_rate": 4.992022973835355e-06, "loss": 0.6785, "step": 236 }, { "epoch": 0.11, "learning_rate": 4.988832163369496e-06, "loss": 0.6774, "step": 240 }, { "epoch": 0.11, "learning_rate": 4.985641352903638e-06, "loss": 0.5039, "step": 244 }, { "epoch": 0.12, "learning_rate": 4.982450542437779e-06, "loss": 0.5724, "step": 248 }, { "epoch": 0.12, "learning_rate": 4.979259731971921e-06, "loss": 0.8469, "step": 252 }, { "epoch": 0.12, "learning_rate": 4.976068921506063e-06, "loss": 0.9735, "step": 256 }, { "epoch": 0.12, "learning_rate": 4.972878111040205e-06, "loss": 0.8594, "step": 260 }, { "epoch": 0.12, "learning_rate": 4.969687300574346e-06, "loss": 0.6523, "step": 264 }, { "epoch": 0.12, "learning_rate": 4.9664964901084875e-06, "loss": 0.6641, "step": 268 }, { "epoch": 0.13, "learning_rate": 4.96330567964263e-06, "loss": 0.6586, "step": 272 }, { "epoch": 0.13, "learning_rate": 4.960114869176771e-06, "loss": 0.6019, "step": 276 }, { "epoch": 0.13, "learning_rate": 4.956924058710913e-06, "loss": 0.7117, "step": 280 }, { "epoch": 0.13, "learning_rate": 4.953733248245054e-06, "loss": 0.4632, "step": 284 }, { "epoch": 0.13, "learning_rate": 4.950542437779196e-06, "loss": 0.733, "step": 288 }, { "epoch": 0.14, "learning_rate": 4.947351627313338e-06, "loss": 0.7205, "step": 292 }, { "epoch": 0.14, "learning_rate": 4.9441608168474795e-06, "loss": 0.6121, "step": 296 }, { "epoch": 0.14, "learning_rate": 4.940970006381621e-06, "loss": 0.7859, "step": 300 }, { "epoch": 0.14, "learning_rate": 4.9377791959157625e-06, "loss": 0.8041, "step": 304 }, { "epoch": 0.14, "learning_rate": 4.934588385449905e-06, "loss": 0.7834, "step": 308 }, { "epoch": 0.14, "learning_rate": 4.931397574984046e-06, "loss": 0.6704, "step": 312 }, { "epoch": 0.15, "learning_rate": 4.928206764518188e-06, "loss": 0.8402, "step": 316 }, { "epoch": 0.15, "learning_rate": 4.925015954052329e-06, "loss": 0.7851, "step": 320 }, { "epoch": 0.15, "learning_rate": 4.9218251435864715e-06, "loss": 0.501, "step": 324 }, { "epoch": 0.15, "learning_rate": 4.918634333120613e-06, "loss": 0.6039, "step": 328 }, { "epoch": 0.15, "learning_rate": 4.9154435226547544e-06, "loss": 0.64, "step": 332 }, { "epoch": 0.16, "learning_rate": 4.912252712188896e-06, "loss": 0.5726, "step": 336 }, { "epoch": 0.16, "learning_rate": 4.909061901723038e-06, "loss": 0.6605, "step": 340 }, { "epoch": 0.16, "learning_rate": 4.90587109125718e-06, "loss": 0.8105, "step": 344 }, { "epoch": 0.16, "learning_rate": 4.902680280791321e-06, "loss": 0.8422, "step": 348 }, { "epoch": 0.16, "learning_rate": 4.8994894703254635e-06, "loss": 0.5242, "step": 352 }, { "epoch": 0.17, "learning_rate": 4.896298659859605e-06, "loss": 0.6062, "step": 356 }, { "epoch": 0.17, "learning_rate": 4.8931078493937464e-06, "loss": 0.7289, "step": 360 }, { "epoch": 0.17, "learning_rate": 4.889917038927888e-06, "loss": 0.6916, "step": 364 }, { "epoch": 0.17, "learning_rate": 4.88672622846203e-06, "loss": 0.8526, "step": 368 }, { "epoch": 0.17, "learning_rate": 4.883535417996172e-06, "loss": 1.0668, "step": 372 }, { "epoch": 0.17, "learning_rate": 4.880344607530313e-06, "loss": 0.6912, "step": 376 }, { "epoch": 0.18, "learning_rate": 4.877153797064455e-06, "loss": 0.7383, "step": 380 }, { "epoch": 0.18, "learning_rate": 4.873962986598597e-06, "loss": 0.77, "step": 384 }, { "epoch": 0.18, "learning_rate": 4.870772176132738e-06, "loss": 0.8328, "step": 388 }, { "epoch": 0.18, "learning_rate": 4.86758136566688e-06, "loss": 0.7135, "step": 392 }, { "epoch": 0.18, "learning_rate": 4.864390555201021e-06, "loss": 0.7976, "step": 396 }, { "epoch": 0.19, "learning_rate": 4.861199744735164e-06, "loss": 0.5799, "step": 400 }, { "epoch": 0.19, "learning_rate": 4.858008934269305e-06, "loss": 0.5246, "step": 404 }, { "epoch": 0.19, "learning_rate": 4.854818123803447e-06, "loss": 0.5895, "step": 408 }, { "epoch": 0.19, "learning_rate": 4.851627313337588e-06, "loss": 0.7751, "step": 412 }, { "epoch": 0.19, "learning_rate": 4.84843650287173e-06, "loss": 0.7469, "step": 416 }, { "epoch": 0.19, "learning_rate": 4.845245692405872e-06, "loss": 0.5013, "step": 420 }, { "epoch": 0.2, "learning_rate": 4.842054881940013e-06, "loss": 0.5398, "step": 424 }, { "epoch": 0.2, "learning_rate": 4.838864071474155e-06, "loss": 0.4547, "step": 428 }, { "epoch": 0.2, "learning_rate": 4.835673261008296e-06, "loss": 0.8732, "step": 432 }, { "epoch": 0.2, "learning_rate": 4.832482450542439e-06, "loss": 0.7671, "step": 436 }, { "epoch": 0.2, "learning_rate": 4.82929164007658e-06, "loss": 0.6574, "step": 440 }, { "epoch": 0.21, "learning_rate": 4.8261008296107215e-06, "loss": 0.7173, "step": 444 }, { "epoch": 0.21, "learning_rate": 4.822910019144863e-06, "loss": 0.4371, "step": 448 }, { "epoch": 0.21, "learning_rate": 4.819719208679005e-06, "loss": 0.6992, "step": 452 }, { "epoch": 0.21, "learning_rate": 4.816528398213147e-06, "loss": 0.6827, "step": 456 }, { "epoch": 0.21, "learning_rate": 4.813337587747288e-06, "loss": 0.4919, "step": 460 }, { "epoch": 0.22, "learning_rate": 4.81014677728143e-06, "loss": 0.9571, "step": 464 }, { "epoch": 0.22, "learning_rate": 4.806955966815571e-06, "loss": 0.5202, "step": 468 }, { "epoch": 0.22, "learning_rate": 4.8037651563497135e-06, "loss": 0.7919, "step": 472 }, { "epoch": 0.22, "learning_rate": 4.800574345883855e-06, "loss": 0.5517, "step": 476 }, { "epoch": 0.22, "learning_rate": 4.7973835354179965e-06, "loss": 0.3889, "step": 480 }, { "epoch": 0.22, "learning_rate": 4.794192724952138e-06, "loss": 0.5933, "step": 484 }, { "epoch": 0.23, "learning_rate": 4.79100191448628e-06, "loss": 0.9298, "step": 488 }, { "epoch": 0.23, "learning_rate": 4.787811104020422e-06, "loss": 0.4758, "step": 492 }, { "epoch": 0.23, "learning_rate": 4.784620293554563e-06, "loss": 0.5162, "step": 496 }, { "epoch": 0.23, "learning_rate": 4.781429483088705e-06, "loss": 0.6675, "step": 500 }, { "epoch": 0.23, "learning_rate": 4.778238672622846e-06, "loss": 0.8493, "step": 504 }, { "epoch": 0.24, "learning_rate": 4.7750478621569885e-06, "loss": 0.6583, "step": 508 }, { "epoch": 0.24, "learning_rate": 4.77185705169113e-06, "loss": 0.4897, "step": 512 }, { "epoch": 0.24, "learning_rate": 4.768666241225271e-06, "loss": 0.6633, "step": 516 }, { "epoch": 0.24, "learning_rate": 4.765475430759413e-06, "loss": 0.782, "step": 520 }, { "epoch": 0.24, "learning_rate": 4.762284620293555e-06, "loss": 0.815, "step": 524 }, { "epoch": 0.24, "learning_rate": 4.759093809827697e-06, "loss": 0.4498, "step": 528 }, { "epoch": 0.25, "learning_rate": 4.755902999361838e-06, "loss": 0.6006, "step": 532 }, { "epoch": 0.25, "learning_rate": 4.75271218889598e-06, "loss": 0.9473, "step": 536 }, { "epoch": 0.25, "learning_rate": 4.749521378430121e-06, "loss": 0.4036, "step": 540 }, { "epoch": 0.25, "learning_rate": 4.746330567964263e-06, "loss": 0.555, "step": 544 }, { "epoch": 0.25, "learning_rate": 4.743139757498405e-06, "loss": 0.7843, "step": 548 }, { "epoch": 0.26, "learning_rate": 4.739948947032546e-06, "loss": 0.8376, "step": 552 }, { "epoch": 0.26, "learning_rate": 4.736758136566688e-06, "loss": 0.5423, "step": 556 }, { "epoch": 0.26, "learning_rate": 4.73356732610083e-06, "loss": 0.5533, "step": 560 }, { "epoch": 0.26, "learning_rate": 4.7303765156349716e-06, "loss": 0.5212, "step": 564 }, { "epoch": 0.26, "learning_rate": 4.727185705169113e-06, "loss": 0.8054, "step": 568 }, { "epoch": 0.27, "learning_rate": 4.7239948947032545e-06, "loss": 0.438, "step": 572 }, { "epoch": 0.27, "learning_rate": 4.720804084237397e-06, "loss": 0.6025, "step": 576 }, { "epoch": 0.27, "learning_rate": 4.717613273771538e-06, "loss": 0.8118, "step": 580 }, { "epoch": 0.27, "learning_rate": 4.71442246330568e-06, "loss": 0.6911, "step": 584 }, { "epoch": 0.27, "learning_rate": 4.711231652839821e-06, "loss": 0.7022, "step": 588 }, { "epoch": 0.27, "learning_rate": 4.7080408423739636e-06, "loss": 0.5918, "step": 592 }, { "epoch": 0.28, "learning_rate": 4.704850031908105e-06, "loss": 0.6012, "step": 596 }, { "epoch": 0.28, "learning_rate": 4.7016592214422465e-06, "loss": 0.8031, "step": 600 }, { "epoch": 0.28, "learning_rate": 4.698468410976389e-06, "loss": 0.7864, "step": 604 }, { "epoch": 0.28, "learning_rate": 4.69527760051053e-06, "loss": 0.6361, "step": 608 }, { "epoch": 0.28, "learning_rate": 4.692086790044672e-06, "loss": 0.6619, "step": 612 }, { "epoch": 0.29, "learning_rate": 4.688895979578813e-06, "loss": 0.5132, "step": 616 }, { "epoch": 0.29, "learning_rate": 4.6857051691129555e-06, "loss": 0.6111, "step": 620 }, { "epoch": 0.29, "learning_rate": 4.682514358647097e-06, "loss": 0.7884, "step": 624 }, { "epoch": 0.29, "learning_rate": 4.6793235481812385e-06, "loss": 0.4355, "step": 628 }, { "epoch": 0.29, "learning_rate": 4.67613273771538e-06, "loss": 0.7325, "step": 632 }, { "epoch": 0.29, "learning_rate": 4.672941927249522e-06, "loss": 0.5633, "step": 636 }, { "epoch": 0.3, "learning_rate": 4.669751116783664e-06, "loss": 0.6415, "step": 640 }, { "epoch": 0.3, "learning_rate": 4.666560306317805e-06, "loss": 0.6508, "step": 644 }, { "epoch": 0.3, "learning_rate": 4.663369495851947e-06, "loss": 0.5909, "step": 648 }, { "epoch": 0.3, "learning_rate": 4.660178685386089e-06, "loss": 0.5651, "step": 652 }, { "epoch": 0.3, "learning_rate": 4.6569878749202305e-06, "loss": 0.6729, "step": 656 }, { "epoch": 0.31, "learning_rate": 4.653797064454372e-06, "loss": 0.842, "step": 660 }, { "epoch": 0.31, "learning_rate": 4.650606253988513e-06, "loss": 0.5844, "step": 664 }, { "epoch": 0.31, "learning_rate": 4.647415443522656e-06, "loss": 0.7394, "step": 668 }, { "epoch": 0.31, "learning_rate": 4.644224633056797e-06, "loss": 0.6725, "step": 672 }, { "epoch": 0.31, "learning_rate": 4.641033822590939e-06, "loss": 0.6416, "step": 676 }, { "epoch": 0.32, "learning_rate": 4.63784301212508e-06, "loss": 0.7926, "step": 680 }, { "epoch": 0.32, "learning_rate": 4.634652201659222e-06, "loss": 0.5941, "step": 684 }, { "epoch": 0.32, "learning_rate": 4.631461391193364e-06, "loss": 0.9582, "step": 688 }, { "epoch": 0.32, "learning_rate": 4.628270580727505e-06, "loss": 0.4289, "step": 692 }, { "epoch": 0.32, "learning_rate": 4.625079770261647e-06, "loss": 0.6518, "step": 696 }, { "epoch": 0.32, "learning_rate": 4.621888959795788e-06, "loss": 0.8722, "step": 700 }, { "epoch": 0.33, "learning_rate": 4.618698149329931e-06, "loss": 0.5419, "step": 704 }, { "epoch": 0.33, "learning_rate": 4.615507338864072e-06, "loss": 0.6891, "step": 708 }, { "epoch": 0.33, "learning_rate": 4.612316528398214e-06, "loss": 0.5157, "step": 712 }, { "epoch": 0.33, "learning_rate": 4.609125717932355e-06, "loss": 0.7015, "step": 716 }, { "epoch": 0.33, "learning_rate": 4.6059349074664965e-06, "loss": 0.546, "step": 720 }, { "epoch": 0.34, "learning_rate": 4.602744097000639e-06, "loss": 0.6735, "step": 724 }, { "epoch": 0.34, "learning_rate": 4.59955328653478e-06, "loss": 0.5564, "step": 728 }, { "epoch": 0.34, "learning_rate": 4.596362476068922e-06, "loss": 0.5182, "step": 732 }, { "epoch": 0.34, "learning_rate": 4.593171665603063e-06, "loss": 0.4053, "step": 736 }, { "epoch": 0.34, "learning_rate": 4.5899808551372056e-06, "loss": 0.4039, "step": 740 }, { "epoch": 0.35, "learning_rate": 4.586790044671347e-06, "loss": 0.6502, "step": 744 }, { "epoch": 0.35, "learning_rate": 4.5835992342054885e-06, "loss": 0.8062, "step": 748 }, { "epoch": 0.35, "learning_rate": 4.58040842373963e-06, "loss": 0.4143, "step": 752 }, { "epoch": 0.35, "learning_rate": 4.5772176132737715e-06, "loss": 0.5539, "step": 756 }, { "epoch": 0.35, "learning_rate": 4.574026802807914e-06, "loss": 0.5926, "step": 760 }, { "epoch": 0.35, "learning_rate": 4.570835992342055e-06, "loss": 0.751, "step": 764 }, { "epoch": 0.36, "learning_rate": 4.567645181876197e-06, "loss": 0.5886, "step": 768 }, { "epoch": 0.36, "learning_rate": 4.564454371410338e-06, "loss": 0.677, "step": 772 }, { "epoch": 0.36, "learning_rate": 4.5612635609444805e-06, "loss": 0.7097, "step": 776 }, { "epoch": 0.36, "learning_rate": 4.558072750478622e-06, "loss": 0.56, "step": 780 }, { "epoch": 0.36, "learning_rate": 4.5548819400127634e-06, "loss": 0.4481, "step": 784 }, { "epoch": 0.37, "learning_rate": 4.551691129546905e-06, "loss": 0.4959, "step": 788 }, { "epoch": 0.37, "learning_rate": 4.548500319081046e-06, "loss": 0.8399, "step": 792 }, { "epoch": 0.37, "learning_rate": 4.545309508615189e-06, "loss": 0.6904, "step": 796 }, { "epoch": 0.37, "learning_rate": 4.54211869814933e-06, "loss": 0.8689, "step": 800 }, { "epoch": 0.37, "learning_rate": 4.538927887683472e-06, "loss": 0.6232, "step": 804 }, { "epoch": 0.37, "learning_rate": 4.535737077217613e-06, "loss": 0.6428, "step": 808 }, { "epoch": 0.38, "learning_rate": 4.5325462667517554e-06, "loss": 0.7462, "step": 812 }, { "epoch": 0.38, "learning_rate": 4.529355456285897e-06, "loss": 0.529, "step": 816 }, { "epoch": 0.38, "learning_rate": 4.526164645820038e-06, "loss": 0.4875, "step": 820 }, { "epoch": 0.38, "learning_rate": 4.52297383535418e-06, "loss": 0.6747, "step": 824 }, { "epoch": 0.38, "learning_rate": 4.519783024888322e-06, "loss": 0.7061, "step": 828 }, { "epoch": 0.39, "learning_rate": 4.516592214422464e-06, "loss": 0.7865, "step": 832 }, { "epoch": 0.39, "learning_rate": 4.513401403956605e-06, "loss": 0.5122, "step": 836 }, { "epoch": 0.39, "learning_rate": 4.5102105934907466e-06, "loss": 0.4014, "step": 840 }, { "epoch": 0.39, "learning_rate": 4.507019783024889e-06, "loss": 0.7509, "step": 844 }, { "epoch": 0.39, "learning_rate": 4.50382897255903e-06, "loss": 0.8073, "step": 848 }, { "epoch": 0.4, "learning_rate": 4.500638162093172e-06, "loss": 0.3459, "step": 852 }, { "epoch": 0.4, "learning_rate": 4.497447351627314e-06, "loss": 0.6814, "step": 856 }, { "epoch": 0.4, "learning_rate": 4.494256541161456e-06, "loss": 1.1027, "step": 860 }, { "epoch": 0.4, "learning_rate": 4.491065730695597e-06, "loss": 0.5254, "step": 864 }, { "epoch": 0.4, "learning_rate": 4.4878749202297385e-06, "loss": 0.7436, "step": 868 }, { "epoch": 0.4, "learning_rate": 4.484684109763881e-06, "loss": 0.4877, "step": 872 }, { "epoch": 0.41, "learning_rate": 4.481493299298022e-06, "loss": 0.657, "step": 876 }, { "epoch": 0.41, "learning_rate": 4.478302488832164e-06, "loss": 0.7193, "step": 880 }, { "epoch": 0.41, "learning_rate": 4.475111678366305e-06, "loss": 0.5461, "step": 884 }, { "epoch": 0.41, "learning_rate": 4.471920867900448e-06, "loss": 0.5707, "step": 888 }, { "epoch": 0.41, "learning_rate": 4.468730057434589e-06, "loss": 0.9755, "step": 892 }, { "epoch": 0.42, "learning_rate": 4.4655392469687305e-06, "loss": 0.551, "step": 896 }, { "epoch": 0.42, "learning_rate": 4.462348436502872e-06, "loss": 0.499, "step": 900 }, { "epoch": 0.42, "learning_rate": 4.459157626037014e-06, "loss": 0.4268, "step": 904 }, { "epoch": 0.42, "learning_rate": 4.455966815571156e-06, "loss": 0.6658, "step": 908 }, { "epoch": 0.42, "learning_rate": 4.452776005105297e-06, "loss": 0.5642, "step": 912 }, { "epoch": 0.42, "learning_rate": 4.449585194639439e-06, "loss": 0.6943, "step": 916 }, { "epoch": 0.43, "learning_rate": 4.446394384173581e-06, "loss": 0.5404, "step": 920 }, { "epoch": 0.43, "learning_rate": 4.4432035737077225e-06, "loss": 0.7934, "step": 924 }, { "epoch": 0.43, "learning_rate": 4.440012763241864e-06, "loss": 0.7138, "step": 928 }, { "epoch": 0.43, "learning_rate": 4.4368219527760055e-06, "loss": 0.5249, "step": 932 }, { "epoch": 0.43, "learning_rate": 4.433631142310147e-06, "loss": 0.9614, "step": 936 }, { "epoch": 0.44, "learning_rate": 4.430440331844289e-06, "loss": 0.5915, "step": 940 }, { "epoch": 0.44, "learning_rate": 4.427249521378431e-06, "loss": 0.6766, "step": 944 }, { "epoch": 0.44, "learning_rate": 4.424058710912572e-06, "loss": 0.6641, "step": 948 }, { "epoch": 0.44, "learning_rate": 4.420867900446714e-06, "loss": 0.4849, "step": 952 }, { "epoch": 0.44, "learning_rate": 4.417677089980856e-06, "loss": 0.7182, "step": 956 }, { "epoch": 0.45, "learning_rate": 4.4144862795149974e-06, "loss": 0.6782, "step": 960 }, { "epoch": 0.45, "learning_rate": 4.411295469049139e-06, "loss": 0.4837, "step": 964 }, { "epoch": 0.45, "learning_rate": 4.40810465858328e-06, "loss": 0.7323, "step": 968 }, { "epoch": 0.45, "learning_rate": 4.404913848117422e-06, "loss": 0.5807, "step": 972 }, { "epoch": 0.45, "learning_rate": 4.401723037651564e-06, "loss": 0.373, "step": 976 }, { "epoch": 0.45, "learning_rate": 4.398532227185706e-06, "loss": 0.5072, "step": 980 }, { "epoch": 0.46, "learning_rate": 4.395341416719847e-06, "loss": 0.5952, "step": 984 }, { "epoch": 0.46, "learning_rate": 4.392150606253989e-06, "loss": 0.549, "step": 988 }, { "epoch": 0.46, "learning_rate": 4.388959795788131e-06, "loss": 0.5918, "step": 992 }, { "epoch": 0.46, "learning_rate": 4.385768985322272e-06, "loss": 0.4411, "step": 996 }, { "epoch": 0.46, "learning_rate": 4.382578174856414e-06, "loss": 0.7001, "step": 1000 }, { "epoch": 0.47, "learning_rate": 4.379387364390555e-06, "loss": 0.744, "step": 1004 }, { "epoch": 0.47, "learning_rate": 4.376196553924697e-06, "loss": 0.4091, "step": 1008 }, { "epoch": 0.47, "learning_rate": 4.373005743458839e-06, "loss": 0.7464, "step": 1012 }, { "epoch": 0.47, "learning_rate": 4.3698149329929806e-06, "loss": 0.6164, "step": 1016 }, { "epoch": 0.47, "learning_rate": 4.366624122527122e-06, "loss": 0.6213, "step": 1020 }, { "epoch": 0.47, "learning_rate": 4.3634333120612635e-06, "loss": 0.6991, "step": 1024 }, { "epoch": 0.48, "learning_rate": 4.360242501595406e-06, "loss": 0.5268, "step": 1028 }, { "epoch": 0.48, "learning_rate": 4.357051691129547e-06, "loss": 0.7768, "step": 1032 }, { "epoch": 0.48, "learning_rate": 4.353860880663689e-06, "loss": 0.9204, "step": 1036 }, { "epoch": 0.48, "learning_rate": 4.35067007019783e-06, "loss": 0.5844, "step": 1040 }, { "epoch": 0.48, "learning_rate": 4.347479259731972e-06, "loss": 0.5198, "step": 1044 }, { "epoch": 0.49, "learning_rate": 4.344288449266114e-06, "loss": 0.3069, "step": 1048 }, { "epoch": 0.49, "learning_rate": 4.3410976388002555e-06, "loss": 0.5465, "step": 1052 }, { "epoch": 0.49, "learning_rate": 4.337906828334397e-06, "loss": 0.4729, "step": 1056 }, { "epoch": 0.49, "learning_rate": 4.3347160178685384e-06, "loss": 0.6514, "step": 1060 }, { "epoch": 0.49, "learning_rate": 4.331525207402681e-06, "loss": 0.8142, "step": 1064 }, { "epoch": 0.5, "learning_rate": 4.328334396936822e-06, "loss": 0.6477, "step": 1068 }, { "epoch": 0.5, "learning_rate": 4.325143586470964e-06, "loss": 0.4601, "step": 1072 }, { "epoch": 0.5, "learning_rate": 4.321952776005105e-06, "loss": 0.6687, "step": 1076 }, { "epoch": 0.5, "learning_rate": 4.3187619655392475e-06, "loss": 0.4565, "step": 1080 }, { "epoch": 0.5, "learning_rate": 4.315571155073389e-06, "loss": 0.646, "step": 1084 }, { "epoch": 0.5, "learning_rate": 4.31238034460753e-06, "loss": 0.6145, "step": 1088 }, { "epoch": 0.51, "learning_rate": 4.309189534141672e-06, "loss": 0.3854, "step": 1092 }, { "epoch": 0.51, "learning_rate": 4.305998723675814e-06, "loss": 0.6016, "step": 1096 }, { "epoch": 0.51, "learning_rate": 4.302807913209956e-06, "loss": 0.5223, "step": 1100 }, { "epoch": 0.51, "learning_rate": 4.299617102744097e-06, "loss": 0.6356, "step": 1104 }, { "epoch": 0.51, "learning_rate": 4.2964262922782395e-06, "loss": 0.4599, "step": 1108 }, { "epoch": 0.52, "learning_rate": 4.293235481812381e-06, "loss": 0.6452, "step": 1112 }, { "epoch": 0.52, "learning_rate": 4.290044671346522e-06, "loss": 0.386, "step": 1116 }, { "epoch": 0.52, "learning_rate": 4.286853860880664e-06, "loss": 0.6384, "step": 1120 }, { "epoch": 0.52, "learning_rate": 4.283663050414806e-06, "loss": 0.7654, "step": 1124 }, { "epoch": 0.52, "learning_rate": 4.280472239948948e-06, "loss": 0.6019, "step": 1128 }, { "epoch": 0.53, "learning_rate": 4.277281429483089e-06, "loss": 0.6078, "step": 1132 }, { "epoch": 0.53, "learning_rate": 4.274090619017231e-06, "loss": 0.5181, "step": 1136 }, { "epoch": 0.53, "learning_rate": 4.270899808551373e-06, "loss": 0.6731, "step": 1140 }, { "epoch": 0.53, "learning_rate": 4.267708998085514e-06, "loss": 0.4956, "step": 1144 }, { "epoch": 0.53, "learning_rate": 4.264518187619656e-06, "loss": 0.6115, "step": 1148 }, { "epoch": 0.53, "learning_rate": 4.261327377153797e-06, "loss": 0.7712, "step": 1152 }, { "epoch": 0.54, "learning_rate": 4.25813656668794e-06, "loss": 0.5086, "step": 1156 }, { "epoch": 0.54, "learning_rate": 4.254945756222081e-06, "loss": 0.7241, "step": 1160 }, { "epoch": 0.54, "learning_rate": 4.251754945756223e-06, "loss": 0.5275, "step": 1164 }, { "epoch": 0.54, "learning_rate": 4.248564135290364e-06, "loss": 0.7552, "step": 1168 }, { "epoch": 0.54, "learning_rate": 4.245373324824506e-06, "loss": 0.4292, "step": 1172 }, { "epoch": 0.55, "learning_rate": 4.242182514358648e-06, "loss": 0.7575, "step": 1176 }, { "epoch": 0.55, "learning_rate": 4.238991703892789e-06, "loss": 0.5653, "step": 1180 }, { "epoch": 0.55, "learning_rate": 4.235800893426931e-06, "loss": 0.6882, "step": 1184 }, { "epoch": 0.55, "learning_rate": 4.232610082961072e-06, "loss": 0.6488, "step": 1188 }, { "epoch": 0.55, "learning_rate": 4.2294192724952146e-06, "loss": 0.5522, "step": 1192 }, { "epoch": 0.55, "learning_rate": 4.226228462029356e-06, "loss": 0.578, "step": 1196 }, { "epoch": 0.56, "learning_rate": 4.2230376515634975e-06, "loss": 0.7412, "step": 1200 }, { "epoch": 0.56, "learning_rate": 4.219846841097639e-06, "loss": 0.5138, "step": 1204 }, { "epoch": 0.56, "learning_rate": 4.216656030631781e-06, "loss": 0.6943, "step": 1208 }, { "epoch": 0.56, "learning_rate": 4.213465220165923e-06, "loss": 0.4599, "step": 1212 }, { "epoch": 0.56, "learning_rate": 4.210274409700064e-06, "loss": 0.8815, "step": 1216 }, { "epoch": 0.57, "learning_rate": 4.207083599234206e-06, "loss": 0.6245, "step": 1220 }, { "epoch": 0.57, "learning_rate": 4.203892788768347e-06, "loss": 0.5513, "step": 1224 }, { "epoch": 0.57, "learning_rate": 4.2007019783024895e-06, "loss": 0.4635, "step": 1228 }, { "epoch": 0.57, "learning_rate": 4.197511167836631e-06, "loss": 0.5711, "step": 1232 }, { "epoch": 0.57, "learning_rate": 4.1943203573707724e-06, "loss": 0.5078, "step": 1236 }, { "epoch": 0.58, "learning_rate": 4.191129546904914e-06, "loss": 0.4304, "step": 1240 }, { "epoch": 0.58, "learning_rate": 4.187938736439056e-06, "loss": 0.715, "step": 1244 }, { "epoch": 0.58, "learning_rate": 4.184747925973198e-06, "loss": 0.6305, "step": 1248 }, { "epoch": 0.58, "learning_rate": 4.181557115507339e-06, "loss": 0.6243, "step": 1252 }, { "epoch": 0.58, "learning_rate": 4.178366305041481e-06, "loss": 0.6439, "step": 1256 }, { "epoch": 0.58, "learning_rate": 4.175175494575622e-06, "loss": 0.4782, "step": 1260 }, { "epoch": 0.59, "learning_rate": 4.171984684109764e-06, "loss": 0.4523, "step": 1264 }, { "epoch": 0.59, "learning_rate": 4.168793873643906e-06, "loss": 0.4884, "step": 1268 }, { "epoch": 0.59, "learning_rate": 4.165603063178047e-06, "loss": 0.3461, "step": 1272 }, { "epoch": 0.59, "learning_rate": 4.162412252712189e-06, "loss": 0.2459, "step": 1276 }, { "epoch": 0.59, "learning_rate": 4.159221442246331e-06, "loss": 0.8138, "step": 1280 }, { "epoch": 0.6, "learning_rate": 4.156030631780473e-06, "loss": 0.6026, "step": 1284 }, { "epoch": 0.6, "learning_rate": 4.152839821314614e-06, "loss": 0.5463, "step": 1288 }, { "epoch": 0.6, "learning_rate": 4.1496490108487556e-06, "loss": 0.4317, "step": 1292 }, { "epoch": 0.6, "learning_rate": 4.146458200382897e-06, "loss": 0.6244, "step": 1296 }, { "epoch": 0.6, "learning_rate": 4.143267389917039e-06, "loss": 0.554, "step": 1300 }, { "epoch": 0.6, "learning_rate": 4.140076579451181e-06, "loss": 0.6441, "step": 1304 }, { "epoch": 0.61, "learning_rate": 4.136885768985322e-06, "loss": 0.6233, "step": 1308 }, { "epoch": 0.61, "learning_rate": 4.133694958519464e-06, "loss": 0.5561, "step": 1312 }, { "epoch": 0.61, "learning_rate": 4.130504148053606e-06, "loss": 0.7524, "step": 1316 }, { "epoch": 0.61, "learning_rate": 4.1273133375877475e-06, "loss": 0.4338, "step": 1320 }, { "epoch": 0.61, "learning_rate": 4.124122527121889e-06, "loss": 0.4495, "step": 1324 }, { "epoch": 0.62, "learning_rate": 4.1209317166560305e-06, "loss": 0.5139, "step": 1328 }, { "epoch": 0.62, "learning_rate": 4.117740906190173e-06, "loss": 0.6545, "step": 1332 }, { "epoch": 0.62, "learning_rate": 4.114550095724314e-06, "loss": 0.5588, "step": 1336 }, { "epoch": 0.62, "learning_rate": 4.111359285258456e-06, "loss": 0.609, "step": 1340 }, { "epoch": 0.62, "learning_rate": 4.108168474792597e-06, "loss": 0.553, "step": 1344 }, { "epoch": 0.63, "learning_rate": 4.1049776643267395e-06, "loss": 0.5844, "step": 1348 }, { "epoch": 0.63, "learning_rate": 4.101786853860881e-06, "loss": 0.5779, "step": 1352 }, { "epoch": 0.63, "learning_rate": 4.0985960433950225e-06, "loss": 0.4207, "step": 1356 }, { "epoch": 0.63, "learning_rate": 4.095405232929165e-06, "loss": 0.4617, "step": 1360 }, { "epoch": 0.63, "learning_rate": 4.092214422463306e-06, "loss": 0.6092, "step": 1364 }, { "epoch": 0.63, "learning_rate": 4.089023611997448e-06, "loss": 0.4607, "step": 1368 }, { "epoch": 0.64, "learning_rate": 4.085832801531589e-06, "loss": 0.4239, "step": 1372 }, { "epoch": 0.64, "learning_rate": 4.0826419910657315e-06, "loss": 0.5438, "step": 1376 }, { "epoch": 0.64, "learning_rate": 4.079451180599873e-06, "loss": 0.5006, "step": 1380 }, { "epoch": 0.64, "learning_rate": 4.0762603701340144e-06, "loss": 0.6889, "step": 1384 }, { "epoch": 0.64, "learning_rate": 4.073069559668156e-06, "loss": 0.5742, "step": 1388 }, { "epoch": 0.65, "learning_rate": 4.069878749202298e-06, "loss": 0.8366, "step": 1392 }, { "epoch": 0.65, "learning_rate": 4.06668793873644e-06, "loss": 0.5182, "step": 1396 }, { "epoch": 0.65, "learning_rate": 4.063497128270581e-06, "loss": 0.4807, "step": 1400 }, { "epoch": 0.65, "learning_rate": 4.060306317804723e-06, "loss": 0.3995, "step": 1404 }, { "epoch": 0.65, "learning_rate": 4.057115507338865e-06, "loss": 0.5958, "step": 1408 }, { "epoch": 0.65, "learning_rate": 4.0539246968730064e-06, "loss": 0.4855, "step": 1412 }, { "epoch": 0.66, "learning_rate": 4.050733886407148e-06, "loss": 0.5908, "step": 1416 }, { "epoch": 0.66, "learning_rate": 4.047543075941289e-06, "loss": 0.7867, "step": 1420 }, { "epoch": 0.66, "learning_rate": 4.044352265475432e-06, "loss": 0.7617, "step": 1424 }, { "epoch": 0.66, "learning_rate": 4.041161455009573e-06, "loss": 0.4752, "step": 1428 }, { "epoch": 0.66, "learning_rate": 4.037970644543715e-06, "loss": 0.4732, "step": 1432 }, { "epoch": 0.67, "learning_rate": 4.034779834077856e-06, "loss": 0.635, "step": 1436 }, { "epoch": 0.67, "learning_rate": 4.0315890236119976e-06, "loss": 0.4924, "step": 1440 }, { "epoch": 0.67, "learning_rate": 4.02839821314614e-06, "loss": 0.4416, "step": 1444 }, { "epoch": 0.67, "learning_rate": 4.025207402680281e-06, "loss": 0.4448, "step": 1448 }, { "epoch": 0.67, "learning_rate": 4.022016592214423e-06, "loss": 0.7631, "step": 1452 }, { "epoch": 0.68, "learning_rate": 4.018825781748564e-06, "loss": 0.5035, "step": 1456 }, { "epoch": 0.68, "learning_rate": 4.015634971282707e-06, "loss": 0.3779, "step": 1460 }, { "epoch": 0.68, "learning_rate": 4.012444160816848e-06, "loss": 0.4924, "step": 1464 }, { "epoch": 0.68, "learning_rate": 4.0092533503509896e-06, "loss": 0.3932, "step": 1468 }, { "epoch": 0.68, "learning_rate": 4.006062539885131e-06, "loss": 0.6974, "step": 1472 }, { "epoch": 0.68, "learning_rate": 4.0028717294192725e-06, "loss": 0.7347, "step": 1476 }, { "epoch": 0.69, "learning_rate": 3.999680918953415e-06, "loss": 0.5564, "step": 1480 }, { "epoch": 0.69, "learning_rate": 3.996490108487556e-06, "loss": 0.4424, "step": 1484 }, { "epoch": 0.69, "learning_rate": 3.993299298021698e-06, "loss": 0.5323, "step": 1488 }, { "epoch": 0.69, "learning_rate": 3.990108487555839e-06, "loss": 0.6138, "step": 1492 }, { "epoch": 0.69, "learning_rate": 3.9869176770899815e-06, "loss": 0.5156, "step": 1496 }, { "epoch": 0.7, "learning_rate": 3.983726866624123e-06, "loss": 0.282, "step": 1500 }, { "epoch": 0.7, "learning_rate": 3.9805360561582645e-06, "loss": 0.5392, "step": 1504 }, { "epoch": 0.7, "learning_rate": 3.977345245692406e-06, "loss": 0.5721, "step": 1508 }, { "epoch": 0.7, "learning_rate": 3.974154435226547e-06, "loss": 0.6967, "step": 1512 }, { "epoch": 0.7, "learning_rate": 3.97096362476069e-06, "loss": 0.5348, "step": 1516 }, { "epoch": 0.71, "learning_rate": 3.967772814294831e-06, "loss": 0.6884, "step": 1520 }, { "epoch": 0.71, "learning_rate": 3.964582003828973e-06, "loss": 0.5065, "step": 1524 }, { "epoch": 0.71, "learning_rate": 3.961391193363114e-06, "loss": 0.4505, "step": 1528 }, { "epoch": 0.71, "learning_rate": 3.9582003828972565e-06, "loss": 0.6881, "step": 1532 }, { "epoch": 0.71, "learning_rate": 3.955009572431398e-06, "loss": 0.5952, "step": 1536 }, { "epoch": 0.71, "learning_rate": 3.951818761965539e-06, "loss": 0.5656, "step": 1540 }, { "epoch": 0.72, "learning_rate": 3.948627951499681e-06, "loss": 0.6437, "step": 1544 }, { "epoch": 0.72, "learning_rate": 3.945437141033822e-06, "loss": 0.5179, "step": 1548 }, { "epoch": 0.72, "learning_rate": 3.942246330567965e-06, "loss": 0.5278, "step": 1552 }, { "epoch": 0.72, "learning_rate": 3.939055520102106e-06, "loss": 0.6951, "step": 1556 }, { "epoch": 0.72, "learning_rate": 3.935864709636248e-06, "loss": 0.5468, "step": 1560 }, { "epoch": 0.73, "learning_rate": 3.932673899170389e-06, "loss": 0.5132, "step": 1564 }, { "epoch": 0.73, "learning_rate": 3.929483088704531e-06, "loss": 0.6297, "step": 1568 }, { "epoch": 0.73, "learning_rate": 3.926292278238673e-06, "loss": 0.5472, "step": 1572 }, { "epoch": 0.73, "learning_rate": 3.923101467772814e-06, "loss": 0.6623, "step": 1576 }, { "epoch": 0.73, "learning_rate": 3.919910657306956e-06, "loss": 0.6216, "step": 1580 }, { "epoch": 0.73, "learning_rate": 3.916719846841098e-06, "loss": 0.5332, "step": 1584 }, { "epoch": 0.74, "learning_rate": 3.91352903637524e-06, "loss": 0.4792, "step": 1588 }, { "epoch": 0.74, "learning_rate": 3.910338225909381e-06, "loss": 0.4573, "step": 1592 }, { "epoch": 0.74, "learning_rate": 3.9071474154435225e-06, "loss": 0.5135, "step": 1596 }, { "epoch": 0.74, "learning_rate": 3.903956604977665e-06, "loss": 0.7619, "step": 1600 }, { "epoch": 0.74, "learning_rate": 3.900765794511806e-06, "loss": 0.6681, "step": 1604 }, { "epoch": 0.75, "learning_rate": 3.897574984045948e-06, "loss": 0.7789, "step": 1608 }, { "epoch": 0.75, "learning_rate": 3.89438417358009e-06, "loss": 0.6078, "step": 1612 }, { "epoch": 0.75, "learning_rate": 3.8911933631142316e-06, "loss": 0.4812, "step": 1616 }, { "epoch": 0.75, "learning_rate": 3.888002552648373e-06, "loss": 0.5893, "step": 1620 }, { "epoch": 0.75, "learning_rate": 3.8848117421825145e-06, "loss": 0.4775, "step": 1624 }, { "epoch": 0.76, "learning_rate": 3.881620931716657e-06, "loss": 0.5012, "step": 1628 }, { "epoch": 0.76, "learning_rate": 3.878430121250798e-06, "loss": 0.4752, "step": 1632 }, { "epoch": 0.76, "learning_rate": 3.87523931078494e-06, "loss": 0.4365, "step": 1636 }, { "epoch": 0.76, "learning_rate": 3.872048500319081e-06, "loss": 0.6722, "step": 1640 }, { "epoch": 0.76, "learning_rate": 3.8688576898532236e-06, "loss": 0.6083, "step": 1644 }, { "epoch": 0.76, "learning_rate": 3.865666879387365e-06, "loss": 0.4533, "step": 1648 }, { "epoch": 0.77, "learning_rate": 3.8624760689215065e-06, "loss": 0.5879, "step": 1652 }, { "epoch": 0.77, "learning_rate": 3.859285258455648e-06, "loss": 0.6564, "step": 1656 }, { "epoch": 0.77, "learning_rate": 3.85609444798979e-06, "loss": 0.5475, "step": 1660 }, { "epoch": 0.77, "learning_rate": 3.852903637523932e-06, "loss": 0.5018, "step": 1664 }, { "epoch": 0.77, "learning_rate": 3.849712827058073e-06, "loss": 0.4544, "step": 1668 }, { "epoch": 0.78, "learning_rate": 3.846522016592215e-06, "loss": 0.6603, "step": 1672 }, { "epoch": 0.78, "learning_rate": 3.843331206126357e-06, "loss": 0.6887, "step": 1676 }, { "epoch": 0.78, "learning_rate": 3.8401403956604985e-06, "loss": 0.7819, "step": 1680 }, { "epoch": 0.78, "learning_rate": 3.83694958519464e-06, "loss": 0.5052, "step": 1684 }, { "epoch": 0.78, "learning_rate": 3.833758774728781e-06, "loss": 0.6689, "step": 1688 }, { "epoch": 0.78, "learning_rate": 3.830567964262923e-06, "loss": 0.5564, "step": 1692 }, { "epoch": 0.79, "learning_rate": 3.827377153797065e-06, "loss": 0.3658, "step": 1696 }, { "epoch": 0.79, "learning_rate": 3.824186343331207e-06, "loss": 0.6376, "step": 1700 }, { "epoch": 0.79, "learning_rate": 3.820995532865348e-06, "loss": 0.5681, "step": 1704 }, { "epoch": 0.79, "learning_rate": 3.81780472239949e-06, "loss": 0.5974, "step": 1708 }, { "epoch": 0.79, "learning_rate": 3.814613911933632e-06, "loss": 0.5623, "step": 1712 }, { "epoch": 0.8, "learning_rate": 3.8114231014677734e-06, "loss": 0.6437, "step": 1716 }, { "epoch": 0.8, "learning_rate": 3.808232291001915e-06, "loss": 0.6442, "step": 1720 }, { "epoch": 0.8, "learning_rate": 3.8050414805360563e-06, "loss": 0.4729, "step": 1724 }, { "epoch": 0.8, "learning_rate": 3.801850670070198e-06, "loss": 0.3677, "step": 1728 }, { "epoch": 0.8, "learning_rate": 3.79865985960434e-06, "loss": 0.4295, "step": 1732 }, { "epoch": 0.81, "learning_rate": 3.7954690491384816e-06, "loss": 0.6049, "step": 1736 }, { "epoch": 0.81, "learning_rate": 3.792278238672623e-06, "loss": 0.6363, "step": 1740 }, { "epoch": 0.81, "learning_rate": 3.7890874282067645e-06, "loss": 0.5939, "step": 1744 }, { "epoch": 0.81, "learning_rate": 3.785896617740907e-06, "loss": 0.5011, "step": 1748 }, { "epoch": 0.81, "learning_rate": 3.7827058072750483e-06, "loss": 0.5177, "step": 1752 }, { "epoch": 0.81, "learning_rate": 3.77951499680919e-06, "loss": 0.7722, "step": 1756 }, { "epoch": 0.82, "learning_rate": 3.7763241863433313e-06, "loss": 0.5204, "step": 1760 }, { "epoch": 0.82, "learning_rate": 3.773133375877473e-06, "loss": 0.455, "step": 1764 }, { "epoch": 0.82, "learning_rate": 3.769942565411615e-06, "loss": 0.5397, "step": 1768 }, { "epoch": 0.82, "learning_rate": 3.7667517549457565e-06, "loss": 0.5528, "step": 1772 }, { "epoch": 0.82, "learning_rate": 3.763560944479898e-06, "loss": 0.5286, "step": 1776 }, { "epoch": 0.83, "learning_rate": 3.76037013401404e-06, "loss": 0.5475, "step": 1780 }, { "epoch": 0.83, "learning_rate": 3.7571793235481818e-06, "loss": 0.3887, "step": 1784 }, { "epoch": 0.83, "learning_rate": 3.7539885130823233e-06, "loss": 0.6288, "step": 1788 }, { "epoch": 0.83, "learning_rate": 3.7507977026164647e-06, "loss": 0.5563, "step": 1792 }, { "epoch": 0.83, "learning_rate": 3.7476068921506066e-06, "loss": 0.6103, "step": 1796 }, { "epoch": 0.83, "learning_rate": 3.744416081684748e-06, "loss": 0.4141, "step": 1800 }, { "epoch": 0.84, "learning_rate": 3.74122527121889e-06, "loss": 0.4075, "step": 1804 }, { "epoch": 0.84, "learning_rate": 3.738034460753032e-06, "loss": 0.3594, "step": 1808 }, { "epoch": 0.84, "learning_rate": 3.7348436502871733e-06, "loss": 0.5157, "step": 1812 }, { "epoch": 0.84, "learning_rate": 3.731652839821315e-06, "loss": 0.4918, "step": 1816 }, { "epoch": 0.84, "learning_rate": 3.7284620293554563e-06, "loss": 0.4456, "step": 1820 }, { "epoch": 0.85, "learning_rate": 3.7252712188895986e-06, "loss": 0.7768, "step": 1824 }, { "epoch": 0.85, "learning_rate": 3.72208040842374e-06, "loss": 0.7511, "step": 1828 }, { "epoch": 0.85, "learning_rate": 3.7188895979578815e-06, "loss": 0.4604, "step": 1832 }, { "epoch": 0.85, "learning_rate": 3.715698787492023e-06, "loss": 0.6048, "step": 1836 }, { "epoch": 0.85, "learning_rate": 3.7125079770261653e-06, "loss": 0.6261, "step": 1840 }, { "epoch": 0.86, "learning_rate": 3.709317166560307e-06, "loss": 0.7588, "step": 1844 }, { "epoch": 0.86, "learning_rate": 3.7061263560944483e-06, "loss": 0.6608, "step": 1848 }, { "epoch": 0.86, "learning_rate": 3.7029355456285897e-06, "loss": 0.5453, "step": 1852 }, { "epoch": 0.86, "learning_rate": 3.6997447351627312e-06, "loss": 0.4361, "step": 1856 }, { "epoch": 0.86, "learning_rate": 3.6965539246968735e-06, "loss": 0.5557, "step": 1860 }, { "epoch": 0.86, "learning_rate": 3.693363114231015e-06, "loss": 0.6371, "step": 1864 }, { "epoch": 0.87, "learning_rate": 3.6901723037651565e-06, "loss": 0.4953, "step": 1868 }, { "epoch": 0.87, "learning_rate": 3.686981493299298e-06, "loss": 0.4157, "step": 1872 }, { "epoch": 0.87, "learning_rate": 3.6837906828334403e-06, "loss": 0.5469, "step": 1876 }, { "epoch": 0.87, "learning_rate": 3.6805998723675817e-06, "loss": 0.4933, "step": 1880 }, { "epoch": 0.87, "learning_rate": 3.677409061901723e-06, "loss": 0.4994, "step": 1884 }, { "epoch": 0.88, "learning_rate": 3.6742182514358647e-06, "loss": 0.3726, "step": 1888 }, { "epoch": 0.88, "learning_rate": 3.6710274409700066e-06, "loss": 0.5413, "step": 1892 }, { "epoch": 0.88, "learning_rate": 3.6678366305041485e-06, "loss": 0.574, "step": 1896 }, { "epoch": 0.88, "learning_rate": 3.66464582003829e-06, "loss": 0.2569, "step": 1900 }, { "epoch": 0.88, "learning_rate": 3.6614550095724314e-06, "loss": 0.5012, "step": 1904 }, { "epoch": 0.88, "learning_rate": 3.6582641991065733e-06, "loss": 0.586, "step": 1908 }, { "epoch": 0.89, "learning_rate": 3.655073388640715e-06, "loss": 0.4588, "step": 1912 }, { "epoch": 0.89, "learning_rate": 3.6518825781748567e-06, "loss": 0.3745, "step": 1916 }, { "epoch": 0.89, "learning_rate": 3.6486917677089985e-06, "loss": 0.5444, "step": 1920 }, { "epoch": 0.89, "learning_rate": 3.64550095724314e-06, "loss": 0.5545, "step": 1924 }, { "epoch": 0.89, "learning_rate": 3.6423101467772815e-06, "loss": 0.6965, "step": 1928 }, { "epoch": 0.9, "learning_rate": 3.6391193363114234e-06, "loss": 0.4442, "step": 1932 }, { "epoch": 0.9, "learning_rate": 3.6359285258455653e-06, "loss": 0.4866, "step": 1936 }, { "epoch": 0.9, "learning_rate": 3.6327377153797067e-06, "loss": 0.5114, "step": 1940 }, { "epoch": 0.9, "learning_rate": 3.6295469049138482e-06, "loss": 0.5922, "step": 1944 }, { "epoch": 0.9, "learning_rate": 3.62635609444799e-06, "loss": 0.4787, "step": 1948 }, { "epoch": 0.91, "learning_rate": 3.623165283982132e-06, "loss": 0.6709, "step": 1952 }, { "epoch": 0.91, "learning_rate": 3.6199744735162735e-06, "loss": 0.5078, "step": 1956 }, { "epoch": 0.91, "learning_rate": 3.616783663050415e-06, "loss": 0.5999, "step": 1960 }, { "epoch": 0.91, "learning_rate": 3.6135928525845564e-06, "loss": 0.5051, "step": 1964 }, { "epoch": 0.91, "learning_rate": 3.6111997447351634e-06, "loss": 0.4373, "step": 1968 }, { "epoch": 0.91, "learning_rate": 3.608008934269305e-06, "loss": 0.7497, "step": 1972 }, { "epoch": 0.92, "learning_rate": 3.6048181238034463e-06, "loss": 0.458, "step": 1976 }, { "epoch": 0.92, "learning_rate": 3.6016273133375878e-06, "loss": 0.3981, "step": 1980 }, { "epoch": 0.92, "learning_rate": 3.59843650287173e-06, "loss": 0.4995, "step": 1984 }, { "epoch": 0.92, "learning_rate": 3.5952456924058716e-06, "loss": 0.493, "step": 1988 }, { "epoch": 0.92, "learning_rate": 3.592054881940013e-06, "loss": 0.462, "step": 1992 }, { "epoch": 0.93, "learning_rate": 3.5888640714741545e-06, "loss": 0.5239, "step": 1996 }, { "epoch": 0.93, "learning_rate": 3.585673261008296e-06, "loss": 0.4376, "step": 2000 }, { "epoch": 0.93, "learning_rate": 3.5824824505424383e-06, "loss": 0.6067, "step": 2004 }, { "epoch": 0.93, "learning_rate": 3.5792916400765798e-06, "loss": 0.4091, "step": 2008 }, { "epoch": 0.93, "learning_rate": 3.5761008296107212e-06, "loss": 0.5261, "step": 2012 }, { "epoch": 0.94, "learning_rate": 3.5729100191448627e-06, "loss": 0.5408, "step": 2016 }, { "epoch": 0.94, "learning_rate": 3.569719208679005e-06, "loss": 0.5867, "step": 2020 }, { "epoch": 0.94, "learning_rate": 3.5665283982131465e-06, "loss": 0.636, "step": 2024 }, { "epoch": 0.94, "learning_rate": 3.563337587747288e-06, "loss": 0.4329, "step": 2028 }, { "epoch": 0.94, "learning_rate": 3.56014677728143e-06, "loss": 0.7026, "step": 2032 }, { "epoch": 0.94, "learning_rate": 3.5569559668155713e-06, "loss": 0.5245, "step": 2036 }, { "epoch": 0.95, "learning_rate": 3.553765156349713e-06, "loss": 0.4929, "step": 2040 }, { "epoch": 0.95, "learning_rate": 3.5505743458838547e-06, "loss": 0.4876, "step": 2044 }, { "epoch": 0.95, "learning_rate": 3.5473835354179966e-06, "loss": 0.45, "step": 2048 }, { "epoch": 0.95, "learning_rate": 3.544192724952138e-06, "loss": 0.5068, "step": 2052 }, { "epoch": 0.95, "learning_rate": 3.54100191448628e-06, "loss": 0.5647, "step": 2056 }, { "epoch": 0.96, "learning_rate": 3.5378111040204214e-06, "loss": 0.5048, "step": 2060 }, { "epoch": 0.96, "learning_rate": 3.5346202935545633e-06, "loss": 0.457, "step": 2064 }, { "epoch": 0.96, "learning_rate": 3.5314294830887048e-06, "loss": 0.4089, "step": 2068 }, { "epoch": 0.96, "learning_rate": 3.5282386726228462e-06, "loss": 0.3521, "step": 2072 }, { "epoch": 0.96, "learning_rate": 3.5250478621569886e-06, "loss": 0.3477, "step": 2076 }, { "epoch": 0.96, "learning_rate": 3.52185705169113e-06, "loss": 0.6625, "step": 2080 }, { "epoch": 0.97, "learning_rate": 3.5186662412252715e-06, "loss": 0.3829, "step": 2084 }, { "epoch": 0.97, "learning_rate": 3.515475430759413e-06, "loss": 0.4733, "step": 2088 }, { "epoch": 0.97, "learning_rate": 3.5122846202935553e-06, "loss": 0.4024, "step": 2092 }, { "epoch": 0.97, "learning_rate": 3.5090938098276968e-06, "loss": 0.5733, "step": 2096 }, { "epoch": 0.97, "learning_rate": 3.5059029993618382e-06, "loss": 0.5788, "step": 2100 }, { "epoch": 0.98, "learning_rate": 3.5027121888959797e-06, "loss": 0.4806, "step": 2104 }, { "epoch": 0.98, "learning_rate": 3.499521378430121e-06, "loss": 0.5091, "step": 2108 }, { "epoch": 0.98, "learning_rate": 3.4963305679642635e-06, "loss": 0.6465, "step": 2112 }, { "epoch": 0.98, "learning_rate": 3.493139757498405e-06, "loss": 0.4821, "step": 2116 }, { "epoch": 0.98, "learning_rate": 3.4899489470325464e-06, "loss": 0.3563, "step": 2120 }, { "epoch": 0.99, "learning_rate": 3.486758136566688e-06, "loss": 0.7174, "step": 2124 }, { "epoch": 0.99, "learning_rate": 3.4835673261008302e-06, "loss": 0.3833, "step": 2128 }, { "epoch": 0.99, "learning_rate": 3.4803765156349717e-06, "loss": 0.6688, "step": 2132 }, { "epoch": 0.99, "learning_rate": 3.477185705169113e-06, "loss": 0.5733, "step": 2136 }, { "epoch": 0.99, "learning_rate": 3.4739948947032546e-06, "loss": 0.5743, "step": 2140 }, { "epoch": 0.99, "learning_rate": 3.4708040842373965e-06, "loss": 0.5219, "step": 2144 }, { "epoch": 1.0, "learning_rate": 3.4676132737715384e-06, "loss": 0.5964, "step": 2148 }, { "epoch": 1.0, "learning_rate": 3.46442246330568e-06, "loss": 0.5981, "step": 2152 }, { "epoch": 1.0, "learning_rate": 3.4612316528398214e-06, "loss": 0.4068, "step": 2156 }, { "epoch": 1.0, "learning_rate": 3.4580408423739632e-06, "loss": 0.3966, "step": 2160 }, { "epoch": 1.0, "learning_rate": 3.454850031908105e-06, "loss": 0.2291, "step": 2164 }, { "epoch": 1.01, "learning_rate": 3.4516592214422466e-06, "loss": 0.4695, "step": 2168 }, { "epoch": 1.01, "learning_rate": 3.4484684109763885e-06, "loss": 0.5594, "step": 2172 }, { "epoch": 1.01, "learning_rate": 3.44527760051053e-06, "loss": 0.5603, "step": 2176 }, { "epoch": 1.01, "learning_rate": 3.4420867900446714e-06, "loss": 0.4934, "step": 2180 }, { "epoch": 1.01, "learning_rate": 3.4388959795788133e-06, "loss": 0.6316, "step": 2184 }, { "epoch": 1.01, "learning_rate": 3.4357051691129552e-06, "loss": 0.3424, "step": 2188 }, { "epoch": 1.02, "learning_rate": 3.4325143586470967e-06, "loss": 0.566, "step": 2192 }, { "epoch": 1.02, "learning_rate": 3.429323548181238e-06, "loss": 0.3565, "step": 2196 }, { "epoch": 1.02, "learning_rate": 3.42613273771538e-06, "loss": 0.5191, "step": 2200 }, { "epoch": 1.02, "learning_rate": 3.422941927249522e-06, "loss": 0.3848, "step": 2204 }, { "epoch": 1.02, "learning_rate": 3.4197511167836634e-06, "loss": 0.6962, "step": 2208 }, { "epoch": 1.03, "learning_rate": 3.416560306317805e-06, "loss": 0.3646, "step": 2212 }, { "epoch": 1.03, "learning_rate": 3.4133694958519464e-06, "loss": 0.3756, "step": 2216 }, { "epoch": 1.03, "learning_rate": 3.4101786853860887e-06, "loss": 0.2853, "step": 2220 }, { "epoch": 1.03, "learning_rate": 3.40698787492023e-06, "loss": 0.2925, "step": 2224 }, { "epoch": 1.03, "learning_rate": 3.4037970644543716e-06, "loss": 0.3838, "step": 2228 }, { "epoch": 1.04, "learning_rate": 3.400606253988513e-06, "loss": 0.4479, "step": 2232 }, { "epoch": 1.04, "learning_rate": 3.3974154435226554e-06, "loss": 0.5207, "step": 2236 }, { "epoch": 1.04, "learning_rate": 3.394224633056797e-06, "loss": 0.3813, "step": 2240 }, { "epoch": 1.04, "learning_rate": 3.3910338225909384e-06, "loss": 0.4028, "step": 2244 }, { "epoch": 1.04, "learning_rate": 3.38784301212508e-06, "loss": 0.3406, "step": 2248 }, { "epoch": 1.04, "learning_rate": 3.3846522016592213e-06, "loss": 0.4495, "step": 2252 }, { "epoch": 1.05, "learning_rate": 3.3814613911933636e-06, "loss": 0.5411, "step": 2256 }, { "epoch": 1.05, "learning_rate": 3.378270580727505e-06, "loss": 0.3533, "step": 2260 }, { "epoch": 1.05, "learning_rate": 3.3750797702616465e-06, "loss": 0.5577, "step": 2264 }, { "epoch": 1.05, "learning_rate": 3.371888959795788e-06, "loss": 0.4198, "step": 2268 }, { "epoch": 1.05, "learning_rate": 3.3686981493299303e-06, "loss": 0.2956, "step": 2272 }, { "epoch": 1.06, "learning_rate": 3.365507338864072e-06, "loss": 0.5714, "step": 2276 }, { "epoch": 1.06, "learning_rate": 3.3623165283982133e-06, "loss": 0.3393, "step": 2280 }, { "epoch": 1.06, "learning_rate": 3.359125717932355e-06, "loss": 0.3448, "step": 2284 }, { "epoch": 1.06, "learning_rate": 3.3559349074664966e-06, "loss": 0.4956, "step": 2288 }, { "epoch": 1.06, "learning_rate": 3.3527440970006385e-06, "loss": 0.4609, "step": 2292 }, { "epoch": 1.06, "learning_rate": 3.34955328653478e-06, "loss": 0.4499, "step": 2296 }, { "epoch": 1.07, "learning_rate": 3.346362476068922e-06, "loss": 0.3638, "step": 2300 }, { "epoch": 1.07, "learning_rate": 3.3431716656030634e-06, "loss": 0.6062, "step": 2304 }, { "epoch": 1.07, "learning_rate": 3.3399808551372053e-06, "loss": 0.319, "step": 2308 }, { "epoch": 1.07, "learning_rate": 3.3367900446713467e-06, "loss": 0.3106, "step": 2312 }, { "epoch": 1.07, "learning_rate": 3.3335992342054886e-06, "loss": 0.6715, "step": 2316 }, { "epoch": 1.08, "learning_rate": 3.33040842373963e-06, "loss": 0.4007, "step": 2320 }, { "epoch": 1.08, "learning_rate": 3.3272176132737716e-06, "loss": 0.5854, "step": 2324 }, { "epoch": 1.08, "learning_rate": 3.324026802807914e-06, "loss": 0.4384, "step": 2328 }, { "epoch": 1.08, "learning_rate": 3.3208359923420554e-06, "loss": 0.5186, "step": 2332 }, { "epoch": 1.08, "learning_rate": 3.317645181876197e-06, "loss": 0.2793, "step": 2336 }, { "epoch": 1.09, "learning_rate": 3.3144543714103383e-06, "loss": 0.3945, "step": 2340 }, { "epoch": 1.09, "learning_rate": 3.3112635609444806e-06, "loss": 0.4488, "step": 2344 }, { "epoch": 1.09, "learning_rate": 3.308072750478622e-06, "loss": 0.2692, "step": 2348 }, { "epoch": 1.09, "learning_rate": 3.3048819400127635e-06, "loss": 0.4689, "step": 2352 }, { "epoch": 1.09, "learning_rate": 3.301691129546905e-06, "loss": 0.2162, "step": 2356 }, { "epoch": 1.09, "learning_rate": 3.2985003190810465e-06, "loss": 0.3339, "step": 2360 }, { "epoch": 1.1, "learning_rate": 3.295309508615189e-06, "loss": 0.5855, "step": 2364 }, { "epoch": 1.1, "learning_rate": 3.2921186981493303e-06, "loss": 0.4823, "step": 2368 }, { "epoch": 1.1, "learning_rate": 3.2889278876834717e-06, "loss": 0.3587, "step": 2372 }, { "epoch": 1.1, "learning_rate": 3.2857370772176132e-06, "loss": 0.3903, "step": 2376 }, { "epoch": 1.1, "learning_rate": 3.2825462667517555e-06, "loss": 0.5669, "step": 2380 }, { "epoch": 1.11, "learning_rate": 3.279355456285897e-06, "loss": 0.413, "step": 2384 }, { "epoch": 1.11, "learning_rate": 3.2761646458200385e-06, "loss": 0.3735, "step": 2388 }, { "epoch": 1.11, "learning_rate": 3.27297383535418e-06, "loss": 0.5467, "step": 2392 }, { "epoch": 1.11, "learning_rate": 3.269783024888322e-06, "loss": 0.3738, "step": 2396 }, { "epoch": 1.11, "learning_rate": 3.2665922144224637e-06, "loss": 0.4619, "step": 2400 }, { "epoch": 1.12, "learning_rate": 3.263401403956605e-06, "loss": 0.3739, "step": 2404 }, { "epoch": 1.12, "learning_rate": 3.2602105934907467e-06, "loss": 0.4065, "step": 2408 }, { "epoch": 1.12, "learning_rate": 3.2570197830248886e-06, "loss": 0.3406, "step": 2412 }, { "epoch": 1.12, "learning_rate": 3.2538289725590305e-06, "loss": 0.4554, "step": 2416 }, { "epoch": 1.12, "learning_rate": 3.250638162093172e-06, "loss": 0.799, "step": 2420 }, { "epoch": 1.12, "learning_rate": 3.247447351627314e-06, "loss": 0.4552, "step": 2424 }, { "epoch": 1.13, "learning_rate": 3.2442565411614553e-06, "loss": 0.3708, "step": 2428 }, { "epoch": 1.13, "learning_rate": 3.2410657306955968e-06, "loss": 0.3309, "step": 2432 }, { "epoch": 1.13, "learning_rate": 3.2378749202297387e-06, "loss": 0.4065, "step": 2436 }, { "epoch": 1.13, "learning_rate": 3.2346841097638806e-06, "loss": 0.627, "step": 2440 }, { "epoch": 1.13, "learning_rate": 3.231493299298022e-06, "loss": 0.3551, "step": 2444 }, { "epoch": 1.14, "learning_rate": 3.2283024888321635e-06, "loss": 0.2465, "step": 2448 }, { "epoch": 1.14, "learning_rate": 3.2251116783663054e-06, "loss": 0.5104, "step": 2452 }, { "epoch": 1.14, "learning_rate": 3.2219208679004473e-06, "loss": 0.5923, "step": 2456 }, { "epoch": 1.14, "learning_rate": 3.2187300574345887e-06, "loss": 0.3455, "step": 2460 }, { "epoch": 1.14, "learning_rate": 3.2155392469687302e-06, "loss": 0.3767, "step": 2464 }, { "epoch": 1.14, "learning_rate": 3.2123484365028717e-06, "loss": 0.4728, "step": 2468 }, { "epoch": 1.15, "learning_rate": 3.209157626037014e-06, "loss": 0.4427, "step": 2472 }, { "epoch": 1.15, "learning_rate": 3.2059668155711555e-06, "loss": 0.3805, "step": 2476 }, { "epoch": 1.15, "learning_rate": 3.202776005105297e-06, "loss": 0.2417, "step": 2480 }, { "epoch": 1.15, "learning_rate": 3.1995851946394384e-06, "loss": 0.4459, "step": 2484 }, { "epoch": 1.15, "learning_rate": 3.1963943841735807e-06, "loss": 0.5951, "step": 2488 }, { "epoch": 1.16, "learning_rate": 3.193203573707722e-06, "loss": 0.4512, "step": 2492 }, { "epoch": 1.16, "learning_rate": 3.1900127632418637e-06, "loss": 0.4038, "step": 2496 }, { "epoch": 1.16, "learning_rate": 3.186821952776005e-06, "loss": 0.5716, "step": 2500 }, { "epoch": 1.16, "learning_rate": 3.1836311423101466e-06, "loss": 0.3122, "step": 2504 }, { "epoch": 1.16, "learning_rate": 3.180440331844289e-06, "loss": 0.6523, "step": 2508 }, { "epoch": 1.17, "learning_rate": 3.1772495213784304e-06, "loss": 0.4129, "step": 2512 }, { "epoch": 1.17, "learning_rate": 3.174058710912572e-06, "loss": 0.7674, "step": 2516 }, { "epoch": 1.17, "learning_rate": 3.1708679004467138e-06, "loss": 0.5227, "step": 2520 }, { "epoch": 1.17, "learning_rate": 3.1676770899808557e-06, "loss": 0.457, "step": 2524 }, { "epoch": 1.17, "learning_rate": 3.164486279514997e-06, "loss": 0.3279, "step": 2528 }, { "epoch": 1.17, "learning_rate": 3.1612954690491386e-06, "loss": 0.4809, "step": 2532 }, { "epoch": 1.18, "learning_rate": 3.1581046585832805e-06, "loss": 0.3513, "step": 2536 }, { "epoch": 1.18, "learning_rate": 3.154913848117422e-06, "loss": 0.5097, "step": 2540 }, { "epoch": 1.18, "learning_rate": 3.151723037651564e-06, "loss": 0.4727, "step": 2544 }, { "epoch": 1.18, "learning_rate": 3.1485322271857053e-06, "loss": 0.3848, "step": 2548 }, { "epoch": 1.18, "learning_rate": 3.1453414167198472e-06, "loss": 0.49, "step": 2552 }, { "epoch": 1.19, "learning_rate": 3.1421506062539887e-06, "loss": 0.4166, "step": 2556 }, { "epoch": 1.19, "learning_rate": 3.1389597957881306e-06, "loss": 0.4397, "step": 2560 }, { "epoch": 1.19, "learning_rate": 3.135768985322272e-06, "loss": 0.4295, "step": 2564 }, { "epoch": 1.19, "learning_rate": 3.132578174856414e-06, "loss": 0.3807, "step": 2568 }, { "epoch": 1.19, "learning_rate": 3.1293873643905554e-06, "loss": 0.5155, "step": 2572 }, { "epoch": 1.19, "learning_rate": 3.126196553924697e-06, "loss": 0.4183, "step": 2576 }, { "epoch": 1.2, "learning_rate": 3.123005743458839e-06, "loss": 0.4173, "step": 2580 }, { "epoch": 1.2, "learning_rate": 3.1198149329929807e-06, "loss": 0.5842, "step": 2584 }, { "epoch": 1.2, "learning_rate": 3.116624122527122e-06, "loss": 0.6673, "step": 2588 }, { "epoch": 1.2, "learning_rate": 3.1134333120612636e-06, "loss": 0.4492, "step": 2592 }, { "epoch": 1.2, "learning_rate": 3.110242501595406e-06, "loss": 0.4747, "step": 2596 }, { "epoch": 1.21, "learning_rate": 3.1070516911295474e-06, "loss": 0.5746, "step": 2600 }, { "epoch": 1.21, "learning_rate": 3.103860880663689e-06, "loss": 0.5708, "step": 2604 }, { "epoch": 1.21, "learning_rate": 3.1006700701978303e-06, "loss": 0.601, "step": 2608 }, { "epoch": 1.21, "learning_rate": 3.097479259731972e-06, "loss": 0.7205, "step": 2612 }, { "epoch": 1.21, "learning_rate": 3.094288449266114e-06, "loss": 0.4023, "step": 2616 }, { "epoch": 1.22, "learning_rate": 3.0910976388002556e-06, "loss": 0.5012, "step": 2620 }, { "epoch": 1.22, "learning_rate": 3.087906828334397e-06, "loss": 0.4779, "step": 2624 }, { "epoch": 1.22, "learning_rate": 3.0847160178685385e-06, "loss": 0.5062, "step": 2628 }, { "epoch": 1.22, "learning_rate": 3.081525207402681e-06, "loss": 0.2888, "step": 2632 }, { "epoch": 1.22, "learning_rate": 3.0783343969368223e-06, "loss": 0.4121, "step": 2636 }, { "epoch": 1.22, "learning_rate": 3.075143586470964e-06, "loss": 0.4721, "step": 2640 }, { "epoch": 1.23, "learning_rate": 3.0719527760051053e-06, "loss": 0.3422, "step": 2644 }, { "epoch": 1.23, "learning_rate": 3.068761965539247e-06, "loss": 0.5018, "step": 2648 }, { "epoch": 1.23, "learning_rate": 3.065571155073389e-06, "loss": 0.5165, "step": 2652 }, { "epoch": 1.23, "learning_rate": 3.0623803446075305e-06, "loss": 0.3574, "step": 2656 }, { "epoch": 1.23, "learning_rate": 3.059189534141672e-06, "loss": 0.579, "step": 2660 }, { "epoch": 1.24, "learning_rate": 3.055998723675814e-06, "loss": 0.4961, "step": 2664 }, { "epoch": 1.24, "learning_rate": 3.0528079132099558e-06, "loss": 0.3949, "step": 2668 }, { "epoch": 1.24, "learning_rate": 3.0496171027440973e-06, "loss": 0.3999, "step": 2672 }, { "epoch": 1.24, "learning_rate": 3.046426292278239e-06, "loss": 0.524, "step": 2676 }, { "epoch": 1.24, "learning_rate": 3.0432354818123806e-06, "loss": 0.3688, "step": 2680 }, { "epoch": 1.24, "learning_rate": 3.040044671346522e-06, "loss": 0.403, "step": 2684 }, { "epoch": 1.25, "learning_rate": 3.036853860880664e-06, "loss": 0.3388, "step": 2688 }, { "epoch": 1.25, "learning_rate": 3.033663050414806e-06, "loss": 0.4706, "step": 2692 }, { "epoch": 1.25, "learning_rate": 3.0304722399489473e-06, "loss": 0.6817, "step": 2696 }, { "epoch": 1.25, "learning_rate": 3.027281429483089e-06, "loss": 0.3896, "step": 2700 }, { "epoch": 1.25, "learning_rate": 3.0240906190172307e-06, "loss": 0.358, "step": 2704 }, { "epoch": 1.26, "learning_rate": 3.0208998085513726e-06, "loss": 0.3115, "step": 2708 }, { "epoch": 1.26, "learning_rate": 3.017708998085514e-06, "loss": 0.5322, "step": 2712 }, { "epoch": 1.26, "learning_rate": 3.0145181876196555e-06, "loss": 0.4613, "step": 2716 }, { "epoch": 1.26, "learning_rate": 3.011327377153797e-06, "loss": 0.4374, "step": 2720 }, { "epoch": 1.26, "learning_rate": 3.0081365666879393e-06, "loss": 0.4775, "step": 2724 }, { "epoch": 1.27, "learning_rate": 3.004945756222081e-06, "loss": 0.349, "step": 2728 }, { "epoch": 1.27, "learning_rate": 3.0017549457562223e-06, "loss": 0.5114, "step": 2732 }, { "epoch": 1.27, "learning_rate": 2.9985641352903637e-06, "loss": 0.3901, "step": 2736 }, { "epoch": 1.27, "learning_rate": 2.995373324824506e-06, "loss": 0.4756, "step": 2740 }, { "epoch": 1.27, "learning_rate": 2.9921825143586475e-06, "loss": 0.4669, "step": 2744 }, { "epoch": 1.27, "learning_rate": 2.988991703892789e-06, "loss": 0.5554, "step": 2748 }, { "epoch": 1.28, "learning_rate": 2.9858008934269305e-06, "loss": 0.3345, "step": 2752 }, { "epoch": 1.28, "learning_rate": 2.982610082961072e-06, "loss": 0.3653, "step": 2756 }, { "epoch": 1.28, "learning_rate": 2.9794192724952143e-06, "loss": 0.4543, "step": 2760 }, { "epoch": 1.28, "learning_rate": 2.9762284620293557e-06, "loss": 0.382, "step": 2764 }, { "epoch": 1.28, "learning_rate": 2.973037651563497e-06, "loss": 0.2821, "step": 2768 }, { "epoch": 1.29, "learning_rate": 2.969846841097639e-06, "loss": 0.4392, "step": 2772 }, { "epoch": 1.29, "learning_rate": 2.966656030631781e-06, "loss": 0.3785, "step": 2776 }, { "epoch": 1.29, "learning_rate": 2.9634652201659224e-06, "loss": 0.4799, "step": 2780 }, { "epoch": 1.29, "learning_rate": 2.960274409700064e-06, "loss": 0.4004, "step": 2784 }, { "epoch": 1.29, "learning_rate": 2.957083599234206e-06, "loss": 0.4598, "step": 2788 }, { "epoch": 1.29, "learning_rate": 2.9538927887683473e-06, "loss": 0.6889, "step": 2792 }, { "epoch": 1.3, "learning_rate": 2.950701978302489e-06, "loss": 0.3401, "step": 2796 }, { "epoch": 1.3, "learning_rate": 2.9475111678366306e-06, "loss": 0.5162, "step": 2800 }, { "epoch": 1.3, "learning_rate": 2.9443203573707725e-06, "loss": 0.3811, "step": 2804 }, { "epoch": 1.3, "learning_rate": 2.941129546904914e-06, "loss": 0.3048, "step": 2808 }, { "epoch": 1.3, "learning_rate": 2.937938736439056e-06, "loss": 0.5528, "step": 2812 }, { "epoch": 1.31, "learning_rate": 2.9347479259731974e-06, "loss": 0.3721, "step": 2816 }, { "epoch": 1.31, "learning_rate": 2.9315571155073393e-06, "loss": 0.4877, "step": 2820 }, { "epoch": 1.31, "learning_rate": 2.9283663050414807e-06, "loss": 0.3101, "step": 2824 }, { "epoch": 1.31, "learning_rate": 2.925175494575622e-06, "loss": 0.3458, "step": 2828 }, { "epoch": 1.31, "learning_rate": 2.9219846841097645e-06, "loss": 0.3741, "step": 2832 }, { "epoch": 1.32, "learning_rate": 2.918793873643906e-06, "loss": 0.7428, "step": 2836 }, { "epoch": 1.32, "learning_rate": 2.9156030631780475e-06, "loss": 0.3487, "step": 2840 }, { "epoch": 1.32, "learning_rate": 2.912412252712189e-06, "loss": 0.3184, "step": 2844 }, { "epoch": 1.32, "learning_rate": 2.9092214422463313e-06, "loss": 0.3778, "step": 2848 }, { "epoch": 1.32, "learning_rate": 2.9060306317804727e-06, "loss": 0.4085, "step": 2852 }, { "epoch": 1.32, "learning_rate": 2.902839821314614e-06, "loss": 0.4859, "step": 2856 }, { "epoch": 1.33, "learning_rate": 2.8996490108487557e-06, "loss": 0.5783, "step": 2860 }, { "epoch": 1.33, "learning_rate": 2.896458200382897e-06, "loss": 0.2396, "step": 2864 }, { "epoch": 1.33, "learning_rate": 2.8932673899170395e-06, "loss": 0.6563, "step": 2868 }, { "epoch": 1.33, "learning_rate": 2.890076579451181e-06, "loss": 0.3048, "step": 2872 }, { "epoch": 1.33, "learning_rate": 2.8868857689853224e-06, "loss": 0.3935, "step": 2876 }, { "epoch": 1.34, "learning_rate": 2.883694958519464e-06, "loss": 0.5063, "step": 2880 }, { "epoch": 1.34, "learning_rate": 2.880504148053606e-06, "loss": 0.5056, "step": 2884 }, { "epoch": 1.34, "learning_rate": 2.8773133375877476e-06, "loss": 0.2203, "step": 2888 }, { "epoch": 1.34, "learning_rate": 2.874122527121889e-06, "loss": 0.3044, "step": 2892 }, { "epoch": 1.34, "learning_rate": 2.8709317166560306e-06, "loss": 0.4444, "step": 2896 }, { "epoch": 1.35, "learning_rate": 2.8677409061901725e-06, "loss": 0.3645, "step": 2900 }, { "epoch": 1.35, "learning_rate": 2.8645500957243144e-06, "loss": 0.4594, "step": 2904 }, { "epoch": 1.35, "learning_rate": 2.861359285258456e-06, "loss": 0.4897, "step": 2908 }, { "epoch": 1.35, "learning_rate": 2.8581684747925973e-06, "loss": 0.4772, "step": 2912 }, { "epoch": 1.35, "learning_rate": 2.854977664326739e-06, "loss": 0.388, "step": 2916 }, { "epoch": 1.35, "learning_rate": 2.851786853860881e-06, "loss": 0.3869, "step": 2920 }, { "epoch": 1.36, "learning_rate": 2.8485960433950226e-06, "loss": 0.4853, "step": 2924 }, { "epoch": 1.36, "learning_rate": 2.8454052329291645e-06, "loss": 0.4467, "step": 2928 }, { "epoch": 1.36, "learning_rate": 2.842214422463306e-06, "loss": 0.2356, "step": 2932 }, { "epoch": 1.36, "learning_rate": 2.8390236119974474e-06, "loss": 0.4614, "step": 2936 }, { "epoch": 1.36, "learning_rate": 2.8358328015315893e-06, "loss": 0.3212, "step": 2940 }, { "epoch": 1.37, "learning_rate": 2.832641991065731e-06, "loss": 0.5037, "step": 2944 }, { "epoch": 1.37, "learning_rate": 2.8294511805998727e-06, "loss": 0.4957, "step": 2948 }, { "epoch": 1.37, "learning_rate": 2.826260370134014e-06, "loss": 0.418, "step": 2952 }, { "epoch": 1.37, "learning_rate": 2.823069559668156e-06, "loss": 0.2996, "step": 2956 }, { "epoch": 1.37, "learning_rate": 2.819878749202298e-06, "loss": 0.5421, "step": 2960 }, { "epoch": 1.37, "learning_rate": 2.8166879387364394e-06, "loss": 0.5049, "step": 2964 }, { "epoch": 1.38, "learning_rate": 2.813497128270581e-06, "loss": 0.3929, "step": 2968 }, { "epoch": 1.38, "learning_rate": 2.8103063178047223e-06, "loss": 0.4045, "step": 2972 }, { "epoch": 1.38, "learning_rate": 2.8071155073388646e-06, "loss": 0.3494, "step": 2976 }, { "epoch": 1.38, "learning_rate": 2.803924696873006e-06, "loss": 0.3782, "step": 2980 }, { "epoch": 1.38, "learning_rate": 2.8007338864071476e-06, "loss": 0.2768, "step": 2984 }, { "epoch": 1.39, "learning_rate": 2.797543075941289e-06, "loss": 0.531, "step": 2988 }, { "epoch": 1.39, "learning_rate": 2.7943522654754314e-06, "loss": 0.4958, "step": 2992 }, { "epoch": 1.39, "learning_rate": 2.791161455009573e-06, "loss": 0.6183, "step": 2996 }, { "epoch": 1.39, "learning_rate": 2.7879706445437143e-06, "loss": 0.3521, "step": 3000 }, { "epoch": 1.39, "learning_rate": 2.7847798340778558e-06, "loss": 0.4406, "step": 3004 }, { "epoch": 1.4, "learning_rate": 2.7815890236119973e-06, "loss": 0.4131, "step": 3008 }, { "epoch": 1.4, "learning_rate": 2.7783982131461396e-06, "loss": 0.5107, "step": 3012 }, { "epoch": 1.4, "learning_rate": 2.775207402680281e-06, "loss": 0.2735, "step": 3016 }, { "epoch": 1.4, "learning_rate": 2.7720165922144225e-06, "loss": 0.3788, "step": 3020 }, { "epoch": 1.4, "learning_rate": 2.7696234843650286e-06, "loss": 0.5599, "step": 3024 }, { "epoch": 1.4, "learning_rate": 2.7664326738991705e-06, "loss": 0.2355, "step": 3028 }, { "epoch": 1.41, "learning_rate": 2.7632418634333124e-06, "loss": 0.5358, "step": 3032 }, { "epoch": 1.41, "learning_rate": 2.760051052967454e-06, "loss": 0.3283, "step": 3036 }, { "epoch": 1.41, "learning_rate": 2.7568602425015958e-06, "loss": 0.4093, "step": 3040 }, { "epoch": 1.41, "learning_rate": 2.7536694320357372e-06, "loss": 0.287, "step": 3044 }, { "epoch": 1.41, "learning_rate": 2.750478621569879e-06, "loss": 0.5271, "step": 3048 }, { "epoch": 1.42, "learning_rate": 2.7472878111040206e-06, "loss": 0.3372, "step": 3052 }, { "epoch": 1.42, "learning_rate": 2.7440970006381625e-06, "loss": 0.5649, "step": 3056 }, { "epoch": 1.42, "learning_rate": 2.740906190172304e-06, "loss": 0.5017, "step": 3060 }, { "epoch": 1.42, "learning_rate": 2.7377153797064454e-06, "loss": 0.6057, "step": 3064 }, { "epoch": 1.42, "learning_rate": 2.7345245692405873e-06, "loss": 0.4184, "step": 3068 }, { "epoch": 1.42, "learning_rate": 2.7313337587747292e-06, "loss": 0.2892, "step": 3072 }, { "epoch": 1.43, "learning_rate": 2.7281429483088707e-06, "loss": 0.5914, "step": 3076 }, { "epoch": 1.43, "learning_rate": 2.724952137843012e-06, "loss": 0.472, "step": 3080 }, { "epoch": 1.43, "learning_rate": 2.721761327377154e-06, "loss": 0.3773, "step": 3084 }, { "epoch": 1.43, "learning_rate": 2.718570516911296e-06, "loss": 0.2942, "step": 3088 }, { "epoch": 1.43, "learning_rate": 2.7153797064454374e-06, "loss": 0.3445, "step": 3092 }, { "epoch": 1.44, "learning_rate": 2.712188895979579e-06, "loss": 0.2773, "step": 3096 }, { "epoch": 1.44, "learning_rate": 2.7089980855137204e-06, "loss": 0.4007, "step": 3100 }, { "epoch": 1.44, "learning_rate": 2.7058072750478627e-06, "loss": 0.3083, "step": 3104 }, { "epoch": 1.44, "learning_rate": 2.702616464582004e-06, "loss": 0.4782, "step": 3108 }, { "epoch": 1.44, "learning_rate": 2.6994256541161456e-06, "loss": 0.5419, "step": 3112 }, { "epoch": 1.45, "learning_rate": 2.696234843650287e-06, "loss": 0.5713, "step": 3116 }, { "epoch": 1.45, "learning_rate": 2.6930440331844294e-06, "loss": 0.3722, "step": 3120 }, { "epoch": 1.45, "learning_rate": 2.689853222718571e-06, "loss": 0.4663, "step": 3124 }, { "epoch": 1.45, "learning_rate": 2.6866624122527123e-06, "loss": 0.3208, "step": 3128 }, { "epoch": 1.45, "learning_rate": 2.683471601786854e-06, "loss": 0.351, "step": 3132 }, { "epoch": 1.45, "learning_rate": 2.6802807913209957e-06, "loss": 0.513, "step": 3136 }, { "epoch": 1.46, "learning_rate": 2.6770899808551376e-06, "loss": 0.4409, "step": 3140 }, { "epoch": 1.46, "learning_rate": 2.673899170389279e-06, "loss": 0.3335, "step": 3144 }, { "epoch": 1.46, "learning_rate": 2.6707083599234205e-06, "loss": 0.3706, "step": 3148 }, { "epoch": 1.46, "learning_rate": 2.6675175494575624e-06, "loss": 0.4404, "step": 3152 }, { "epoch": 1.46, "learning_rate": 2.6643267389917043e-06, "loss": 0.4186, "step": 3156 }, { "epoch": 1.47, "learning_rate": 2.661135928525846e-06, "loss": 0.3666, "step": 3160 }, { "epoch": 1.47, "learning_rate": 2.6579451180599873e-06, "loss": 0.3849, "step": 3164 }, { "epoch": 1.47, "learning_rate": 2.654754307594129e-06, "loss": 0.4564, "step": 3168 }, { "epoch": 1.47, "learning_rate": 2.6515634971282706e-06, "loss": 0.3534, "step": 3172 }, { "epoch": 1.47, "learning_rate": 2.6483726866624125e-06, "loss": 0.3735, "step": 3176 }, { "epoch": 1.47, "learning_rate": 2.645181876196554e-06, "loss": 0.4449, "step": 3180 }, { "epoch": 1.48, "learning_rate": 2.641991065730696e-06, "loss": 0.5032, "step": 3184 }, { "epoch": 1.48, "learning_rate": 2.6388002552648374e-06, "loss": 0.3677, "step": 3188 }, { "epoch": 1.48, "learning_rate": 2.6356094447989793e-06, "loss": 0.5004, "step": 3192 }, { "epoch": 1.48, "learning_rate": 2.632418634333121e-06, "loss": 0.1972, "step": 3196 }, { "epoch": 1.48, "learning_rate": 2.6292278238672626e-06, "loss": 0.4606, "step": 3200 }, { "epoch": 1.49, "learning_rate": 2.626037013401404e-06, "loss": 0.3533, "step": 3204 }, { "epoch": 1.49, "learning_rate": 2.6228462029355456e-06, "loss": 0.3607, "step": 3208 }, { "epoch": 1.49, "learning_rate": 2.619655392469688e-06, "loss": 0.5767, "step": 3212 }, { "epoch": 1.49, "learning_rate": 2.6164645820038293e-06, "loss": 0.5316, "step": 3216 }, { "epoch": 1.49, "learning_rate": 2.613273771537971e-06, "loss": 0.2474, "step": 3220 }, { "epoch": 1.5, "learning_rate": 2.6100829610721123e-06, "loss": 0.3168, "step": 3224 }, { "epoch": 1.5, "learning_rate": 2.6068921506062546e-06, "loss": 0.4029, "step": 3228 }, { "epoch": 1.5, "learning_rate": 2.603701340140396e-06, "loss": 0.2693, "step": 3232 }, { "epoch": 1.5, "learning_rate": 2.6005105296745375e-06, "loss": 0.3756, "step": 3236 }, { "epoch": 1.5, "learning_rate": 2.597319719208679e-06, "loss": 0.3712, "step": 3240 }, { "epoch": 1.5, "learning_rate": 2.5941289087428205e-06, "loss": 0.366, "step": 3244 }, { "epoch": 1.51, "learning_rate": 2.590938098276963e-06, "loss": 0.3813, "step": 3248 }, { "epoch": 1.51, "learning_rate": 2.5877472878111043e-06, "loss": 0.4442, "step": 3252 }, { "epoch": 1.51, "learning_rate": 2.5845564773452457e-06, "loss": 0.4061, "step": 3256 }, { "epoch": 1.51, "learning_rate": 2.581365666879387e-06, "loss": 0.3679, "step": 3260 }, { "epoch": 1.51, "learning_rate": 2.5781748564135295e-06, "loss": 0.2641, "step": 3264 }, { "epoch": 1.52, "learning_rate": 2.574984045947671e-06, "loss": 0.5656, "step": 3268 }, { "epoch": 1.52, "learning_rate": 2.5717932354818125e-06, "loss": 0.3672, "step": 3272 }, { "epoch": 1.52, "learning_rate": 2.568602425015954e-06, "loss": 0.3395, "step": 3276 }, { "epoch": 1.52, "learning_rate": 2.565411614550096e-06, "loss": 0.5946, "step": 3280 }, { "epoch": 1.52, "learning_rate": 2.5622208040842377e-06, "loss": 0.3526, "step": 3284 }, { "epoch": 1.53, "learning_rate": 2.559029993618379e-06, "loss": 0.3365, "step": 3288 }, { "epoch": 1.53, "learning_rate": 2.555839183152521e-06, "loss": 0.4003, "step": 3292 }, { "epoch": 1.53, "learning_rate": 2.5526483726866626e-06, "loss": 0.3994, "step": 3296 }, { "epoch": 1.53, "learning_rate": 2.5494575622208045e-06, "loss": 0.3623, "step": 3300 }, { "epoch": 1.53, "learning_rate": 2.546266751754946e-06, "loss": 0.5994, "step": 3304 }, { "epoch": 1.53, "learning_rate": 2.543075941289088e-06, "loss": 0.3717, "step": 3308 }, { "epoch": 1.54, "learning_rate": 2.5398851308232293e-06, "loss": 0.2424, "step": 3312 }, { "epoch": 1.54, "learning_rate": 2.5366943203573708e-06, "loss": 0.5083, "step": 3316 }, { "epoch": 1.54, "learning_rate": 2.5335035098915127e-06, "loss": 0.2865, "step": 3320 }, { "epoch": 1.54, "learning_rate": 2.5303126994256545e-06, "loss": 0.2184, "step": 3324 }, { "epoch": 1.54, "learning_rate": 2.527121888959796e-06, "loss": 0.5697, "step": 3328 }, { "epoch": 1.55, "learning_rate": 2.5239310784939375e-06, "loss": 0.3524, "step": 3332 }, { "epoch": 1.55, "learning_rate": 2.5207402680280794e-06, "loss": 0.3922, "step": 3336 }, { "epoch": 1.55, "learning_rate": 2.5175494575622213e-06, "loss": 0.3364, "step": 3340 }, { "epoch": 1.55, "learning_rate": 2.5143586470963627e-06, "loss": 0.3983, "step": 3344 }, { "epoch": 1.55, "learning_rate": 2.5111678366305042e-06, "loss": 0.3812, "step": 3348 }, { "epoch": 1.55, "learning_rate": 2.5079770261646457e-06, "loss": 0.3001, "step": 3352 }, { "epoch": 1.56, "learning_rate": 2.504786215698788e-06, "loss": 0.3159, "step": 3356 }, { "epoch": 1.56, "learning_rate": 2.5015954052329295e-06, "loss": 0.316, "step": 3360 }, { "epoch": 1.56, "learning_rate": 2.498404594767071e-06, "loss": 0.2347, "step": 3364 }, { "epoch": 1.56, "learning_rate": 2.495213784301213e-06, "loss": 0.4728, "step": 3368 }, { "epoch": 1.56, "learning_rate": 2.4920229738353543e-06, "loss": 0.4451, "step": 3372 }, { "epoch": 1.57, "learning_rate": 2.488832163369496e-06, "loss": 0.6876, "step": 3376 }, { "epoch": 1.57, "learning_rate": 2.4856413529036377e-06, "loss": 0.4799, "step": 3380 }, { "epoch": 1.57, "learning_rate": 2.482450542437779e-06, "loss": 0.3912, "step": 3384 }, { "epoch": 1.57, "learning_rate": 2.479259731971921e-06, "loss": 0.2295, "step": 3388 }, { "epoch": 1.57, "learning_rate": 2.4760689215060625e-06, "loss": 0.2529, "step": 3392 }, { "epoch": 1.58, "learning_rate": 2.4728781110402044e-06, "loss": 0.454, "step": 3396 }, { "epoch": 1.58, "learning_rate": 2.469687300574346e-06, "loss": 0.3894, "step": 3400 }, { "epoch": 1.58, "learning_rate": 2.4664964901084878e-06, "loss": 0.2908, "step": 3404 }, { "epoch": 1.58, "learning_rate": 2.4633056796426292e-06, "loss": 0.499, "step": 3408 }, { "epoch": 1.58, "learning_rate": 2.460114869176771e-06, "loss": 0.3336, "step": 3412 }, { "epoch": 1.58, "learning_rate": 2.4569240587109126e-06, "loss": 0.3016, "step": 3416 }, { "epoch": 1.59, "learning_rate": 2.4537332482450545e-06, "loss": 0.4519, "step": 3420 }, { "epoch": 1.59, "learning_rate": 2.450542437779196e-06, "loss": 0.2589, "step": 3424 }, { "epoch": 1.59, "learning_rate": 2.447351627313338e-06, "loss": 0.404, "step": 3428 }, { "epoch": 1.59, "learning_rate": 2.4441608168474793e-06, "loss": 0.335, "step": 3432 }, { "epoch": 1.59, "learning_rate": 2.4409700063816212e-06, "loss": 0.4312, "step": 3436 }, { "epoch": 1.6, "learning_rate": 2.437779195915763e-06, "loss": 0.2877, "step": 3440 }, { "epoch": 1.6, "learning_rate": 2.4345883854499046e-06, "loss": 0.3591, "step": 3444 }, { "epoch": 1.6, "learning_rate": 2.4313975749840465e-06, "loss": 0.3149, "step": 3448 }, { "epoch": 1.6, "learning_rate": 2.428206764518188e-06, "loss": 0.3785, "step": 3452 }, { "epoch": 1.6, "learning_rate": 2.4250159540523294e-06, "loss": 0.3654, "step": 3456 }, { "epoch": 1.6, "learning_rate": 2.4218251435864713e-06, "loss": 0.2894, "step": 3460 }, { "epoch": 1.61, "learning_rate": 2.4186343331206128e-06, "loss": 0.5198, "step": 3464 }, { "epoch": 1.61, "learning_rate": 2.4154435226547547e-06, "loss": 0.4666, "step": 3468 }, { "epoch": 1.61, "learning_rate": 2.412252712188896e-06, "loss": 0.3899, "step": 3472 }, { "epoch": 1.61, "learning_rate": 2.409061901723038e-06, "loss": 0.4248, "step": 3476 }, { "epoch": 1.61, "learning_rate": 2.4058710912571795e-06, "loss": 0.3144, "step": 3480 }, { "epoch": 1.62, "learning_rate": 2.4026802807913214e-06, "loss": 0.3294, "step": 3484 }, { "epoch": 1.62, "learning_rate": 2.399489470325463e-06, "loss": 0.3395, "step": 3488 }, { "epoch": 1.62, "learning_rate": 2.3962986598596043e-06, "loss": 0.4384, "step": 3492 }, { "epoch": 1.62, "learning_rate": 2.3931078493937462e-06, "loss": 0.3029, "step": 3496 }, { "epoch": 1.62, "learning_rate": 2.3899170389278877e-06, "loss": 0.3868, "step": 3500 }, { "epoch": 1.63, "learning_rate": 2.3867262284620296e-06, "loss": 0.233, "step": 3504 }, { "epoch": 1.63, "learning_rate": 2.383535417996171e-06, "loss": 0.4025, "step": 3508 }, { "epoch": 1.63, "learning_rate": 2.380344607530313e-06, "loss": 0.2714, "step": 3512 }, { "epoch": 1.63, "learning_rate": 2.3771537970644544e-06, "loss": 0.4694, "step": 3516 }, { "epoch": 1.63, "learning_rate": 2.3739629865985963e-06, "loss": 0.3092, "step": 3520 }, { "epoch": 1.63, "learning_rate": 2.370772176132738e-06, "loss": 0.3375, "step": 3524 }, { "epoch": 1.64, "learning_rate": 2.3675813656668793e-06, "loss": 0.2356, "step": 3528 }, { "epoch": 1.64, "learning_rate": 2.364390555201021e-06, "loss": 0.4403, "step": 3532 }, { "epoch": 1.64, "learning_rate": 2.3611997447351626e-06, "loss": 0.4015, "step": 3536 }, { "epoch": 1.64, "learning_rate": 2.3580089342693045e-06, "loss": 0.5201, "step": 3540 }, { "epoch": 1.64, "learning_rate": 2.3548181238034464e-06, "loss": 0.4203, "step": 3544 }, { "epoch": 1.65, "learning_rate": 2.351627313337588e-06, "loss": 0.4869, "step": 3548 }, { "epoch": 1.65, "learning_rate": 2.3484365028717298e-06, "loss": 0.3923, "step": 3552 }, { "epoch": 1.65, "learning_rate": 2.3452456924058712e-06, "loss": 0.6743, "step": 3556 }, { "epoch": 1.65, "learning_rate": 2.342054881940013e-06, "loss": 0.2588, "step": 3560 }, { "epoch": 1.65, "learning_rate": 2.3388640714741546e-06, "loss": 0.323, "step": 3564 }, { "epoch": 1.65, "learning_rate": 2.3356732610082965e-06, "loss": 0.2859, "step": 3568 }, { "epoch": 1.66, "learning_rate": 2.332482450542438e-06, "loss": 0.2747, "step": 3572 }, { "epoch": 1.66, "learning_rate": 2.32929164007658e-06, "loss": 0.2221, "step": 3576 }, { "epoch": 1.66, "learning_rate": 2.3261008296107213e-06, "loss": 0.3744, "step": 3580 }, { "epoch": 1.66, "learning_rate": 2.3229100191448632e-06, "loss": 0.3965, "step": 3584 }, { "epoch": 1.66, "learning_rate": 2.3197192086790047e-06, "loss": 0.4889, "step": 3588 }, { "epoch": 1.67, "learning_rate": 2.3165283982131466e-06, "loss": 0.4218, "step": 3592 }, { "epoch": 1.67, "learning_rate": 2.313337587747288e-06, "loss": 0.3016, "step": 3596 }, { "epoch": 1.67, "learning_rate": 2.3101467772814295e-06, "loss": 0.3408, "step": 3600 }, { "epoch": 1.67, "learning_rate": 2.3069559668155714e-06, "loss": 0.387, "step": 3604 }, { "epoch": 1.67, "learning_rate": 2.303765156349713e-06, "loss": 0.3845, "step": 3608 }, { "epoch": 1.68, "learning_rate": 2.300574345883855e-06, "loss": 0.2885, "step": 3612 }, { "epoch": 1.68, "learning_rate": 2.2973835354179963e-06, "loss": 0.1871, "step": 3616 }, { "epoch": 1.68, "learning_rate": 2.294192724952138e-06, "loss": 0.3516, "step": 3620 }, { "epoch": 1.68, "learning_rate": 2.2910019144862796e-06, "loss": 0.4165, "step": 3624 }, { "epoch": 1.68, "learning_rate": 2.2878111040204215e-06, "loss": 0.2891, "step": 3628 }, { "epoch": 1.68, "learning_rate": 2.284620293554563e-06, "loss": 0.3616, "step": 3632 }, { "epoch": 1.69, "learning_rate": 2.2814294830887045e-06, "loss": 0.4057, "step": 3636 }, { "epoch": 1.69, "learning_rate": 2.2782386726228464e-06, "loss": 0.5166, "step": 3640 }, { "epoch": 1.69, "learning_rate": 2.275047862156988e-06, "loss": 0.3279, "step": 3644 }, { "epoch": 1.69, "learning_rate": 2.2718570516911297e-06, "loss": 0.3537, "step": 3648 }, { "epoch": 1.69, "learning_rate": 2.268666241225271e-06, "loss": 0.3187, "step": 3652 }, { "epoch": 1.7, "learning_rate": 2.265475430759413e-06, "loss": 0.4043, "step": 3656 }, { "epoch": 1.7, "learning_rate": 2.2622846202935546e-06, "loss": 0.2799, "step": 3660 }, { "epoch": 1.7, "learning_rate": 2.2590938098276964e-06, "loss": 0.3363, "step": 3664 }, { "epoch": 1.7, "learning_rate": 2.255902999361838e-06, "loss": 0.6477, "step": 3668 }, { "epoch": 1.7, "learning_rate": 2.25271218889598e-06, "loss": 0.4967, "step": 3672 }, { "epoch": 1.71, "learning_rate": 2.2495213784301213e-06, "loss": 0.4474, "step": 3676 }, { "epoch": 1.71, "learning_rate": 2.246330567964263e-06, "loss": 0.2501, "step": 3680 }, { "epoch": 1.71, "learning_rate": 2.2431397574984046e-06, "loss": 0.3448, "step": 3684 }, { "epoch": 1.71, "learning_rate": 2.2399489470325465e-06, "loss": 0.3084, "step": 3688 }, { "epoch": 1.71, "learning_rate": 2.2367581365666884e-06, "loss": 0.3165, "step": 3692 }, { "epoch": 1.71, "learning_rate": 2.23356732610083e-06, "loss": 0.405, "step": 3696 }, { "epoch": 1.72, "learning_rate": 2.230376515634972e-06, "loss": 0.3648, "step": 3700 }, { "epoch": 1.72, "learning_rate": 2.2271857051691133e-06, "loss": 0.2938, "step": 3704 }, { "epoch": 1.72, "learning_rate": 2.2239948947032547e-06, "loss": 0.336, "step": 3708 }, { "epoch": 1.72, "learning_rate": 2.2208040842373966e-06, "loss": 0.4741, "step": 3712 }, { "epoch": 1.72, "learning_rate": 2.217613273771538e-06, "loss": 0.4006, "step": 3716 }, { "epoch": 1.73, "learning_rate": 2.21442246330568e-06, "loss": 0.3443, "step": 3720 }, { "epoch": 1.73, "learning_rate": 2.2112316528398215e-06, "loss": 0.2771, "step": 3724 }, { "epoch": 1.73, "learning_rate": 2.2080408423739634e-06, "loss": 0.2515, "step": 3728 }, { "epoch": 1.73, "learning_rate": 2.204850031908105e-06, "loss": 0.3897, "step": 3732 }, { "epoch": 1.73, "learning_rate": 2.2016592214422467e-06, "loss": 0.182, "step": 3736 }, { "epoch": 1.73, "learning_rate": 2.198468410976388e-06, "loss": 0.3575, "step": 3740 }, { "epoch": 1.74, "learning_rate": 2.1952776005105297e-06, "loss": 0.3662, "step": 3744 }, { "epoch": 1.74, "learning_rate": 2.1920867900446716e-06, "loss": 0.4394, "step": 3748 }, { "epoch": 1.74, "learning_rate": 2.188895979578813e-06, "loss": 0.3541, "step": 3752 }, { "epoch": 1.74, "learning_rate": 2.185705169112955e-06, "loss": 0.3837, "step": 3756 }, { "epoch": 1.74, "learning_rate": 2.1825143586470964e-06, "loss": 0.2765, "step": 3760 }, { "epoch": 1.75, "learning_rate": 2.1793235481812383e-06, "loss": 0.3349, "step": 3764 }, { "epoch": 1.75, "learning_rate": 2.1761327377153797e-06, "loss": 0.3141, "step": 3768 }, { "epoch": 1.75, "learning_rate": 2.1729419272495216e-06, "loss": 0.3836, "step": 3772 }, { "epoch": 1.75, "learning_rate": 2.169751116783663e-06, "loss": 0.417, "step": 3776 }, { "epoch": 1.75, "learning_rate": 2.1665603063178046e-06, "loss": 0.339, "step": 3780 }, { "epoch": 1.76, "learning_rate": 2.1633694958519465e-06, "loss": 0.4287, "step": 3784 }, { "epoch": 1.76, "learning_rate": 2.1601786853860884e-06, "loss": 0.3423, "step": 3788 }, { "epoch": 1.76, "learning_rate": 2.15698787492023e-06, "loss": 0.3367, "step": 3792 }, { "epoch": 1.76, "learning_rate": 2.1537970644543717e-06, "loss": 0.2519, "step": 3796 }, { "epoch": 1.76, "learning_rate": 2.150606253988513e-06, "loss": 0.3884, "step": 3800 }, { "epoch": 1.76, "learning_rate": 2.147415443522655e-06, "loss": 0.2767, "step": 3804 }, { "epoch": 1.77, "learning_rate": 2.1442246330567966e-06, "loss": 0.3162, "step": 3808 }, { "epoch": 1.77, "learning_rate": 2.1410338225909385e-06, "loss": 0.3722, "step": 3812 }, { "epoch": 1.77, "learning_rate": 2.13784301212508e-06, "loss": 0.462, "step": 3816 }, { "epoch": 1.77, "learning_rate": 2.134652201659222e-06, "loss": 0.4508, "step": 3820 }, { "epoch": 1.77, "learning_rate": 2.1314613911933633e-06, "loss": 0.309, "step": 3824 }, { "epoch": 1.78, "learning_rate": 2.128270580727505e-06, "loss": 0.4566, "step": 3828 }, { "epoch": 1.78, "learning_rate": 2.1250797702616467e-06, "loss": 0.3216, "step": 3832 }, { "epoch": 1.78, "learning_rate": 2.1218889597957886e-06, "loss": 0.4669, "step": 3836 }, { "epoch": 1.78, "learning_rate": 2.11869814932993e-06, "loss": 0.4764, "step": 3840 }, { "epoch": 1.78, "learning_rate": 2.115507338864072e-06, "loss": 0.3011, "step": 3844 }, { "epoch": 1.78, "learning_rate": 2.1123165283982134e-06, "loss": 0.3308, "step": 3848 }, { "epoch": 1.79, "learning_rate": 2.109125717932355e-06, "loss": 0.4038, "step": 3852 }, { "epoch": 1.79, "learning_rate": 2.1059349074664967e-06, "loss": 0.2768, "step": 3856 }, { "epoch": 1.79, "learning_rate": 2.1027440970006382e-06, "loss": 0.374, "step": 3860 }, { "epoch": 1.79, "learning_rate": 2.09955328653478e-06, "loss": 0.3393, "step": 3864 }, { "epoch": 1.79, "learning_rate": 2.0963624760689216e-06, "loss": 0.3846, "step": 3868 }, { "epoch": 1.8, "learning_rate": 2.0931716656030635e-06, "loss": 0.308, "step": 3872 }, { "epoch": 1.8, "learning_rate": 2.089980855137205e-06, "loss": 0.4816, "step": 3876 }, { "epoch": 1.8, "learning_rate": 2.086790044671347e-06, "loss": 0.2121, "step": 3880 }, { "epoch": 1.8, "learning_rate": 2.0835992342054883e-06, "loss": 0.3698, "step": 3884 }, { "epoch": 1.8, "learning_rate": 2.0804084237396298e-06, "loss": 0.3615, "step": 3888 }, { "epoch": 1.81, "learning_rate": 2.0772176132737717e-06, "loss": 0.2294, "step": 3892 }, { "epoch": 1.81, "learning_rate": 2.074026802807913e-06, "loss": 0.2515, "step": 3896 }, { "epoch": 1.81, "learning_rate": 2.070835992342055e-06, "loss": 0.3559, "step": 3900 }, { "epoch": 1.81, "learning_rate": 2.0676451818761965e-06, "loss": 0.4243, "step": 3904 }, { "epoch": 1.81, "learning_rate": 2.0644543714103384e-06, "loss": 0.3622, "step": 3908 }, { "epoch": 1.81, "learning_rate": 2.06126356094448e-06, "loss": 0.5588, "step": 3912 }, { "epoch": 1.82, "learning_rate": 2.0580727504786218e-06, "loss": 0.2169, "step": 3916 }, { "epoch": 1.82, "learning_rate": 2.0548819400127632e-06, "loss": 0.4732, "step": 3920 }, { "epoch": 1.82, "learning_rate": 2.051691129546905e-06, "loss": 0.2331, "step": 3924 }, { "epoch": 1.82, "learning_rate": 2.0485003190810466e-06, "loss": 0.3388, "step": 3928 }, { "epoch": 1.82, "learning_rate": 2.0453095086151885e-06, "loss": 0.4545, "step": 3932 }, { "epoch": 1.83, "learning_rate": 2.04211869814933e-06, "loss": 0.3886, "step": 3936 }, { "epoch": 1.83, "learning_rate": 2.038927887683472e-06, "loss": 0.2233, "step": 3940 }, { "epoch": 1.83, "learning_rate": 2.0357370772176138e-06, "loss": 0.3658, "step": 3944 }, { "epoch": 1.83, "learning_rate": 2.0325462667517552e-06, "loss": 0.3229, "step": 3948 }, { "epoch": 1.83, "learning_rate": 2.029355456285897e-06, "loss": 0.1759, "step": 3952 }, { "epoch": 1.83, "learning_rate": 2.0261646458200386e-06, "loss": 0.3737, "step": 3956 }, { "epoch": 1.84, "learning_rate": 2.02297383535418e-06, "loss": 0.3362, "step": 3960 }, { "epoch": 1.84, "learning_rate": 2.019783024888322e-06, "loss": 0.2873, "step": 3964 }, { "epoch": 1.84, "learning_rate": 2.0165922144224634e-06, "loss": 0.3454, "step": 3968 }, { "epoch": 1.84, "learning_rate": 2.0134014039566053e-06, "loss": 0.3428, "step": 3972 }, { "epoch": 1.84, "learning_rate": 2.0102105934907468e-06, "loss": 0.4089, "step": 3976 }, { "epoch": 1.85, "learning_rate": 2.0070197830248887e-06, "loss": 0.3472, "step": 3980 }, { "epoch": 1.85, "learning_rate": 2.00382897255903e-06, "loss": 0.2868, "step": 3984 }, { "epoch": 1.85, "learning_rate": 2.000638162093172e-06, "loss": 0.3088, "step": 3988 }, { "epoch": 1.85, "learning_rate": 1.9974473516273135e-06, "loss": 0.2471, "step": 3992 }, { "epoch": 1.85, "learning_rate": 1.994256541161455e-06, "loss": 0.2816, "step": 3996 }, { "epoch": 1.86, "learning_rate": 1.991065730695597e-06, "loss": 0.3135, "step": 4000 }, { "epoch": 1.86, "learning_rate": 1.9878749202297383e-06, "loss": 0.379, "step": 4004 }, { "epoch": 1.86, "learning_rate": 1.9846841097638802e-06, "loss": 0.5225, "step": 4008 }, { "epoch": 1.86, "learning_rate": 1.9814932992980217e-06, "loss": 0.3229, "step": 4012 }, { "epoch": 1.86, "learning_rate": 1.9783024888321636e-06, "loss": 0.3573, "step": 4016 }, { "epoch": 1.86, "learning_rate": 1.975111678366305e-06, "loss": 0.2219, "step": 4020 }, { "epoch": 1.87, "learning_rate": 1.971920867900447e-06, "loss": 0.2133, "step": 4024 }, { "epoch": 1.87, "learning_rate": 1.9687300574345884e-06, "loss": 0.4303, "step": 4028 }, { "epoch": 1.87, "learning_rate": 1.96553924696873e-06, "loss": 0.4735, "step": 4032 }, { "epoch": 1.87, "learning_rate": 1.9631461391193364e-06, "loss": 0.3223, "step": 4036 }, { "epoch": 1.87, "learning_rate": 1.959955328653478e-06, "loss": 0.3124, "step": 4040 }, { "epoch": 1.88, "learning_rate": 1.95676451818762e-06, "loss": 0.4547, "step": 4044 }, { "epoch": 1.88, "learning_rate": 1.9535737077217613e-06, "loss": 0.3089, "step": 4048 }, { "epoch": 1.88, "learning_rate": 1.950382897255903e-06, "loss": 0.344, "step": 4052 }, { "epoch": 1.88, "learning_rate": 1.947192086790045e-06, "loss": 0.1488, "step": 4056 }, { "epoch": 1.88, "learning_rate": 1.9440012763241865e-06, "loss": 0.4715, "step": 4060 }, { "epoch": 1.88, "learning_rate": 1.9408104658583284e-06, "loss": 0.2866, "step": 4064 }, { "epoch": 1.89, "learning_rate": 1.93761965539247e-06, "loss": 0.3207, "step": 4068 }, { "epoch": 1.89, "learning_rate": 1.9344288449266118e-06, "loss": 0.3532, "step": 4072 }, { "epoch": 1.89, "learning_rate": 1.9312380344607532e-06, "loss": 0.3416, "step": 4076 }, { "epoch": 1.89, "learning_rate": 1.928047223994895e-06, "loss": 0.6239, "step": 4080 }, { "epoch": 1.89, "learning_rate": 1.9248564135290366e-06, "loss": 0.1806, "step": 4084 }, { "epoch": 1.9, "learning_rate": 1.9216656030631785e-06, "loss": 0.3065, "step": 4088 }, { "epoch": 1.9, "learning_rate": 1.91847479259732e-06, "loss": 0.2393, "step": 4092 }, { "epoch": 1.9, "learning_rate": 1.9152839821314614e-06, "loss": 0.4581, "step": 4096 }, { "epoch": 1.9, "learning_rate": 1.9120931716656033e-06, "loss": 0.2407, "step": 4100 }, { "epoch": 1.9, "learning_rate": 1.908902361199745e-06, "loss": 0.3328, "step": 4104 }, { "epoch": 1.91, "learning_rate": 1.9057115507338867e-06, "loss": 0.2898, "step": 4108 }, { "epoch": 1.91, "learning_rate": 1.9025207402680282e-06, "loss": 0.5888, "step": 4112 }, { "epoch": 1.91, "learning_rate": 1.89932992980217e-06, "loss": 0.3909, "step": 4116 }, { "epoch": 1.91, "learning_rate": 1.8961391193363115e-06, "loss": 0.2613, "step": 4120 }, { "epoch": 1.91, "learning_rate": 1.8929483088704534e-06, "loss": 0.2594, "step": 4124 }, { "epoch": 1.91, "learning_rate": 1.889757498404595e-06, "loss": 0.3601, "step": 4128 }, { "epoch": 1.92, "learning_rate": 1.8865666879387366e-06, "loss": 0.1791, "step": 4132 }, { "epoch": 1.92, "learning_rate": 1.8833758774728783e-06, "loss": 0.3714, "step": 4136 }, { "epoch": 1.92, "learning_rate": 1.88018506700702e-06, "loss": 0.3601, "step": 4140 }, { "epoch": 1.92, "learning_rate": 1.8769942565411616e-06, "loss": 0.4697, "step": 4144 }, { "epoch": 1.92, "learning_rate": 1.8738034460753033e-06, "loss": 0.4277, "step": 4148 }, { "epoch": 1.93, "learning_rate": 1.870612635609445e-06, "loss": 0.4183, "step": 4152 }, { "epoch": 1.93, "learning_rate": 1.8674218251435867e-06, "loss": 0.2764, "step": 4156 }, { "epoch": 1.93, "learning_rate": 1.8642310146777281e-06, "loss": 0.3209, "step": 4160 }, { "epoch": 1.93, "learning_rate": 1.86104020421187e-06, "loss": 0.328, "step": 4164 }, { "epoch": 1.93, "learning_rate": 1.8578493937460115e-06, "loss": 0.3673, "step": 4168 }, { "epoch": 1.94, "learning_rate": 1.8546585832801534e-06, "loss": 0.2856, "step": 4172 }, { "epoch": 1.94, "learning_rate": 1.8514677728142949e-06, "loss": 0.4248, "step": 4176 }, { "epoch": 1.94, "learning_rate": 1.8482769623484368e-06, "loss": 0.419, "step": 4180 }, { "epoch": 1.94, "learning_rate": 1.8450861518825782e-06, "loss": 0.3315, "step": 4184 }, { "epoch": 1.94, "learning_rate": 1.8418953414167201e-06, "loss": 0.3508, "step": 4188 }, { "epoch": 1.94, "learning_rate": 1.8387045309508616e-06, "loss": 0.2016, "step": 4192 }, { "epoch": 1.95, "learning_rate": 1.8355137204850033e-06, "loss": 0.2352, "step": 4196 }, { "epoch": 1.95, "learning_rate": 1.832322910019145e-06, "loss": 0.4638, "step": 4200 }, { "epoch": 1.95, "learning_rate": 1.8291320995532866e-06, "loss": 0.4352, "step": 4204 }, { "epoch": 1.95, "learning_rate": 1.8259412890874283e-06, "loss": 0.4832, "step": 4208 }, { "epoch": 1.95, "learning_rate": 1.82275047862157e-06, "loss": 0.295, "step": 4212 }, { "epoch": 1.96, "learning_rate": 1.8195596681557117e-06, "loss": 0.3176, "step": 4216 }, { "epoch": 1.96, "learning_rate": 1.8163688576898534e-06, "loss": 0.0922, "step": 4220 }, { "epoch": 1.96, "learning_rate": 1.813178047223995e-06, "loss": 0.2375, "step": 4224 }, { "epoch": 1.96, "learning_rate": 1.8099872367581367e-06, "loss": 0.3374, "step": 4228 }, { "epoch": 1.96, "learning_rate": 1.8067964262922782e-06, "loss": 0.2551, "step": 4232 }, { "epoch": 1.96, "learning_rate": 1.80360561582642e-06, "loss": 0.3228, "step": 4236 }, { "epoch": 1.97, "learning_rate": 1.8004148053605616e-06, "loss": 0.3102, "step": 4240 }, { "epoch": 1.97, "learning_rate": 1.7972239948947035e-06, "loss": 0.2471, "step": 4244 }, { "epoch": 1.97, "learning_rate": 1.794033184428845e-06, "loss": 0.285, "step": 4248 }, { "epoch": 1.97, "learning_rate": 1.7908423739629868e-06, "loss": 0.3468, "step": 4252 }, { "epoch": 1.97, "learning_rate": 1.7876515634971283e-06, "loss": 0.2877, "step": 4256 }, { "epoch": 1.98, "learning_rate": 1.7844607530312702e-06, "loss": 0.4362, "step": 4260 }, { "epoch": 1.98, "learning_rate": 1.7812699425654117e-06, "loss": 0.1789, "step": 4264 }, { "epoch": 1.98, "learning_rate": 1.7780791320995533e-06, "loss": 0.3056, "step": 4268 }, { "epoch": 1.98, "learning_rate": 1.774888321633695e-06, "loss": 0.478, "step": 4272 }, { "epoch": 1.98, "learning_rate": 1.7716975111678367e-06, "loss": 0.3405, "step": 4276 }, { "epoch": 1.99, "learning_rate": 1.7685067007019786e-06, "loss": 0.2038, "step": 4280 }, { "epoch": 1.99, "learning_rate": 1.76531589023612e-06, "loss": 0.2301, "step": 4284 }, { "epoch": 1.99, "learning_rate": 1.762125079770262e-06, "loss": 0.3283, "step": 4288 }, { "epoch": 1.99, "learning_rate": 1.7589342693044034e-06, "loss": 0.1711, "step": 4292 }, { "epoch": 1.99, "learning_rate": 1.7557434588385453e-06, "loss": 0.241, "step": 4296 }, { "epoch": 1.99, "learning_rate": 1.7525526483726868e-06, "loss": 0.2408, "step": 4300 } ], "logging_steps": 4, "max_steps": 6468, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 43550404509696.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }