{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9980787704130645, "eval_steps": 500, "global_step": 845, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 3.921568627450981e-07, "loss": 2.8563, "step": 1 }, { "epoch": 0.0, "learning_rate": 7.843137254901962e-07, "loss": 2.8115, "step": 2 }, { "epoch": 0.01, "learning_rate": 1.1764705882352942e-06, "loss": 3.2218, "step": 3 }, { "epoch": 0.01, "learning_rate": 1.5686274509803923e-06, "loss": 2.4117, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.96078431372549e-06, "loss": 2.3211, "step": 5 }, { "epoch": 0.01, "learning_rate": 2.3529411764705885e-06, "loss": 4.132, "step": 6 }, { "epoch": 0.02, "learning_rate": 2.7450980392156867e-06, "loss": 1.6698, "step": 7 }, { "epoch": 0.02, "learning_rate": 3.1372549019607846e-06, "loss": 1.5128, "step": 8 }, { "epoch": 0.02, "learning_rate": 3.529411764705883e-06, "loss": 1.1126, "step": 9 }, { "epoch": 0.02, "learning_rate": 3.92156862745098e-06, "loss": 1.2495, "step": 10 }, { "epoch": 0.03, "learning_rate": 4.313725490196079e-06, "loss": 1.2488, "step": 11 }, { "epoch": 0.03, "learning_rate": 4.705882352941177e-06, "loss": 0.9805, "step": 12 }, { "epoch": 0.03, "learning_rate": 5.098039215686274e-06, "loss": 1.4122, "step": 13 }, { "epoch": 0.03, "learning_rate": 5.4901960784313735e-06, "loss": 1.2458, "step": 14 }, { "epoch": 0.04, "learning_rate": 5.882352941176471e-06, "loss": 1.1085, "step": 15 }, { "epoch": 0.04, "learning_rate": 6.274509803921569e-06, "loss": 1.2776, "step": 16 }, { "epoch": 0.04, "learning_rate": 6.666666666666667e-06, "loss": 1.1455, "step": 17 }, { "epoch": 0.04, "learning_rate": 7.058823529411766e-06, "loss": 2.7929, "step": 18 }, { "epoch": 0.04, "learning_rate": 7.450980392156863e-06, "loss": 1.0727, "step": 19 }, { "epoch": 0.05, "learning_rate": 7.84313725490196e-06, "loss": 1.0432, "step": 20 }, { "epoch": 0.05, "learning_rate": 8.23529411764706e-06, "loss": 1.3406, "step": 21 }, { "epoch": 0.05, "learning_rate": 8.627450980392157e-06, "loss": 1.2187, "step": 22 }, { "epoch": 0.05, "learning_rate": 9.019607843137256e-06, "loss": 1.2385, "step": 23 }, { "epoch": 0.06, "learning_rate": 9.411764705882354e-06, "loss": 1.0816, "step": 24 }, { "epoch": 0.06, "learning_rate": 9.803921568627451e-06, "loss": 1.2362, "step": 25 }, { "epoch": 0.06, "learning_rate": 1.0196078431372549e-05, "loss": 1.0961, "step": 26 }, { "epoch": 0.06, "learning_rate": 1.0588235294117648e-05, "loss": 3.3536, "step": 27 }, { "epoch": 0.07, "learning_rate": 1.0980392156862747e-05, "loss": 1.2555, "step": 28 }, { "epoch": 0.07, "learning_rate": 1.1372549019607844e-05, "loss": 1.2809, "step": 29 }, { "epoch": 0.07, "learning_rate": 1.1764705882352942e-05, "loss": 1.112, "step": 30 }, { "epoch": 0.07, "learning_rate": 1.215686274509804e-05, "loss": 0.9778, "step": 31 }, { "epoch": 0.08, "learning_rate": 1.2549019607843138e-05, "loss": 1.0019, "step": 32 }, { "epoch": 0.08, "learning_rate": 1.2941176470588238e-05, "loss": 1.111, "step": 33 }, { "epoch": 0.08, "learning_rate": 1.3333333333333333e-05, "loss": 1.6903, "step": 34 }, { "epoch": 0.08, "learning_rate": 1.3725490196078432e-05, "loss": 1.3078, "step": 35 }, { "epoch": 0.09, "learning_rate": 1.4117647058823532e-05, "loss": 1.1624, "step": 36 }, { "epoch": 0.09, "learning_rate": 1.4509803921568629e-05, "loss": 1.0753, "step": 37 }, { "epoch": 0.09, "learning_rate": 1.4901960784313726e-05, "loss": 1.0034, "step": 38 }, { "epoch": 0.09, "learning_rate": 1.5294117647058822e-05, "loss": 0.9425, "step": 39 }, { "epoch": 0.09, "learning_rate": 1.568627450980392e-05, "loss": 1.1524, "step": 40 }, { "epoch": 0.1, "learning_rate": 1.607843137254902e-05, "loss": 1.4117, "step": 41 }, { "epoch": 0.1, "learning_rate": 1.647058823529412e-05, "loss": 0.912, "step": 42 }, { "epoch": 0.1, "learning_rate": 1.686274509803922e-05, "loss": 1.1033, "step": 43 }, { "epoch": 0.1, "learning_rate": 1.7254901960784314e-05, "loss": 0.8739, "step": 44 }, { "epoch": 0.11, "learning_rate": 1.7647058823529414e-05, "loss": 1.0451, "step": 45 }, { "epoch": 0.11, "learning_rate": 1.8039215686274513e-05, "loss": 0.9691, "step": 46 }, { "epoch": 0.11, "learning_rate": 1.843137254901961e-05, "loss": 1.0241, "step": 47 }, { "epoch": 0.11, "learning_rate": 1.8823529411764708e-05, "loss": 1.008, "step": 48 }, { "epoch": 0.12, "learning_rate": 1.9215686274509807e-05, "loss": 1.23, "step": 49 }, { "epoch": 0.12, "learning_rate": 1.9607843137254903e-05, "loss": 1.0657, "step": 50 }, { "epoch": 0.12, "learning_rate": 2e-05, "loss": 0.9921, "step": 51 }, { "epoch": 0.12, "learning_rate": 1.9987782529016497e-05, "loss": 1.2391, "step": 52 }, { "epoch": 0.13, "learning_rate": 1.997556505803299e-05, "loss": 1.0503, "step": 53 }, { "epoch": 0.13, "learning_rate": 1.9963347587049484e-05, "loss": 1.0485, "step": 54 }, { "epoch": 0.13, "learning_rate": 1.9951130116065975e-05, "loss": 1.015, "step": 55 }, { "epoch": 0.13, "learning_rate": 1.993891264508247e-05, "loss": 0.9249, "step": 56 }, { "epoch": 0.13, "learning_rate": 1.9926695174098962e-05, "loss": 0.9354, "step": 57 }, { "epoch": 0.14, "learning_rate": 1.9914477703115457e-05, "loss": 1.0506, "step": 58 }, { "epoch": 0.14, "learning_rate": 1.9902260232131952e-05, "loss": 1.1836, "step": 59 }, { "epoch": 0.14, "learning_rate": 1.9890042761148444e-05, "loss": 1.1941, "step": 60 }, { "epoch": 0.14, "learning_rate": 1.987782529016494e-05, "loss": 1.0519, "step": 61 }, { "epoch": 0.15, "learning_rate": 1.986560781918143e-05, "loss": 1.1899, "step": 62 }, { "epoch": 0.15, "learning_rate": 1.9853390348197926e-05, "loss": 1.0649, "step": 63 }, { "epoch": 0.15, "learning_rate": 1.9841172877214418e-05, "loss": 1.2611, "step": 64 }, { "epoch": 0.15, "learning_rate": 1.9828955406230913e-05, "loss": 1.352, "step": 65 }, { "epoch": 0.16, "learning_rate": 1.9816737935247404e-05, "loss": 1.0917, "step": 66 }, { "epoch": 0.16, "learning_rate": 1.98045204642639e-05, "loss": 1.0388, "step": 67 }, { "epoch": 0.16, "learning_rate": 1.979230299328039e-05, "loss": 1.0571, "step": 68 }, { "epoch": 0.16, "learning_rate": 1.9780085522296886e-05, "loss": 1.3069, "step": 69 }, { "epoch": 0.17, "learning_rate": 1.9767868051313378e-05, "loss": 1.8331, "step": 70 }, { "epoch": 0.17, "learning_rate": 1.9755650580329873e-05, "loss": 0.9785, "step": 71 }, { "epoch": 0.17, "learning_rate": 1.9743433109346365e-05, "loss": 1.1313, "step": 72 }, { "epoch": 0.17, "learning_rate": 1.973121563836286e-05, "loss": 0.9897, "step": 73 }, { "epoch": 0.17, "learning_rate": 1.9718998167379355e-05, "loss": 1.2494, "step": 74 }, { "epoch": 0.18, "learning_rate": 1.970678069639585e-05, "loss": 0.9476, "step": 75 }, { "epoch": 0.18, "learning_rate": 1.9694563225412342e-05, "loss": 1.2027, "step": 76 }, { "epoch": 0.18, "learning_rate": 1.9682345754428837e-05, "loss": 1.2106, "step": 77 }, { "epoch": 0.18, "learning_rate": 1.967012828344533e-05, "loss": 0.9073, "step": 78 }, { "epoch": 0.19, "learning_rate": 1.9657910812461824e-05, "loss": 1.1522, "step": 79 }, { "epoch": 0.19, "learning_rate": 1.9645693341478315e-05, "loss": 1.0476, "step": 80 }, { "epoch": 0.19, "learning_rate": 1.963347587049481e-05, "loss": 0.9493, "step": 81 }, { "epoch": 0.19, "learning_rate": 1.9621258399511302e-05, "loss": 0.9964, "step": 82 }, { "epoch": 0.2, "learning_rate": 1.9609040928527797e-05, "loss": 0.8192, "step": 83 }, { "epoch": 0.2, "learning_rate": 1.959682345754429e-05, "loss": 1.1224, "step": 84 }, { "epoch": 0.2, "learning_rate": 1.9584605986560784e-05, "loss": 1.0489, "step": 85 }, { "epoch": 0.2, "learning_rate": 1.9572388515577276e-05, "loss": 1.0041, "step": 86 }, { "epoch": 0.21, "learning_rate": 1.956017104459377e-05, "loss": 0.9065, "step": 87 }, { "epoch": 0.21, "learning_rate": 1.9547953573610263e-05, "loss": 1.7173, "step": 88 }, { "epoch": 0.21, "learning_rate": 1.9535736102626758e-05, "loss": 1.2975, "step": 89 }, { "epoch": 0.21, "learning_rate": 1.9523518631643253e-05, "loss": 0.8456, "step": 90 }, { "epoch": 0.22, "learning_rate": 1.9511301160659744e-05, "loss": 1.1463, "step": 91 }, { "epoch": 0.22, "learning_rate": 1.949908368967624e-05, "loss": 1.1324, "step": 92 }, { "epoch": 0.22, "learning_rate": 1.948686621869273e-05, "loss": 1.0128, "step": 93 }, { "epoch": 0.22, "learning_rate": 1.9474648747709226e-05, "loss": 1.0593, "step": 94 }, { "epoch": 0.22, "learning_rate": 1.9462431276725718e-05, "loss": 0.9871, "step": 95 }, { "epoch": 0.23, "learning_rate": 1.9450213805742213e-05, "loss": 1.3373, "step": 96 }, { "epoch": 0.23, "learning_rate": 1.9437996334758705e-05, "loss": 1.1009, "step": 97 }, { "epoch": 0.23, "learning_rate": 1.94257788637752e-05, "loss": 1.1652, "step": 98 }, { "epoch": 0.23, "learning_rate": 1.941356139279169e-05, "loss": 0.855, "step": 99 }, { "epoch": 0.24, "learning_rate": 1.9401343921808187e-05, "loss": 1.1619, "step": 100 }, { "epoch": 0.24, "learning_rate": 1.938912645082468e-05, "loss": 0.883, "step": 101 }, { "epoch": 0.24, "learning_rate": 1.9376908979841174e-05, "loss": 1.2267, "step": 102 }, { "epoch": 0.24, "learning_rate": 1.936469150885767e-05, "loss": 0.8898, "step": 103 }, { "epoch": 0.25, "learning_rate": 1.9352474037874164e-05, "loss": 0.9406, "step": 104 }, { "epoch": 0.25, "learning_rate": 1.9340256566890655e-05, "loss": 1.0517, "step": 105 }, { "epoch": 0.25, "learning_rate": 1.932803909590715e-05, "loss": 1.0964, "step": 106 }, { "epoch": 0.25, "learning_rate": 1.9315821624923642e-05, "loss": 0.9225, "step": 107 }, { "epoch": 0.26, "learning_rate": 1.9303604153940137e-05, "loss": 1.0517, "step": 108 }, { "epoch": 0.26, "learning_rate": 1.929138668295663e-05, "loss": 1.0929, "step": 109 }, { "epoch": 0.26, "learning_rate": 1.9279169211973124e-05, "loss": 1.0169, "step": 110 }, { "epoch": 0.26, "learning_rate": 1.9266951740989616e-05, "loss": 0.9908, "step": 111 }, { "epoch": 0.26, "learning_rate": 1.925473427000611e-05, "loss": 0.9025, "step": 112 }, { "epoch": 0.27, "learning_rate": 1.9242516799022603e-05, "loss": 1.2115, "step": 113 }, { "epoch": 0.27, "learning_rate": 1.9230299328039098e-05, "loss": 1.0489, "step": 114 }, { "epoch": 0.27, "learning_rate": 1.921808185705559e-05, "loss": 0.9932, "step": 115 }, { "epoch": 0.27, "learning_rate": 1.9205864386072085e-05, "loss": 1.0111, "step": 116 }, { "epoch": 0.28, "learning_rate": 1.9193646915088576e-05, "loss": 1.4229, "step": 117 }, { "epoch": 0.28, "learning_rate": 1.918142944410507e-05, "loss": 1.1299, "step": 118 }, { "epoch": 0.28, "learning_rate": 1.9169211973121567e-05, "loss": 1.1633, "step": 119 }, { "epoch": 0.28, "learning_rate": 1.9156994502138058e-05, "loss": 0.9408, "step": 120 }, { "epoch": 0.29, "learning_rate": 1.9144777031154553e-05, "loss": 1.0087, "step": 121 }, { "epoch": 0.29, "learning_rate": 1.913255956017105e-05, "loss": 0.9289, "step": 122 }, { "epoch": 0.29, "learning_rate": 1.912034208918754e-05, "loss": 1.1067, "step": 123 }, { "epoch": 0.29, "learning_rate": 1.9108124618204035e-05, "loss": 1.2101, "step": 124 }, { "epoch": 0.3, "learning_rate": 1.9095907147220527e-05, "loss": 0.9399, "step": 125 }, { "epoch": 0.3, "learning_rate": 1.9083689676237022e-05, "loss": 1.0829, "step": 126 }, { "epoch": 0.3, "learning_rate": 1.9071472205253514e-05, "loss": 1.1841, "step": 127 }, { "epoch": 0.3, "learning_rate": 1.905925473427001e-05, "loss": 0.9907, "step": 128 }, { "epoch": 0.31, "learning_rate": 1.90470372632865e-05, "loss": 1.0977, "step": 129 }, { "epoch": 0.31, "learning_rate": 1.9034819792302996e-05, "loss": 1.0854, "step": 130 }, { "epoch": 0.31, "learning_rate": 1.9022602321319487e-05, "loss": 0.9287, "step": 131 }, { "epoch": 0.31, "learning_rate": 1.9010384850335982e-05, "loss": 0.7713, "step": 132 }, { "epoch": 0.31, "learning_rate": 1.8998167379352474e-05, "loss": 0.8631, "step": 133 }, { "epoch": 0.32, "learning_rate": 1.898594990836897e-05, "loss": 0.9045, "step": 134 }, { "epoch": 0.32, "learning_rate": 1.8973732437385464e-05, "loss": 0.7478, "step": 135 }, { "epoch": 0.32, "learning_rate": 1.8961514966401956e-05, "loss": 1.2063, "step": 136 }, { "epoch": 0.32, "learning_rate": 1.894929749541845e-05, "loss": 0.9594, "step": 137 }, { "epoch": 0.33, "learning_rate": 1.8937080024434943e-05, "loss": 1.0198, "step": 138 }, { "epoch": 0.33, "learning_rate": 1.8924862553451438e-05, "loss": 0.9273, "step": 139 }, { "epoch": 0.33, "learning_rate": 1.891264508246793e-05, "loss": 0.8572, "step": 140 }, { "epoch": 0.33, "learning_rate": 1.8900427611484425e-05, "loss": 1.0327, "step": 141 }, { "epoch": 0.34, "learning_rate": 1.8888210140500916e-05, "loss": 0.7909, "step": 142 }, { "epoch": 0.34, "learning_rate": 1.887599266951741e-05, "loss": 0.9771, "step": 143 }, { "epoch": 0.34, "learning_rate": 1.8863775198533903e-05, "loss": 1.7899, "step": 144 }, { "epoch": 0.34, "learning_rate": 1.88515577275504e-05, "loss": 0.8969, "step": 145 }, { "epoch": 0.35, "learning_rate": 1.883934025656689e-05, "loss": 0.9746, "step": 146 }, { "epoch": 0.35, "learning_rate": 1.8827122785583385e-05, "loss": 0.7487, "step": 147 }, { "epoch": 0.35, "learning_rate": 1.8814905314599877e-05, "loss": 0.9903, "step": 148 }, { "epoch": 0.35, "learning_rate": 1.8802687843616375e-05, "loss": 0.9306, "step": 149 }, { "epoch": 0.35, "learning_rate": 1.8790470372632867e-05, "loss": 0.8955, "step": 150 }, { "epoch": 0.36, "learning_rate": 1.8778252901649362e-05, "loss": 0.8673, "step": 151 }, { "epoch": 0.36, "learning_rate": 1.8766035430665854e-05, "loss": 1.12, "step": 152 }, { "epoch": 0.36, "learning_rate": 1.875381795968235e-05, "loss": 0.7883, "step": 153 }, { "epoch": 0.36, "learning_rate": 1.874160048869884e-05, "loss": 0.9563, "step": 154 }, { "epoch": 0.37, "learning_rate": 1.8729383017715336e-05, "loss": 1.0749, "step": 155 }, { "epoch": 0.37, "learning_rate": 1.8717165546731827e-05, "loss": 1.2149, "step": 156 }, { "epoch": 0.37, "learning_rate": 1.8704948075748323e-05, "loss": 1.4591, "step": 157 }, { "epoch": 0.37, "learning_rate": 1.8692730604764814e-05, "loss": 1.2051, "step": 158 }, { "epoch": 0.38, "learning_rate": 1.868051313378131e-05, "loss": 0.9812, "step": 159 }, { "epoch": 0.38, "learning_rate": 1.86682956627978e-05, "loss": 1.1543, "step": 160 }, { "epoch": 0.38, "learning_rate": 1.8656078191814296e-05, "loss": 1.1209, "step": 161 }, { "epoch": 0.38, "learning_rate": 1.8643860720830788e-05, "loss": 1.0923, "step": 162 }, { "epoch": 0.39, "learning_rate": 1.8631643249847283e-05, "loss": 1.0248, "step": 163 }, { "epoch": 0.39, "learning_rate": 1.8619425778863778e-05, "loss": 0.9989, "step": 164 }, { "epoch": 0.39, "learning_rate": 1.860720830788027e-05, "loss": 1.0276, "step": 165 }, { "epoch": 0.39, "learning_rate": 1.8594990836896765e-05, "loss": 1.0172, "step": 166 }, { "epoch": 0.39, "learning_rate": 1.8582773365913257e-05, "loss": 1.0659, "step": 167 }, { "epoch": 0.4, "learning_rate": 1.857055589492975e-05, "loss": 0.9678, "step": 168 }, { "epoch": 0.4, "learning_rate": 1.8558338423946243e-05, "loss": 1.0349, "step": 169 }, { "epoch": 0.4, "learning_rate": 1.854612095296274e-05, "loss": 1.1437, "step": 170 }, { "epoch": 0.4, "learning_rate": 1.853390348197923e-05, "loss": 0.9805, "step": 171 }, { "epoch": 0.41, "learning_rate": 1.8521686010995725e-05, "loss": 1.1063, "step": 172 }, { "epoch": 0.41, "learning_rate": 1.850946854001222e-05, "loss": 1.0671, "step": 173 }, { "epoch": 0.41, "learning_rate": 1.8497251069028712e-05, "loss": 1.0975, "step": 174 }, { "epoch": 0.41, "learning_rate": 1.8485033598045207e-05, "loss": 0.9989, "step": 175 }, { "epoch": 0.42, "learning_rate": 1.84728161270617e-05, "loss": 1.033, "step": 176 }, { "epoch": 0.42, "learning_rate": 1.8460598656078194e-05, "loss": 0.8797, "step": 177 }, { "epoch": 0.42, "learning_rate": 1.8448381185094686e-05, "loss": 1.0079, "step": 178 }, { "epoch": 0.42, "learning_rate": 1.843616371411118e-05, "loss": 1.0638, "step": 179 }, { "epoch": 0.43, "learning_rate": 1.8423946243127676e-05, "loss": 0.907, "step": 180 }, { "epoch": 0.43, "learning_rate": 1.8411728772144168e-05, "loss": 0.8491, "step": 181 }, { "epoch": 0.43, "learning_rate": 1.8399511301160663e-05, "loss": 1.0359, "step": 182 }, { "epoch": 0.43, "learning_rate": 1.8387293830177154e-05, "loss": 0.9798, "step": 183 }, { "epoch": 0.44, "learning_rate": 1.837507635919365e-05, "loss": 0.8019, "step": 184 }, { "epoch": 0.44, "learning_rate": 1.836285888821014e-05, "loss": 1.2839, "step": 185 }, { "epoch": 0.44, "learning_rate": 1.8350641417226636e-05, "loss": 1.007, "step": 186 }, { "epoch": 0.44, "learning_rate": 1.8338423946243128e-05, "loss": 0.8493, "step": 187 }, { "epoch": 0.44, "learning_rate": 1.8326206475259623e-05, "loss": 0.9633, "step": 188 }, { "epoch": 0.45, "learning_rate": 1.8313989004276115e-05, "loss": 0.7577, "step": 189 }, { "epoch": 0.45, "learning_rate": 1.830177153329261e-05, "loss": 2.1476, "step": 190 }, { "epoch": 0.45, "learning_rate": 1.82895540623091e-05, "loss": 0.7755, "step": 191 }, { "epoch": 0.45, "learning_rate": 1.8277336591325597e-05, "loss": 1.1204, "step": 192 }, { "epoch": 0.46, "learning_rate": 1.826511912034209e-05, "loss": 1.0931, "step": 193 }, { "epoch": 0.46, "learning_rate": 1.8252901649358587e-05, "loss": 0.8763, "step": 194 }, { "epoch": 0.46, "learning_rate": 1.824068417837508e-05, "loss": 0.9134, "step": 195 }, { "epoch": 0.46, "learning_rate": 1.8228466707391574e-05, "loss": 0.7959, "step": 196 }, { "epoch": 0.47, "learning_rate": 1.8216249236408065e-05, "loss": 0.7246, "step": 197 }, { "epoch": 0.47, "learning_rate": 1.820403176542456e-05, "loss": 1.1064, "step": 198 }, { "epoch": 0.47, "learning_rate": 1.8191814294441052e-05, "loss": 1.5641, "step": 199 }, { "epoch": 0.47, "learning_rate": 1.8179596823457547e-05, "loss": 1.2576, "step": 200 }, { "epoch": 0.48, "learning_rate": 1.816737935247404e-05, "loss": 1.1123, "step": 201 }, { "epoch": 0.48, "learning_rate": 1.8155161881490534e-05, "loss": 0.8261, "step": 202 }, { "epoch": 0.48, "learning_rate": 1.8142944410507026e-05, "loss": 1.1329, "step": 203 }, { "epoch": 0.48, "learning_rate": 1.813072693952352e-05, "loss": 0.7938, "step": 204 }, { "epoch": 0.48, "learning_rate": 1.8118509468540013e-05, "loss": 0.9276, "step": 205 }, { "epoch": 0.49, "learning_rate": 1.8106291997556508e-05, "loss": 0.8416, "step": 206 }, { "epoch": 0.49, "learning_rate": 1.8094074526573e-05, "loss": 1.0415, "step": 207 }, { "epoch": 0.49, "learning_rate": 1.8081857055589494e-05, "loss": 1.0821, "step": 208 }, { "epoch": 0.49, "learning_rate": 1.806963958460599e-05, "loss": 0.9006, "step": 209 }, { "epoch": 0.5, "learning_rate": 1.805742211362248e-05, "loss": 0.9101, "step": 210 }, { "epoch": 0.5, "learning_rate": 1.8045204642638976e-05, "loss": 1.0872, "step": 211 }, { "epoch": 0.5, "learning_rate": 1.8032987171655468e-05, "loss": 0.9797, "step": 212 }, { "epoch": 0.5, "learning_rate": 1.8020769700671963e-05, "loss": 1.0248, "step": 213 }, { "epoch": 0.51, "learning_rate": 1.8008552229688455e-05, "loss": 1.1993, "step": 214 }, { "epoch": 0.51, "learning_rate": 1.799633475870495e-05, "loss": 0.8838, "step": 215 }, { "epoch": 0.51, "learning_rate": 1.7984117287721442e-05, "loss": 0.8972, "step": 216 }, { "epoch": 0.51, "learning_rate": 1.7971899816737937e-05, "loss": 1.1412, "step": 217 }, { "epoch": 0.52, "learning_rate": 1.795968234575443e-05, "loss": 1.0733, "step": 218 }, { "epoch": 0.52, "learning_rate": 1.7947464874770924e-05, "loss": 1.0435, "step": 219 }, { "epoch": 0.52, "learning_rate": 1.7935247403787415e-05, "loss": 1.0862, "step": 220 }, { "epoch": 0.52, "learning_rate": 1.792302993280391e-05, "loss": 1.0815, "step": 221 }, { "epoch": 0.52, "learning_rate": 1.7910812461820402e-05, "loss": 1.1491, "step": 222 }, { "epoch": 0.53, "learning_rate": 1.7898594990836897e-05, "loss": 1.0378, "step": 223 }, { "epoch": 0.53, "learning_rate": 1.7886377519853392e-05, "loss": 1.0162, "step": 224 }, { "epoch": 0.53, "learning_rate": 1.7874160048869887e-05, "loss": 0.8259, "step": 225 }, { "epoch": 0.53, "learning_rate": 1.786194257788638e-05, "loss": 0.8914, "step": 226 }, { "epoch": 0.54, "learning_rate": 1.7849725106902874e-05, "loss": 0.8749, "step": 227 }, { "epoch": 0.54, "learning_rate": 1.7837507635919366e-05, "loss": 1.0432, "step": 228 }, { "epoch": 0.54, "learning_rate": 1.782529016493586e-05, "loss": 1.0119, "step": 229 }, { "epoch": 0.54, "learning_rate": 1.7813072693952353e-05, "loss": 1.1508, "step": 230 }, { "epoch": 0.55, "learning_rate": 1.7800855222968848e-05, "loss": 1.1533, "step": 231 }, { "epoch": 0.55, "learning_rate": 1.778863775198534e-05, "loss": 1.2029, "step": 232 }, { "epoch": 0.55, "learning_rate": 1.7776420281001835e-05, "loss": 1.1147, "step": 233 }, { "epoch": 0.55, "learning_rate": 1.7764202810018326e-05, "loss": 0.9685, "step": 234 }, { "epoch": 0.56, "learning_rate": 1.775198533903482e-05, "loss": 0.8694, "step": 235 }, { "epoch": 0.56, "learning_rate": 1.7739767868051313e-05, "loss": 0.9998, "step": 236 }, { "epoch": 0.56, "learning_rate": 1.7727550397067808e-05, "loss": 0.9252, "step": 237 }, { "epoch": 0.56, "learning_rate": 1.77153329260843e-05, "loss": 0.9679, "step": 238 }, { "epoch": 0.57, "learning_rate": 1.7703115455100795e-05, "loss": 1.1989, "step": 239 }, { "epoch": 0.57, "learning_rate": 1.769089798411729e-05, "loss": 1.0249, "step": 240 }, { "epoch": 0.57, "learning_rate": 1.7678680513133785e-05, "loss": 0.9713, "step": 241 }, { "epoch": 0.57, "learning_rate": 1.7666463042150277e-05, "loss": 1.0119, "step": 242 }, { "epoch": 0.57, "learning_rate": 1.7654245571166772e-05, "loss": 1.0165, "step": 243 }, { "epoch": 0.58, "learning_rate": 1.7642028100183264e-05, "loss": 0.848, "step": 244 }, { "epoch": 0.58, "learning_rate": 1.762981062919976e-05, "loss": 0.9093, "step": 245 }, { "epoch": 0.58, "learning_rate": 1.761759315821625e-05, "loss": 0.929, "step": 246 }, { "epoch": 0.58, "learning_rate": 1.7605375687232746e-05, "loss": 1.1163, "step": 247 }, { "epoch": 0.59, "learning_rate": 1.7593158216249237e-05, "loss": 1.4804, "step": 248 }, { "epoch": 0.59, "learning_rate": 1.7580940745265732e-05, "loss": 1.04, "step": 249 }, { "epoch": 0.59, "learning_rate": 1.7568723274282224e-05, "loss": 0.915, "step": 250 }, { "epoch": 0.59, "learning_rate": 1.755650580329872e-05, "loss": 1.1308, "step": 251 }, { "epoch": 0.6, "learning_rate": 1.754428833231521e-05, "loss": 1.029, "step": 252 }, { "epoch": 0.6, "learning_rate": 1.7532070861331706e-05, "loss": 1.1648, "step": 253 }, { "epoch": 0.6, "learning_rate": 1.75198533903482e-05, "loss": 1.0718, "step": 254 }, { "epoch": 0.6, "learning_rate": 1.7507635919364693e-05, "loss": 0.7947, "step": 255 }, { "epoch": 0.61, "learning_rate": 1.7495418448381188e-05, "loss": 1.0397, "step": 256 }, { "epoch": 0.61, "learning_rate": 1.748320097739768e-05, "loss": 0.7579, "step": 257 }, { "epoch": 0.61, "learning_rate": 1.7470983506414175e-05, "loss": 0.8241, "step": 258 }, { "epoch": 0.61, "learning_rate": 1.7458766035430666e-05, "loss": 1.7378, "step": 259 }, { "epoch": 0.61, "learning_rate": 1.744654856444716e-05, "loss": 0.9632, "step": 260 }, { "epoch": 0.62, "learning_rate": 1.7434331093463653e-05, "loss": 0.9171, "step": 261 }, { "epoch": 0.62, "learning_rate": 1.742211362248015e-05, "loss": 0.887, "step": 262 }, { "epoch": 0.62, "learning_rate": 1.740989615149664e-05, "loss": 1.0072, "step": 263 }, { "epoch": 0.62, "learning_rate": 1.7397678680513135e-05, "loss": 1.051, "step": 264 }, { "epoch": 0.63, "learning_rate": 1.7385461209529627e-05, "loss": 0.95, "step": 265 }, { "epoch": 0.63, "learning_rate": 1.7373243738546122e-05, "loss": 0.8019, "step": 266 }, { "epoch": 0.63, "learning_rate": 1.7361026267562614e-05, "loss": 1.014, "step": 267 }, { "epoch": 0.63, "learning_rate": 1.734880879657911e-05, "loss": 0.8493, "step": 268 }, { "epoch": 0.64, "learning_rate": 1.7336591325595604e-05, "loss": 1.2377, "step": 269 }, { "epoch": 0.64, "learning_rate": 1.73243738546121e-05, "loss": 0.9735, "step": 270 }, { "epoch": 0.64, "learning_rate": 1.731215638362859e-05, "loss": 1.1114, "step": 271 }, { "epoch": 0.64, "learning_rate": 1.7299938912645086e-05, "loss": 1.1334, "step": 272 }, { "epoch": 0.65, "learning_rate": 1.7287721441661577e-05, "loss": 0.8667, "step": 273 }, { "epoch": 0.65, "learning_rate": 1.7275503970678073e-05, "loss": 0.9092, "step": 274 }, { "epoch": 0.65, "learning_rate": 1.7263286499694564e-05, "loss": 1.004, "step": 275 }, { "epoch": 0.65, "learning_rate": 1.725106902871106e-05, "loss": 1.027, "step": 276 }, { "epoch": 0.65, "learning_rate": 1.723885155772755e-05, "loss": 0.9408, "step": 277 }, { "epoch": 0.66, "learning_rate": 1.7226634086744046e-05, "loss": 0.8829, "step": 278 }, { "epoch": 0.66, "learning_rate": 1.7214416615760538e-05, "loss": 1.3557, "step": 279 }, { "epoch": 0.66, "learning_rate": 1.7202199144777033e-05, "loss": 0.9898, "step": 280 }, { "epoch": 0.66, "learning_rate": 1.7189981673793525e-05, "loss": 0.9547, "step": 281 }, { "epoch": 0.67, "learning_rate": 1.717776420281002e-05, "loss": 0.9013, "step": 282 }, { "epoch": 0.67, "learning_rate": 1.716554673182651e-05, "loss": 0.9221, "step": 283 }, { "epoch": 0.67, "learning_rate": 1.7153329260843007e-05, "loss": 0.9773, "step": 284 }, { "epoch": 0.67, "learning_rate": 1.71411117898595e-05, "loss": 0.9181, "step": 285 }, { "epoch": 0.68, "learning_rate": 1.7128894318875993e-05, "loss": 1.0452, "step": 286 }, { "epoch": 0.68, "learning_rate": 1.711667684789249e-05, "loss": 1.0132, "step": 287 }, { "epoch": 0.68, "learning_rate": 1.710445937690898e-05, "loss": 0.8378, "step": 288 }, { "epoch": 0.68, "learning_rate": 1.7092241905925475e-05, "loss": 0.9312, "step": 289 }, { "epoch": 0.69, "learning_rate": 1.7080024434941967e-05, "loss": 0.863, "step": 290 }, { "epoch": 0.69, "learning_rate": 1.7067806963958462e-05, "loss": 1.1051, "step": 291 }, { "epoch": 0.69, "learning_rate": 1.7055589492974954e-05, "loss": 0.8351, "step": 292 }, { "epoch": 0.69, "learning_rate": 1.704337202199145e-05, "loss": 1.1086, "step": 293 }, { "epoch": 0.7, "learning_rate": 1.7031154551007944e-05, "loss": 0.9148, "step": 294 }, { "epoch": 0.7, "learning_rate": 1.7018937080024436e-05, "loss": 1.0763, "step": 295 }, { "epoch": 0.7, "learning_rate": 1.700671960904093e-05, "loss": 0.9651, "step": 296 }, { "epoch": 0.7, "learning_rate": 1.6994502138057422e-05, "loss": 0.7762, "step": 297 }, { "epoch": 0.7, "learning_rate": 1.6982284667073918e-05, "loss": 0.8788, "step": 298 }, { "epoch": 0.71, "learning_rate": 1.6970067196090413e-05, "loss": 0.9629, "step": 299 }, { "epoch": 0.71, "learning_rate": 1.6957849725106904e-05, "loss": 0.9064, "step": 300 }, { "epoch": 0.71, "learning_rate": 1.69456322541234e-05, "loss": 0.9137, "step": 301 }, { "epoch": 0.71, "learning_rate": 1.693341478313989e-05, "loss": 1.0518, "step": 302 }, { "epoch": 0.72, "learning_rate": 1.6921197312156386e-05, "loss": 1.1436, "step": 303 }, { "epoch": 0.72, "learning_rate": 1.6908979841172878e-05, "loss": 1.063, "step": 304 }, { "epoch": 0.72, "learning_rate": 1.6896762370189373e-05, "loss": 0.8848, "step": 305 }, { "epoch": 0.72, "learning_rate": 1.6884544899205865e-05, "loss": 0.7027, "step": 306 }, { "epoch": 0.73, "learning_rate": 1.687232742822236e-05, "loss": 0.929, "step": 307 }, { "epoch": 0.73, "learning_rate": 1.686010995723885e-05, "loss": 0.9704, "step": 308 }, { "epoch": 0.73, "learning_rate": 1.6847892486255347e-05, "loss": 0.8492, "step": 309 }, { "epoch": 0.73, "learning_rate": 1.683567501527184e-05, "loss": 0.8724, "step": 310 }, { "epoch": 0.74, "learning_rate": 1.6823457544288334e-05, "loss": 0.8877, "step": 311 }, { "epoch": 0.74, "learning_rate": 1.6811240073304825e-05, "loss": 0.9331, "step": 312 }, { "epoch": 0.74, "learning_rate": 1.679902260232132e-05, "loss": 0.9574, "step": 313 }, { "epoch": 0.74, "learning_rate": 1.6786805131337815e-05, "loss": 1.0783, "step": 314 }, { "epoch": 0.74, "learning_rate": 1.677458766035431e-05, "loss": 0.9318, "step": 315 }, { "epoch": 0.75, "learning_rate": 1.6762370189370802e-05, "loss": 0.783, "step": 316 }, { "epoch": 0.75, "learning_rate": 1.6750152718387297e-05, "loss": 0.8419, "step": 317 }, { "epoch": 0.75, "learning_rate": 1.673793524740379e-05, "loss": 1.1953, "step": 318 }, { "epoch": 0.75, "learning_rate": 1.6725717776420284e-05, "loss": 0.9503, "step": 319 }, { "epoch": 0.76, "learning_rate": 1.6713500305436776e-05, "loss": 0.7846, "step": 320 }, { "epoch": 0.76, "learning_rate": 1.670128283445327e-05, "loss": 0.949, "step": 321 }, { "epoch": 0.76, "learning_rate": 1.6689065363469763e-05, "loss": 1.0251, "step": 322 }, { "epoch": 0.76, "learning_rate": 1.6676847892486258e-05, "loss": 1.1247, "step": 323 }, { "epoch": 0.77, "learning_rate": 1.666463042150275e-05, "loss": 0.9591, "step": 324 }, { "epoch": 0.77, "learning_rate": 1.6652412950519245e-05, "loss": 0.9174, "step": 325 }, { "epoch": 0.77, "learning_rate": 1.6640195479535736e-05, "loss": 0.8984, "step": 326 }, { "epoch": 0.77, "learning_rate": 1.662797800855223e-05, "loss": 0.8926, "step": 327 }, { "epoch": 0.78, "learning_rate": 1.6615760537568723e-05, "loss": 0.9084, "step": 328 }, { "epoch": 0.78, "learning_rate": 1.6603543066585218e-05, "loss": 0.9717, "step": 329 }, { "epoch": 0.78, "learning_rate": 1.6591325595601713e-05, "loss": 1.2356, "step": 330 }, { "epoch": 0.78, "learning_rate": 1.6579108124618205e-05, "loss": 0.8197, "step": 331 }, { "epoch": 0.79, "learning_rate": 1.65668906536347e-05, "loss": 1.1189, "step": 332 }, { "epoch": 0.79, "learning_rate": 1.6554673182651192e-05, "loss": 1.0264, "step": 333 }, { "epoch": 0.79, "learning_rate": 1.6542455711667687e-05, "loss": 1.0084, "step": 334 }, { "epoch": 0.79, "learning_rate": 1.653023824068418e-05, "loss": 1.0175, "step": 335 }, { "epoch": 0.79, "learning_rate": 1.6518020769700674e-05, "loss": 0.8441, "step": 336 }, { "epoch": 0.8, "learning_rate": 1.6505803298717165e-05, "loss": 1.2329, "step": 337 }, { "epoch": 0.8, "learning_rate": 1.649358582773366e-05, "loss": 0.994, "step": 338 }, { "epoch": 0.8, "learning_rate": 1.6481368356750152e-05, "loss": 1.2737, "step": 339 }, { "epoch": 0.8, "learning_rate": 1.6469150885766647e-05, "loss": 1.2804, "step": 340 }, { "epoch": 0.81, "learning_rate": 1.645693341478314e-05, "loss": 0.7583, "step": 341 }, { "epoch": 0.81, "learning_rate": 1.6444715943799634e-05, "loss": 1.0893, "step": 342 }, { "epoch": 0.81, "learning_rate": 1.6432498472816126e-05, "loss": 1.1396, "step": 343 }, { "epoch": 0.81, "learning_rate": 1.6420281001832624e-05, "loss": 0.9067, "step": 344 }, { "epoch": 0.82, "learning_rate": 1.6408063530849116e-05, "loss": 1.6975, "step": 345 }, { "epoch": 0.82, "learning_rate": 1.639584605986561e-05, "loss": 0.7273, "step": 346 }, { "epoch": 0.82, "learning_rate": 1.6383628588882103e-05, "loss": 0.9293, "step": 347 }, { "epoch": 0.82, "learning_rate": 1.6371411117898598e-05, "loss": 0.7945, "step": 348 }, { "epoch": 0.83, "learning_rate": 1.635919364691509e-05, "loss": 0.8376, "step": 349 }, { "epoch": 0.83, "learning_rate": 1.6346976175931585e-05, "loss": 0.7766, "step": 350 }, { "epoch": 0.83, "learning_rate": 1.6334758704948076e-05, "loss": 1.1176, "step": 351 }, { "epoch": 0.83, "learning_rate": 1.632254123396457e-05, "loss": 0.9348, "step": 352 }, { "epoch": 0.83, "learning_rate": 1.6310323762981063e-05, "loss": 1.0182, "step": 353 }, { "epoch": 0.84, "learning_rate": 1.6298106291997558e-05, "loss": 1.0181, "step": 354 }, { "epoch": 0.84, "learning_rate": 1.628588882101405e-05, "loss": 0.7864, "step": 355 }, { "epoch": 0.84, "learning_rate": 1.6273671350030545e-05, "loss": 0.9777, "step": 356 }, { "epoch": 0.84, "learning_rate": 1.6261453879047037e-05, "loss": 0.7503, "step": 357 }, { "epoch": 0.85, "learning_rate": 1.6249236408063532e-05, "loss": 0.8894, "step": 358 }, { "epoch": 0.85, "learning_rate": 1.6237018937080027e-05, "loss": 0.8838, "step": 359 }, { "epoch": 0.85, "learning_rate": 1.622480146609652e-05, "loss": 0.874, "step": 360 }, { "epoch": 0.85, "learning_rate": 1.6212583995113014e-05, "loss": 0.9243, "step": 361 }, { "epoch": 0.86, "learning_rate": 1.620036652412951e-05, "loss": 0.9355, "step": 362 }, { "epoch": 0.86, "learning_rate": 1.6188149053146e-05, "loss": 0.9969, "step": 363 }, { "epoch": 0.86, "learning_rate": 1.6175931582162496e-05, "loss": 0.7542, "step": 364 }, { "epoch": 0.86, "learning_rate": 1.6163714111178987e-05, "loss": 1.7259, "step": 365 }, { "epoch": 0.87, "learning_rate": 1.6151496640195482e-05, "loss": 1.0189, "step": 366 }, { "epoch": 0.87, "learning_rate": 1.6139279169211974e-05, "loss": 0.8796, "step": 367 }, { "epoch": 0.87, "learning_rate": 1.612706169822847e-05, "loss": 1.0183, "step": 368 }, { "epoch": 0.87, "learning_rate": 1.611484422724496e-05, "loss": 0.9069, "step": 369 }, { "epoch": 0.87, "learning_rate": 1.6102626756261456e-05, "loss": 0.7681, "step": 370 }, { "epoch": 0.88, "learning_rate": 1.6090409285277948e-05, "loss": 0.8676, "step": 371 }, { "epoch": 0.88, "learning_rate": 1.6078191814294443e-05, "loss": 1.0638, "step": 372 }, { "epoch": 0.88, "learning_rate": 1.6065974343310935e-05, "loss": 0.8624, "step": 373 }, { "epoch": 0.88, "learning_rate": 1.605375687232743e-05, "loss": 0.8557, "step": 374 }, { "epoch": 0.89, "learning_rate": 1.6041539401343925e-05, "loss": 0.9156, "step": 375 }, { "epoch": 0.89, "learning_rate": 1.6029321930360416e-05, "loss": 0.9213, "step": 376 }, { "epoch": 0.89, "learning_rate": 1.601710445937691e-05, "loss": 0.8925, "step": 377 }, { "epoch": 0.89, "learning_rate": 1.6004886988393403e-05, "loss": 1.0731, "step": 378 }, { "epoch": 0.9, "learning_rate": 1.59926695174099e-05, "loss": 0.9418, "step": 379 }, { "epoch": 0.9, "learning_rate": 1.598045204642639e-05, "loss": 1.1517, "step": 380 }, { "epoch": 0.9, "learning_rate": 1.5968234575442885e-05, "loss": 1.0124, "step": 381 }, { "epoch": 0.9, "learning_rate": 1.5956017104459377e-05, "loss": 1.0369, "step": 382 }, { "epoch": 0.91, "learning_rate": 1.5943799633475872e-05, "loss": 0.9406, "step": 383 }, { "epoch": 0.91, "learning_rate": 1.5931582162492364e-05, "loss": 1.1757, "step": 384 }, { "epoch": 0.91, "learning_rate": 1.591936469150886e-05, "loss": 1.0063, "step": 385 }, { "epoch": 0.91, "learning_rate": 1.590714722052535e-05, "loss": 1.6491, "step": 386 }, { "epoch": 0.92, "learning_rate": 1.5894929749541846e-05, "loss": 1.0071, "step": 387 }, { "epoch": 0.92, "learning_rate": 1.5882712278558337e-05, "loss": 0.926, "step": 388 }, { "epoch": 0.92, "learning_rate": 1.5870494807574836e-05, "loss": 0.91, "step": 389 }, { "epoch": 0.92, "learning_rate": 1.5858277336591327e-05, "loss": 0.9837, "step": 390 }, { "epoch": 0.92, "learning_rate": 1.5846059865607823e-05, "loss": 0.787, "step": 391 }, { "epoch": 0.93, "learning_rate": 1.5833842394624314e-05, "loss": 1.0002, "step": 392 }, { "epoch": 0.93, "learning_rate": 1.582162492364081e-05, "loss": 1.0767, "step": 393 }, { "epoch": 0.93, "learning_rate": 1.58094074526573e-05, "loss": 0.9251, "step": 394 }, { "epoch": 0.93, "learning_rate": 1.5797189981673796e-05, "loss": 1.2487, "step": 395 }, { "epoch": 0.94, "learning_rate": 1.5784972510690288e-05, "loss": 0.9963, "step": 396 }, { "epoch": 0.94, "learning_rate": 1.5772755039706783e-05, "loss": 0.6831, "step": 397 }, { "epoch": 0.94, "learning_rate": 1.5760537568723275e-05, "loss": 0.9607, "step": 398 }, { "epoch": 0.94, "learning_rate": 1.574832009773977e-05, "loss": 0.8419, "step": 399 }, { "epoch": 0.95, "learning_rate": 1.573610262675626e-05, "loss": 1.1296, "step": 400 }, { "epoch": 0.95, "learning_rate": 1.5723885155772757e-05, "loss": 1.1621, "step": 401 }, { "epoch": 0.95, "learning_rate": 1.5711667684789248e-05, "loss": 0.7839, "step": 402 }, { "epoch": 0.95, "learning_rate": 1.5699450213805743e-05, "loss": 0.9428, "step": 403 }, { "epoch": 0.96, "learning_rate": 1.568723274282224e-05, "loss": 0.8488, "step": 404 }, { "epoch": 0.96, "learning_rate": 1.567501527183873e-05, "loss": 0.8306, "step": 405 }, { "epoch": 0.96, "learning_rate": 1.5662797800855225e-05, "loss": 1.0083, "step": 406 }, { "epoch": 0.96, "learning_rate": 1.5650580329871717e-05, "loss": 0.9262, "step": 407 }, { "epoch": 0.96, "learning_rate": 1.5638362858888212e-05, "loss": 0.769, "step": 408 }, { "epoch": 0.97, "learning_rate": 1.5626145387904704e-05, "loss": 0.9818, "step": 409 }, { "epoch": 0.97, "learning_rate": 1.56139279169212e-05, "loss": 0.8393, "step": 410 }, { "epoch": 0.97, "learning_rate": 1.560171044593769e-05, "loss": 0.953, "step": 411 }, { "epoch": 0.97, "learning_rate": 1.5589492974954186e-05, "loss": 0.9442, "step": 412 }, { "epoch": 0.98, "learning_rate": 1.557727550397068e-05, "loss": 0.9067, "step": 413 }, { "epoch": 0.98, "learning_rate": 1.5565058032987173e-05, "loss": 1.0057, "step": 414 }, { "epoch": 0.98, "learning_rate": 1.5552840562003668e-05, "loss": 0.8088, "step": 415 }, { "epoch": 0.98, "learning_rate": 1.554062309102016e-05, "loss": 0.7332, "step": 416 }, { "epoch": 0.99, "learning_rate": 1.5528405620036654e-05, "loss": 1.084, "step": 417 }, { "epoch": 0.99, "learning_rate": 1.5516188149053146e-05, "loss": 0.8262, "step": 418 }, { "epoch": 0.99, "learning_rate": 1.550397067806964e-05, "loss": 0.6978, "step": 419 }, { "epoch": 0.99, "learning_rate": 1.5491753207086136e-05, "loss": 0.8885, "step": 420 }, { "epoch": 1.0, "learning_rate": 1.5479535736102628e-05, "loss": 0.9892, "step": 421 }, { "epoch": 1.0, "learning_rate": 1.5467318265119123e-05, "loss": 1.0634, "step": 422 }, { "epoch": 1.0, "learning_rate": 1.5455100794135615e-05, "loss": 1.2635, "step": 423 }, { "epoch": 1.0, "learning_rate": 1.544288332315211e-05, "loss": 0.8863, "step": 424 }, { "epoch": 1.0, "learning_rate": 1.54306658521686e-05, "loss": 0.8724, "step": 425 }, { "epoch": 1.01, "learning_rate": 1.5418448381185097e-05, "loss": 0.8895, "step": 426 }, { "epoch": 1.01, "learning_rate": 1.540623091020159e-05, "loss": 0.7665, "step": 427 }, { "epoch": 1.01, "learning_rate": 1.5394013439218084e-05, "loss": 0.7186, "step": 428 }, { "epoch": 1.01, "learning_rate": 1.5381795968234575e-05, "loss": 0.7365, "step": 429 }, { "epoch": 1.02, "learning_rate": 1.536957849725107e-05, "loss": 0.9939, "step": 430 }, { "epoch": 1.02, "learning_rate": 1.5357361026267562e-05, "loss": 0.9525, "step": 431 }, { "epoch": 1.02, "learning_rate": 1.5345143555284057e-05, "loss": 0.7501, "step": 432 }, { "epoch": 1.02, "learning_rate": 1.533292608430055e-05, "loss": 1.0303, "step": 433 }, { "epoch": 1.03, "learning_rate": 1.5320708613317047e-05, "loss": 1.0044, "step": 434 }, { "epoch": 1.03, "learning_rate": 1.530849114233354e-05, "loss": 1.0009, "step": 435 }, { "epoch": 1.03, "learning_rate": 1.5296273671350034e-05, "loss": 1.0215, "step": 436 }, { "epoch": 1.03, "learning_rate": 1.5284056200366526e-05, "loss": 1.0151, "step": 437 }, { "epoch": 1.04, "learning_rate": 1.527183872938302e-05, "loss": 0.9823, "step": 438 }, { "epoch": 1.04, "learning_rate": 1.5259621258399513e-05, "loss": 0.862, "step": 439 }, { "epoch": 1.04, "learning_rate": 1.5247403787416006e-05, "loss": 0.8483, "step": 440 }, { "epoch": 1.04, "learning_rate": 1.52351863164325e-05, "loss": 0.947, "step": 441 }, { "epoch": 1.05, "learning_rate": 1.5222968845448993e-05, "loss": 0.8914, "step": 442 }, { "epoch": 1.05, "learning_rate": 1.5210751374465486e-05, "loss": 1.0391, "step": 443 }, { "epoch": 1.05, "learning_rate": 1.519853390348198e-05, "loss": 0.9706, "step": 444 }, { "epoch": 1.05, "learning_rate": 1.5186316432498473e-05, "loss": 0.6686, "step": 445 }, { "epoch": 1.05, "learning_rate": 1.5174098961514966e-05, "loss": 1.177, "step": 446 }, { "epoch": 1.06, "learning_rate": 1.516188149053146e-05, "loss": 0.9745, "step": 447 }, { "epoch": 1.06, "learning_rate": 1.5149664019547953e-05, "loss": 0.8828, "step": 448 }, { "epoch": 1.06, "learning_rate": 1.513744654856445e-05, "loss": 0.5288, "step": 449 }, { "epoch": 1.06, "learning_rate": 1.5125229077580943e-05, "loss": 1.6787, "step": 450 }, { "epoch": 1.07, "learning_rate": 1.5113011606597437e-05, "loss": 0.8921, "step": 451 }, { "epoch": 1.07, "learning_rate": 1.510079413561393e-05, "loss": 1.1841, "step": 452 }, { "epoch": 1.07, "learning_rate": 1.5088576664630424e-05, "loss": 0.9546, "step": 453 }, { "epoch": 1.07, "learning_rate": 1.5076359193646917e-05, "loss": 0.786, "step": 454 }, { "epoch": 1.08, "learning_rate": 1.506414172266341e-05, "loss": 0.9826, "step": 455 }, { "epoch": 1.08, "learning_rate": 1.5051924251679904e-05, "loss": 0.9842, "step": 456 }, { "epoch": 1.08, "learning_rate": 1.5039706780696397e-05, "loss": 0.9016, "step": 457 }, { "epoch": 1.08, "learning_rate": 1.502748930971289e-05, "loss": 0.7724, "step": 458 }, { "epoch": 1.09, "learning_rate": 1.5015271838729384e-05, "loss": 0.6341, "step": 459 }, { "epoch": 1.09, "learning_rate": 1.5003054367745877e-05, "loss": 0.8749, "step": 460 }, { "epoch": 1.09, "learning_rate": 1.4990836896762371e-05, "loss": 0.6795, "step": 461 }, { "epoch": 1.09, "learning_rate": 1.4978619425778864e-05, "loss": 1.0035, "step": 462 }, { "epoch": 1.09, "learning_rate": 1.4966401954795358e-05, "loss": 0.6718, "step": 463 }, { "epoch": 1.1, "learning_rate": 1.4954184483811851e-05, "loss": 0.853, "step": 464 }, { "epoch": 1.1, "learning_rate": 1.4941967012828346e-05, "loss": 0.8484, "step": 465 }, { "epoch": 1.1, "learning_rate": 1.492974954184484e-05, "loss": 0.8504, "step": 466 }, { "epoch": 1.1, "learning_rate": 1.4917532070861333e-05, "loss": 1.0019, "step": 467 }, { "epoch": 1.11, "learning_rate": 1.4905314599877826e-05, "loss": 0.7926, "step": 468 }, { "epoch": 1.11, "learning_rate": 1.489309712889432e-05, "loss": 0.951, "step": 469 }, { "epoch": 1.11, "learning_rate": 1.4880879657910813e-05, "loss": 0.9752, "step": 470 }, { "epoch": 1.11, "learning_rate": 1.4868662186927307e-05, "loss": 0.7827, "step": 471 }, { "epoch": 1.12, "learning_rate": 1.48564447159438e-05, "loss": 1.0879, "step": 472 }, { "epoch": 1.12, "learning_rate": 1.4844227244960293e-05, "loss": 0.985, "step": 473 }, { "epoch": 1.12, "learning_rate": 1.4832009773976788e-05, "loss": 0.662, "step": 474 }, { "epoch": 1.12, "learning_rate": 1.4819792302993282e-05, "loss": 0.7518, "step": 475 }, { "epoch": 1.13, "learning_rate": 1.4807574832009775e-05, "loss": 0.6804, "step": 476 }, { "epoch": 1.13, "learning_rate": 1.4795357361026269e-05, "loss": 0.7282, "step": 477 }, { "epoch": 1.13, "learning_rate": 1.4783139890042762e-05, "loss": 0.8529, "step": 478 }, { "epoch": 1.13, "learning_rate": 1.4770922419059255e-05, "loss": 1.0995, "step": 479 }, { "epoch": 1.14, "learning_rate": 1.475870494807575e-05, "loss": 0.9315, "step": 480 }, { "epoch": 1.14, "learning_rate": 1.4746487477092244e-05, "loss": 0.9777, "step": 481 }, { "epoch": 1.14, "learning_rate": 1.4734270006108737e-05, "loss": 0.8812, "step": 482 }, { "epoch": 1.14, "learning_rate": 1.472205253512523e-05, "loss": 0.7139, "step": 483 }, { "epoch": 1.14, "learning_rate": 1.4709835064141724e-05, "loss": 0.871, "step": 484 }, { "epoch": 1.15, "learning_rate": 1.4697617593158218e-05, "loss": 0.9647, "step": 485 }, { "epoch": 1.15, "learning_rate": 1.4685400122174711e-05, "loss": 0.9651, "step": 486 }, { "epoch": 1.15, "learning_rate": 1.4673182651191204e-05, "loss": 0.821, "step": 487 }, { "epoch": 1.15, "learning_rate": 1.4660965180207698e-05, "loss": 0.6677, "step": 488 }, { "epoch": 1.16, "learning_rate": 1.4648747709224191e-05, "loss": 0.8127, "step": 489 }, { "epoch": 1.16, "learning_rate": 1.4636530238240685e-05, "loss": 0.9843, "step": 490 }, { "epoch": 1.16, "learning_rate": 1.4624312767257178e-05, "loss": 0.958, "step": 491 }, { "epoch": 1.16, "learning_rate": 1.4612095296273671e-05, "loss": 0.9279, "step": 492 }, { "epoch": 1.17, "learning_rate": 1.4599877825290165e-05, "loss": 0.9219, "step": 493 }, { "epoch": 1.17, "learning_rate": 1.4587660354306658e-05, "loss": 0.7247, "step": 494 }, { "epoch": 1.17, "learning_rate": 1.4575442883323155e-05, "loss": 0.7789, "step": 495 }, { "epoch": 1.17, "learning_rate": 1.4563225412339648e-05, "loss": 0.8834, "step": 496 }, { "epoch": 1.18, "learning_rate": 1.4551007941356142e-05, "loss": 1.0376, "step": 497 }, { "epoch": 1.18, "learning_rate": 1.4538790470372635e-05, "loss": 0.6958, "step": 498 }, { "epoch": 1.18, "learning_rate": 1.4526572999389129e-05, "loss": 0.7884, "step": 499 }, { "epoch": 1.18, "learning_rate": 1.4514355528405622e-05, "loss": 0.7412, "step": 500 }, { "epoch": 1.18, "learning_rate": 1.4502138057422115e-05, "loss": 1.04, "step": 501 }, { "epoch": 1.19, "learning_rate": 1.4489920586438609e-05, "loss": 0.9107, "step": 502 }, { "epoch": 1.19, "learning_rate": 1.4477703115455102e-05, "loss": 0.8633, "step": 503 }, { "epoch": 1.19, "learning_rate": 1.4465485644471596e-05, "loss": 0.97, "step": 504 }, { "epoch": 1.19, "learning_rate": 1.4453268173488089e-05, "loss": 0.8615, "step": 505 }, { "epoch": 1.2, "learning_rate": 1.4441050702504582e-05, "loss": 0.846, "step": 506 }, { "epoch": 1.2, "learning_rate": 1.4428833231521076e-05, "loss": 0.7451, "step": 507 }, { "epoch": 1.2, "learning_rate": 1.441661576053757e-05, "loss": 0.6941, "step": 508 }, { "epoch": 1.2, "learning_rate": 1.4404398289554063e-05, "loss": 0.8699, "step": 509 }, { "epoch": 1.21, "learning_rate": 1.4392180818570558e-05, "loss": 0.926, "step": 510 }, { "epoch": 1.21, "learning_rate": 1.4379963347587051e-05, "loss": 0.6859, "step": 511 }, { "epoch": 1.21, "learning_rate": 1.4367745876603545e-05, "loss": 1.0583, "step": 512 }, { "epoch": 1.21, "learning_rate": 1.4355528405620038e-05, "loss": 0.888, "step": 513 }, { "epoch": 1.22, "learning_rate": 1.4343310934636531e-05, "loss": 1.9128, "step": 514 }, { "epoch": 1.22, "learning_rate": 1.4331093463653025e-05, "loss": 0.9135, "step": 515 }, { "epoch": 1.22, "learning_rate": 1.4318875992669518e-05, "loss": 0.9358, "step": 516 }, { "epoch": 1.22, "learning_rate": 1.4306658521686012e-05, "loss": 1.0008, "step": 517 }, { "epoch": 1.22, "learning_rate": 1.4294441050702505e-05, "loss": 1.059, "step": 518 }, { "epoch": 1.23, "learning_rate": 1.4282223579718998e-05, "loss": 0.8735, "step": 519 }, { "epoch": 1.23, "learning_rate": 1.4270006108735492e-05, "loss": 0.9229, "step": 520 }, { "epoch": 1.23, "learning_rate": 1.4257788637751985e-05, "loss": 0.8591, "step": 521 }, { "epoch": 1.23, "learning_rate": 1.4245571166768479e-05, "loss": 0.6778, "step": 522 }, { "epoch": 1.24, "learning_rate": 1.4233353695784972e-05, "loss": 0.9968, "step": 523 }, { "epoch": 1.24, "learning_rate": 1.4221136224801465e-05, "loss": 0.932, "step": 524 }, { "epoch": 1.24, "learning_rate": 1.4208918753817962e-05, "loss": 0.8142, "step": 525 }, { "epoch": 1.24, "learning_rate": 1.4196701282834456e-05, "loss": 0.8707, "step": 526 }, { "epoch": 1.25, "learning_rate": 1.4184483811850949e-05, "loss": 1.3432, "step": 527 }, { "epoch": 1.25, "learning_rate": 1.4172266340867442e-05, "loss": 0.9905, "step": 528 }, { "epoch": 1.25, "learning_rate": 1.4160048869883936e-05, "loss": 0.9144, "step": 529 }, { "epoch": 1.25, "learning_rate": 1.4147831398900429e-05, "loss": 0.9362, "step": 530 }, { "epoch": 1.26, "learning_rate": 1.4135613927916923e-05, "loss": 0.8545, "step": 531 }, { "epoch": 1.26, "learning_rate": 1.4123396456933416e-05, "loss": 0.9268, "step": 532 }, { "epoch": 1.26, "learning_rate": 1.411117898594991e-05, "loss": 0.8372, "step": 533 }, { "epoch": 1.26, "learning_rate": 1.4098961514966403e-05, "loss": 0.7659, "step": 534 }, { "epoch": 1.27, "learning_rate": 1.4086744043982896e-05, "loss": 0.7959, "step": 535 }, { "epoch": 1.27, "learning_rate": 1.407452657299939e-05, "loss": 0.7957, "step": 536 }, { "epoch": 1.27, "learning_rate": 1.4062309102015883e-05, "loss": 0.8758, "step": 537 }, { "epoch": 1.27, "learning_rate": 1.4050091631032376e-05, "loss": 0.8478, "step": 538 }, { "epoch": 1.27, "learning_rate": 1.403787416004887e-05, "loss": 0.9166, "step": 539 }, { "epoch": 1.28, "learning_rate": 1.4025656689065365e-05, "loss": 0.8067, "step": 540 }, { "epoch": 1.28, "learning_rate": 1.4013439218081858e-05, "loss": 0.7642, "step": 541 }, { "epoch": 1.28, "learning_rate": 1.4001221747098352e-05, "loss": 0.925, "step": 542 }, { "epoch": 1.28, "learning_rate": 1.3989004276114847e-05, "loss": 0.9245, "step": 543 }, { "epoch": 1.29, "learning_rate": 1.397678680513134e-05, "loss": 1.3363, "step": 544 }, { "epoch": 1.29, "learning_rate": 1.3964569334147834e-05, "loss": 0.703, "step": 545 }, { "epoch": 1.29, "learning_rate": 1.3952351863164327e-05, "loss": 0.8265, "step": 546 }, { "epoch": 1.29, "learning_rate": 1.394013439218082e-05, "loss": 0.8719, "step": 547 }, { "epoch": 1.3, "learning_rate": 1.3927916921197314e-05, "loss": 0.9639, "step": 548 }, { "epoch": 1.3, "learning_rate": 1.3915699450213807e-05, "loss": 0.9368, "step": 549 }, { "epoch": 1.3, "learning_rate": 1.39034819792303e-05, "loss": 0.8262, "step": 550 }, { "epoch": 1.3, "learning_rate": 1.3891264508246794e-05, "loss": 0.8692, "step": 551 }, { "epoch": 1.31, "learning_rate": 1.3879047037263287e-05, "loss": 0.8015, "step": 552 }, { "epoch": 1.31, "learning_rate": 1.386682956627978e-05, "loss": 0.789, "step": 553 }, { "epoch": 1.31, "learning_rate": 1.3854612095296274e-05, "loss": 0.7983, "step": 554 }, { "epoch": 1.31, "learning_rate": 1.384239462431277e-05, "loss": 0.9553, "step": 555 }, { "epoch": 1.31, "learning_rate": 1.3830177153329263e-05, "loss": 0.9573, "step": 556 }, { "epoch": 1.32, "learning_rate": 1.3817959682345756e-05, "loss": 0.8161, "step": 557 }, { "epoch": 1.32, "learning_rate": 1.380574221136225e-05, "loss": 0.7292, "step": 558 }, { "epoch": 1.32, "learning_rate": 1.3793524740378743e-05, "loss": 0.9365, "step": 559 }, { "epoch": 1.32, "learning_rate": 1.3781307269395236e-05, "loss": 0.9629, "step": 560 }, { "epoch": 1.33, "learning_rate": 1.376908979841173e-05, "loss": 0.823, "step": 561 }, { "epoch": 1.33, "learning_rate": 1.3756872327428223e-05, "loss": 0.8126, "step": 562 }, { "epoch": 1.33, "learning_rate": 1.3744654856444716e-05, "loss": 0.8997, "step": 563 }, { "epoch": 1.33, "learning_rate": 1.373243738546121e-05, "loss": 0.7755, "step": 564 }, { "epoch": 1.34, "learning_rate": 1.3720219914477703e-05, "loss": 0.8907, "step": 565 }, { "epoch": 1.34, "learning_rate": 1.3708002443494197e-05, "loss": 0.9902, "step": 566 }, { "epoch": 1.34, "learning_rate": 1.369578497251069e-05, "loss": 0.7797, "step": 567 }, { "epoch": 1.34, "learning_rate": 1.3683567501527183e-05, "loss": 0.8291, "step": 568 }, { "epoch": 1.35, "learning_rate": 1.3671350030543677e-05, "loss": 0.7132, "step": 569 }, { "epoch": 1.35, "learning_rate": 1.3659132559560174e-05, "loss": 0.8713, "step": 570 }, { "epoch": 1.35, "learning_rate": 1.3646915088576667e-05, "loss": 0.8353, "step": 571 }, { "epoch": 1.35, "learning_rate": 1.363469761759316e-05, "loss": 1.1653, "step": 572 }, { "epoch": 1.35, "learning_rate": 1.3622480146609654e-05, "loss": 0.8543, "step": 573 }, { "epoch": 1.36, "learning_rate": 1.3610262675626147e-05, "loss": 0.7127, "step": 574 }, { "epoch": 1.36, "learning_rate": 1.359804520464264e-05, "loss": 0.7931, "step": 575 }, { "epoch": 1.36, "learning_rate": 1.3585827733659134e-05, "loss": 1.0101, "step": 576 }, { "epoch": 1.36, "learning_rate": 1.3573610262675627e-05, "loss": 0.7975, "step": 577 }, { "epoch": 1.37, "learning_rate": 1.3561392791692121e-05, "loss": 0.8408, "step": 578 }, { "epoch": 1.37, "learning_rate": 1.3549175320708614e-05, "loss": 0.8974, "step": 579 }, { "epoch": 1.37, "learning_rate": 1.3536957849725108e-05, "loss": 0.8791, "step": 580 }, { "epoch": 1.37, "learning_rate": 1.3524740378741601e-05, "loss": 0.654, "step": 581 }, { "epoch": 1.38, "learning_rate": 1.3512522907758094e-05, "loss": 0.9567, "step": 582 }, { "epoch": 1.38, "learning_rate": 1.3500305436774588e-05, "loss": 0.9604, "step": 583 }, { "epoch": 1.38, "learning_rate": 1.3488087965791081e-05, "loss": 0.8445, "step": 584 }, { "epoch": 1.38, "learning_rate": 1.3475870494807576e-05, "loss": 0.9758, "step": 585 }, { "epoch": 1.39, "learning_rate": 1.346365302382407e-05, "loss": 0.8508, "step": 586 }, { "epoch": 1.39, "learning_rate": 1.3451435552840563e-05, "loss": 0.789, "step": 587 }, { "epoch": 1.39, "learning_rate": 1.3439218081857057e-05, "loss": 0.946, "step": 588 }, { "epoch": 1.39, "learning_rate": 1.342700061087355e-05, "loss": 0.7667, "step": 589 }, { "epoch": 1.4, "learning_rate": 1.3414783139890043e-05, "loss": 1.0173, "step": 590 }, { "epoch": 1.4, "learning_rate": 1.3402565668906537e-05, "loss": 0.781, "step": 591 }, { "epoch": 1.4, "learning_rate": 1.339034819792303e-05, "loss": 0.8253, "step": 592 }, { "epoch": 1.4, "learning_rate": 1.3378130726939524e-05, "loss": 0.985, "step": 593 }, { "epoch": 1.4, "learning_rate": 1.3365913255956019e-05, "loss": 1.0426, "step": 594 }, { "epoch": 1.41, "learning_rate": 1.3353695784972512e-05, "loss": 0.7838, "step": 595 }, { "epoch": 1.41, "learning_rate": 1.3341478313989005e-05, "loss": 0.8737, "step": 596 }, { "epoch": 1.41, "learning_rate": 1.3329260843005499e-05, "loss": 0.9306, "step": 597 }, { "epoch": 1.41, "learning_rate": 1.3317043372021992e-05, "loss": 0.7315, "step": 598 }, { "epoch": 1.42, "learning_rate": 1.3304825901038486e-05, "loss": 1.3077, "step": 599 }, { "epoch": 1.42, "learning_rate": 1.329260843005498e-05, "loss": 0.7888, "step": 600 }, { "epoch": 1.42, "learning_rate": 1.3280390959071474e-05, "loss": 0.9452, "step": 601 }, { "epoch": 1.42, "learning_rate": 1.3268173488087968e-05, "loss": 1.0367, "step": 602 }, { "epoch": 1.43, "learning_rate": 1.3255956017104461e-05, "loss": 1.4717, "step": 603 }, { "epoch": 1.43, "learning_rate": 1.3243738546120954e-05, "loss": 1.4204, "step": 604 }, { "epoch": 1.43, "learning_rate": 1.3231521075137448e-05, "loss": 0.7385, "step": 605 }, { "epoch": 1.43, "learning_rate": 1.3219303604153941e-05, "loss": 0.7257, "step": 606 }, { "epoch": 1.44, "learning_rate": 1.3207086133170435e-05, "loss": 0.8938, "step": 607 }, { "epoch": 1.44, "learning_rate": 1.3194868662186928e-05, "loss": 0.6473, "step": 608 }, { "epoch": 1.44, "learning_rate": 1.3182651191203421e-05, "loss": 0.8329, "step": 609 }, { "epoch": 1.44, "learning_rate": 1.3170433720219915e-05, "loss": 0.6972, "step": 610 }, { "epoch": 1.44, "learning_rate": 1.3158216249236408e-05, "loss": 0.894, "step": 611 }, { "epoch": 1.45, "learning_rate": 1.3145998778252902e-05, "loss": 0.751, "step": 612 }, { "epoch": 1.45, "learning_rate": 1.3133781307269395e-05, "loss": 0.9107, "step": 613 }, { "epoch": 1.45, "learning_rate": 1.3121563836285888e-05, "loss": 1.0217, "step": 614 }, { "epoch": 1.45, "learning_rate": 1.3109346365302385e-05, "loss": 0.8694, "step": 615 }, { "epoch": 1.46, "learning_rate": 1.3097128894318879e-05, "loss": 0.7461, "step": 616 }, { "epoch": 1.46, "learning_rate": 1.3084911423335372e-05, "loss": 0.916, "step": 617 }, { "epoch": 1.46, "learning_rate": 1.3072693952351865e-05, "loss": 0.8143, "step": 618 }, { "epoch": 1.46, "learning_rate": 1.3060476481368359e-05, "loss": 0.8181, "step": 619 }, { "epoch": 1.47, "learning_rate": 1.3048259010384852e-05, "loss": 0.9167, "step": 620 }, { "epoch": 1.47, "learning_rate": 1.3036041539401346e-05, "loss": 0.8222, "step": 621 }, { "epoch": 1.47, "learning_rate": 1.3023824068417839e-05, "loss": 0.907, "step": 622 }, { "epoch": 1.47, "learning_rate": 1.3011606597434332e-05, "loss": 1.0519, "step": 623 }, { "epoch": 1.48, "learning_rate": 1.2999389126450826e-05, "loss": 0.8482, "step": 624 }, { "epoch": 1.48, "learning_rate": 1.298717165546732e-05, "loss": 0.9613, "step": 625 }, { "epoch": 1.48, "learning_rate": 1.2974954184483813e-05, "loss": 0.749, "step": 626 }, { "epoch": 1.48, "learning_rate": 1.2962736713500306e-05, "loss": 0.8903, "step": 627 }, { "epoch": 1.48, "learning_rate": 1.29505192425168e-05, "loss": 0.7888, "step": 628 }, { "epoch": 1.49, "learning_rate": 1.2938301771533293e-05, "loss": 0.7947, "step": 629 }, { "epoch": 1.49, "learning_rate": 1.2926084300549788e-05, "loss": 0.7678, "step": 630 }, { "epoch": 1.49, "learning_rate": 1.2913866829566281e-05, "loss": 1.0733, "step": 631 }, { "epoch": 1.49, "learning_rate": 1.2901649358582775e-05, "loss": 1.0331, "step": 632 }, { "epoch": 1.5, "learning_rate": 1.2889431887599268e-05, "loss": 0.7706, "step": 633 }, { "epoch": 1.5, "learning_rate": 1.2877214416615762e-05, "loss": 0.9589, "step": 634 }, { "epoch": 1.5, "learning_rate": 1.2864996945632255e-05, "loss": 0.7827, "step": 635 }, { "epoch": 1.5, "learning_rate": 1.2852779474648748e-05, "loss": 0.86, "step": 636 }, { "epoch": 1.51, "learning_rate": 1.2840562003665242e-05, "loss": 0.8908, "step": 637 }, { "epoch": 1.51, "learning_rate": 1.2828344532681735e-05, "loss": 0.9065, "step": 638 }, { "epoch": 1.51, "learning_rate": 1.2816127061698229e-05, "loss": 0.847, "step": 639 }, { "epoch": 1.51, "learning_rate": 1.2803909590714722e-05, "loss": 1.051, "step": 640 }, { "epoch": 1.52, "learning_rate": 1.2791692119731215e-05, "loss": 1.0868, "step": 641 }, { "epoch": 1.52, "learning_rate": 1.2779474648747709e-05, "loss": 1.0014, "step": 642 }, { "epoch": 1.52, "learning_rate": 1.2767257177764202e-05, "loss": 0.8227, "step": 643 }, { "epoch": 1.52, "learning_rate": 1.2755039706780696e-05, "loss": 0.88, "step": 644 }, { "epoch": 1.53, "learning_rate": 1.2742822235797192e-05, "loss": 0.7747, "step": 645 }, { "epoch": 1.53, "learning_rate": 1.2730604764813686e-05, "loss": 0.7716, "step": 646 }, { "epoch": 1.53, "learning_rate": 1.2718387293830179e-05, "loss": 0.8671, "step": 647 }, { "epoch": 1.53, "learning_rate": 1.2706169822846673e-05, "loss": 0.7988, "step": 648 }, { "epoch": 1.53, "learning_rate": 1.2693952351863166e-05, "loss": 0.7907, "step": 649 }, { "epoch": 1.54, "learning_rate": 1.268173488087966e-05, "loss": 0.7658, "step": 650 }, { "epoch": 1.54, "learning_rate": 1.2669517409896153e-05, "loss": 0.9084, "step": 651 }, { "epoch": 1.54, "learning_rate": 1.2657299938912646e-05, "loss": 0.7117, "step": 652 }, { "epoch": 1.54, "learning_rate": 1.264508246792914e-05, "loss": 0.7998, "step": 653 }, { "epoch": 1.55, "learning_rate": 1.2632864996945633e-05, "loss": 0.7648, "step": 654 }, { "epoch": 1.55, "learning_rate": 1.2620647525962126e-05, "loss": 0.7891, "step": 655 }, { "epoch": 1.55, "learning_rate": 1.260843005497862e-05, "loss": 0.7822, "step": 656 }, { "epoch": 1.55, "learning_rate": 1.2596212583995113e-05, "loss": 0.9386, "step": 657 }, { "epoch": 1.56, "learning_rate": 1.2583995113011607e-05, "loss": 1.0717, "step": 658 }, { "epoch": 1.56, "learning_rate": 1.25717776420281e-05, "loss": 0.883, "step": 659 }, { "epoch": 1.56, "learning_rate": 1.2559560171044595e-05, "loss": 0.8948, "step": 660 }, { "epoch": 1.56, "learning_rate": 1.2547342700061088e-05, "loss": 0.8952, "step": 661 }, { "epoch": 1.57, "learning_rate": 1.2535125229077582e-05, "loss": 0.8745, "step": 662 }, { "epoch": 1.57, "learning_rate": 1.2522907758094075e-05, "loss": 1.0238, "step": 663 }, { "epoch": 1.57, "learning_rate": 1.251069028711057e-05, "loss": 0.835, "step": 664 }, { "epoch": 1.57, "learning_rate": 1.2498472816127064e-05, "loss": 0.9643, "step": 665 }, { "epoch": 1.57, "learning_rate": 1.2486255345143557e-05, "loss": 0.8413, "step": 666 }, { "epoch": 1.58, "learning_rate": 1.247403787416005e-05, "loss": 0.8589, "step": 667 }, { "epoch": 1.58, "learning_rate": 1.2461820403176544e-05, "loss": 0.9187, "step": 668 }, { "epoch": 1.58, "learning_rate": 1.2449602932193037e-05, "loss": 0.6343, "step": 669 }, { "epoch": 1.58, "learning_rate": 1.243738546120953e-05, "loss": 0.7666, "step": 670 }, { "epoch": 1.59, "learning_rate": 1.2425167990226024e-05, "loss": 0.7723, "step": 671 }, { "epoch": 1.59, "learning_rate": 1.2412950519242518e-05, "loss": 0.7481, "step": 672 }, { "epoch": 1.59, "learning_rate": 1.2400733048259011e-05, "loss": 1.0699, "step": 673 }, { "epoch": 1.59, "learning_rate": 1.2388515577275504e-05, "loss": 0.9871, "step": 674 }, { "epoch": 1.6, "learning_rate": 1.2376298106292e-05, "loss": 0.7944, "step": 675 }, { "epoch": 1.6, "learning_rate": 1.2364080635308493e-05, "loss": 0.8405, "step": 676 }, { "epoch": 1.6, "learning_rate": 1.2351863164324986e-05, "loss": 1.097, "step": 677 }, { "epoch": 1.6, "learning_rate": 1.233964569334148e-05, "loss": 0.8661, "step": 678 }, { "epoch": 1.61, "learning_rate": 1.2327428222357973e-05, "loss": 0.9293, "step": 679 }, { "epoch": 1.61, "learning_rate": 1.2315210751374466e-05, "loss": 0.8302, "step": 680 }, { "epoch": 1.61, "learning_rate": 1.230299328039096e-05, "loss": 0.8256, "step": 681 }, { "epoch": 1.61, "learning_rate": 1.2290775809407453e-05, "loss": 0.9635, "step": 682 }, { "epoch": 1.62, "learning_rate": 1.2278558338423947e-05, "loss": 0.9612, "step": 683 }, { "epoch": 1.62, "learning_rate": 1.226634086744044e-05, "loss": 0.6661, "step": 684 }, { "epoch": 1.62, "learning_rate": 1.2254123396456933e-05, "loss": 0.8132, "step": 685 }, { "epoch": 1.62, "learning_rate": 1.2241905925473427e-05, "loss": 0.8554, "step": 686 }, { "epoch": 1.62, "learning_rate": 1.222968845448992e-05, "loss": 0.9417, "step": 687 }, { "epoch": 1.63, "learning_rate": 1.2217470983506414e-05, "loss": 0.7557, "step": 688 }, { "epoch": 1.63, "learning_rate": 1.2205253512522907e-05, "loss": 0.7313, "step": 689 }, { "epoch": 1.63, "learning_rate": 1.2193036041539404e-05, "loss": 1.0282, "step": 690 }, { "epoch": 1.63, "learning_rate": 1.2180818570555897e-05, "loss": 0.8616, "step": 691 }, { "epoch": 1.64, "learning_rate": 1.216860109957239e-05, "loss": 1.0365, "step": 692 }, { "epoch": 1.64, "learning_rate": 1.2156383628588884e-05, "loss": 0.6458, "step": 693 }, { "epoch": 1.64, "learning_rate": 1.2144166157605377e-05, "loss": 0.8812, "step": 694 }, { "epoch": 1.64, "learning_rate": 1.2131948686621871e-05, "loss": 0.9863, "step": 695 }, { "epoch": 1.65, "learning_rate": 1.2119731215638364e-05, "loss": 0.9672, "step": 696 }, { "epoch": 1.65, "learning_rate": 1.2107513744654858e-05, "loss": 0.7714, "step": 697 }, { "epoch": 1.65, "learning_rate": 1.2095296273671351e-05, "loss": 1.8343, "step": 698 }, { "epoch": 1.65, "learning_rate": 1.2083078802687844e-05, "loss": 1.1402, "step": 699 }, { "epoch": 1.66, "learning_rate": 1.2070861331704338e-05, "loss": 0.7542, "step": 700 }, { "epoch": 1.66, "learning_rate": 1.2058643860720831e-05, "loss": 0.9874, "step": 701 }, { "epoch": 1.66, "learning_rate": 1.2046426389737325e-05, "loss": 0.9331, "step": 702 }, { "epoch": 1.66, "learning_rate": 1.2034208918753818e-05, "loss": 0.9548, "step": 703 }, { "epoch": 1.66, "learning_rate": 1.2021991447770312e-05, "loss": 0.7545, "step": 704 }, { "epoch": 1.67, "learning_rate": 1.2009773976786807e-05, "loss": 1.3738, "step": 705 }, { "epoch": 1.67, "learning_rate": 1.19975565058033e-05, "loss": 0.9293, "step": 706 }, { "epoch": 1.67, "learning_rate": 1.1985339034819793e-05, "loss": 0.7363, "step": 707 }, { "epoch": 1.67, "learning_rate": 1.1973121563836287e-05, "loss": 0.9742, "step": 708 }, { "epoch": 1.68, "learning_rate": 1.196090409285278e-05, "loss": 0.9045, "step": 709 }, { "epoch": 1.68, "learning_rate": 1.1948686621869274e-05, "loss": 1.3182, "step": 710 }, { "epoch": 1.68, "learning_rate": 1.1936469150885767e-05, "loss": 0.9714, "step": 711 }, { "epoch": 1.68, "learning_rate": 1.192425167990226e-05, "loss": 0.9094, "step": 712 }, { "epoch": 1.69, "learning_rate": 1.1912034208918754e-05, "loss": 1.4162, "step": 713 }, { "epoch": 1.69, "learning_rate": 1.1899816737935247e-05, "loss": 0.8216, "step": 714 }, { "epoch": 1.69, "learning_rate": 1.1887599266951742e-05, "loss": 0.8592, "step": 715 }, { "epoch": 1.69, "learning_rate": 1.1875381795968236e-05, "loss": 0.8486, "step": 716 }, { "epoch": 1.7, "learning_rate": 1.1863164324984729e-05, "loss": 0.7479, "step": 717 }, { "epoch": 1.7, "learning_rate": 1.1850946854001223e-05, "loss": 0.9526, "step": 718 }, { "epoch": 1.7, "learning_rate": 1.1838729383017716e-05, "loss": 1.3393, "step": 719 }, { "epoch": 1.7, "learning_rate": 1.1826511912034211e-05, "loss": 0.7805, "step": 720 }, { "epoch": 1.7, "learning_rate": 1.1814294441050704e-05, "loss": 0.8528, "step": 721 }, { "epoch": 1.71, "learning_rate": 1.1802076970067198e-05, "loss": 1.4923, "step": 722 }, { "epoch": 1.71, "learning_rate": 1.1789859499083691e-05, "loss": 0.9028, "step": 723 }, { "epoch": 1.71, "learning_rate": 1.1777642028100185e-05, "loss": 0.8582, "step": 724 }, { "epoch": 1.71, "learning_rate": 1.1765424557116678e-05, "loss": 0.7968, "step": 725 }, { "epoch": 1.72, "learning_rate": 1.1753207086133171e-05, "loss": 0.8106, "step": 726 }, { "epoch": 1.72, "learning_rate": 1.1740989615149665e-05, "loss": 1.3575, "step": 727 }, { "epoch": 1.72, "learning_rate": 1.1728772144166158e-05, "loss": 0.7235, "step": 728 }, { "epoch": 1.72, "learning_rate": 1.1716554673182652e-05, "loss": 0.8314, "step": 729 }, { "epoch": 1.73, "learning_rate": 1.1704337202199145e-05, "loss": 1.0177, "step": 730 }, { "epoch": 1.73, "learning_rate": 1.1692119731215638e-05, "loss": 0.8382, "step": 731 }, { "epoch": 1.73, "learning_rate": 1.1679902260232132e-05, "loss": 0.95, "step": 732 }, { "epoch": 1.73, "learning_rate": 1.1667684789248625e-05, "loss": 0.6696, "step": 733 }, { "epoch": 1.74, "learning_rate": 1.1655467318265119e-05, "loss": 1.0264, "step": 734 }, { "epoch": 1.74, "learning_rate": 1.1643249847281615e-05, "loss": 1.1022, "step": 735 }, { "epoch": 1.74, "learning_rate": 1.1631032376298109e-05, "loss": 0.9275, "step": 736 }, { "epoch": 1.74, "learning_rate": 1.1618814905314602e-05, "loss": 0.7903, "step": 737 }, { "epoch": 1.75, "learning_rate": 1.1606597434331096e-05, "loss": 0.9102, "step": 738 }, { "epoch": 1.75, "learning_rate": 1.1594379963347589e-05, "loss": 0.8238, "step": 739 }, { "epoch": 1.75, "learning_rate": 1.1582162492364082e-05, "loss": 0.8609, "step": 740 }, { "epoch": 1.75, "learning_rate": 1.1569945021380576e-05, "loss": 0.8274, "step": 741 }, { "epoch": 1.75, "learning_rate": 1.155772755039707e-05, "loss": 0.9712, "step": 742 }, { "epoch": 1.76, "learning_rate": 1.1545510079413563e-05, "loss": 0.9417, "step": 743 }, { "epoch": 1.76, "learning_rate": 1.1533292608430056e-05, "loss": 0.8531, "step": 744 }, { "epoch": 1.76, "learning_rate": 1.152107513744655e-05, "loss": 0.8248, "step": 745 }, { "epoch": 1.76, "learning_rate": 1.1508857666463043e-05, "loss": 0.9558, "step": 746 }, { "epoch": 1.77, "learning_rate": 1.1496640195479536e-05, "loss": 1.0065, "step": 747 }, { "epoch": 1.77, "learning_rate": 1.148442272449603e-05, "loss": 1.0631, "step": 748 }, { "epoch": 1.77, "learning_rate": 1.1472205253512523e-05, "loss": 0.8718, "step": 749 }, { "epoch": 1.77, "learning_rate": 1.1459987782529018e-05, "loss": 0.9382, "step": 750 }, { "epoch": 1.78, "learning_rate": 1.1447770311545512e-05, "loss": 0.9873, "step": 751 }, { "epoch": 1.78, "learning_rate": 1.1435552840562005e-05, "loss": 0.7277, "step": 752 }, { "epoch": 1.78, "learning_rate": 1.1423335369578498e-05, "loss": 0.7512, "step": 753 }, { "epoch": 1.78, "learning_rate": 1.1411117898594992e-05, "loss": 0.8206, "step": 754 }, { "epoch": 1.79, "learning_rate": 1.1398900427611485e-05, "loss": 0.8375, "step": 755 }, { "epoch": 1.79, "learning_rate": 1.1386682956627979e-05, "loss": 0.9206, "step": 756 }, { "epoch": 1.79, "learning_rate": 1.1374465485644472e-05, "loss": 0.869, "step": 757 }, { "epoch": 1.79, "learning_rate": 1.1362248014660965e-05, "loss": 0.8234, "step": 758 }, { "epoch": 1.79, "learning_rate": 1.1350030543677459e-05, "loss": 0.7956, "step": 759 }, { "epoch": 1.8, "learning_rate": 1.1337813072693952e-05, "loss": 0.7696, "step": 760 }, { "epoch": 1.8, "learning_rate": 1.1325595601710446e-05, "loss": 0.9689, "step": 761 }, { "epoch": 1.8, "learning_rate": 1.1313378130726939e-05, "loss": 0.8306, "step": 762 }, { "epoch": 1.8, "learning_rate": 1.1301160659743432e-05, "loss": 0.7371, "step": 763 }, { "epoch": 1.81, "learning_rate": 1.1288943188759926e-05, "loss": 0.8887, "step": 764 }, { "epoch": 1.81, "learning_rate": 1.1276725717776423e-05, "loss": 0.7669, "step": 765 }, { "epoch": 1.81, "learning_rate": 1.1264508246792916e-05, "loss": 1.0305, "step": 766 }, { "epoch": 1.81, "learning_rate": 1.125229077580941e-05, "loss": 0.5774, "step": 767 }, { "epoch": 1.82, "learning_rate": 1.1240073304825903e-05, "loss": 0.9091, "step": 768 }, { "epoch": 1.82, "learning_rate": 1.1227855833842396e-05, "loss": 1.001, "step": 769 }, { "epoch": 1.82, "learning_rate": 1.121563836285889e-05, "loss": 0.9184, "step": 770 }, { "epoch": 1.82, "learning_rate": 1.1203420891875383e-05, "loss": 0.8154, "step": 771 }, { "epoch": 1.83, "learning_rate": 1.1191203420891876e-05, "loss": 0.9085, "step": 772 }, { "epoch": 1.83, "learning_rate": 1.117898594990837e-05, "loss": 0.892, "step": 773 }, { "epoch": 1.83, "learning_rate": 1.1166768478924863e-05, "loss": 0.7947, "step": 774 }, { "epoch": 1.83, "learning_rate": 1.1154551007941357e-05, "loss": 0.8033, "step": 775 }, { "epoch": 1.83, "learning_rate": 1.114233353695785e-05, "loss": 1.4454, "step": 776 }, { "epoch": 1.84, "learning_rate": 1.1130116065974343e-05, "loss": 1.1096, "step": 777 }, { "epoch": 1.84, "learning_rate": 1.1117898594990837e-05, "loss": 0.8875, "step": 778 }, { "epoch": 1.84, "learning_rate": 1.110568112400733e-05, "loss": 0.7709, "step": 779 }, { "epoch": 1.84, "learning_rate": 1.1093463653023825e-05, "loss": 0.8432, "step": 780 }, { "epoch": 1.85, "learning_rate": 1.1081246182040319e-05, "loss": 0.887, "step": 781 }, { "epoch": 1.85, "learning_rate": 1.1069028711056812e-05, "loss": 0.9143, "step": 782 }, { "epoch": 1.85, "learning_rate": 1.1056811240073305e-05, "loss": 0.8532, "step": 783 }, { "epoch": 1.85, "learning_rate": 1.10445937690898e-05, "loss": 0.7214, "step": 784 }, { "epoch": 1.86, "learning_rate": 1.1032376298106294e-05, "loss": 0.8941, "step": 785 }, { "epoch": 1.86, "learning_rate": 1.1020158827122787e-05, "loss": 0.6898, "step": 786 }, { "epoch": 1.86, "learning_rate": 1.100794135613928e-05, "loss": 0.7767, "step": 787 }, { "epoch": 1.86, "learning_rate": 1.0995723885155774e-05, "loss": 1.0031, "step": 788 }, { "epoch": 1.87, "learning_rate": 1.0983506414172268e-05, "loss": 0.7288, "step": 789 }, { "epoch": 1.87, "learning_rate": 1.0971288943188761e-05, "loss": 0.902, "step": 790 }, { "epoch": 1.87, "learning_rate": 1.0959071472205254e-05, "loss": 1.6741, "step": 791 }, { "epoch": 1.87, "learning_rate": 1.0946854001221748e-05, "loss": 0.8963, "step": 792 }, { "epoch": 1.88, "learning_rate": 1.0934636530238241e-05, "loss": 0.8454, "step": 793 }, { "epoch": 1.88, "learning_rate": 1.0922419059254735e-05, "loss": 0.7937, "step": 794 }, { "epoch": 1.88, "learning_rate": 1.091020158827123e-05, "loss": 0.9366, "step": 795 }, { "epoch": 1.88, "learning_rate": 1.0897984117287723e-05, "loss": 0.7642, "step": 796 }, { "epoch": 1.88, "learning_rate": 1.0885766646304216e-05, "loss": 1.0383, "step": 797 }, { "epoch": 1.89, "learning_rate": 1.087354917532071e-05, "loss": 0.8115, "step": 798 }, { "epoch": 1.89, "learning_rate": 1.0861331704337203e-05, "loss": 0.7814, "step": 799 }, { "epoch": 1.89, "learning_rate": 1.0849114233353697e-05, "loss": 0.7422, "step": 800 }, { "epoch": 1.89, "learning_rate": 1.083689676237019e-05, "loss": 0.9207, "step": 801 }, { "epoch": 1.9, "learning_rate": 1.0824679291386684e-05, "loss": 0.978, "step": 802 }, { "epoch": 1.9, "learning_rate": 1.0812461820403177e-05, "loss": 0.8223, "step": 803 }, { "epoch": 1.9, "learning_rate": 1.080024434941967e-05, "loss": 0.7823, "step": 804 }, { "epoch": 1.9, "learning_rate": 1.0788026878436164e-05, "loss": 1.227, "step": 805 }, { "epoch": 1.91, "learning_rate": 1.0775809407452657e-05, "loss": 0.7274, "step": 806 }, { "epoch": 1.91, "learning_rate": 1.076359193646915e-05, "loss": 1.0557, "step": 807 }, { "epoch": 1.91, "learning_rate": 1.0751374465485644e-05, "loss": 0.6012, "step": 808 }, { "epoch": 1.91, "learning_rate": 1.0739156994502137e-05, "loss": 0.7529, "step": 809 }, { "epoch": 1.92, "learning_rate": 1.0726939523518634e-05, "loss": 0.697, "step": 810 }, { "epoch": 1.92, "learning_rate": 1.0714722052535128e-05, "loss": 0.7619, "step": 811 }, { "epoch": 1.92, "learning_rate": 1.0702504581551621e-05, "loss": 0.6546, "step": 812 }, { "epoch": 1.92, "learning_rate": 1.0690287110568114e-05, "loss": 1.0423, "step": 813 }, { "epoch": 1.92, "learning_rate": 1.0678069639584608e-05, "loss": 0.8163, "step": 814 }, { "epoch": 1.93, "learning_rate": 1.0665852168601101e-05, "loss": 0.8361, "step": 815 }, { "epoch": 1.93, "learning_rate": 1.0653634697617595e-05, "loss": 1.0374, "step": 816 }, { "epoch": 1.93, "learning_rate": 1.0641417226634088e-05, "loss": 0.6715, "step": 817 }, { "epoch": 1.93, "learning_rate": 1.0629199755650581e-05, "loss": 0.8869, "step": 818 }, { "epoch": 1.94, "learning_rate": 1.0616982284667075e-05, "loss": 0.7648, "step": 819 }, { "epoch": 1.94, "learning_rate": 1.0604764813683568e-05, "loss": 0.617, "step": 820 }, { "epoch": 1.94, "learning_rate": 1.0592547342700062e-05, "loss": 0.8328, "step": 821 }, { "epoch": 1.94, "learning_rate": 1.0580329871716555e-05, "loss": 0.6538, "step": 822 }, { "epoch": 1.95, "learning_rate": 1.0568112400733048e-05, "loss": 0.7676, "step": 823 }, { "epoch": 1.95, "learning_rate": 1.0555894929749542e-05, "loss": 0.8372, "step": 824 }, { "epoch": 1.95, "learning_rate": 1.0543677458766037e-05, "loss": 1.4951, "step": 825 }, { "epoch": 1.95, "learning_rate": 1.053145998778253e-05, "loss": 0.8325, "step": 826 }, { "epoch": 1.96, "learning_rate": 1.0519242516799024e-05, "loss": 0.7662, "step": 827 }, { "epoch": 1.96, "learning_rate": 1.0507025045815517e-05, "loss": 0.7526, "step": 828 }, { "epoch": 1.96, "learning_rate": 1.049480757483201e-05, "loss": 0.9022, "step": 829 }, { "epoch": 1.96, "learning_rate": 1.0482590103848504e-05, "loss": 0.7836, "step": 830 }, { "epoch": 1.96, "learning_rate": 1.0470372632864997e-05, "loss": 0.8463, "step": 831 }, { "epoch": 1.97, "learning_rate": 1.045815516188149e-05, "loss": 1.0518, "step": 832 }, { "epoch": 1.97, "learning_rate": 1.0445937690897984e-05, "loss": 0.6923, "step": 833 }, { "epoch": 1.97, "learning_rate": 1.0433720219914477e-05, "loss": 1.0485, "step": 834 }, { "epoch": 1.97, "learning_rate": 1.0421502748930973e-05, "loss": 0.9231, "step": 835 }, { "epoch": 1.98, "learning_rate": 1.0409285277947466e-05, "loss": 0.8881, "step": 836 }, { "epoch": 1.98, "learning_rate": 1.039706780696396e-05, "loss": 0.9175, "step": 837 }, { "epoch": 1.98, "learning_rate": 1.0384850335980453e-05, "loss": 0.8728, "step": 838 }, { "epoch": 1.98, "learning_rate": 1.0372632864996946e-05, "loss": 0.6343, "step": 839 }, { "epoch": 1.99, "learning_rate": 1.0360415394013441e-05, "loss": 0.7374, "step": 840 }, { "epoch": 1.99, "learning_rate": 1.0348197923029935e-05, "loss": 1.014, "step": 841 }, { "epoch": 1.99, "learning_rate": 1.0335980452046428e-05, "loss": 0.7877, "step": 842 }, { "epoch": 1.99, "learning_rate": 1.0323762981062921e-05, "loss": 0.8771, "step": 843 }, { "epoch": 2.0, "learning_rate": 1.0311545510079415e-05, "loss": 0.7503, "step": 844 }, { "epoch": 2.0, "learning_rate": 1.0299328039095908e-05, "loss": 0.9533, "step": 845 } ], "logging_steps": 1.0, "max_steps": 1688, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 4.061471742005084e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }