{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 2772, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.4388489208633095e-07, "loss": 2.3547, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.877697841726619e-07, "loss": 2.3576, "step": 2 }, { "epoch": 0.0, "learning_rate": 4.3165467625899287e-07, "loss": 2.1275, "step": 3 }, { "epoch": 0.0, "learning_rate": 5.755395683453238e-07, "loss": 2.4731, "step": 4 }, { "epoch": 0.01, "learning_rate": 7.194244604316547e-07, "loss": 2.4011, "step": 5 }, { "epoch": 0.01, "learning_rate": 8.633093525179857e-07, "loss": 2.3406, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.0071942446043167e-06, "loss": 2.4751, "step": 7 }, { "epoch": 0.01, "learning_rate": 1.1510791366906476e-06, "loss": 2.145, "step": 8 }, { "epoch": 0.01, "learning_rate": 1.2949640287769785e-06, "loss": 2.7092, "step": 9 }, { "epoch": 0.01, "learning_rate": 1.4388489208633094e-06, "loss": 2.3121, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.5827338129496403e-06, "loss": 2.6237, "step": 11 }, { "epoch": 0.01, "learning_rate": 1.7266187050359715e-06, "loss": 2.3887, "step": 12 }, { "epoch": 0.01, "learning_rate": 1.8705035971223024e-06, "loss": 2.3881, "step": 13 }, { "epoch": 0.02, "learning_rate": 2.0143884892086333e-06, "loss": 2.1992, "step": 14 }, { "epoch": 0.02, "learning_rate": 2.158273381294964e-06, "loss": 2.6365, "step": 15 }, { "epoch": 0.02, "learning_rate": 2.302158273381295e-06, "loss": 2.1901, "step": 16 }, { "epoch": 0.02, "learning_rate": 2.4460431654676263e-06, "loss": 2.2422, "step": 17 }, { "epoch": 0.02, "learning_rate": 2.589928057553957e-06, "loss": 2.2557, "step": 18 }, { "epoch": 0.02, "learning_rate": 2.733812949640288e-06, "loss": 2.3875, "step": 19 }, { "epoch": 0.02, "learning_rate": 2.877697841726619e-06, "loss": 2.3422, "step": 20 }, { "epoch": 0.02, "learning_rate": 3.02158273381295e-06, "loss": 2.1384, "step": 21 }, { "epoch": 0.02, "learning_rate": 3.1654676258992807e-06, "loss": 1.8947, "step": 22 }, { "epoch": 0.02, "learning_rate": 3.309352517985612e-06, "loss": 2.0175, "step": 23 }, { "epoch": 0.03, "learning_rate": 3.453237410071943e-06, "loss": 2.0965, "step": 24 }, { "epoch": 0.03, "learning_rate": 3.5971223021582737e-06, "loss": 2.1806, "step": 25 }, { "epoch": 0.03, "learning_rate": 3.741007194244605e-06, "loss": 2.0644, "step": 26 }, { "epoch": 0.03, "learning_rate": 3.884892086330936e-06, "loss": 1.9973, "step": 27 }, { "epoch": 0.03, "learning_rate": 4.028776978417267e-06, "loss": 1.8138, "step": 28 }, { "epoch": 0.03, "learning_rate": 4.172661870503597e-06, "loss": 2.0755, "step": 29 }, { "epoch": 0.03, "learning_rate": 4.316546762589928e-06, "loss": 1.8519, "step": 30 }, { "epoch": 0.03, "learning_rate": 4.46043165467626e-06, "loss": 2.2144, "step": 31 }, { "epoch": 0.03, "learning_rate": 4.60431654676259e-06, "loss": 1.9688, "step": 32 }, { "epoch": 0.04, "learning_rate": 4.748201438848921e-06, "loss": 2.0193, "step": 33 }, { "epoch": 0.04, "learning_rate": 4.892086330935253e-06, "loss": 1.8964, "step": 34 }, { "epoch": 0.04, "learning_rate": 5.035971223021583e-06, "loss": 2.0533, "step": 35 }, { "epoch": 0.04, "learning_rate": 5.179856115107914e-06, "loss": 2.1172, "step": 36 }, { "epoch": 0.04, "learning_rate": 5.3237410071942456e-06, "loss": 2.1439, "step": 37 }, { "epoch": 0.04, "learning_rate": 5.467625899280576e-06, "loss": 1.8953, "step": 38 }, { "epoch": 0.04, "learning_rate": 5.611510791366906e-06, "loss": 2.0619, "step": 39 }, { "epoch": 0.04, "learning_rate": 5.755395683453238e-06, "loss": 2.0246, "step": 40 }, { "epoch": 0.04, "learning_rate": 5.899280575539568e-06, "loss": 1.9229, "step": 41 }, { "epoch": 0.05, "learning_rate": 6.0431654676259e-06, "loss": 1.9145, "step": 42 }, { "epoch": 0.05, "learning_rate": 6.1870503597122315e-06, "loss": 2.159, "step": 43 }, { "epoch": 0.05, "learning_rate": 6.330935251798561e-06, "loss": 1.91, "step": 44 }, { "epoch": 0.05, "learning_rate": 6.474820143884892e-06, "loss": 1.9358, "step": 45 }, { "epoch": 0.05, "learning_rate": 6.618705035971224e-06, "loss": 1.9698, "step": 46 }, { "epoch": 0.05, "learning_rate": 6.762589928057554e-06, "loss": 1.673, "step": 47 }, { "epoch": 0.05, "learning_rate": 6.906474820143886e-06, "loss": 1.9999, "step": 48 }, { "epoch": 0.05, "learning_rate": 7.050359712230216e-06, "loss": 2.0436, "step": 49 }, { "epoch": 0.05, "learning_rate": 7.194244604316547e-06, "loss": 2.0187, "step": 50 }, { "epoch": 0.06, "learning_rate": 7.338129496402878e-06, "loss": 1.8008, "step": 51 }, { "epoch": 0.06, "learning_rate": 7.48201438848921e-06, "loss": 2.1516, "step": 52 }, { "epoch": 0.06, "learning_rate": 7.62589928057554e-06, "loss": 1.8388, "step": 53 }, { "epoch": 0.06, "learning_rate": 7.769784172661872e-06, "loss": 1.9828, "step": 54 }, { "epoch": 0.06, "learning_rate": 7.913669064748202e-06, "loss": 1.7816, "step": 55 }, { "epoch": 0.06, "learning_rate": 8.057553956834533e-06, "loss": 1.8797, "step": 56 }, { "epoch": 0.06, "learning_rate": 8.201438848920865e-06, "loss": 1.8773, "step": 57 }, { "epoch": 0.06, "learning_rate": 8.345323741007195e-06, "loss": 1.8915, "step": 58 }, { "epoch": 0.06, "learning_rate": 8.489208633093526e-06, "loss": 1.8354, "step": 59 }, { "epoch": 0.06, "learning_rate": 8.633093525179856e-06, "loss": 1.9031, "step": 60 }, { "epoch": 0.07, "learning_rate": 8.776978417266188e-06, "loss": 1.8754, "step": 61 }, { "epoch": 0.07, "learning_rate": 8.92086330935252e-06, "loss": 1.772, "step": 62 }, { "epoch": 0.07, "learning_rate": 9.064748201438849e-06, "loss": 1.8173, "step": 63 }, { "epoch": 0.07, "learning_rate": 9.20863309352518e-06, "loss": 1.8111, "step": 64 }, { "epoch": 0.07, "learning_rate": 9.35251798561151e-06, "loss": 1.8438, "step": 65 }, { "epoch": 0.07, "learning_rate": 9.496402877697842e-06, "loss": 1.5675, "step": 66 }, { "epoch": 0.07, "learning_rate": 9.640287769784174e-06, "loss": 1.9983, "step": 67 }, { "epoch": 0.07, "learning_rate": 9.784172661870505e-06, "loss": 1.8764, "step": 68 }, { "epoch": 0.07, "learning_rate": 9.928057553956835e-06, "loss": 1.9815, "step": 69 }, { "epoch": 0.08, "learning_rate": 1.0071942446043167e-05, "loss": 1.9521, "step": 70 }, { "epoch": 0.08, "learning_rate": 1.0215827338129498e-05, "loss": 1.7376, "step": 71 }, { "epoch": 0.08, "learning_rate": 1.0359712230215828e-05, "loss": 1.9935, "step": 72 }, { "epoch": 0.08, "learning_rate": 1.0503597122302158e-05, "loss": 2.1314, "step": 73 }, { "epoch": 0.08, "learning_rate": 1.0647482014388491e-05, "loss": 1.8567, "step": 74 }, { "epoch": 0.08, "learning_rate": 1.0791366906474821e-05, "loss": 2.1048, "step": 75 }, { "epoch": 0.08, "learning_rate": 1.0935251798561153e-05, "loss": 2.0353, "step": 76 }, { "epoch": 0.08, "learning_rate": 1.1079136690647482e-05, "loss": 1.8824, "step": 77 }, { "epoch": 0.08, "learning_rate": 1.1223021582733812e-05, "loss": 2.0055, "step": 78 }, { "epoch": 0.09, "learning_rate": 1.1366906474820146e-05, "loss": 1.8171, "step": 79 }, { "epoch": 0.09, "learning_rate": 1.1510791366906475e-05, "loss": 1.9234, "step": 80 }, { "epoch": 0.09, "learning_rate": 1.1654676258992807e-05, "loss": 1.8208, "step": 81 }, { "epoch": 0.09, "learning_rate": 1.1798561151079137e-05, "loss": 1.7002, "step": 82 }, { "epoch": 0.09, "learning_rate": 1.1942446043165468e-05, "loss": 1.907, "step": 83 }, { "epoch": 0.09, "learning_rate": 1.20863309352518e-05, "loss": 1.9767, "step": 84 }, { "epoch": 0.09, "learning_rate": 1.223021582733813e-05, "loss": 1.7917, "step": 85 }, { "epoch": 0.09, "learning_rate": 1.2374100719424463e-05, "loss": 1.6337, "step": 86 }, { "epoch": 0.09, "learning_rate": 1.2517985611510793e-05, "loss": 2.0116, "step": 87 }, { "epoch": 0.1, "learning_rate": 1.2661870503597123e-05, "loss": 2.0588, "step": 88 }, { "epoch": 0.1, "learning_rate": 1.2805755395683454e-05, "loss": 1.5452, "step": 89 }, { "epoch": 0.1, "learning_rate": 1.2949640287769784e-05, "loss": 1.8182, "step": 90 }, { "epoch": 0.1, "learning_rate": 1.3093525179856117e-05, "loss": 2.0173, "step": 91 }, { "epoch": 0.1, "learning_rate": 1.3237410071942447e-05, "loss": 1.5917, "step": 92 }, { "epoch": 0.1, "learning_rate": 1.3381294964028777e-05, "loss": 1.845, "step": 93 }, { "epoch": 0.1, "learning_rate": 1.3525179856115109e-05, "loss": 1.9642, "step": 94 }, { "epoch": 0.1, "learning_rate": 1.3669064748201439e-05, "loss": 2.0804, "step": 95 }, { "epoch": 0.1, "learning_rate": 1.3812949640287772e-05, "loss": 1.8469, "step": 96 }, { "epoch": 0.1, "learning_rate": 1.3956834532374102e-05, "loss": 2.0403, "step": 97 }, { "epoch": 0.11, "learning_rate": 1.4100719424460432e-05, "loss": 2.1303, "step": 98 }, { "epoch": 0.11, "learning_rate": 1.4244604316546765e-05, "loss": 2.1525, "step": 99 }, { "epoch": 0.11, "learning_rate": 1.4388489208633095e-05, "loss": 1.6616, "step": 100 }, { "epoch": 0.11, "learning_rate": 1.4532374100719426e-05, "loss": 2.0293, "step": 101 }, { "epoch": 0.11, "learning_rate": 1.4676258992805756e-05, "loss": 2.0534, "step": 102 }, { "epoch": 0.11, "learning_rate": 1.4820143884892086e-05, "loss": 2.0292, "step": 103 }, { "epoch": 0.11, "learning_rate": 1.496402877697842e-05, "loss": 1.8536, "step": 104 }, { "epoch": 0.11, "learning_rate": 1.5107913669064749e-05, "loss": 1.8581, "step": 105 }, { "epoch": 0.11, "learning_rate": 1.525179856115108e-05, "loss": 1.6334, "step": 106 }, { "epoch": 0.12, "learning_rate": 1.5395683453237412e-05, "loss": 1.8721, "step": 107 }, { "epoch": 0.12, "learning_rate": 1.5539568345323744e-05, "loss": 1.8919, "step": 108 }, { "epoch": 0.12, "learning_rate": 1.5683453237410072e-05, "loss": 1.7582, "step": 109 }, { "epoch": 0.12, "learning_rate": 1.5827338129496403e-05, "loss": 1.7654, "step": 110 }, { "epoch": 0.12, "learning_rate": 1.5971223021582735e-05, "loss": 1.9529, "step": 111 }, { "epoch": 0.12, "learning_rate": 1.6115107913669067e-05, "loss": 2.0011, "step": 112 }, { "epoch": 0.12, "learning_rate": 1.6258992805755398e-05, "loss": 1.7407, "step": 113 }, { "epoch": 0.12, "learning_rate": 1.640287769784173e-05, "loss": 2.0088, "step": 114 }, { "epoch": 0.12, "learning_rate": 1.6546762589928058e-05, "loss": 2.0356, "step": 115 }, { "epoch": 0.13, "learning_rate": 1.669064748201439e-05, "loss": 1.9327, "step": 116 }, { "epoch": 0.13, "learning_rate": 1.683453237410072e-05, "loss": 2.0276, "step": 117 }, { "epoch": 0.13, "learning_rate": 1.6978417266187053e-05, "loss": 1.7289, "step": 118 }, { "epoch": 0.13, "learning_rate": 1.7122302158273384e-05, "loss": 2.1943, "step": 119 }, { "epoch": 0.13, "learning_rate": 1.7266187050359712e-05, "loss": 1.8819, "step": 120 }, { "epoch": 0.13, "learning_rate": 1.7410071942446044e-05, "loss": 2.142, "step": 121 }, { "epoch": 0.13, "learning_rate": 1.7553956834532375e-05, "loss": 1.9633, "step": 122 }, { "epoch": 0.13, "learning_rate": 1.7697841726618707e-05, "loss": 1.9181, "step": 123 }, { "epoch": 0.13, "learning_rate": 1.784172661870504e-05, "loss": 1.9825, "step": 124 }, { "epoch": 0.14, "learning_rate": 1.7985611510791367e-05, "loss": 1.9971, "step": 125 }, { "epoch": 0.14, "learning_rate": 1.8129496402877698e-05, "loss": 1.9008, "step": 126 }, { "epoch": 0.14, "learning_rate": 1.827338129496403e-05, "loss": 2.1904, "step": 127 }, { "epoch": 0.14, "learning_rate": 1.841726618705036e-05, "loss": 2.0583, "step": 128 }, { "epoch": 0.14, "learning_rate": 1.8561151079136693e-05, "loss": 2.1272, "step": 129 }, { "epoch": 0.14, "learning_rate": 1.870503597122302e-05, "loss": 2.0181, "step": 130 }, { "epoch": 0.14, "learning_rate": 1.8848920863309356e-05, "loss": 1.873, "step": 131 }, { "epoch": 0.14, "learning_rate": 1.8992805755395684e-05, "loss": 1.8865, "step": 132 }, { "epoch": 0.14, "learning_rate": 1.9136690647482016e-05, "loss": 1.7558, "step": 133 }, { "epoch": 0.15, "learning_rate": 1.9280575539568347e-05, "loss": 1.9659, "step": 134 }, { "epoch": 0.15, "learning_rate": 1.9424460431654675e-05, "loss": 1.8793, "step": 135 }, { "epoch": 0.15, "learning_rate": 1.956834532374101e-05, "loss": 1.7461, "step": 136 }, { "epoch": 0.15, "learning_rate": 1.971223021582734e-05, "loss": 1.9957, "step": 137 }, { "epoch": 0.15, "learning_rate": 1.985611510791367e-05, "loss": 1.8534, "step": 138 }, { "epoch": 0.15, "learning_rate": 2e-05, "loss": 1.8629, "step": 139 }, { "epoch": 0.15, "learning_rate": 1.9999992881836162e-05, "loss": 1.8998, "step": 140 }, { "epoch": 0.15, "learning_rate": 1.9999971527354777e-05, "loss": 1.869, "step": 141 }, { "epoch": 0.15, "learning_rate": 1.9999935936586245e-05, "loss": 1.951, "step": 142 }, { "epoch": 0.15, "learning_rate": 1.999988610958124e-05, "loss": 1.9328, "step": 143 }, { "epoch": 0.16, "learning_rate": 1.999982204641069e-05, "loss": 1.6579, "step": 144 }, { "epoch": 0.16, "learning_rate": 1.9999743747165806e-05, "loss": 1.817, "step": 145 }, { "epoch": 0.16, "learning_rate": 1.9999651211958052e-05, "loss": 1.6378, "step": 146 }, { "epoch": 0.16, "learning_rate": 1.9999544440919166e-05, "loss": 1.863, "step": 147 }, { "epoch": 0.16, "learning_rate": 1.9999423434201147e-05, "loss": 1.8736, "step": 148 }, { "epoch": 0.16, "learning_rate": 1.9999288191976264e-05, "loss": 1.9776, "step": 149 }, { "epoch": 0.16, "learning_rate": 1.999913871443706e-05, "loss": 1.9897, "step": 150 }, { "epoch": 0.16, "learning_rate": 1.9998975001796328e-05, "loss": 1.8387, "step": 151 }, { "epoch": 0.16, "learning_rate": 1.999879705428714e-05, "loss": 1.6932, "step": 152 }, { "epoch": 0.17, "learning_rate": 1.9998604872162825e-05, "loss": 2.2017, "step": 153 }, { "epoch": 0.17, "learning_rate": 1.9998398455696983e-05, "loss": 1.8806, "step": 154 }, { "epoch": 0.17, "learning_rate": 1.9998177805183467e-05, "loss": 1.7912, "step": 155 }, { "epoch": 0.17, "learning_rate": 1.999794292093641e-05, "loss": 1.6728, "step": 156 }, { "epoch": 0.17, "learning_rate": 1.99976938032902e-05, "loss": 2.026, "step": 157 }, { "epoch": 0.17, "learning_rate": 1.999743045259949e-05, "loss": 2.1211, "step": 158 }, { "epoch": 0.17, "learning_rate": 1.999715286923919e-05, "loss": 1.8448, "step": 159 }, { "epoch": 0.17, "learning_rate": 1.999686105360448e-05, "loss": 2.0976, "step": 160 }, { "epoch": 0.17, "learning_rate": 1.9996555006110802e-05, "loss": 1.9568, "step": 161 }, { "epoch": 0.18, "learning_rate": 1.9996234727193845e-05, "loss": 1.7825, "step": 162 }, { "epoch": 0.18, "learning_rate": 1.9995900217309575e-05, "loss": 1.7175, "step": 163 }, { "epoch": 0.18, "learning_rate": 1.999555147693421e-05, "loss": 1.6861, "step": 164 }, { "epoch": 0.18, "learning_rate": 1.9995188506564234e-05, "loss": 2.016, "step": 165 }, { "epoch": 0.18, "learning_rate": 1.9994811306716374e-05, "loss": 1.9465, "step": 166 }, { "epoch": 0.18, "learning_rate": 1.9994419877927632e-05, "loss": 2.0717, "step": 167 }, { "epoch": 0.18, "learning_rate": 1.9994014220755255e-05, "loss": 2.1168, "step": 168 }, { "epoch": 0.18, "learning_rate": 1.999359433577675e-05, "loss": 1.8352, "step": 169 }, { "epoch": 0.18, "learning_rate": 1.9993160223589875e-05, "loss": 1.8891, "step": 170 }, { "epoch": 0.19, "learning_rate": 1.9992711884812656e-05, "loss": 1.6747, "step": 171 }, { "epoch": 0.19, "learning_rate": 1.9992249320083355e-05, "loss": 2.0694, "step": 172 }, { "epoch": 0.19, "learning_rate": 1.9991772530060497e-05, "loss": 2.0438, "step": 173 }, { "epoch": 0.19, "learning_rate": 1.999128151542286e-05, "loss": 1.9977, "step": 174 }, { "epoch": 0.19, "learning_rate": 1.9990776276869456e-05, "loss": 1.6287, "step": 175 }, { "epoch": 0.19, "learning_rate": 1.9990256815119572e-05, "loss": 1.8438, "step": 176 }, { "epoch": 0.19, "learning_rate": 1.9989723130912722e-05, "loss": 1.7703, "step": 177 }, { "epoch": 0.19, "learning_rate": 1.9989175225008685e-05, "loss": 1.8473, "step": 178 }, { "epoch": 0.19, "learning_rate": 1.9988613098187476e-05, "loss": 1.8733, "step": 179 }, { "epoch": 0.19, "learning_rate": 1.9988036751249348e-05, "loss": 1.8972, "step": 180 }, { "epoch": 0.2, "learning_rate": 1.9987446185014815e-05, "loss": 2.0464, "step": 181 }, { "epoch": 0.2, "learning_rate": 1.9986841400324625e-05, "loss": 1.7597, "step": 182 }, { "epoch": 0.2, "learning_rate": 1.998622239803977e-05, "loss": 1.8031, "step": 183 }, { "epoch": 0.2, "learning_rate": 1.9985589179041485e-05, "loss": 2.103, "step": 184 }, { "epoch": 0.2, "learning_rate": 1.9984941744231233e-05, "loss": 1.9884, "step": 185 }, { "epoch": 0.2, "learning_rate": 1.998428009453073e-05, "loss": 1.8569, "step": 186 }, { "epoch": 0.2, "learning_rate": 1.998360423088192e-05, "loss": 2.0007, "step": 187 }, { "epoch": 0.2, "learning_rate": 1.9982914154246986e-05, "loss": 1.7048, "step": 188 }, { "epoch": 0.2, "learning_rate": 1.998220986560834e-05, "loss": 2.0295, "step": 189 }, { "epoch": 0.21, "learning_rate": 1.9981491365968635e-05, "loss": 1.6678, "step": 190 }, { "epoch": 0.21, "learning_rate": 1.998075865635075e-05, "loss": 1.801, "step": 191 }, { "epoch": 0.21, "learning_rate": 1.998001173779779e-05, "loss": 1.7313, "step": 192 }, { "epoch": 0.21, "learning_rate": 1.99792506113731e-05, "loss": 1.8665, "step": 193 }, { "epoch": 0.21, "learning_rate": 1.9978475278160236e-05, "loss": 1.8131, "step": 194 }, { "epoch": 0.21, "learning_rate": 1.9977685739262996e-05, "loss": 1.9217, "step": 195 }, { "epoch": 0.21, "learning_rate": 1.9976881995805387e-05, "loss": 1.9435, "step": 196 }, { "epoch": 0.21, "learning_rate": 1.9976064048931648e-05, "loss": 2.0235, "step": 197 }, { "epoch": 0.21, "learning_rate": 1.9975231899806233e-05, "loss": 1.8873, "step": 198 }, { "epoch": 0.22, "learning_rate": 1.997438554961382e-05, "loss": 2.0316, "step": 199 }, { "epoch": 0.22, "learning_rate": 1.9973524999559295e-05, "loss": 1.7718, "step": 200 }, { "epoch": 0.22, "learning_rate": 1.9972650250867773e-05, "loss": 2.0169, "step": 201 }, { "epoch": 0.22, "learning_rate": 1.997176130478457e-05, "loss": 1.9306, "step": 202 }, { "epoch": 0.22, "learning_rate": 1.997085816257522e-05, "loss": 1.9852, "step": 203 }, { "epoch": 0.22, "learning_rate": 1.9969940825525462e-05, "loss": 1.8372, "step": 204 }, { "epoch": 0.22, "learning_rate": 1.9969009294941252e-05, "loss": 1.8715, "step": 205 }, { "epoch": 0.22, "learning_rate": 1.9968063572148748e-05, "loss": 1.863, "step": 206 }, { "epoch": 0.22, "learning_rate": 1.9967103658494308e-05, "loss": 1.8807, "step": 207 }, { "epoch": 0.23, "learning_rate": 1.99661295553445e-05, "loss": 1.9153, "step": 208 }, { "epoch": 0.23, "learning_rate": 1.9965141264086084e-05, "loss": 2.0213, "step": 209 }, { "epoch": 0.23, "learning_rate": 1.996413878612603e-05, "loss": 1.707, "step": 210 }, { "epoch": 0.23, "learning_rate": 1.99631221228915e-05, "loss": 1.8745, "step": 211 }, { "epoch": 0.23, "learning_rate": 1.9962091275829844e-05, "loss": 1.7234, "step": 212 }, { "epoch": 0.23, "learning_rate": 1.9961046246408603e-05, "loss": 1.8484, "step": 213 }, { "epoch": 0.23, "learning_rate": 1.995998703611553e-05, "loss": 1.7754, "step": 214 }, { "epoch": 0.23, "learning_rate": 1.9958913646458546e-05, "loss": 2.0296, "step": 215 }, { "epoch": 0.23, "learning_rate": 1.9957826078965756e-05, "loss": 1.5189, "step": 216 }, { "epoch": 0.23, "learning_rate": 1.9956724335185466e-05, "loss": 1.4644, "step": 217 }, { "epoch": 0.24, "learning_rate": 1.9955608416686147e-05, "loss": 1.9898, "step": 218 }, { "epoch": 0.24, "learning_rate": 1.995447832505647e-05, "loss": 2.1078, "step": 219 }, { "epoch": 0.24, "learning_rate": 1.9953334061905254e-05, "loss": 1.7983, "step": 220 }, { "epoch": 0.24, "learning_rate": 1.995217562886152e-05, "loss": 1.7168, "step": 221 }, { "epoch": 0.24, "learning_rate": 1.995100302757445e-05, "loss": 1.8447, "step": 222 }, { "epoch": 0.24, "learning_rate": 1.9949816259713394e-05, "loss": 1.9374, "step": 223 }, { "epoch": 0.24, "learning_rate": 1.994861532696788e-05, "loss": 2.0859, "step": 224 }, { "epoch": 0.24, "learning_rate": 1.9947400231047584e-05, "loss": 1.8303, "step": 225 }, { "epoch": 0.24, "learning_rate": 1.9946170973682367e-05, "loss": 1.5848, "step": 226 }, { "epoch": 0.25, "learning_rate": 1.9944927556622236e-05, "loss": 1.8136, "step": 227 }, { "epoch": 0.25, "learning_rate": 1.9943669981637367e-05, "loss": 1.7803, "step": 228 }, { "epoch": 0.25, "learning_rate": 1.994239825051807e-05, "loss": 1.9902, "step": 229 }, { "epoch": 0.25, "learning_rate": 1.9941112365074836e-05, "loss": 1.7615, "step": 230 }, { "epoch": 0.25, "learning_rate": 1.993981232713829e-05, "loss": 2.0408, "step": 231 }, { "epoch": 0.25, "learning_rate": 1.9938498138559205e-05, "loss": 1.6415, "step": 232 }, { "epoch": 0.25, "learning_rate": 1.993716980120851e-05, "loss": 1.8852, "step": 233 }, { "epoch": 0.25, "learning_rate": 1.9935827316977266e-05, "loss": 1.9254, "step": 234 }, { "epoch": 0.25, "learning_rate": 1.9934470687776674e-05, "loss": 1.9132, "step": 235 }, { "epoch": 0.26, "learning_rate": 1.993309991553808e-05, "loss": 1.8581, "step": 236 }, { "epoch": 0.26, "learning_rate": 1.993171500221296e-05, "loss": 1.8959, "step": 237 }, { "epoch": 0.26, "learning_rate": 1.993031594977292e-05, "loss": 1.8841, "step": 238 }, { "epoch": 0.26, "learning_rate": 1.99289027602097e-05, "loss": 1.8922, "step": 239 }, { "epoch": 0.26, "learning_rate": 1.9927475435535156e-05, "loss": 1.66, "step": 240 }, { "epoch": 0.26, "learning_rate": 1.9926033977781284e-05, "loss": 2.0959, "step": 241 }, { "epoch": 0.26, "learning_rate": 1.9924578389000185e-05, "loss": 1.8981, "step": 242 }, { "epoch": 0.26, "learning_rate": 1.992310867126408e-05, "loss": 2.079, "step": 243 }, { "epoch": 0.26, "learning_rate": 1.9921624826665316e-05, "loss": 1.896, "step": 244 }, { "epoch": 0.27, "learning_rate": 1.9920126857316334e-05, "loss": 2.0524, "step": 245 }, { "epoch": 0.27, "learning_rate": 1.99186147653497e-05, "loss": 1.7147, "step": 246 }, { "epoch": 0.27, "learning_rate": 1.991708855291807e-05, "loss": 1.8989, "step": 247 }, { "epoch": 0.27, "learning_rate": 1.9915548222194215e-05, "loss": 1.6191, "step": 248 }, { "epoch": 0.27, "learning_rate": 1.9913993775371e-05, "loss": 1.8722, "step": 249 }, { "epoch": 0.27, "learning_rate": 1.9912425214661386e-05, "loss": 1.8121, "step": 250 }, { "epoch": 0.27, "learning_rate": 1.9910842542298425e-05, "loss": 1.8755, "step": 251 }, { "epoch": 0.27, "learning_rate": 1.9909245760535263e-05, "loss": 2.0044, "step": 252 }, { "epoch": 0.27, "learning_rate": 1.990763487164513e-05, "loss": 1.7097, "step": 253 }, { "epoch": 0.27, "learning_rate": 1.9906009877921345e-05, "loss": 2.0125, "step": 254 }, { "epoch": 0.28, "learning_rate": 1.9904370781677294e-05, "loss": 1.9329, "step": 255 }, { "epoch": 0.28, "learning_rate": 1.9902717585246456e-05, "loss": 2.0188, "step": 256 }, { "epoch": 0.28, "learning_rate": 1.990105029098237e-05, "loss": 1.8302, "step": 257 }, { "epoch": 0.28, "learning_rate": 1.9899368901258652e-05, "loss": 1.9421, "step": 258 }, { "epoch": 0.28, "learning_rate": 1.989767341846899e-05, "loss": 1.952, "step": 259 }, { "epoch": 0.28, "learning_rate": 1.989596384502712e-05, "loss": 1.7265, "step": 260 }, { "epoch": 0.28, "learning_rate": 1.9894240183366847e-05, "loss": 2.014, "step": 261 }, { "epoch": 0.28, "learning_rate": 1.989250243594204e-05, "loss": 1.7925, "step": 262 }, { "epoch": 0.28, "learning_rate": 1.9890750605226606e-05, "loss": 1.9085, "step": 263 }, { "epoch": 0.29, "learning_rate": 1.988898469371451e-05, "loss": 1.7916, "step": 264 }, { "epoch": 0.29, "learning_rate": 1.9887204703919764e-05, "loss": 1.5943, "step": 265 }, { "epoch": 0.29, "learning_rate": 1.9885410638376418e-05, "loss": 2.0056, "step": 266 }, { "epoch": 0.29, "learning_rate": 1.9883602499638565e-05, "loss": 1.7821, "step": 267 }, { "epoch": 0.29, "learning_rate": 1.9881780290280327e-05, "loss": 2.0791, "step": 268 }, { "epoch": 0.29, "learning_rate": 1.9879944012895863e-05, "loss": 1.8741, "step": 269 }, { "epoch": 0.29, "learning_rate": 1.9878093670099355e-05, "loss": 1.9063, "step": 270 }, { "epoch": 0.29, "learning_rate": 1.9876229264525012e-05, "loss": 1.9244, "step": 271 }, { "epoch": 0.29, "learning_rate": 1.987435079882707e-05, "loss": 1.9378, "step": 272 }, { "epoch": 0.3, "learning_rate": 1.9872458275679766e-05, "loss": 1.8986, "step": 273 }, { "epoch": 0.3, "learning_rate": 1.987055169777736e-05, "loss": 1.8158, "step": 274 }, { "epoch": 0.3, "learning_rate": 1.986863106783412e-05, "loss": 1.4929, "step": 275 }, { "epoch": 0.3, "learning_rate": 1.9866696388584316e-05, "loss": 1.8164, "step": 276 }, { "epoch": 0.3, "learning_rate": 1.9864747662782226e-05, "loss": 1.6363, "step": 277 }, { "epoch": 0.3, "learning_rate": 1.9862784893202113e-05, "loss": 1.7578, "step": 278 }, { "epoch": 0.3, "learning_rate": 1.9860808082638245e-05, "loss": 1.8658, "step": 279 }, { "epoch": 0.3, "learning_rate": 1.9858817233904872e-05, "loss": 1.6387, "step": 280 }, { "epoch": 0.3, "learning_rate": 1.985681234983623e-05, "loss": 1.7362, "step": 281 }, { "epoch": 0.31, "learning_rate": 1.9854793433286544e-05, "loss": 1.8246, "step": 282 }, { "epoch": 0.31, "learning_rate": 1.9852760487129998e-05, "loss": 1.7408, "step": 283 }, { "epoch": 0.31, "learning_rate": 1.9850713514260772e-05, "loss": 1.8302, "step": 284 }, { "epoch": 0.31, "learning_rate": 1.9848652517593005e-05, "loss": 1.7418, "step": 285 }, { "epoch": 0.31, "learning_rate": 1.9846577500060786e-05, "loss": 1.802, "step": 286 }, { "epoch": 0.31, "learning_rate": 1.9844488464618192e-05, "loss": 1.6638, "step": 287 }, { "epoch": 0.31, "learning_rate": 1.984238541423923e-05, "loss": 1.9796, "step": 288 }, { "epoch": 0.31, "learning_rate": 1.9840268351917883e-05, "loss": 2.012, "step": 289 }, { "epoch": 0.31, "learning_rate": 1.983813728066806e-05, "loss": 1.7997, "step": 290 }, { "epoch": 0.31, "learning_rate": 1.9835992203523636e-05, "loss": 1.7265, "step": 291 }, { "epoch": 0.32, "learning_rate": 1.98338331235384e-05, "loss": 1.9192, "step": 292 }, { "epoch": 0.32, "learning_rate": 1.9831660043786097e-05, "loss": 1.72, "step": 293 }, { "epoch": 0.32, "learning_rate": 1.982947296736039e-05, "loss": 1.984, "step": 294 }, { "epoch": 0.32, "learning_rate": 1.9827271897374883e-05, "loss": 1.6382, "step": 295 }, { "epoch": 0.32, "learning_rate": 1.9825056836963078e-05, "loss": 1.8195, "step": 296 }, { "epoch": 0.32, "learning_rate": 1.9822827789278412e-05, "loss": 1.9058, "step": 297 }, { "epoch": 0.32, "learning_rate": 1.9820584757494234e-05, "loss": 1.8352, "step": 298 }, { "epoch": 0.32, "learning_rate": 1.9818327744803796e-05, "loss": 1.6767, "step": 299 }, { "epoch": 0.32, "learning_rate": 1.9816056754420253e-05, "loss": 1.7036, "step": 300 }, { "epoch": 0.33, "learning_rate": 1.9813771789576663e-05, "loss": 1.9956, "step": 301 }, { "epoch": 0.33, "learning_rate": 1.9811472853525978e-05, "loss": 2.1392, "step": 302 }, { "epoch": 0.33, "learning_rate": 1.980915994954103e-05, "loss": 1.9096, "step": 303 }, { "epoch": 0.33, "learning_rate": 1.980683308091456e-05, "loss": 1.7885, "step": 304 }, { "epoch": 0.33, "learning_rate": 1.9804492250959164e-05, "loss": 1.8036, "step": 305 }, { "epoch": 0.33, "learning_rate": 1.9802137463007327e-05, "loss": 1.7883, "step": 306 }, { "epoch": 0.33, "learning_rate": 1.97997687204114e-05, "loss": 1.9368, "step": 307 }, { "epoch": 0.33, "learning_rate": 1.9797386026543607e-05, "loss": 1.8897, "step": 308 }, { "epoch": 0.33, "learning_rate": 1.9794989384796025e-05, "loss": 1.8347, "step": 309 }, { "epoch": 0.34, "learning_rate": 1.9792578798580597e-05, "loss": 1.9128, "step": 310 }, { "epoch": 0.34, "learning_rate": 1.9790154271329105e-05, "loss": 1.9121, "step": 311 }, { "epoch": 0.34, "learning_rate": 1.978771580649319e-05, "loss": 1.9557, "step": 312 }, { "epoch": 0.34, "learning_rate": 1.978526340754433e-05, "loss": 1.9909, "step": 313 }, { "epoch": 0.34, "learning_rate": 1.978279707797384e-05, "loss": 1.7941, "step": 314 }, { "epoch": 0.34, "learning_rate": 1.978031682129287e-05, "loss": 1.8897, "step": 315 }, { "epoch": 0.34, "learning_rate": 1.977782264103239e-05, "loss": 1.9731, "step": 316 }, { "epoch": 0.34, "learning_rate": 1.97753145407432e-05, "loss": 2.0019, "step": 317 }, { "epoch": 0.34, "learning_rate": 1.9772792523995912e-05, "loss": 2.01, "step": 318 }, { "epoch": 0.35, "learning_rate": 1.9770256594380955e-05, "loss": 1.9104, "step": 319 }, { "epoch": 0.35, "learning_rate": 1.976770675550856e-05, "loss": 1.9926, "step": 320 }, { "epoch": 0.35, "learning_rate": 1.9765143011008758e-05, "loss": 2.1075, "step": 321 }, { "epoch": 0.35, "learning_rate": 1.9762565364531383e-05, "loss": 1.916, "step": 322 }, { "epoch": 0.35, "learning_rate": 1.9759973819746055e-05, "loss": 1.7118, "step": 323 }, { "epoch": 0.35, "learning_rate": 1.9757368380342185e-05, "loss": 2.1362, "step": 324 }, { "epoch": 0.35, "learning_rate": 1.975474905002896e-05, "loss": 1.8019, "step": 325 }, { "epoch": 0.35, "learning_rate": 1.9752115832535343e-05, "loss": 1.8218, "step": 326 }, { "epoch": 0.35, "learning_rate": 1.9749468731610072e-05, "loss": 1.7612, "step": 327 }, { "epoch": 0.35, "learning_rate": 1.9746807751021645e-05, "loss": 1.9373, "step": 328 }, { "epoch": 0.36, "learning_rate": 1.974413289455832e-05, "loss": 1.9847, "step": 329 }, { "epoch": 0.36, "learning_rate": 1.974144416602811e-05, "loss": 1.7258, "step": 330 }, { "epoch": 0.36, "learning_rate": 1.9738741569258782e-05, "loss": 1.7487, "step": 331 }, { "epoch": 0.36, "learning_rate": 1.9736025108097834e-05, "loss": 1.9034, "step": 332 }, { "epoch": 0.36, "learning_rate": 1.9733294786412513e-05, "loss": 1.7912, "step": 333 }, { "epoch": 0.36, "learning_rate": 1.9730550608089795e-05, "loss": 1.8738, "step": 334 }, { "epoch": 0.36, "learning_rate": 1.9727792577036383e-05, "loss": 1.8648, "step": 335 }, { "epoch": 0.36, "learning_rate": 1.9725020697178697e-05, "loss": 1.9759, "step": 336 }, { "epoch": 0.36, "learning_rate": 1.972223497246288e-05, "loss": 1.9795, "step": 337 }, { "epoch": 0.37, "learning_rate": 1.9719435406854775e-05, "loss": 1.9294, "step": 338 }, { "epoch": 0.37, "learning_rate": 1.9716622004339943e-05, "loss": 2.0429, "step": 339 }, { "epoch": 0.37, "learning_rate": 1.971379476892363e-05, "loss": 2.1394, "step": 340 }, { "epoch": 0.37, "learning_rate": 1.9710953704630784e-05, "loss": 1.9411, "step": 341 }, { "epoch": 0.37, "learning_rate": 1.9708098815506035e-05, "loss": 1.8354, "step": 342 }, { "epoch": 0.37, "learning_rate": 1.97052301056137e-05, "loss": 1.9368, "step": 343 }, { "epoch": 0.37, "learning_rate": 1.9702347579037765e-05, "loss": 1.6728, "step": 344 }, { "epoch": 0.37, "learning_rate": 1.969945123988189e-05, "loss": 1.789, "step": 345 }, { "epoch": 0.37, "learning_rate": 1.9696541092269402e-05, "loss": 1.7972, "step": 346 }, { "epoch": 0.38, "learning_rate": 1.9693617140343277e-05, "loss": 2.2036, "step": 347 }, { "epoch": 0.38, "learning_rate": 1.969067938826615e-05, "loss": 1.796, "step": 348 }, { "epoch": 0.38, "learning_rate": 1.9687727840220304e-05, "loss": 1.8355, "step": 349 }, { "epoch": 0.38, "learning_rate": 1.9684762500407662e-05, "loss": 1.8797, "step": 350 }, { "epoch": 0.38, "learning_rate": 1.9681783373049768e-05, "loss": 1.8698, "step": 351 }, { "epoch": 0.38, "learning_rate": 1.9678790462387816e-05, "loss": 1.9311, "step": 352 }, { "epoch": 0.38, "learning_rate": 1.967578377268261e-05, "loss": 1.6739, "step": 353 }, { "epoch": 0.38, "learning_rate": 1.9672763308214566e-05, "loss": 1.9781, "step": 354 }, { "epoch": 0.38, "learning_rate": 1.9669729073283724e-05, "loss": 1.9173, "step": 355 }, { "epoch": 0.39, "learning_rate": 1.9666681072209712e-05, "loss": 1.7903, "step": 356 }, { "epoch": 0.39, "learning_rate": 1.966361930933177e-05, "loss": 1.6572, "step": 357 }, { "epoch": 0.39, "learning_rate": 1.9660543789008724e-05, "loss": 1.893, "step": 358 }, { "epoch": 0.39, "learning_rate": 1.9657454515618984e-05, "loss": 1.6262, "step": 359 }, { "epoch": 0.39, "learning_rate": 1.965435149356054e-05, "loss": 1.8287, "step": 360 }, { "epoch": 0.39, "learning_rate": 1.9651234727250958e-05, "loss": 1.924, "step": 361 }, { "epoch": 0.39, "learning_rate": 1.9648104221127363e-05, "loss": 1.9815, "step": 362 }, { "epoch": 0.39, "learning_rate": 1.9644959979646455e-05, "loss": 1.9731, "step": 363 }, { "epoch": 0.39, "learning_rate": 1.964180200728447e-05, "loss": 1.613, "step": 364 }, { "epoch": 0.4, "learning_rate": 1.9638630308537213e-05, "loss": 1.6979, "step": 365 }, { "epoch": 0.4, "learning_rate": 1.9635444887920006e-05, "loss": 1.7188, "step": 366 }, { "epoch": 0.4, "learning_rate": 1.9632245749967723e-05, "loss": 1.9756, "step": 367 }, { "epoch": 0.4, "learning_rate": 1.9629032899234763e-05, "loss": 2.0186, "step": 368 }, { "epoch": 0.4, "learning_rate": 1.9625806340295047e-05, "loss": 1.952, "step": 369 }, { "epoch": 0.4, "learning_rate": 1.9622566077742e-05, "loss": 1.6013, "step": 370 }, { "epoch": 0.4, "learning_rate": 1.961931211618858e-05, "loss": 1.8473, "step": 371 }, { "epoch": 0.4, "learning_rate": 1.9616044460267224e-05, "loss": 1.9991, "step": 372 }, { "epoch": 0.4, "learning_rate": 1.9612763114629875e-05, "loss": 1.6916, "step": 373 }, { "epoch": 0.4, "learning_rate": 1.960946808394797e-05, "loss": 2.0116, "step": 374 }, { "epoch": 0.41, "learning_rate": 1.9606159372912415e-05, "loss": 2.0294, "step": 375 }, { "epoch": 0.41, "learning_rate": 1.9602836986233603e-05, "loss": 1.8217, "step": 376 }, { "epoch": 0.41, "learning_rate": 1.959950092864139e-05, "loss": 1.8566, "step": 377 }, { "epoch": 0.41, "learning_rate": 1.9596151204885103e-05, "loss": 1.8843, "step": 378 }, { "epoch": 0.41, "learning_rate": 1.9592787819733513e-05, "loss": 1.9598, "step": 379 }, { "epoch": 0.41, "learning_rate": 1.9589410777974852e-05, "loss": 1.9737, "step": 380 }, { "epoch": 0.41, "learning_rate": 1.9586020084416777e-05, "loss": 1.9526, "step": 381 }, { "epoch": 0.41, "learning_rate": 1.9582615743886397e-05, "loss": 1.9623, "step": 382 }, { "epoch": 0.41, "learning_rate": 1.9579197761230243e-05, "loss": 1.9201, "step": 383 }, { "epoch": 0.42, "learning_rate": 1.9575766141314264e-05, "loss": 1.8925, "step": 384 }, { "epoch": 0.42, "learning_rate": 1.957232088902383e-05, "loss": 2.0269, "step": 385 }, { "epoch": 0.42, "learning_rate": 1.9568862009263712e-05, "loss": 1.9761, "step": 386 }, { "epoch": 0.42, "learning_rate": 1.9565389506958086e-05, "loss": 1.7009, "step": 387 }, { "epoch": 0.42, "learning_rate": 1.956190338705052e-05, "loss": 1.8927, "step": 388 }, { "epoch": 0.42, "learning_rate": 1.955840365450397e-05, "loss": 1.7908, "step": 389 }, { "epoch": 0.42, "learning_rate": 1.9554890314300768e-05, "loss": 1.6918, "step": 390 }, { "epoch": 0.42, "learning_rate": 1.955136337144262e-05, "loss": 2.0751, "step": 391 }, { "epoch": 0.42, "learning_rate": 1.9547822830950597e-05, "loss": 1.9847, "step": 392 }, { "epoch": 0.43, "learning_rate": 1.954426869786513e-05, "loss": 1.8185, "step": 393 }, { "epoch": 0.43, "learning_rate": 1.9540700977246e-05, "loss": 1.8964, "step": 394 }, { "epoch": 0.43, "learning_rate": 1.9537119674172332e-05, "loss": 1.7804, "step": 395 }, { "epoch": 0.43, "learning_rate": 1.953352479374258e-05, "loss": 2.0158, "step": 396 }, { "epoch": 0.43, "learning_rate": 1.9529916341074538e-05, "loss": 1.7388, "step": 397 }, { "epoch": 0.43, "learning_rate": 1.9526294321305323e-05, "loss": 1.8575, "step": 398 }, { "epoch": 0.43, "learning_rate": 1.9522658739591348e-05, "loss": 1.8096, "step": 399 }, { "epoch": 0.43, "learning_rate": 1.9519009601108358e-05, "loss": 1.7633, "step": 400 }, { "epoch": 0.43, "learning_rate": 1.951534691105138e-05, "loss": 1.6767, "step": 401 }, { "epoch": 0.44, "learning_rate": 1.9511670674634745e-05, "loss": 1.7279, "step": 402 }, { "epoch": 0.44, "learning_rate": 1.9507980897092055e-05, "loss": 1.7844, "step": 403 }, { "epoch": 0.44, "learning_rate": 1.9504277583676204e-05, "loss": 1.8847, "step": 404 }, { "epoch": 0.44, "learning_rate": 1.950056073965935e-05, "loss": 1.7377, "step": 405 }, { "epoch": 0.44, "learning_rate": 1.9496830370332916e-05, "loss": 1.7804, "step": 406 }, { "epoch": 0.44, "learning_rate": 1.949308648100757e-05, "loss": 1.6626, "step": 407 }, { "epoch": 0.44, "learning_rate": 1.9489329077013244e-05, "loss": 1.8978, "step": 408 }, { "epoch": 0.44, "learning_rate": 1.94855581636991e-05, "loss": 2.005, "step": 409 }, { "epoch": 0.44, "learning_rate": 1.948177374643353e-05, "loss": 1.8455, "step": 410 }, { "epoch": 0.44, "learning_rate": 1.9477975830604158e-05, "loss": 1.7812, "step": 411 }, { "epoch": 0.45, "learning_rate": 1.947416442161782e-05, "loss": 1.7934, "step": 412 }, { "epoch": 0.45, "learning_rate": 1.947033952490056e-05, "loss": 2.0325, "step": 413 }, { "epoch": 0.45, "learning_rate": 1.946650114589763e-05, "loss": 1.7311, "step": 414 }, { "epoch": 0.45, "learning_rate": 1.9462649290073473e-05, "loss": 1.7788, "step": 415 }, { "epoch": 0.45, "learning_rate": 1.9458783962911717e-05, "loss": 2.0093, "step": 416 }, { "epoch": 0.45, "learning_rate": 1.9454905169915164e-05, "loss": 1.8025, "step": 417 }, { "epoch": 0.45, "learning_rate": 1.945101291660579e-05, "loss": 1.7943, "step": 418 }, { "epoch": 0.45, "learning_rate": 1.9447107208524744e-05, "loss": 2.1016, "step": 419 }, { "epoch": 0.45, "learning_rate": 1.944318805123231e-05, "loss": 1.917, "step": 420 }, { "epoch": 0.46, "learning_rate": 1.943925545030794e-05, "loss": 1.8536, "step": 421 }, { "epoch": 0.46, "learning_rate": 1.9435309411350195e-05, "loss": 1.9451, "step": 422 }, { "epoch": 0.46, "learning_rate": 1.9431349939976804e-05, "loss": 1.8264, "step": 423 }, { "epoch": 0.46, "learning_rate": 1.942737704182459e-05, "loss": 1.895, "step": 424 }, { "epoch": 0.46, "learning_rate": 1.9423390722549506e-05, "loss": 2.0386, "step": 425 }, { "epoch": 0.46, "learning_rate": 1.94193909878266e-05, "loss": 1.8459, "step": 426 }, { "epoch": 0.46, "learning_rate": 1.9415377843350032e-05, "loss": 1.6088, "step": 427 }, { "epoch": 0.46, "learning_rate": 1.9411351294833047e-05, "loss": 1.9419, "step": 428 }, { "epoch": 0.46, "learning_rate": 1.9407311348007965e-05, "loss": 1.5992, "step": 429 }, { "epoch": 0.47, "learning_rate": 1.940325800862619e-05, "loss": 1.8278, "step": 430 }, { "epoch": 0.47, "learning_rate": 1.939919128245819e-05, "loss": 1.8029, "step": 431 }, { "epoch": 0.47, "learning_rate": 1.9395111175293485e-05, "loss": 1.861, "step": 432 }, { "epoch": 0.47, "learning_rate": 1.939101769294066e-05, "loss": 1.8148, "step": 433 }, { "epoch": 0.47, "learning_rate": 1.9386910841227315e-05, "loss": 1.7924, "step": 434 }, { "epoch": 0.47, "learning_rate": 1.9382790626000112e-05, "loss": 1.829, "step": 435 }, { "epoch": 0.47, "learning_rate": 1.9378657053124718e-05, "loss": 1.685, "step": 436 }, { "epoch": 0.47, "learning_rate": 1.9374510128485824e-05, "loss": 2.1904, "step": 437 }, { "epoch": 0.47, "learning_rate": 1.9370349857987127e-05, "loss": 1.8356, "step": 438 }, { "epoch": 0.48, "learning_rate": 1.9366176247551327e-05, "loss": 1.9149, "step": 439 }, { "epoch": 0.48, "learning_rate": 1.936198930312011e-05, "loss": 1.6392, "step": 440 }, { "epoch": 0.48, "learning_rate": 1.9357789030654146e-05, "loss": 1.7937, "step": 441 }, { "epoch": 0.48, "learning_rate": 1.9353575436133086e-05, "loss": 1.8761, "step": 442 }, { "epoch": 0.48, "learning_rate": 1.9349348525555537e-05, "loss": 1.9128, "step": 443 }, { "epoch": 0.48, "learning_rate": 1.9345108304939065e-05, "loss": 1.9687, "step": 444 }, { "epoch": 0.48, "learning_rate": 1.9340854780320196e-05, "loss": 1.8992, "step": 445 }, { "epoch": 0.48, "learning_rate": 1.9336587957754382e-05, "loss": 1.8379, "step": 446 }, { "epoch": 0.48, "learning_rate": 1.9332307843316004e-05, "loss": 1.7322, "step": 447 }, { "epoch": 0.48, "learning_rate": 1.9328014443098385e-05, "loss": 1.7952, "step": 448 }, { "epoch": 0.49, "learning_rate": 1.932370776321374e-05, "loss": 1.7826, "step": 449 }, { "epoch": 0.49, "learning_rate": 1.931938780979321e-05, "loss": 1.9815, "step": 450 }, { "epoch": 0.49, "learning_rate": 1.9315054588986815e-05, "loss": 1.5621, "step": 451 }, { "epoch": 0.49, "learning_rate": 1.9310708106963473e-05, "loss": 1.7337, "step": 452 }, { "epoch": 0.49, "learning_rate": 1.930634836991098e-05, "loss": 1.8992, "step": 453 }, { "epoch": 0.49, "learning_rate": 1.9301975384035994e-05, "loss": 1.7118, "step": 454 }, { "epoch": 0.49, "learning_rate": 1.929758915556404e-05, "loss": 1.6617, "step": 455 }, { "epoch": 0.49, "learning_rate": 1.9293189690739512e-05, "loss": 1.9588, "step": 456 }, { "epoch": 0.49, "learning_rate": 1.9288776995825616e-05, "loss": 1.5833, "step": 457 }, { "epoch": 0.5, "learning_rate": 1.9284351077104414e-05, "loss": 1.7649, "step": 458 }, { "epoch": 0.5, "learning_rate": 1.9279911940876793e-05, "loss": 1.714, "step": 459 }, { "epoch": 0.5, "learning_rate": 1.927545959346245e-05, "loss": 1.9171, "step": 460 }, { "epoch": 0.5, "learning_rate": 1.927099404119989e-05, "loss": 1.6816, "step": 461 }, { "epoch": 0.5, "learning_rate": 1.9266515290446422e-05, "loss": 1.6948, "step": 462 }, { "epoch": 0.5, "learning_rate": 1.926202334757814e-05, "loss": 1.9208, "step": 463 }, { "epoch": 0.5, "learning_rate": 1.9257518218989925e-05, "loss": 2.0632, "step": 464 }, { "epoch": 0.5, "learning_rate": 1.9252999911095425e-05, "loss": 1.9065, "step": 465 }, { "epoch": 0.5, "learning_rate": 1.924846843032705e-05, "loss": 1.8936, "step": 466 }, { "epoch": 0.51, "learning_rate": 1.9243923783135963e-05, "loss": 1.9589, "step": 467 }, { "epoch": 0.51, "learning_rate": 1.9239365975992075e-05, "loss": 2.0159, "step": 468 }, { "epoch": 0.51, "learning_rate": 1.923479501538403e-05, "loss": 1.8736, "step": 469 }, { "epoch": 0.51, "learning_rate": 1.9230210907819194e-05, "loss": 1.8036, "step": 470 }, { "epoch": 0.51, "learning_rate": 1.9225613659823653e-05, "loss": 1.8388, "step": 471 }, { "epoch": 0.51, "learning_rate": 1.9221003277942206e-05, "loss": 2.069, "step": 472 }, { "epoch": 0.51, "learning_rate": 1.9216379768738338e-05, "loss": 1.9428, "step": 473 }, { "epoch": 0.51, "learning_rate": 1.921174313879423e-05, "loss": 1.7716, "step": 474 }, { "epoch": 0.51, "learning_rate": 1.9207093394710742e-05, "loss": 1.8555, "step": 475 }, { "epoch": 0.52, "learning_rate": 1.920243054310739e-05, "loss": 2.1768, "step": 476 }, { "epoch": 0.52, "learning_rate": 1.9197754590622385e-05, "loss": 1.8459, "step": 477 }, { "epoch": 0.52, "learning_rate": 1.9193065543912546e-05, "loss": 1.8115, "step": 478 }, { "epoch": 0.52, "learning_rate": 1.9188363409653363e-05, "loss": 1.8124, "step": 479 }, { "epoch": 0.52, "learning_rate": 1.9183648194538946e-05, "loss": 1.7224, "step": 480 }, { "epoch": 0.52, "learning_rate": 1.9178919905282033e-05, "loss": 1.8167, "step": 481 }, { "epoch": 0.52, "learning_rate": 1.9174178548613967e-05, "loss": 1.8557, "step": 482 }, { "epoch": 0.52, "learning_rate": 1.9169424131284698e-05, "loss": 1.7496, "step": 483 }, { "epoch": 0.52, "learning_rate": 1.9164656660062773e-05, "loss": 1.8104, "step": 484 }, { "epoch": 0.52, "learning_rate": 1.9159876141735324e-05, "loss": 1.7362, "step": 485 }, { "epoch": 0.53, "learning_rate": 1.9155082583108048e-05, "loss": 2.0344, "step": 486 }, { "epoch": 0.53, "learning_rate": 1.915027599100521e-05, "loss": 1.7144, "step": 487 }, { "epoch": 0.53, "learning_rate": 1.9145456372269643e-05, "loss": 1.8579, "step": 488 }, { "epoch": 0.53, "learning_rate": 1.91406237337627e-05, "loss": 1.7571, "step": 489 }, { "epoch": 0.53, "learning_rate": 1.9135778082364295e-05, "loss": 1.5567, "step": 490 }, { "epoch": 0.53, "learning_rate": 1.913091942497285e-05, "loss": 1.7423, "step": 491 }, { "epoch": 0.53, "learning_rate": 1.9126047768505307e-05, "loss": 1.9152, "step": 492 }, { "epoch": 0.53, "learning_rate": 1.9121163119897122e-05, "loss": 1.7091, "step": 493 }, { "epoch": 0.53, "learning_rate": 1.9116265486102237e-05, "loss": 1.7242, "step": 494 }, { "epoch": 0.54, "learning_rate": 1.9111354874093087e-05, "loss": 1.9924, "step": 495 }, { "epoch": 0.54, "learning_rate": 1.910643129086058e-05, "loss": 1.8313, "step": 496 }, { "epoch": 0.54, "learning_rate": 1.9101494743414082e-05, "loss": 1.8238, "step": 497 }, { "epoch": 0.54, "learning_rate": 1.9096545238781433e-05, "loss": 1.987, "step": 498 }, { "epoch": 0.54, "learning_rate": 1.9091582784008908e-05, "loss": 1.8133, "step": 499 }, { "epoch": 0.54, "learning_rate": 1.908660738616122e-05, "loss": 1.8589, "step": 500 }, { "epoch": 0.54, "learning_rate": 1.908161905232151e-05, "loss": 1.8214, "step": 501 }, { "epoch": 0.54, "learning_rate": 1.9076617789591326e-05, "loss": 1.7285, "step": 502 }, { "epoch": 0.54, "learning_rate": 1.9071603605090638e-05, "loss": 1.8021, "step": 503 }, { "epoch": 0.55, "learning_rate": 1.9066576505957796e-05, "loss": 1.7359, "step": 504 }, { "epoch": 0.55, "learning_rate": 1.9061536499349553e-05, "loss": 2.1337, "step": 505 }, { "epoch": 0.55, "learning_rate": 1.905648359244102e-05, "loss": 1.787, "step": 506 }, { "epoch": 0.55, "learning_rate": 1.905141779242568e-05, "loss": 1.7364, "step": 507 }, { "epoch": 0.55, "learning_rate": 1.904633910651538e-05, "loss": 1.6271, "step": 508 }, { "epoch": 0.55, "learning_rate": 1.9041247541940294e-05, "loss": 1.9234, "step": 509 }, { "epoch": 0.55, "learning_rate": 1.9036143105948944e-05, "loss": 2.0125, "step": 510 }, { "epoch": 0.55, "learning_rate": 1.9031025805808175e-05, "loss": 2.0037, "step": 511 }, { "epoch": 0.55, "learning_rate": 1.902589564880314e-05, "loss": 1.4813, "step": 512 }, { "epoch": 0.56, "learning_rate": 1.9020752642237302e-05, "loss": 1.8722, "step": 513 }, { "epoch": 0.56, "learning_rate": 1.9015596793432408e-05, "loss": 1.9763, "step": 514 }, { "epoch": 0.56, "learning_rate": 1.9010428109728497e-05, "loss": 1.7076, "step": 515 }, { "epoch": 0.56, "learning_rate": 1.9005246598483876e-05, "loss": 1.7394, "step": 516 }, { "epoch": 0.56, "learning_rate": 1.9000052267075116e-05, "loss": 1.6068, "step": 517 }, { "epoch": 0.56, "learning_rate": 1.8994845122897033e-05, "loss": 1.8676, "step": 518 }, { "epoch": 0.56, "learning_rate": 1.898962517336269e-05, "loss": 1.6328, "step": 519 }, { "epoch": 0.56, "learning_rate": 1.8984392425903384e-05, "loss": 2.1272, "step": 520 }, { "epoch": 0.56, "learning_rate": 1.8979146887968615e-05, "loss": 1.7898, "step": 521 }, { "epoch": 0.56, "learning_rate": 1.897388856702611e-05, "loss": 1.8246, "step": 522 }, { "epoch": 0.57, "learning_rate": 1.8968617470561788e-05, "loss": 1.6391, "step": 523 }, { "epoch": 0.57, "learning_rate": 1.896333360607975e-05, "loss": 1.6649, "step": 524 }, { "epoch": 0.57, "learning_rate": 1.895803698110228e-05, "loss": 1.7837, "step": 525 }, { "epoch": 0.57, "learning_rate": 1.895272760316983e-05, "loss": 1.7271, "step": 526 }, { "epoch": 0.57, "learning_rate": 1.8947405479840998e-05, "loss": 1.6772, "step": 527 }, { "epoch": 0.57, "learning_rate": 1.894207061869254e-05, "loss": 2.2271, "step": 528 }, { "epoch": 0.57, "learning_rate": 1.8936723027319334e-05, "loss": 1.6941, "step": 529 }, { "epoch": 0.57, "learning_rate": 1.8931362713334393e-05, "loss": 1.844, "step": 530 }, { "epoch": 0.57, "learning_rate": 1.8925989684368823e-05, "loss": 1.5838, "step": 531 }, { "epoch": 0.58, "learning_rate": 1.892060394807186e-05, "loss": 2.196, "step": 532 }, { "epoch": 0.58, "learning_rate": 1.8915205512110802e-05, "loss": 1.8994, "step": 533 }, { "epoch": 0.58, "learning_rate": 1.8909794384171048e-05, "loss": 2.097, "step": 534 }, { "epoch": 0.58, "learning_rate": 1.8904370571956053e-05, "loss": 1.8567, "step": 535 }, { "epoch": 0.58, "learning_rate": 1.889893408318733e-05, "loss": 1.7573, "step": 536 }, { "epoch": 0.58, "learning_rate": 1.889348492560445e-05, "loss": 1.8545, "step": 537 }, { "epoch": 0.58, "learning_rate": 1.8888023106965007e-05, "loss": 1.8982, "step": 538 }, { "epoch": 0.58, "learning_rate": 1.8882548635044625e-05, "loss": 1.8322, "step": 539 }, { "epoch": 0.58, "learning_rate": 1.8877061517636948e-05, "loss": 1.7116, "step": 540 }, { "epoch": 0.59, "learning_rate": 1.8871561762553612e-05, "loss": 1.8873, "step": 541 }, { "epoch": 0.59, "learning_rate": 1.8866049377624245e-05, "loss": 1.785, "step": 542 }, { "epoch": 0.59, "learning_rate": 1.8860524370696464e-05, "loss": 1.9869, "step": 543 }, { "epoch": 0.59, "learning_rate": 1.8854986749635847e-05, "loss": 2.0405, "step": 544 }, { "epoch": 0.59, "learning_rate": 1.884943652232593e-05, "loss": 1.7545, "step": 545 }, { "epoch": 0.59, "learning_rate": 1.8843873696668208e-05, "loss": 1.9384, "step": 546 }, { "epoch": 0.59, "learning_rate": 1.8838298280582097e-05, "loss": 1.7382, "step": 547 }, { "epoch": 0.59, "learning_rate": 1.8832710282004936e-05, "loss": 1.9079, "step": 548 }, { "epoch": 0.59, "learning_rate": 1.882710970889199e-05, "loss": 1.7966, "step": 549 }, { "epoch": 0.6, "learning_rate": 1.882149656921642e-05, "loss": 1.7548, "step": 550 }, { "epoch": 0.6, "learning_rate": 1.8815870870969267e-05, "loss": 2.0582, "step": 551 }, { "epoch": 0.6, "learning_rate": 1.8810232622159465e-05, "loss": 1.9251, "step": 552 }, { "epoch": 0.6, "learning_rate": 1.8804581830813812e-05, "loss": 1.7577, "step": 553 }, { "epoch": 0.6, "learning_rate": 1.8798918504976953e-05, "loss": 1.8336, "step": 554 }, { "epoch": 0.6, "learning_rate": 1.8793242652711388e-05, "loss": 2.0418, "step": 555 }, { "epoch": 0.6, "learning_rate": 1.8787554282097452e-05, "loss": 1.7, "step": 556 }, { "epoch": 0.6, "learning_rate": 1.8781853401233285e-05, "loss": 1.9158, "step": 557 }, { "epoch": 0.6, "learning_rate": 1.8776140018234855e-05, "loss": 1.5324, "step": 558 }, { "epoch": 0.6, "learning_rate": 1.8770414141235918e-05, "loss": 1.934, "step": 559 }, { "epoch": 0.61, "learning_rate": 1.8764675778388023e-05, "loss": 1.9987, "step": 560 }, { "epoch": 0.61, "learning_rate": 1.8758924937860487e-05, "loss": 1.7242, "step": 561 }, { "epoch": 0.61, "learning_rate": 1.87531616278404e-05, "loss": 1.7915, "step": 562 }, { "epoch": 0.61, "learning_rate": 1.8747385856532596e-05, "loss": 2.0531, "step": 563 }, { "epoch": 0.61, "learning_rate": 1.874159763215965e-05, "loss": 2.0786, "step": 564 }, { "epoch": 0.61, "learning_rate": 1.8735796962961878e-05, "loss": 1.8106, "step": 565 }, { "epoch": 0.61, "learning_rate": 1.872998385719729e-05, "loss": 1.8355, "step": 566 }, { "epoch": 0.61, "learning_rate": 1.872415832314162e-05, "loss": 1.7494, "step": 567 }, { "epoch": 0.61, "learning_rate": 1.871832036908829e-05, "loss": 1.9507, "step": 568 }, { "epoch": 0.62, "learning_rate": 1.87124700033484e-05, "loss": 1.8078, "step": 569 }, { "epoch": 0.62, "learning_rate": 1.8706607234250723e-05, "loss": 1.8325, "step": 570 }, { "epoch": 0.62, "learning_rate": 1.8700732070141693e-05, "loss": 1.8502, "step": 571 }, { "epoch": 0.62, "learning_rate": 1.8694844519385383e-05, "loss": 1.7116, "step": 572 }, { "epoch": 0.62, "learning_rate": 1.86889445903635e-05, "loss": 1.8984, "step": 573 }, { "epoch": 0.62, "learning_rate": 1.8683032291475382e-05, "loss": 1.8425, "step": 574 }, { "epoch": 0.62, "learning_rate": 1.867710763113797e-05, "loss": 1.7594, "step": 575 }, { "epoch": 0.62, "learning_rate": 1.86711706177858e-05, "loss": 1.6792, "step": 576 }, { "epoch": 0.62, "learning_rate": 1.8665221259871005e-05, "loss": 1.9078, "step": 577 }, { "epoch": 0.63, "learning_rate": 1.865925956586328e-05, "loss": 1.6609, "step": 578 }, { "epoch": 0.63, "learning_rate": 1.8653285544249896e-05, "loss": 1.7867, "step": 579 }, { "epoch": 0.63, "learning_rate": 1.864729920353566e-05, "loss": 1.7511, "step": 580 }, { "epoch": 0.63, "learning_rate": 1.864130055224292e-05, "loss": 1.9401, "step": 581 }, { "epoch": 0.63, "learning_rate": 1.863528959891156e-05, "loss": 2.0443, "step": 582 }, { "epoch": 0.63, "learning_rate": 1.8629266352098964e-05, "loss": 1.8604, "step": 583 }, { "epoch": 0.63, "learning_rate": 1.8623230820380026e-05, "loss": 1.6141, "step": 584 }, { "epoch": 0.63, "learning_rate": 1.861718301234713e-05, "loss": 2.0134, "step": 585 }, { "epoch": 0.63, "learning_rate": 1.861112293661013e-05, "loss": 1.5974, "step": 586 }, { "epoch": 0.64, "learning_rate": 1.8605050601796345e-05, "loss": 1.675, "step": 587 }, { "epoch": 0.64, "learning_rate": 1.8598966016550556e-05, "loss": 1.6918, "step": 588 }, { "epoch": 0.64, "learning_rate": 1.8592869189534974e-05, "loss": 1.7935, "step": 589 }, { "epoch": 0.64, "learning_rate": 1.8586760129429247e-05, "loss": 2.0052, "step": 590 }, { "epoch": 0.64, "learning_rate": 1.8580638844930425e-05, "loss": 1.8375, "step": 591 }, { "epoch": 0.64, "learning_rate": 1.8574505344752977e-05, "loss": 2.0357, "step": 592 }, { "epoch": 0.64, "learning_rate": 1.856835963762875e-05, "loss": 1.9813, "step": 593 }, { "epoch": 0.64, "learning_rate": 1.8562201732306976e-05, "loss": 1.8431, "step": 594 }, { "epoch": 0.64, "learning_rate": 1.8556031637554254e-05, "loss": 1.922, "step": 595 }, { "epoch": 0.65, "learning_rate": 1.8549849362154524e-05, "loss": 1.7421, "step": 596 }, { "epoch": 0.65, "learning_rate": 1.8543654914909083e-05, "loss": 1.8718, "step": 597 }, { "epoch": 0.65, "learning_rate": 1.8537448304636546e-05, "loss": 1.943, "step": 598 }, { "epoch": 0.65, "learning_rate": 1.8531229540172852e-05, "loss": 1.9991, "step": 599 }, { "epoch": 0.65, "learning_rate": 1.852499863037123e-05, "loss": 1.6649, "step": 600 }, { "epoch": 0.65, "learning_rate": 1.8518755584102214e-05, "loss": 1.8183, "step": 601 }, { "epoch": 0.65, "learning_rate": 1.8512500410253604e-05, "loss": 1.7537, "step": 602 }, { "epoch": 0.65, "learning_rate": 1.8506233117730478e-05, "loss": 1.8279, "step": 603 }, { "epoch": 0.65, "learning_rate": 1.8499953715455148e-05, "loss": 1.6765, "step": 604 }, { "epoch": 0.65, "learning_rate": 1.8493662212367184e-05, "loss": 1.9644, "step": 605 }, { "epoch": 0.66, "learning_rate": 1.8487358617423376e-05, "loss": 2.0155, "step": 606 }, { "epoch": 0.66, "learning_rate": 1.8481042939597727e-05, "loss": 1.7584, "step": 607 }, { "epoch": 0.66, "learning_rate": 1.8474715187881444e-05, "loss": 1.6792, "step": 608 }, { "epoch": 0.66, "learning_rate": 1.846837537128292e-05, "loss": 1.8875, "step": 609 }, { "epoch": 0.66, "learning_rate": 1.8462023498827728e-05, "loss": 1.7411, "step": 610 }, { "epoch": 0.66, "learning_rate": 1.84556595795586e-05, "loss": 1.4576, "step": 611 }, { "epoch": 0.66, "learning_rate": 1.8449283622535416e-05, "loss": 1.7889, "step": 612 }, { "epoch": 0.66, "learning_rate": 1.8442895636835205e-05, "loss": 1.7446, "step": 613 }, { "epoch": 0.66, "learning_rate": 1.843649563155211e-05, "loss": 2.0262, "step": 614 }, { "epoch": 0.67, "learning_rate": 1.8430083615797386e-05, "loss": 1.7133, "step": 615 }, { "epoch": 0.67, "learning_rate": 1.842365959869939e-05, "loss": 2.1312, "step": 616 }, { "epoch": 0.67, "learning_rate": 1.8417223589403567e-05, "loss": 2.0981, "step": 617 }, { "epoch": 0.67, "learning_rate": 1.8410775597072418e-05, "loss": 1.6955, "step": 618 }, { "epoch": 0.67, "learning_rate": 1.8404315630885535e-05, "loss": 1.5846, "step": 619 }, { "epoch": 0.67, "learning_rate": 1.8397843700039523e-05, "loss": 1.6552, "step": 620 }, { "epoch": 0.67, "learning_rate": 1.839135981374804e-05, "loss": 1.7879, "step": 621 }, { "epoch": 0.67, "learning_rate": 1.838486398124176e-05, "loss": 1.7106, "step": 622 }, { "epoch": 0.67, "learning_rate": 1.8378356211768364e-05, "loss": 1.8767, "step": 623 }, { "epoch": 0.68, "learning_rate": 1.837183651459252e-05, "loss": 2.015, "step": 624 }, { "epoch": 0.68, "learning_rate": 1.8365304898995887e-05, "loss": 1.5223, "step": 625 }, { "epoch": 0.68, "learning_rate": 1.8358761374277088e-05, "loss": 2.0143, "step": 626 }, { "epoch": 0.68, "learning_rate": 1.8352205949751695e-05, "loss": 1.725, "step": 627 }, { "epoch": 0.68, "learning_rate": 1.8345638634752227e-05, "loss": 1.841, "step": 628 }, { "epoch": 0.68, "learning_rate": 1.8339059438628134e-05, "loss": 1.7485, "step": 629 }, { "epoch": 0.68, "learning_rate": 1.8332468370745766e-05, "loss": 1.5277, "step": 630 }, { "epoch": 0.68, "learning_rate": 1.832586544048839e-05, "loss": 1.6906, "step": 631 }, { "epoch": 0.68, "learning_rate": 1.831925065725615e-05, "loss": 1.7929, "step": 632 }, { "epoch": 0.69, "learning_rate": 1.831262403046607e-05, "loss": 1.6509, "step": 633 }, { "epoch": 0.69, "learning_rate": 1.8305985569552034e-05, "loss": 2.0475, "step": 634 }, { "epoch": 0.69, "learning_rate": 1.829933528396477e-05, "loss": 1.9633, "step": 635 }, { "epoch": 0.69, "learning_rate": 1.8292673183171845e-05, "loss": 1.8058, "step": 636 }, { "epoch": 0.69, "learning_rate": 1.8285999276657642e-05, "loss": 1.7511, "step": 637 }, { "epoch": 0.69, "learning_rate": 1.8279313573923354e-05, "loss": 1.9296, "step": 638 }, { "epoch": 0.69, "learning_rate": 1.8272616084486968e-05, "loss": 1.8956, "step": 639 }, { "epoch": 0.69, "learning_rate": 1.8265906817883244e-05, "loss": 1.8134, "step": 640 }, { "epoch": 0.69, "learning_rate": 1.825918578366372e-05, "loss": 2.0214, "step": 641 }, { "epoch": 0.69, "learning_rate": 1.8252452991396676e-05, "loss": 1.7222, "step": 642 }, { "epoch": 0.7, "learning_rate": 1.824570845066714e-05, "loss": 1.8207, "step": 643 }, { "epoch": 0.7, "learning_rate": 1.8238952171076862e-05, "loss": 1.6518, "step": 644 }, { "epoch": 0.7, "learning_rate": 1.8232184162244297e-05, "loss": 1.5583, "step": 645 }, { "epoch": 0.7, "learning_rate": 1.8225404433804605e-05, "loss": 2.127, "step": 646 }, { "epoch": 0.7, "learning_rate": 1.821861299540963e-05, "loss": 1.561, "step": 647 }, { "epoch": 0.7, "learning_rate": 1.8211809856727892e-05, "loss": 1.9207, "step": 648 }, { "epoch": 0.7, "learning_rate": 1.8204995027444554e-05, "loss": 1.9223, "step": 649 }, { "epoch": 0.7, "learning_rate": 1.819816851726144e-05, "loss": 1.8233, "step": 650 }, { "epoch": 0.7, "learning_rate": 1.8191330335896985e-05, "loss": 1.4765, "step": 651 }, { "epoch": 0.71, "learning_rate": 1.8184480493086248e-05, "loss": 1.6904, "step": 652 }, { "epoch": 0.71, "learning_rate": 1.8177618998580897e-05, "loss": 1.9904, "step": 653 }, { "epoch": 0.71, "learning_rate": 1.8170745862149174e-05, "loss": 1.8617, "step": 654 }, { "epoch": 0.71, "learning_rate": 1.8163861093575905e-05, "loss": 2.1474, "step": 655 }, { "epoch": 0.71, "learning_rate": 1.815696470266247e-05, "loss": 1.967, "step": 656 }, { "epoch": 0.71, "learning_rate": 1.8150056699226793e-05, "loss": 1.7864, "step": 657 }, { "epoch": 0.71, "learning_rate": 1.8143137093103345e-05, "loss": 1.8498, "step": 658 }, { "epoch": 0.71, "learning_rate": 1.8136205894143098e-05, "loss": 1.8173, "step": 659 }, { "epoch": 0.71, "learning_rate": 1.8129263112213527e-05, "loss": 1.9093, "step": 660 }, { "epoch": 0.72, "learning_rate": 1.8122308757198614e-05, "loss": 1.4915, "step": 661 }, { "epoch": 0.72, "learning_rate": 1.8115342838998807e-05, "loss": 1.8303, "step": 662 }, { "epoch": 0.72, "learning_rate": 1.8108365367531003e-05, "loss": 1.7912, "step": 663 }, { "epoch": 0.72, "learning_rate": 1.8101376352728572e-05, "loss": 1.7086, "step": 664 }, { "epoch": 0.72, "learning_rate": 1.80943758045413e-05, "loss": 2.0016, "step": 665 }, { "epoch": 0.72, "learning_rate": 1.8087363732935398e-05, "loss": 1.7456, "step": 666 }, { "epoch": 0.72, "learning_rate": 1.8080340147893477e-05, "loss": 1.7095, "step": 667 }, { "epoch": 0.72, "learning_rate": 1.807330505941455e-05, "loss": 1.8166, "step": 668 }, { "epoch": 0.72, "learning_rate": 1.8066258477513992e-05, "loss": 1.9571, "step": 669 }, { "epoch": 0.73, "learning_rate": 1.805920041222355e-05, "loss": 1.857, "step": 670 }, { "epoch": 0.73, "learning_rate": 1.805213087359132e-05, "loss": 1.8744, "step": 671 }, { "epoch": 0.73, "learning_rate": 1.8045049871681726e-05, "loss": 1.8435, "step": 672 }, { "epoch": 0.73, "learning_rate": 1.8037957416575514e-05, "loss": 2.0229, "step": 673 }, { "epoch": 0.73, "learning_rate": 1.803085351836974e-05, "loss": 1.8561, "step": 674 }, { "epoch": 0.73, "learning_rate": 1.802373818717774e-05, "loss": 1.5955, "step": 675 }, { "epoch": 0.73, "learning_rate": 1.8016611433129135e-05, "loss": 1.6131, "step": 676 }, { "epoch": 0.73, "learning_rate": 1.8009473266369806e-05, "loss": 1.8665, "step": 677 }, { "epoch": 0.73, "learning_rate": 1.8002323697061883e-05, "loss": 1.9577, "step": 678 }, { "epoch": 0.73, "learning_rate": 1.7995162735383725e-05, "loss": 1.6646, "step": 679 }, { "epoch": 0.74, "learning_rate": 1.798799039152991e-05, "loss": 1.7852, "step": 680 }, { "epoch": 0.74, "learning_rate": 1.7980806675711225e-05, "loss": 1.904, "step": 681 }, { "epoch": 0.74, "learning_rate": 1.7973611598154644e-05, "loss": 1.7585, "step": 682 }, { "epoch": 0.74, "learning_rate": 1.7966405169103313e-05, "loss": 1.8896, "step": 683 }, { "epoch": 0.74, "learning_rate": 1.795918739881654e-05, "loss": 1.7932, "step": 684 }, { "epoch": 0.74, "learning_rate": 1.7951958297569775e-05, "loss": 1.887, "step": 685 }, { "epoch": 0.74, "learning_rate": 1.7944717875654615e-05, "loss": 1.7784, "step": 686 }, { "epoch": 0.74, "learning_rate": 1.7937466143378754e-05, "loss": 1.598, "step": 687 }, { "epoch": 0.74, "learning_rate": 1.7930203111065997e-05, "loss": 1.8335, "step": 688 }, { "epoch": 0.75, "learning_rate": 1.7922928789056233e-05, "loss": 1.7218, "step": 689 }, { "epoch": 0.75, "learning_rate": 1.7915643187705428e-05, "loss": 1.8661, "step": 690 }, { "epoch": 0.75, "learning_rate": 1.7908346317385602e-05, "loss": 2.0096, "step": 691 }, { "epoch": 0.75, "learning_rate": 1.7901038188484818e-05, "loss": 1.9253, "step": 692 }, { "epoch": 0.75, "learning_rate": 1.789371881140717e-05, "loss": 1.9858, "step": 693 }, { "epoch": 0.75, "learning_rate": 1.7886388196572758e-05, "loss": 1.6172, "step": 694 }, { "epoch": 0.75, "learning_rate": 1.787904635441769e-05, "loss": 2.1303, "step": 695 }, { "epoch": 0.75, "learning_rate": 1.787169329539405e-05, "loss": 1.9885, "step": 696 }, { "epoch": 0.75, "learning_rate": 1.7864329029969903e-05, "loss": 1.8695, "step": 697 }, { "epoch": 0.76, "learning_rate": 1.7856953568629243e-05, "loss": 1.816, "step": 698 }, { "epoch": 0.76, "learning_rate": 1.784956692187203e-05, "loss": 1.7575, "step": 699 }, { "epoch": 0.76, "learning_rate": 1.7842169100214137e-05, "loss": 1.9882, "step": 700 }, { "epoch": 0.76, "learning_rate": 1.7834760114187334e-05, "loss": 1.8572, "step": 701 }, { "epoch": 0.76, "learning_rate": 1.782733997433931e-05, "loss": 2.1264, "step": 702 }, { "epoch": 0.76, "learning_rate": 1.781990869123361e-05, "loss": 1.7489, "step": 703 }, { "epoch": 0.76, "learning_rate": 1.7812466275449656e-05, "loss": 1.9035, "step": 704 }, { "epoch": 0.76, "learning_rate": 1.7805012737582713e-05, "loss": 2.0032, "step": 705 }, { "epoch": 0.76, "learning_rate": 1.779754808824388e-05, "loss": 1.7685, "step": 706 }, { "epoch": 0.77, "learning_rate": 1.7790072338060086e-05, "loss": 2.076, "step": 707 }, { "epoch": 0.77, "learning_rate": 1.7782585497674043e-05, "loss": 2.145, "step": 708 }, { "epoch": 0.77, "learning_rate": 1.7775087577744266e-05, "loss": 1.8889, "step": 709 }, { "epoch": 0.77, "learning_rate": 1.776757858894504e-05, "loss": 1.7354, "step": 710 }, { "epoch": 0.77, "learning_rate": 1.7760058541966406e-05, "loss": 1.8177, "step": 711 }, { "epoch": 0.77, "learning_rate": 1.7752527447514154e-05, "loss": 1.8914, "step": 712 }, { "epoch": 0.77, "learning_rate": 1.7744985316309793e-05, "loss": 1.5651, "step": 713 }, { "epoch": 0.77, "learning_rate": 1.773743215909055e-05, "loss": 1.8058, "step": 714 }, { "epoch": 0.77, "learning_rate": 1.7729867986609338e-05, "loss": 2.0572, "step": 715 }, { "epoch": 0.77, "learning_rate": 1.772229280963478e-05, "loss": 1.8813, "step": 716 }, { "epoch": 0.78, "learning_rate": 1.7714706638951126e-05, "loss": 1.8782, "step": 717 }, { "epoch": 0.78, "learning_rate": 1.770710948535831e-05, "loss": 1.9337, "step": 718 }, { "epoch": 0.78, "learning_rate": 1.769950135967188e-05, "loss": 1.9506, "step": 719 }, { "epoch": 0.78, "learning_rate": 1.7691882272723023e-05, "loss": 1.8569, "step": 720 }, { "epoch": 0.78, "learning_rate": 1.7684252235358514e-05, "loss": 2.0687, "step": 721 }, { "epoch": 0.78, "learning_rate": 1.7676611258440726e-05, "loss": 2.0012, "step": 722 }, { "epoch": 0.78, "learning_rate": 1.76689593528476e-05, "loss": 1.6913, "step": 723 }, { "epoch": 0.78, "learning_rate": 1.766129652947265e-05, "loss": 1.7295, "step": 724 }, { "epoch": 0.78, "learning_rate": 1.7653622799224914e-05, "loss": 1.4796, "step": 725 }, { "epoch": 0.79, "learning_rate": 1.7645938173028964e-05, "loss": 1.8595, "step": 726 }, { "epoch": 0.79, "learning_rate": 1.7638242661824892e-05, "loss": 1.7979, "step": 727 }, { "epoch": 0.79, "learning_rate": 1.7630536276568277e-05, "loss": 2.0496, "step": 728 }, { "epoch": 0.79, "learning_rate": 1.762281902823018e-05, "loss": 1.7305, "step": 729 }, { "epoch": 0.79, "learning_rate": 1.7615090927797135e-05, "loss": 1.7799, "step": 730 }, { "epoch": 0.79, "learning_rate": 1.760735198627111e-05, "loss": 1.8931, "step": 731 }, { "epoch": 0.79, "learning_rate": 1.7599602214669522e-05, "loss": 1.9162, "step": 732 }, { "epoch": 0.79, "learning_rate": 1.75918416240252e-05, "loss": 1.8491, "step": 733 }, { "epoch": 0.79, "learning_rate": 1.7584070225386368e-05, "loss": 1.9094, "step": 734 }, { "epoch": 0.8, "learning_rate": 1.7576288029816654e-05, "loss": 1.6776, "step": 735 }, { "epoch": 0.8, "learning_rate": 1.756849504839504e-05, "loss": 1.7776, "step": 736 }, { "epoch": 0.8, "learning_rate": 1.7560691292215872e-05, "loss": 1.9614, "step": 737 }, { "epoch": 0.8, "learning_rate": 1.7552876772388833e-05, "loss": 1.9188, "step": 738 }, { "epoch": 0.8, "learning_rate": 1.7545051500038926e-05, "loss": 1.6998, "step": 739 }, { "epoch": 0.8, "learning_rate": 1.753721548630647e-05, "loss": 1.7554, "step": 740 }, { "epoch": 0.8, "learning_rate": 1.7529368742347066e-05, "loss": 1.7072, "step": 741 }, { "epoch": 0.8, "learning_rate": 1.75215112793316e-05, "loss": 1.6595, "step": 742 }, { "epoch": 0.8, "learning_rate": 1.7513643108446213e-05, "loss": 1.6982, "step": 743 }, { "epoch": 0.81, "learning_rate": 1.750576424089229e-05, "loss": 1.5064, "step": 744 }, { "epoch": 0.81, "learning_rate": 1.7497874687886447e-05, "loss": 1.7888, "step": 745 }, { "epoch": 0.81, "learning_rate": 1.7489974460660507e-05, "loss": 1.6298, "step": 746 }, { "epoch": 0.81, "learning_rate": 1.7482063570461493e-05, "loss": 1.9298, "step": 747 }, { "epoch": 0.81, "learning_rate": 1.747414202855161e-05, "loss": 1.8131, "step": 748 }, { "epoch": 0.81, "learning_rate": 1.7466209846208225e-05, "loss": 1.7347, "step": 749 }, { "epoch": 0.81, "learning_rate": 1.7458267034723846e-05, "loss": 1.7985, "step": 750 }, { "epoch": 0.81, "learning_rate": 1.745031360540613e-05, "loss": 1.6756, "step": 751 }, { "epoch": 0.81, "learning_rate": 1.744234956957783e-05, "loss": 1.5744, "step": 752 }, { "epoch": 0.81, "learning_rate": 1.743437493857681e-05, "loss": 2.2723, "step": 753 }, { "epoch": 0.82, "learning_rate": 1.7426389723756026e-05, "loss": 1.8948, "step": 754 }, { "epoch": 0.82, "learning_rate": 1.741839393648348e-05, "loss": 2.0171, "step": 755 }, { "epoch": 0.82, "learning_rate": 1.741038758814224e-05, "loss": 2.0134, "step": 756 }, { "epoch": 0.82, "learning_rate": 1.7402370690130406e-05, "loss": 2.0325, "step": 757 }, { "epoch": 0.82, "learning_rate": 1.73943432538611e-05, "loss": 1.7718, "step": 758 }, { "epoch": 0.82, "learning_rate": 1.7386305290762437e-05, "loss": 1.9268, "step": 759 }, { "epoch": 0.82, "learning_rate": 1.737825681227753e-05, "loss": 2.1387, "step": 760 }, { "epoch": 0.82, "learning_rate": 1.7370197829864454e-05, "loss": 1.7365, "step": 761 }, { "epoch": 0.82, "learning_rate": 1.7362128354996242e-05, "loss": 1.876, "step": 762 }, { "epoch": 0.83, "learning_rate": 1.7354048399160866e-05, "loss": 1.9904, "step": 763 }, { "epoch": 0.83, "learning_rate": 1.7345957973861205e-05, "loss": 2.1336, "step": 764 }, { "epoch": 0.83, "learning_rate": 1.7337857090615068e-05, "loss": 1.5848, "step": 765 }, { "epoch": 0.83, "learning_rate": 1.7329745760955122e-05, "loss": 1.6669, "step": 766 }, { "epoch": 0.83, "learning_rate": 1.732162399642894e-05, "loss": 1.73, "step": 767 }, { "epoch": 0.83, "learning_rate": 1.7313491808598914e-05, "loss": 1.9249, "step": 768 }, { "epoch": 0.83, "learning_rate": 1.7305349209042303e-05, "loss": 1.6414, "step": 769 }, { "epoch": 0.83, "learning_rate": 1.729719620935118e-05, "loss": 1.7314, "step": 770 }, { "epoch": 0.83, "learning_rate": 1.728903282113242e-05, "loss": 1.7717, "step": 771 }, { "epoch": 0.84, "learning_rate": 1.7280859056007682e-05, "loss": 1.8558, "step": 772 }, { "epoch": 0.84, "learning_rate": 1.7272674925613424e-05, "loss": 2.0347, "step": 773 }, { "epoch": 0.84, "learning_rate": 1.7264480441600823e-05, "loss": 1.7946, "step": 774 }, { "epoch": 0.84, "learning_rate": 1.7256275615635826e-05, "loss": 1.5202, "step": 775 }, { "epoch": 0.84, "learning_rate": 1.7248060459399092e-05, "loss": 1.5847, "step": 776 }, { "epoch": 0.84, "learning_rate": 1.7239834984585982e-05, "loss": 1.7596, "step": 777 }, { "epoch": 0.84, "learning_rate": 1.7231599202906553e-05, "loss": 1.5023, "step": 778 }, { "epoch": 0.84, "learning_rate": 1.7223353126085536e-05, "loss": 1.6659, "step": 779 }, { "epoch": 0.84, "learning_rate": 1.7215096765862315e-05, "loss": 1.8749, "step": 780 }, { "epoch": 0.85, "learning_rate": 1.720683013399091e-05, "loss": 1.806, "step": 781 }, { "epoch": 0.85, "learning_rate": 1.7198553242239978e-05, "loss": 1.7427, "step": 782 }, { "epoch": 0.85, "learning_rate": 1.7190266102392765e-05, "loss": 1.7963, "step": 783 }, { "epoch": 0.85, "learning_rate": 1.718196872624712e-05, "loss": 1.8034, "step": 784 }, { "epoch": 0.85, "learning_rate": 1.717366112561546e-05, "loss": 1.6718, "step": 785 }, { "epoch": 0.85, "learning_rate": 1.7165343312324755e-05, "loss": 1.7927, "step": 786 }, { "epoch": 0.85, "learning_rate": 1.7157015298216516e-05, "loss": 1.7335, "step": 787 }, { "epoch": 0.85, "learning_rate": 1.714867709514678e-05, "loss": 1.7174, "step": 788 }, { "epoch": 0.85, "learning_rate": 1.714032871498608e-05, "loss": 1.9039, "step": 789 }, { "epoch": 0.85, "learning_rate": 1.713197016961945e-05, "loss": 1.874, "step": 790 }, { "epoch": 0.86, "learning_rate": 1.7123601470946388e-05, "loss": 1.7786, "step": 791 }, { "epoch": 0.86, "learning_rate": 1.7115222630880844e-05, "loss": 1.8011, "step": 792 }, { "epoch": 0.86, "learning_rate": 1.7106833661351213e-05, "loss": 2.0506, "step": 793 }, { "epoch": 0.86, "learning_rate": 1.7098434574300307e-05, "loss": 2.0258, "step": 794 }, { "epoch": 0.86, "learning_rate": 1.7090025381685337e-05, "loss": 1.628, "step": 795 }, { "epoch": 0.86, "learning_rate": 1.708160609547791e-05, "loss": 1.7226, "step": 796 }, { "epoch": 0.86, "learning_rate": 1.7073176727663994e-05, "loss": 2.1619, "step": 797 }, { "epoch": 0.86, "learning_rate": 1.706473729024392e-05, "loss": 1.6363, "step": 798 }, { "epoch": 0.86, "learning_rate": 1.7056287795232338e-05, "loss": 1.4952, "step": 799 }, { "epoch": 0.87, "learning_rate": 1.7047828254658233e-05, "loss": 1.7071, "step": 800 }, { "epoch": 0.87, "learning_rate": 1.703935868056488e-05, "loss": 1.9266, "step": 801 }, { "epoch": 0.87, "learning_rate": 1.703087908500985e-05, "loss": 1.6527, "step": 802 }, { "epoch": 0.87, "learning_rate": 1.702238948006496e-05, "loss": 1.8177, "step": 803 }, { "epoch": 0.87, "learning_rate": 1.70138898778163e-05, "loss": 1.8119, "step": 804 }, { "epoch": 0.87, "learning_rate": 1.7005380290364182e-05, "loss": 1.887, "step": 805 }, { "epoch": 0.87, "learning_rate": 1.6996860729823127e-05, "loss": 1.5231, "step": 806 }, { "epoch": 0.87, "learning_rate": 1.6988331208321868e-05, "loss": 1.8765, "step": 807 }, { "epoch": 0.87, "learning_rate": 1.6979791738003305e-05, "loss": 2.113, "step": 808 }, { "epoch": 0.88, "learning_rate": 1.697124233102451e-05, "loss": 1.7483, "step": 809 }, { "epoch": 0.88, "learning_rate": 1.69626829995567e-05, "loss": 1.7028, "step": 810 }, { "epoch": 0.88, "learning_rate": 1.695411375578522e-05, "loss": 1.8967, "step": 811 }, { "epoch": 0.88, "learning_rate": 1.6945534611909525e-05, "loss": 1.8347, "step": 812 }, { "epoch": 0.88, "learning_rate": 1.6936945580143166e-05, "loss": 1.8635, "step": 813 }, { "epoch": 0.88, "learning_rate": 1.6928346672713768e-05, "loss": 1.8776, "step": 814 }, { "epoch": 0.88, "learning_rate": 1.6919737901863024e-05, "loss": 1.6324, "step": 815 }, { "epoch": 0.88, "learning_rate": 1.6911119279846655e-05, "loss": 1.6854, "step": 816 }, { "epoch": 0.88, "learning_rate": 1.6902490818934417e-05, "loss": 1.8578, "step": 817 }, { "epoch": 0.89, "learning_rate": 1.6893852531410066e-05, "loss": 1.6869, "step": 818 }, { "epoch": 0.89, "learning_rate": 1.6885204429571356e-05, "loss": 1.7316, "step": 819 }, { "epoch": 0.89, "learning_rate": 1.6876546525730005e-05, "loss": 1.85, "step": 820 }, { "epoch": 0.89, "learning_rate": 1.686787883221169e-05, "loss": 1.6942, "step": 821 }, { "epoch": 0.89, "learning_rate": 1.6859201361356025e-05, "loss": 1.7333, "step": 822 }, { "epoch": 0.89, "learning_rate": 1.685051412551654e-05, "loss": 1.4995, "step": 823 }, { "epoch": 0.89, "learning_rate": 1.6841817137060666e-05, "loss": 1.7027, "step": 824 }, { "epoch": 0.89, "learning_rate": 1.683311040836973e-05, "loss": 2.003, "step": 825 }, { "epoch": 0.89, "learning_rate": 1.68243939518389e-05, "loss": 1.7645, "step": 826 }, { "epoch": 0.9, "learning_rate": 1.6815667779877226e-05, "loss": 1.6719, "step": 827 }, { "epoch": 0.9, "learning_rate": 1.6806931904907562e-05, "loss": 1.8548, "step": 828 }, { "epoch": 0.9, "learning_rate": 1.6798186339366593e-05, "loss": 1.7971, "step": 829 }, { "epoch": 0.9, "learning_rate": 1.678943109570479e-05, "loss": 1.8877, "step": 830 }, { "epoch": 0.9, "learning_rate": 1.67806661863864e-05, "loss": 1.7383, "step": 831 }, { "epoch": 0.9, "learning_rate": 1.677189162388944e-05, "loss": 1.7564, "step": 832 }, { "epoch": 0.9, "learning_rate": 1.676310742070566e-05, "loss": 1.6716, "step": 833 }, { "epoch": 0.9, "learning_rate": 1.6754313589340546e-05, "loss": 1.6561, "step": 834 }, { "epoch": 0.9, "learning_rate": 1.674551014231328e-05, "loss": 2.1709, "step": 835 }, { "epoch": 0.9, "learning_rate": 1.673669709215674e-05, "loss": 1.5265, "step": 836 }, { "epoch": 0.91, "learning_rate": 1.6727874451417473e-05, "loss": 1.8503, "step": 837 }, { "epoch": 0.91, "learning_rate": 1.6719042232655677e-05, "loss": 1.8054, "step": 838 }, { "epoch": 0.91, "learning_rate": 1.671020044844519e-05, "loss": 1.8034, "step": 839 }, { "epoch": 0.91, "learning_rate": 1.6701349111373465e-05, "loss": 1.5963, "step": 840 }, { "epoch": 0.91, "learning_rate": 1.6692488234041556e-05, "loss": 1.9236, "step": 841 }, { "epoch": 0.91, "learning_rate": 1.66836178290641e-05, "loss": 1.5941, "step": 842 }, { "epoch": 0.91, "learning_rate": 1.667473790906929e-05, "loss": 1.8286, "step": 843 }, { "epoch": 0.91, "learning_rate": 1.666584848669888e-05, "loss": 1.7725, "step": 844 }, { "epoch": 0.91, "learning_rate": 1.6656949574608138e-05, "loss": 1.6444, "step": 845 }, { "epoch": 0.92, "learning_rate": 1.6648041185465846e-05, "loss": 1.8188, "step": 846 }, { "epoch": 0.92, "learning_rate": 1.6639123331954276e-05, "loss": 1.7729, "step": 847 }, { "epoch": 0.92, "learning_rate": 1.6630196026769187e-05, "loss": 1.6913, "step": 848 }, { "epoch": 0.92, "learning_rate": 1.662125928261977e-05, "loss": 1.7507, "step": 849 }, { "epoch": 0.92, "learning_rate": 1.661231311222868e-05, "loss": 1.6705, "step": 850 }, { "epoch": 0.92, "learning_rate": 1.6603357528331966e-05, "loss": 1.8839, "step": 851 }, { "epoch": 0.92, "learning_rate": 1.6594392543679098e-05, "loss": 1.7405, "step": 852 }, { "epoch": 0.92, "learning_rate": 1.6585418171032925e-05, "loss": 1.8388, "step": 853 }, { "epoch": 0.92, "learning_rate": 1.6576434423169647e-05, "loss": 1.4925, "step": 854 }, { "epoch": 0.93, "learning_rate": 1.6567441312878828e-05, "loss": 1.899, "step": 855 }, { "epoch": 0.93, "learning_rate": 1.6558438852963358e-05, "loss": 2.1859, "step": 856 }, { "epoch": 0.93, "learning_rate": 1.6549427056239432e-05, "loss": 1.7347, "step": 857 }, { "epoch": 0.93, "learning_rate": 1.6540405935536532e-05, "loss": 1.8964, "step": 858 }, { "epoch": 0.93, "learning_rate": 1.6531375503697433e-05, "loss": 1.9026, "step": 859 }, { "epoch": 0.93, "learning_rate": 1.6522335773578143e-05, "loss": 1.7895, "step": 860 }, { "epoch": 0.93, "learning_rate": 1.6513286758047923e-05, "loss": 2.135, "step": 861 }, { "epoch": 0.93, "learning_rate": 1.6504228469989248e-05, "loss": 1.789, "step": 862 }, { "epoch": 0.93, "learning_rate": 1.6495160922297793e-05, "loss": 2.0962, "step": 863 }, { "epoch": 0.94, "learning_rate": 1.6486084127882416e-05, "loss": 1.8792, "step": 864 }, { "epoch": 0.94, "learning_rate": 1.647699809966514e-05, "loss": 1.6231, "step": 865 }, { "epoch": 0.94, "learning_rate": 1.646790285058113e-05, "loss": 1.5529, "step": 866 }, { "epoch": 0.94, "learning_rate": 1.6458798393578684e-05, "loss": 1.8931, "step": 867 }, { "epoch": 0.94, "learning_rate": 1.64496847416192e-05, "loss": 1.5178, "step": 868 }, { "epoch": 0.94, "learning_rate": 1.644056190767718e-05, "loss": 1.791, "step": 869 }, { "epoch": 0.94, "learning_rate": 1.6431429904740183e-05, "loss": 1.6673, "step": 870 }, { "epoch": 0.94, "learning_rate": 1.6422288745808828e-05, "loss": 1.6321, "step": 871 }, { "epoch": 0.94, "learning_rate": 1.641313844389677e-05, "loss": 1.8554, "step": 872 }, { "epoch": 0.94, "learning_rate": 1.6403979012030677e-05, "loss": 1.6654, "step": 873 }, { "epoch": 0.95, "learning_rate": 1.6394810463250218e-05, "loss": 1.7586, "step": 874 }, { "epoch": 0.95, "learning_rate": 1.6385632810608035e-05, "loss": 1.8075, "step": 875 }, { "epoch": 0.95, "learning_rate": 1.6376446067169744e-05, "loss": 1.7, "step": 876 }, { "epoch": 0.95, "learning_rate": 1.6367250246013887e-05, "loss": 1.8388, "step": 877 }, { "epoch": 0.95, "learning_rate": 1.6358045360231936e-05, "loss": 1.562, "step": 878 }, { "epoch": 0.95, "learning_rate": 1.6348831422928277e-05, "loss": 1.9606, "step": 879 }, { "epoch": 0.95, "learning_rate": 1.6339608447220163e-05, "loss": 1.9655, "step": 880 }, { "epoch": 0.95, "learning_rate": 1.6330376446237726e-05, "loss": 2.0021, "step": 881 }, { "epoch": 0.95, "learning_rate": 1.6321135433123946e-05, "loss": 1.9258, "step": 882 }, { "epoch": 0.96, "learning_rate": 1.6311885421034638e-05, "loss": 1.7109, "step": 883 }, { "epoch": 0.96, "learning_rate": 1.6302626423138412e-05, "loss": 2.0604, "step": 884 }, { "epoch": 0.96, "learning_rate": 1.629335845261669e-05, "loss": 1.7187, "step": 885 }, { "epoch": 0.96, "learning_rate": 1.6284081522663652e-05, "loss": 1.8782, "step": 886 }, { "epoch": 0.96, "learning_rate": 1.6274795646486244e-05, "loss": 1.8045, "step": 887 }, { "epoch": 0.96, "learning_rate": 1.626550083730414e-05, "loss": 2.041, "step": 888 }, { "epoch": 0.96, "learning_rate": 1.6256197108349734e-05, "loss": 1.8206, "step": 889 }, { "epoch": 0.96, "learning_rate": 1.6246884472868128e-05, "loss": 1.7373, "step": 890 }, { "epoch": 0.96, "learning_rate": 1.6237562944117087e-05, "loss": 1.6757, "step": 891 }, { "epoch": 0.97, "learning_rate": 1.6228232535367045e-05, "loss": 1.9031, "step": 892 }, { "epoch": 0.97, "learning_rate": 1.621889325990108e-05, "loss": 1.626, "step": 893 }, { "epoch": 0.97, "learning_rate": 1.6209545131014893e-05, "loss": 1.5336, "step": 894 }, { "epoch": 0.97, "learning_rate": 1.6200188162016778e-05, "loss": 1.8612, "step": 895 }, { "epoch": 0.97, "learning_rate": 1.619082236622763e-05, "loss": 1.8652, "step": 896 }, { "epoch": 0.97, "learning_rate": 1.6181447756980903e-05, "loss": 1.9462, "step": 897 }, { "epoch": 0.97, "learning_rate": 1.6172064347622595e-05, "loss": 1.7591, "step": 898 }, { "epoch": 0.97, "learning_rate": 1.616267215151124e-05, "loss": 2.0353, "step": 899 }, { "epoch": 0.97, "learning_rate": 1.6153271182017864e-05, "loss": 1.7371, "step": 900 }, { "epoch": 0.98, "learning_rate": 1.614386145252601e-05, "loss": 1.5204, "step": 901 }, { "epoch": 0.98, "learning_rate": 1.613444297643167e-05, "loss": 1.6634, "step": 902 }, { "epoch": 0.98, "learning_rate": 1.6125015767143293e-05, "loss": 1.9809, "step": 903 }, { "epoch": 0.98, "learning_rate": 1.611557983808177e-05, "loss": 1.9466, "step": 904 }, { "epoch": 0.98, "learning_rate": 1.610613520268039e-05, "loss": 1.8158, "step": 905 }, { "epoch": 0.98, "learning_rate": 1.609668187438485e-05, "loss": 1.6599, "step": 906 }, { "epoch": 0.98, "learning_rate": 1.608721986665322e-05, "loss": 1.6313, "step": 907 }, { "epoch": 0.98, "learning_rate": 1.6077749192955922e-05, "loss": 2.0018, "step": 908 }, { "epoch": 0.98, "learning_rate": 1.6068269866775722e-05, "loss": 1.5188, "step": 909 }, { "epoch": 0.98, "learning_rate": 1.6058781901607687e-05, "loss": 1.901, "step": 910 }, { "epoch": 0.99, "learning_rate": 1.604928531095921e-05, "loss": 1.7248, "step": 911 }, { "epoch": 0.99, "learning_rate": 1.6039780108349938e-05, "loss": 1.9463, "step": 912 }, { "epoch": 0.99, "learning_rate": 1.603026630731179e-05, "loss": 1.9392, "step": 913 }, { "epoch": 0.99, "learning_rate": 1.6020743921388937e-05, "loss": 1.7979, "step": 914 }, { "epoch": 0.99, "learning_rate": 1.6011212964137742e-05, "loss": 1.9517, "step": 915 }, { "epoch": 0.99, "learning_rate": 1.60016734491268e-05, "loss": 1.5702, "step": 916 }, { "epoch": 0.99, "learning_rate": 1.5992125389936873e-05, "loss": 1.775, "step": 917 }, { "epoch": 0.99, "learning_rate": 1.598256880016089e-05, "loss": 1.8394, "step": 918 }, { "epoch": 0.99, "learning_rate": 1.5973003693403928e-05, "loss": 1.7364, "step": 919 }, { "epoch": 1.0, "learning_rate": 1.5963430083283185e-05, "loss": 2.0016, "step": 920 }, { "epoch": 1.0, "learning_rate": 1.5953847983427967e-05, "loss": 1.7369, "step": 921 }, { "epoch": 1.0, "learning_rate": 1.5944257407479665e-05, "loss": 2.1433, "step": 922 }, { "epoch": 1.0, "learning_rate": 1.5934658369091734e-05, "loss": 1.8075, "step": 923 }, { "epoch": 1.0, "learning_rate": 1.5925050881929682e-05, "loss": 1.7377, "step": 924 }, { "epoch": 1.0, "learning_rate": 1.5915434959671046e-05, "loss": 1.6292, "step": 925 }, { "epoch": 1.0, "learning_rate": 1.590581061600536e-05, "loss": 1.5613, "step": 926 }, { "epoch": 1.0, "learning_rate": 1.5896177864634166e-05, "loss": 1.7109, "step": 927 }, { "epoch": 1.0, "learning_rate": 1.5886536719270956e-05, "loss": 1.4451, "step": 928 }, { "epoch": 1.01, "learning_rate": 1.5876887193641183e-05, "loss": 1.7095, "step": 929 }, { "epoch": 1.01, "learning_rate": 1.5867229301482227e-05, "loss": 1.8886, "step": 930 }, { "epoch": 1.01, "learning_rate": 1.585756305654338e-05, "loss": 1.395, "step": 931 }, { "epoch": 1.01, "learning_rate": 1.5847888472585826e-05, "loss": 1.7254, "step": 932 }, { "epoch": 1.01, "learning_rate": 1.583820556338262e-05, "loss": 1.704, "step": 933 }, { "epoch": 1.01, "learning_rate": 1.582851434271867e-05, "loss": 1.7093, "step": 934 }, { "epoch": 1.01, "learning_rate": 1.5818814824390713e-05, "loss": 1.7495, "step": 935 }, { "epoch": 1.01, "learning_rate": 1.58091070222073e-05, "loss": 1.9524, "step": 936 }, { "epoch": 1.01, "learning_rate": 1.5799390949988775e-05, "loss": 1.7421, "step": 937 }, { "epoch": 1.02, "learning_rate": 1.578966662156726e-05, "loss": 1.6707, "step": 938 }, { "epoch": 1.02, "learning_rate": 1.5779934050786633e-05, "loss": 1.5381, "step": 939 }, { "epoch": 1.02, "learning_rate": 1.577019325150249e-05, "loss": 1.4886, "step": 940 }, { "epoch": 1.02, "learning_rate": 1.5760444237582157e-05, "loss": 1.7248, "step": 941 }, { "epoch": 1.02, "learning_rate": 1.5750687022904654e-05, "loss": 1.4004, "step": 942 }, { "epoch": 1.02, "learning_rate": 1.5740921621360664e-05, "loss": 1.7162, "step": 943 }, { "epoch": 1.02, "learning_rate": 1.5731148046852537e-05, "loss": 1.6786, "step": 944 }, { "epoch": 1.02, "learning_rate": 1.572136631329425e-05, "loss": 1.442, "step": 945 }, { "epoch": 1.02, "learning_rate": 1.5711576434611404e-05, "loss": 1.5372, "step": 946 }, { "epoch": 1.02, "learning_rate": 1.5701778424741188e-05, "loss": 1.4451, "step": 947 }, { "epoch": 1.03, "learning_rate": 1.5691972297632373e-05, "loss": 1.5666, "step": 948 }, { "epoch": 1.03, "learning_rate": 1.568215806724528e-05, "loss": 1.7347, "step": 949 }, { "epoch": 1.03, "learning_rate": 1.5672335747551772e-05, "loss": 1.4896, "step": 950 }, { "epoch": 1.03, "learning_rate": 1.566250535253522e-05, "loss": 1.57, "step": 951 }, { "epoch": 1.03, "learning_rate": 1.5652666896190498e-05, "loss": 1.6703, "step": 952 }, { "epoch": 1.03, "learning_rate": 1.5642820392523962e-05, "loss": 1.4305, "step": 953 }, { "epoch": 1.03, "learning_rate": 1.563296585555341e-05, "loss": 1.6613, "step": 954 }, { "epoch": 1.03, "learning_rate": 1.5623103299308082e-05, "loss": 1.9669, "step": 955 }, { "epoch": 1.03, "learning_rate": 1.5613232737828644e-05, "loss": 1.7696, "step": 956 }, { "epoch": 1.04, "learning_rate": 1.560335418516714e-05, "loss": 1.6591, "step": 957 }, { "epoch": 1.04, "learning_rate": 1.5593467655387012e-05, "loss": 1.5471, "step": 958 }, { "epoch": 1.04, "learning_rate": 1.5583573162563045e-05, "loss": 1.3704, "step": 959 }, { "epoch": 1.04, "learning_rate": 1.5573670720781363e-05, "loss": 1.6398, "step": 960 }, { "epoch": 1.04, "learning_rate": 1.55637603441394e-05, "loss": 1.7751, "step": 961 }, { "epoch": 1.04, "learning_rate": 1.5553842046745903e-05, "loss": 1.4721, "step": 962 }, { "epoch": 1.04, "learning_rate": 1.554391584272088e-05, "loss": 1.5168, "step": 963 }, { "epoch": 1.04, "learning_rate": 1.5533981746195598e-05, "loss": 1.4701, "step": 964 }, { "epoch": 1.04, "learning_rate": 1.5524039771312565e-05, "loss": 1.5164, "step": 965 }, { "epoch": 1.05, "learning_rate": 1.5514089932225506e-05, "loss": 1.6643, "step": 966 }, { "epoch": 1.05, "learning_rate": 1.5504132243099327e-05, "loss": 1.5087, "step": 967 }, { "epoch": 1.05, "learning_rate": 1.5494166718110137e-05, "loss": 1.6638, "step": 968 }, { "epoch": 1.05, "learning_rate": 1.5484193371445168e-05, "loss": 1.4278, "step": 969 }, { "epoch": 1.05, "learning_rate": 1.5474212217302814e-05, "loss": 1.5084, "step": 970 }, { "epoch": 1.05, "learning_rate": 1.5464223269892564e-05, "loss": 1.4425, "step": 971 }, { "epoch": 1.05, "learning_rate": 1.545422654343502e-05, "loss": 1.7087, "step": 972 }, { "epoch": 1.05, "learning_rate": 1.544422205216184e-05, "loss": 1.4237, "step": 973 }, { "epoch": 1.05, "learning_rate": 1.543420981031576e-05, "loss": 1.7358, "step": 974 }, { "epoch": 1.06, "learning_rate": 1.5424189832150518e-05, "loss": 1.8258, "step": 975 }, { "epoch": 1.06, "learning_rate": 1.5414162131930896e-05, "loss": 1.7415, "step": 976 }, { "epoch": 1.06, "learning_rate": 1.5404126723932648e-05, "loss": 1.8253, "step": 977 }, { "epoch": 1.06, "learning_rate": 1.5394083622442516e-05, "loss": 1.4271, "step": 978 }, { "epoch": 1.06, "learning_rate": 1.5384032841758186e-05, "loss": 1.6272, "step": 979 }, { "epoch": 1.06, "learning_rate": 1.537397439618828e-05, "loss": 1.6815, "step": 980 }, { "epoch": 1.06, "learning_rate": 1.536390830005233e-05, "loss": 1.6434, "step": 981 }, { "epoch": 1.06, "learning_rate": 1.5353834567680758e-05, "loss": 1.6524, "step": 982 }, { "epoch": 1.06, "learning_rate": 1.5343753213414862e-05, "loss": 1.6558, "step": 983 }, { "epoch": 1.06, "learning_rate": 1.5333664251606787e-05, "loss": 1.5875, "step": 984 }, { "epoch": 1.07, "learning_rate": 1.5323567696619513e-05, "loss": 1.527, "step": 985 }, { "epoch": 1.07, "learning_rate": 1.531346356282682e-05, "loss": 1.5793, "step": 986 }, { "epoch": 1.07, "learning_rate": 1.530335186461329e-05, "loss": 1.6894, "step": 987 }, { "epoch": 1.07, "learning_rate": 1.5293232616374267e-05, "loss": 1.687, "step": 988 }, { "epoch": 1.07, "learning_rate": 1.5283105832515842e-05, "loss": 1.7821, "step": 989 }, { "epoch": 1.07, "learning_rate": 1.5272971527454838e-05, "loss": 1.8022, "step": 990 }, { "epoch": 1.07, "learning_rate": 1.5262829715618782e-05, "loss": 1.586, "step": 991 }, { "epoch": 1.07, "learning_rate": 1.5252680411445892e-05, "loss": 1.6101, "step": 992 }, { "epoch": 1.07, "learning_rate": 1.5242523629385048e-05, "loss": 1.6305, "step": 993 }, { "epoch": 1.08, "learning_rate": 1.5232359383895779e-05, "loss": 1.7602, "step": 994 }, { "epoch": 1.08, "learning_rate": 1.5222187689448235e-05, "loss": 1.3457, "step": 995 }, { "epoch": 1.08, "learning_rate": 1.521200856052318e-05, "loss": 1.6397, "step": 996 }, { "epoch": 1.08, "learning_rate": 1.520182201161195e-05, "loss": 1.5046, "step": 997 }, { "epoch": 1.08, "learning_rate": 1.5191628057216452e-05, "loss": 1.6075, "step": 998 }, { "epoch": 1.08, "learning_rate": 1.5181426711849133e-05, "loss": 1.5093, "step": 999 }, { "epoch": 1.08, "learning_rate": 1.517121799003296e-05, "loss": 1.7927, "step": 1000 }, { "epoch": 1.08, "learning_rate": 1.5161001906301407e-05, "loss": 1.7316, "step": 1001 }, { "epoch": 1.08, "learning_rate": 1.5150778475198427e-05, "loss": 1.7274, "step": 1002 }, { "epoch": 1.09, "learning_rate": 1.5140547711278428e-05, "loss": 1.2897, "step": 1003 }, { "epoch": 1.09, "learning_rate": 1.5130309629106264e-05, "loss": 1.5808, "step": 1004 }, { "epoch": 1.09, "learning_rate": 1.51200642432572e-05, "loss": 1.7323, "step": 1005 }, { "epoch": 1.09, "learning_rate": 1.5109811568316906e-05, "loss": 1.6818, "step": 1006 }, { "epoch": 1.09, "learning_rate": 1.5099551618881426e-05, "loss": 1.4173, "step": 1007 }, { "epoch": 1.09, "learning_rate": 1.5089284409557158e-05, "loss": 1.7259, "step": 1008 }, { "epoch": 1.09, "learning_rate": 1.5079009954960842e-05, "loss": 1.5218, "step": 1009 }, { "epoch": 1.09, "learning_rate": 1.5068728269719524e-05, "loss": 1.6281, "step": 1010 }, { "epoch": 1.09, "learning_rate": 1.505843936847055e-05, "loss": 1.7548, "step": 1011 }, { "epoch": 1.1, "learning_rate": 1.5048143265861536e-05, "loss": 1.524, "step": 1012 }, { "epoch": 1.1, "learning_rate": 1.5037839976550352e-05, "loss": 1.7357, "step": 1013 }, { "epoch": 1.1, "learning_rate": 1.5027529515205097e-05, "loss": 1.7994, "step": 1014 }, { "epoch": 1.1, "learning_rate": 1.5017211896504082e-05, "loss": 1.8074, "step": 1015 }, { "epoch": 1.1, "learning_rate": 1.500688713513581e-05, "loss": 1.5332, "step": 1016 }, { "epoch": 1.1, "learning_rate": 1.4996555245798944e-05, "loss": 1.673, "step": 1017 }, { "epoch": 1.1, "learning_rate": 1.4986216243202307e-05, "loss": 1.6054, "step": 1018 }, { "epoch": 1.1, "learning_rate": 1.4975870142064838e-05, "loss": 1.8379, "step": 1019 }, { "epoch": 1.1, "learning_rate": 1.4965516957115585e-05, "loss": 1.7481, "step": 1020 }, { "epoch": 1.1, "learning_rate": 1.495515670309368e-05, "loss": 1.7256, "step": 1021 }, { "epoch": 1.11, "learning_rate": 1.4944789394748322e-05, "loss": 1.5541, "step": 1022 }, { "epoch": 1.11, "learning_rate": 1.4934415046838755e-05, "loss": 1.7022, "step": 1023 }, { "epoch": 1.11, "learning_rate": 1.4924033674134236e-05, "loss": 1.6277, "step": 1024 }, { "epoch": 1.11, "learning_rate": 1.4913645291414027e-05, "loss": 1.8026, "step": 1025 }, { "epoch": 1.11, "learning_rate": 1.490324991346737e-05, "loss": 1.548, "step": 1026 }, { "epoch": 1.11, "learning_rate": 1.4892847555093468e-05, "loss": 1.7814, "step": 1027 }, { "epoch": 1.11, "learning_rate": 1.4882438231101456e-05, "loss": 1.3886, "step": 1028 }, { "epoch": 1.11, "learning_rate": 1.4872021956310394e-05, "loss": 1.503, "step": 1029 }, { "epoch": 1.11, "learning_rate": 1.4861598745549228e-05, "loss": 1.5944, "step": 1030 }, { "epoch": 1.12, "learning_rate": 1.4851168613656785e-05, "loss": 1.5893, "step": 1031 }, { "epoch": 1.12, "learning_rate": 1.484073157548174e-05, "loss": 1.4621, "step": 1032 }, { "epoch": 1.12, "learning_rate": 1.4830287645882602e-05, "loss": 1.6625, "step": 1033 }, { "epoch": 1.12, "learning_rate": 1.481983683972769e-05, "loss": 1.6391, "step": 1034 }, { "epoch": 1.12, "learning_rate": 1.4809379171895122e-05, "loss": 1.7311, "step": 1035 }, { "epoch": 1.12, "learning_rate": 1.4798914657272771e-05, "loss": 1.424, "step": 1036 }, { "epoch": 1.12, "learning_rate": 1.4788443310758263e-05, "loss": 1.5855, "step": 1037 }, { "epoch": 1.12, "learning_rate": 1.477796514725895e-05, "loss": 1.5577, "step": 1038 }, { "epoch": 1.12, "learning_rate": 1.4767480181691888e-05, "loss": 1.469, "step": 1039 }, { "epoch": 1.13, "learning_rate": 1.475698842898382e-05, "loss": 2.0158, "step": 1040 }, { "epoch": 1.13, "learning_rate": 1.4746489904071148e-05, "loss": 1.7775, "step": 1041 }, { "epoch": 1.13, "learning_rate": 1.4735984621899917e-05, "loss": 1.5493, "step": 1042 }, { "epoch": 1.13, "learning_rate": 1.472547259742579e-05, "loss": 1.7657, "step": 1043 }, { "epoch": 1.13, "learning_rate": 1.4714953845614028e-05, "loss": 1.7562, "step": 1044 }, { "epoch": 1.13, "learning_rate": 1.4704428381439471e-05, "loss": 1.4397, "step": 1045 }, { "epoch": 1.13, "learning_rate": 1.4693896219886518e-05, "loss": 1.5493, "step": 1046 }, { "epoch": 1.13, "learning_rate": 1.4683357375949099e-05, "loss": 1.7458, "step": 1047 }, { "epoch": 1.13, "learning_rate": 1.467281186463065e-05, "loss": 1.7092, "step": 1048 }, { "epoch": 1.14, "learning_rate": 1.4662259700944117e-05, "loss": 1.7902, "step": 1049 }, { "epoch": 1.14, "learning_rate": 1.46517008999119e-05, "loss": 1.6822, "step": 1050 }, { "epoch": 1.14, "learning_rate": 1.4641135476565853e-05, "loss": 1.4869, "step": 1051 }, { "epoch": 1.14, "learning_rate": 1.4630563445947265e-05, "loss": 1.2409, "step": 1052 }, { "epoch": 1.14, "learning_rate": 1.4619984823106821e-05, "loss": 1.6679, "step": 1053 }, { "epoch": 1.14, "learning_rate": 1.4609399623104594e-05, "loss": 1.4442, "step": 1054 }, { "epoch": 1.14, "learning_rate": 1.4598807861010023e-05, "loss": 1.7156, "step": 1055 }, { "epoch": 1.14, "learning_rate": 1.4588209551901886e-05, "loss": 1.8078, "step": 1056 }, { "epoch": 1.14, "learning_rate": 1.4577604710868288e-05, "loss": 1.7926, "step": 1057 }, { "epoch": 1.15, "learning_rate": 1.4566993353006622e-05, "loss": 1.2587, "step": 1058 }, { "epoch": 1.15, "learning_rate": 1.4556375493423572e-05, "loss": 1.9043, "step": 1059 }, { "epoch": 1.15, "learning_rate": 1.4545751147235063e-05, "loss": 1.6593, "step": 1060 }, { "epoch": 1.15, "learning_rate": 1.4535120329566268e-05, "loss": 1.9199, "step": 1061 }, { "epoch": 1.15, "learning_rate": 1.4524483055551561e-05, "loss": 1.39, "step": 1062 }, { "epoch": 1.15, "learning_rate": 1.4513839340334523e-05, "loss": 1.5897, "step": 1063 }, { "epoch": 1.15, "learning_rate": 1.4503189199067891e-05, "loss": 1.695, "step": 1064 }, { "epoch": 1.15, "learning_rate": 1.4492532646913553e-05, "loss": 1.5598, "step": 1065 }, { "epoch": 1.15, "learning_rate": 1.448186969904253e-05, "loss": 1.6194, "step": 1066 }, { "epoch": 1.15, "learning_rate": 1.447120037063494e-05, "loss": 1.4702, "step": 1067 }, { "epoch": 1.16, "learning_rate": 1.446052467687999e-05, "loss": 1.673, "step": 1068 }, { "epoch": 1.16, "learning_rate": 1.4449842632975948e-05, "loss": 1.6351, "step": 1069 }, { "epoch": 1.16, "learning_rate": 1.4439154254130123e-05, "loss": 1.6767, "step": 1070 }, { "epoch": 1.16, "learning_rate": 1.4428459555558841e-05, "loss": 1.518, "step": 1071 }, { "epoch": 1.16, "learning_rate": 1.4417758552487424e-05, "loss": 1.6418, "step": 1072 }, { "epoch": 1.16, "learning_rate": 1.4407051260150167e-05, "loss": 1.4267, "step": 1073 }, { "epoch": 1.16, "learning_rate": 1.4396337693790329e-05, "loss": 1.392, "step": 1074 }, { "epoch": 1.16, "learning_rate": 1.4385617868660094e-05, "loss": 1.6811, "step": 1075 }, { "epoch": 1.16, "learning_rate": 1.4374891800020549e-05, "loss": 1.3297, "step": 1076 }, { "epoch": 1.17, "learning_rate": 1.4364159503141684e-05, "loss": 1.9199, "step": 1077 }, { "epoch": 1.17, "learning_rate": 1.4353420993302346e-05, "loss": 1.5111, "step": 1078 }, { "epoch": 1.17, "learning_rate": 1.4342676285790224e-05, "loss": 1.3439, "step": 1079 }, { "epoch": 1.17, "learning_rate": 1.4331925395901847e-05, "loss": 1.7453, "step": 1080 }, { "epoch": 1.17, "learning_rate": 1.4321168338942528e-05, "loss": 1.867, "step": 1081 }, { "epoch": 1.17, "learning_rate": 1.4310405130226365e-05, "loss": 1.537, "step": 1082 }, { "epoch": 1.17, "learning_rate": 1.4299635785076214e-05, "loss": 1.4933, "step": 1083 }, { "epoch": 1.17, "learning_rate": 1.4288860318823673e-05, "loss": 1.7592, "step": 1084 }, { "epoch": 1.17, "learning_rate": 1.4278078746809039e-05, "loss": 1.4784, "step": 1085 }, { "epoch": 1.18, "learning_rate": 1.4267291084381322e-05, "loss": 1.5672, "step": 1086 }, { "epoch": 1.18, "learning_rate": 1.4256497346898186e-05, "loss": 1.5483, "step": 1087 }, { "epoch": 1.18, "learning_rate": 1.4245697549725951e-05, "loss": 1.753, "step": 1088 }, { "epoch": 1.18, "learning_rate": 1.4234891708239562e-05, "loss": 1.6669, "step": 1089 }, { "epoch": 1.18, "learning_rate": 1.4224079837822566e-05, "loss": 1.6799, "step": 1090 }, { "epoch": 1.18, "learning_rate": 1.4213261953867099e-05, "loss": 1.629, "step": 1091 }, { "epoch": 1.18, "learning_rate": 1.4202438071773856e-05, "loss": 1.4544, "step": 1092 }, { "epoch": 1.18, "learning_rate": 1.4191608206952069e-05, "loss": 1.7615, "step": 1093 }, { "epoch": 1.18, "learning_rate": 1.4180772374819489e-05, "loss": 1.5733, "step": 1094 }, { "epoch": 1.19, "learning_rate": 1.416993059080236e-05, "loss": 1.4325, "step": 1095 }, { "epoch": 1.19, "learning_rate": 1.4159082870335402e-05, "loss": 1.5521, "step": 1096 }, { "epoch": 1.19, "learning_rate": 1.4148229228861782e-05, "loss": 1.6471, "step": 1097 }, { "epoch": 1.19, "learning_rate": 1.4137369681833106e-05, "loss": 1.5429, "step": 1098 }, { "epoch": 1.19, "learning_rate": 1.4126504244709377e-05, "loss": 1.7703, "step": 1099 }, { "epoch": 1.19, "learning_rate": 1.4115632932958992e-05, "loss": 1.5304, "step": 1100 }, { "epoch": 1.19, "learning_rate": 1.4104755762058701e-05, "loss": 1.6547, "step": 1101 }, { "epoch": 1.19, "learning_rate": 1.40938727474936e-05, "loss": 1.7368, "step": 1102 }, { "epoch": 1.19, "learning_rate": 1.408298390475711e-05, "loss": 1.4461, "step": 1103 }, { "epoch": 1.19, "learning_rate": 1.4072089249350942e-05, "loss": 1.7066, "step": 1104 }, { "epoch": 1.2, "learning_rate": 1.4061188796785085e-05, "loss": 1.745, "step": 1105 }, { "epoch": 1.2, "learning_rate": 1.4050282562577782e-05, "loss": 1.6486, "step": 1106 }, { "epoch": 1.2, "learning_rate": 1.4039370562255501e-05, "loss": 1.4831, "step": 1107 }, { "epoch": 1.2, "learning_rate": 1.4028452811352926e-05, "loss": 1.54, "step": 1108 }, { "epoch": 1.2, "learning_rate": 1.4017529325412926e-05, "loss": 1.6582, "step": 1109 }, { "epoch": 1.2, "learning_rate": 1.400660011998653e-05, "loss": 1.7603, "step": 1110 }, { "epoch": 1.2, "learning_rate": 1.3995665210632918e-05, "loss": 1.6216, "step": 1111 }, { "epoch": 1.2, "learning_rate": 1.398472461291938e-05, "loss": 1.6027, "step": 1112 }, { "epoch": 1.2, "learning_rate": 1.3973778342421314e-05, "loss": 1.6572, "step": 1113 }, { "epoch": 1.21, "learning_rate": 1.3962826414722185e-05, "loss": 1.4402, "step": 1114 }, { "epoch": 1.21, "learning_rate": 1.395186884541352e-05, "loss": 1.6595, "step": 1115 }, { "epoch": 1.21, "learning_rate": 1.3940905650094874e-05, "loss": 1.5473, "step": 1116 }, { "epoch": 1.21, "learning_rate": 1.3929936844373806e-05, "loss": 1.653, "step": 1117 }, { "epoch": 1.21, "learning_rate": 1.391896244386587e-05, "loss": 1.4939, "step": 1118 }, { "epoch": 1.21, "learning_rate": 1.3907982464194584e-05, "loss": 1.4917, "step": 1119 }, { "epoch": 1.21, "learning_rate": 1.3896996920991409e-05, "loss": 1.3911, "step": 1120 }, { "epoch": 1.21, "learning_rate": 1.3886005829895717e-05, "loss": 1.6069, "step": 1121 }, { "epoch": 1.21, "learning_rate": 1.387500920655479e-05, "loss": 1.683, "step": 1122 }, { "epoch": 1.22, "learning_rate": 1.3864007066623782e-05, "loss": 1.9023, "step": 1123 }, { "epoch": 1.22, "learning_rate": 1.3852999425765697e-05, "loss": 1.5886, "step": 1124 }, { "epoch": 1.22, "learning_rate": 1.384198629965137e-05, "loss": 1.5902, "step": 1125 }, { "epoch": 1.22, "learning_rate": 1.3830967703959458e-05, "loss": 1.7075, "step": 1126 }, { "epoch": 1.22, "learning_rate": 1.3819943654376393e-05, "loss": 1.8132, "step": 1127 }, { "epoch": 1.22, "learning_rate": 1.3808914166596367e-05, "loss": 1.7699, "step": 1128 }, { "epoch": 1.22, "learning_rate": 1.3797879256321323e-05, "loss": 1.8824, "step": 1129 }, { "epoch": 1.22, "learning_rate": 1.378683893926092e-05, "loss": 1.7651, "step": 1130 }, { "epoch": 1.22, "learning_rate": 1.3775793231132515e-05, "loss": 1.7531, "step": 1131 }, { "epoch": 1.23, "learning_rate": 1.3764742147661143e-05, "loss": 1.5328, "step": 1132 }, { "epoch": 1.23, "learning_rate": 1.3753685704579489e-05, "loss": 1.5323, "step": 1133 }, { "epoch": 1.23, "learning_rate": 1.3742623917627864e-05, "loss": 1.6862, "step": 1134 }, { "epoch": 1.23, "learning_rate": 1.373155680255419e-05, "loss": 1.6841, "step": 1135 }, { "epoch": 1.23, "learning_rate": 1.3720484375113978e-05, "loss": 1.5371, "step": 1136 }, { "epoch": 1.23, "learning_rate": 1.3709406651070299e-05, "loss": 1.6985, "step": 1137 }, { "epoch": 1.23, "learning_rate": 1.3698323646193758e-05, "loss": 1.6268, "step": 1138 }, { "epoch": 1.23, "learning_rate": 1.3687235376262492e-05, "loss": 1.5333, "step": 1139 }, { "epoch": 1.23, "learning_rate": 1.3676141857062117e-05, "loss": 1.7055, "step": 1140 }, { "epoch": 1.23, "learning_rate": 1.366504310438574e-05, "loss": 1.4433, "step": 1141 }, { "epoch": 1.24, "learning_rate": 1.3653939134033897e-05, "loss": 1.8203, "step": 1142 }, { "epoch": 1.24, "learning_rate": 1.3642829961814577e-05, "loss": 1.6585, "step": 1143 }, { "epoch": 1.24, "learning_rate": 1.3631715603543153e-05, "loss": 1.4763, "step": 1144 }, { "epoch": 1.24, "learning_rate": 1.362059607504239e-05, "loss": 1.8157, "step": 1145 }, { "epoch": 1.24, "learning_rate": 1.3609471392142419e-05, "loss": 1.5029, "step": 1146 }, { "epoch": 1.24, "learning_rate": 1.3598341570680698e-05, "loss": 1.5438, "step": 1147 }, { "epoch": 1.24, "learning_rate": 1.3587206626502004e-05, "loss": 1.4743, "step": 1148 }, { "epoch": 1.24, "learning_rate": 1.3576066575458415e-05, "loss": 1.8572, "step": 1149 }, { "epoch": 1.24, "learning_rate": 1.3564921433409268e-05, "loss": 1.5078, "step": 1150 }, { "epoch": 1.25, "learning_rate": 1.3553771216221155e-05, "loss": 1.2459, "step": 1151 }, { "epoch": 1.25, "learning_rate": 1.3542615939767882e-05, "loss": 1.7684, "step": 1152 }, { "epoch": 1.25, "learning_rate": 1.3531455619930481e-05, "loss": 1.4721, "step": 1153 }, { "epoch": 1.25, "learning_rate": 1.3520290272597135e-05, "loss": 1.4634, "step": 1154 }, { "epoch": 1.25, "learning_rate": 1.3509119913663206e-05, "loss": 1.5922, "step": 1155 }, { "epoch": 1.25, "learning_rate": 1.3497944559031185e-05, "loss": 1.58, "step": 1156 }, { "epoch": 1.25, "learning_rate": 1.3486764224610667e-05, "loss": 1.8095, "step": 1157 }, { "epoch": 1.25, "learning_rate": 1.3475578926318343e-05, "loss": 1.6782, "step": 1158 }, { "epoch": 1.25, "learning_rate": 1.3464388680077973e-05, "loss": 1.4844, "step": 1159 }, { "epoch": 1.26, "learning_rate": 1.345319350182036e-05, "loss": 1.3102, "step": 1160 }, { "epoch": 1.26, "learning_rate": 1.3441993407483321e-05, "loss": 1.719, "step": 1161 }, { "epoch": 1.26, "learning_rate": 1.343078841301168e-05, "loss": 1.6825, "step": 1162 }, { "epoch": 1.26, "learning_rate": 1.3419578534357236e-05, "loss": 1.7969, "step": 1163 }, { "epoch": 1.26, "learning_rate": 1.3408363787478736e-05, "loss": 1.7454, "step": 1164 }, { "epoch": 1.26, "learning_rate": 1.3397144188341865e-05, "loss": 1.7881, "step": 1165 }, { "epoch": 1.26, "learning_rate": 1.3385919752919208e-05, "loss": 1.8708, "step": 1166 }, { "epoch": 1.26, "learning_rate": 1.3374690497190244e-05, "loss": 1.6696, "step": 1167 }, { "epoch": 1.26, "learning_rate": 1.3363456437141305e-05, "loss": 1.6845, "step": 1168 }, { "epoch": 1.27, "learning_rate": 1.335221758876557e-05, "loss": 1.7927, "step": 1169 }, { "epoch": 1.27, "learning_rate": 1.334097396806303e-05, "loss": 1.5664, "step": 1170 }, { "epoch": 1.27, "learning_rate": 1.332972559104047e-05, "loss": 1.7425, "step": 1171 }, { "epoch": 1.27, "learning_rate": 1.3318472473711453e-05, "loss": 1.5114, "step": 1172 }, { "epoch": 1.27, "learning_rate": 1.3307214632096282e-05, "loss": 1.4314, "step": 1173 }, { "epoch": 1.27, "learning_rate": 1.329595208222199e-05, "loss": 1.7526, "step": 1174 }, { "epoch": 1.27, "learning_rate": 1.3284684840122313e-05, "loss": 1.693, "step": 1175 }, { "epoch": 1.27, "learning_rate": 1.3273412921837663e-05, "loss": 1.6599, "step": 1176 }, { "epoch": 1.27, "learning_rate": 1.3262136343415117e-05, "loss": 1.7349, "step": 1177 }, { "epoch": 1.27, "learning_rate": 1.3250855120908379e-05, "loss": 1.6321, "step": 1178 }, { "epoch": 1.28, "learning_rate": 1.3239569270377768e-05, "loss": 1.6261, "step": 1179 }, { "epoch": 1.28, "learning_rate": 1.3228278807890184e-05, "loss": 1.5685, "step": 1180 }, { "epoch": 1.28, "learning_rate": 1.3216983749519112e-05, "loss": 1.6259, "step": 1181 }, { "epoch": 1.28, "learning_rate": 1.3205684111344557e-05, "loss": 1.7537, "step": 1182 }, { "epoch": 1.28, "learning_rate": 1.319437990945306e-05, "loss": 1.6813, "step": 1183 }, { "epoch": 1.28, "learning_rate": 1.3183071159937649e-05, "loss": 1.5848, "step": 1184 }, { "epoch": 1.28, "learning_rate": 1.3171757878897831e-05, "loss": 1.9219, "step": 1185 }, { "epoch": 1.28, "learning_rate": 1.3160440082439565e-05, "loss": 1.6489, "step": 1186 }, { "epoch": 1.28, "learning_rate": 1.3149117786675239e-05, "loss": 1.7062, "step": 1187 }, { "epoch": 1.29, "learning_rate": 1.3137791007723636e-05, "loss": 1.5602, "step": 1188 }, { "epoch": 1.29, "learning_rate": 1.3126459761709943e-05, "loss": 1.7758, "step": 1189 }, { "epoch": 1.29, "learning_rate": 1.311512406476568e-05, "loss": 1.4631, "step": 1190 }, { "epoch": 1.29, "learning_rate": 1.3103783933028726e-05, "loss": 1.5428, "step": 1191 }, { "epoch": 1.29, "learning_rate": 1.309243938264326e-05, "loss": 1.7362, "step": 1192 }, { "epoch": 1.29, "learning_rate": 1.3081090429759755e-05, "loss": 1.6797, "step": 1193 }, { "epoch": 1.29, "learning_rate": 1.3069737090534951e-05, "loss": 1.748, "step": 1194 }, { "epoch": 1.29, "learning_rate": 1.305837938113184e-05, "loss": 1.6353, "step": 1195 }, { "epoch": 1.29, "learning_rate": 1.3047017317719623e-05, "loss": 1.533, "step": 1196 }, { "epoch": 1.3, "learning_rate": 1.303565091647371e-05, "loss": 1.6425, "step": 1197 }, { "epoch": 1.3, "learning_rate": 1.302428019357568e-05, "loss": 1.7006, "step": 1198 }, { "epoch": 1.3, "learning_rate": 1.3012905165213265e-05, "loss": 1.7035, "step": 1199 }, { "epoch": 1.3, "learning_rate": 1.300152584758033e-05, "loss": 1.4196, "step": 1200 }, { "epoch": 1.3, "learning_rate": 1.2990142256876845e-05, "loss": 1.6645, "step": 1201 }, { "epoch": 1.3, "learning_rate": 1.2978754409308864e-05, "loss": 1.687, "step": 1202 }, { "epoch": 1.3, "learning_rate": 1.29673623210885e-05, "loss": 1.5081, "step": 1203 }, { "epoch": 1.3, "learning_rate": 1.2955966008433898e-05, "loss": 1.7733, "step": 1204 }, { "epoch": 1.3, "learning_rate": 1.2944565487569224e-05, "loss": 1.5592, "step": 1205 }, { "epoch": 1.31, "learning_rate": 1.293316077472464e-05, "loss": 1.541, "step": 1206 }, { "epoch": 1.31, "learning_rate": 1.292175188613626e-05, "loss": 1.5144, "step": 1207 }, { "epoch": 1.31, "learning_rate": 1.2910338838046154e-05, "loss": 1.4817, "step": 1208 }, { "epoch": 1.31, "learning_rate": 1.2898921646702317e-05, "loss": 1.5223, "step": 1209 }, { "epoch": 1.31, "learning_rate": 1.2887500328358627e-05, "loss": 1.6485, "step": 1210 }, { "epoch": 1.31, "learning_rate": 1.2876074899274855e-05, "loss": 1.6818, "step": 1211 }, { "epoch": 1.31, "learning_rate": 1.2864645375716614e-05, "loss": 1.2953, "step": 1212 }, { "epoch": 1.31, "learning_rate": 1.2853211773955347e-05, "loss": 1.4401, "step": 1213 }, { "epoch": 1.31, "learning_rate": 1.2841774110268304e-05, "loss": 1.5226, "step": 1214 }, { "epoch": 1.31, "learning_rate": 1.2830332400938518e-05, "loss": 1.7461, "step": 1215 }, { "epoch": 1.32, "learning_rate": 1.2818886662254782e-05, "loss": 1.659, "step": 1216 }, { "epoch": 1.32, "learning_rate": 1.2807436910511626e-05, "loss": 1.6772, "step": 1217 }, { "epoch": 1.32, "learning_rate": 1.279598316200929e-05, "loss": 1.7157, "step": 1218 }, { "epoch": 1.32, "learning_rate": 1.2784525433053707e-05, "loss": 1.5594, "step": 1219 }, { "epoch": 1.32, "learning_rate": 1.2773063739956473e-05, "loss": 1.4783, "step": 1220 }, { "epoch": 1.32, "learning_rate": 1.276159809903483e-05, "loss": 1.5933, "step": 1221 }, { "epoch": 1.32, "learning_rate": 1.2750128526611642e-05, "loss": 1.5352, "step": 1222 }, { "epoch": 1.32, "learning_rate": 1.2738655039015368e-05, "loss": 1.5875, "step": 1223 }, { "epoch": 1.32, "learning_rate": 1.2727177652580044e-05, "loss": 1.6181, "step": 1224 }, { "epoch": 1.33, "learning_rate": 1.2715696383645247e-05, "loss": 1.3225, "step": 1225 }, { "epoch": 1.33, "learning_rate": 1.2704211248556089e-05, "loss": 1.7181, "step": 1226 }, { "epoch": 1.33, "learning_rate": 1.2692722263663186e-05, "loss": 1.8566, "step": 1227 }, { "epoch": 1.33, "learning_rate": 1.2681229445322633e-05, "loss": 1.712, "step": 1228 }, { "epoch": 1.33, "learning_rate": 1.2669732809895986e-05, "loss": 1.7477, "step": 1229 }, { "epoch": 1.33, "learning_rate": 1.265823237375023e-05, "loss": 1.6415, "step": 1230 }, { "epoch": 1.33, "learning_rate": 1.2646728153257761e-05, "loss": 1.6903, "step": 1231 }, { "epoch": 1.33, "learning_rate": 1.2635220164796364e-05, "loss": 1.6767, "step": 1232 }, { "epoch": 1.33, "learning_rate": 1.262370842474919e-05, "loss": 1.557, "step": 1233 }, { "epoch": 1.34, "learning_rate": 1.261219294950473e-05, "loss": 1.5915, "step": 1234 }, { "epoch": 1.34, "learning_rate": 1.2600673755456789e-05, "loss": 1.7127, "step": 1235 }, { "epoch": 1.34, "learning_rate": 1.2589150859004473e-05, "loss": 1.4802, "step": 1236 }, { "epoch": 1.34, "learning_rate": 1.2577624276552155e-05, "loss": 1.6932, "step": 1237 }, { "epoch": 1.34, "learning_rate": 1.2566094024509452e-05, "loss": 1.5752, "step": 1238 }, { "epoch": 1.34, "learning_rate": 1.2554560119291206e-05, "loss": 1.5841, "step": 1239 }, { "epoch": 1.34, "learning_rate": 1.2543022577317472e-05, "loss": 1.739, "step": 1240 }, { "epoch": 1.34, "learning_rate": 1.2531481415013464e-05, "loss": 1.7102, "step": 1241 }, { "epoch": 1.34, "learning_rate": 1.2519936648809564e-05, "loss": 1.7832, "step": 1242 }, { "epoch": 1.35, "learning_rate": 1.2508388295141276e-05, "loss": 1.8557, "step": 1243 }, { "epoch": 1.35, "learning_rate": 1.249683637044922e-05, "loss": 1.3856, "step": 1244 }, { "epoch": 1.35, "learning_rate": 1.2485280891179086e-05, "loss": 1.6422, "step": 1245 }, { "epoch": 1.35, "learning_rate": 1.247372187378164e-05, "loss": 2.1887, "step": 1246 }, { "epoch": 1.35, "learning_rate": 1.2462159334712676e-05, "loss": 1.9006, "step": 1247 }, { "epoch": 1.35, "learning_rate": 1.2450593290433005e-05, "loss": 1.7601, "step": 1248 }, { "epoch": 1.35, "learning_rate": 1.243902375740842e-05, "loss": 1.734, "step": 1249 }, { "epoch": 1.35, "learning_rate": 1.2427450752109696e-05, "loss": 1.4335, "step": 1250 }, { "epoch": 1.35, "learning_rate": 1.2415874291012538e-05, "loss": 1.809, "step": 1251 }, { "epoch": 1.35, "learning_rate": 1.2404294390597576e-05, "loss": 1.406, "step": 1252 }, { "epoch": 1.36, "learning_rate": 1.2392711067350337e-05, "loss": 1.5234, "step": 1253 }, { "epoch": 1.36, "learning_rate": 1.2381124337761216e-05, "loss": 1.7456, "step": 1254 }, { "epoch": 1.36, "learning_rate": 1.2369534218325465e-05, "loss": 1.8913, "step": 1255 }, { "epoch": 1.36, "learning_rate": 1.2357940725543156e-05, "loss": 1.6218, "step": 1256 }, { "epoch": 1.36, "learning_rate": 1.2346343875919163e-05, "loss": 1.6535, "step": 1257 }, { "epoch": 1.36, "learning_rate": 1.2334743685963146e-05, "loss": 1.4896, "step": 1258 }, { "epoch": 1.36, "learning_rate": 1.2323140172189515e-05, "loss": 1.3186, "step": 1259 }, { "epoch": 1.36, "learning_rate": 1.2311533351117406e-05, "loss": 1.5025, "step": 1260 }, { "epoch": 1.36, "learning_rate": 1.2299923239270675e-05, "loss": 1.5083, "step": 1261 }, { "epoch": 1.37, "learning_rate": 1.2288309853177855e-05, "loss": 1.4921, "step": 1262 }, { "epoch": 1.37, "learning_rate": 1.227669320937215e-05, "loss": 1.65, "step": 1263 }, { "epoch": 1.37, "learning_rate": 1.2265073324391388e-05, "loss": 1.6036, "step": 1264 }, { "epoch": 1.37, "learning_rate": 1.225345021477802e-05, "loss": 1.4732, "step": 1265 }, { "epoch": 1.37, "learning_rate": 1.2241823897079084e-05, "loss": 1.6494, "step": 1266 }, { "epoch": 1.37, "learning_rate": 1.2230194387846189e-05, "loss": 1.7075, "step": 1267 }, { "epoch": 1.37, "learning_rate": 1.2218561703635484e-05, "loss": 1.7934, "step": 1268 }, { "epoch": 1.37, "learning_rate": 1.2206925861007639e-05, "loss": 1.818, "step": 1269 }, { "epoch": 1.37, "learning_rate": 1.2195286876527824e-05, "loss": 1.7087, "step": 1270 }, { "epoch": 1.38, "learning_rate": 1.2183644766765674e-05, "loss": 1.5402, "step": 1271 }, { "epoch": 1.38, "learning_rate": 1.2171999548295284e-05, "loss": 1.6554, "step": 1272 }, { "epoch": 1.38, "learning_rate": 1.2160351237695162e-05, "loss": 1.5047, "step": 1273 }, { "epoch": 1.38, "learning_rate": 1.214869985154823e-05, "loss": 1.5085, "step": 1274 }, { "epoch": 1.38, "learning_rate": 1.213704540644178e-05, "loss": 1.5607, "step": 1275 }, { "epoch": 1.38, "learning_rate": 1.2125387918967461e-05, "loss": 1.4395, "step": 1276 }, { "epoch": 1.38, "learning_rate": 1.2113727405721261e-05, "loss": 1.5419, "step": 1277 }, { "epoch": 1.38, "learning_rate": 1.2102063883303462e-05, "loss": 1.6719, "step": 1278 }, { "epoch": 1.38, "learning_rate": 1.2090397368318634e-05, "loss": 1.4981, "step": 1279 }, { "epoch": 1.39, "learning_rate": 1.207872787737562e-05, "loss": 1.7986, "step": 1280 }, { "epoch": 1.39, "learning_rate": 1.2067055427087482e-05, "loss": 1.6052, "step": 1281 }, { "epoch": 1.39, "learning_rate": 1.2055380034071504e-05, "loss": 1.4215, "step": 1282 }, { "epoch": 1.39, "learning_rate": 1.204370171494916e-05, "loss": 1.5639, "step": 1283 }, { "epoch": 1.39, "learning_rate": 1.2032020486346085e-05, "loss": 1.6687, "step": 1284 }, { "epoch": 1.39, "learning_rate": 1.2020336364892062e-05, "loss": 1.7765, "step": 1285 }, { "epoch": 1.39, "learning_rate": 1.2008649367220988e-05, "loss": 1.7461, "step": 1286 }, { "epoch": 1.39, "learning_rate": 1.1996959509970857e-05, "loss": 1.542, "step": 1287 }, { "epoch": 1.39, "learning_rate": 1.198526680978373e-05, "loss": 1.7398, "step": 1288 }, { "epoch": 1.4, "learning_rate": 1.197357128330572e-05, "loss": 1.5469, "step": 1289 }, { "epoch": 1.4, "learning_rate": 1.1961872947186958e-05, "loss": 1.5236, "step": 1290 }, { "epoch": 1.4, "learning_rate": 1.1950171818081583e-05, "loss": 1.5148, "step": 1291 }, { "epoch": 1.4, "learning_rate": 1.1938467912647708e-05, "loss": 1.7556, "step": 1292 }, { "epoch": 1.4, "learning_rate": 1.1926761247547392e-05, "loss": 1.5149, "step": 1293 }, { "epoch": 1.4, "learning_rate": 1.1915051839446627e-05, "loss": 1.6909, "step": 1294 }, { "epoch": 1.4, "learning_rate": 1.190333970501531e-05, "loss": 1.7696, "step": 1295 }, { "epoch": 1.4, "learning_rate": 1.1891624860927222e-05, "loss": 1.7218, "step": 1296 }, { "epoch": 1.4, "learning_rate": 1.1879907323859997e-05, "loss": 1.5859, "step": 1297 }, { "epoch": 1.4, "learning_rate": 1.1868187110495104e-05, "loss": 1.8859, "step": 1298 }, { "epoch": 1.41, "learning_rate": 1.1856464237517826e-05, "loss": 1.793, "step": 1299 }, { "epoch": 1.41, "learning_rate": 1.1844738721617228e-05, "loss": 1.657, "step": 1300 }, { "epoch": 1.41, "learning_rate": 1.1833010579486135e-05, "loss": 1.6519, "step": 1301 }, { "epoch": 1.41, "learning_rate": 1.1821279827821118e-05, "loss": 1.5418, "step": 1302 }, { "epoch": 1.41, "learning_rate": 1.1809546483322458e-05, "loss": 1.7052, "step": 1303 }, { "epoch": 1.41, "learning_rate": 1.1797810562694127e-05, "loss": 1.5585, "step": 1304 }, { "epoch": 1.41, "learning_rate": 1.1786072082643774e-05, "loss": 1.7492, "step": 1305 }, { "epoch": 1.41, "learning_rate": 1.1774331059882676e-05, "loss": 1.5275, "step": 1306 }, { "epoch": 1.41, "learning_rate": 1.1762587511125738e-05, "loss": 1.7395, "step": 1307 }, { "epoch": 1.42, "learning_rate": 1.1750841453091464e-05, "loss": 1.4839, "step": 1308 }, { "epoch": 1.42, "learning_rate": 1.1739092902501927e-05, "loss": 1.5222, "step": 1309 }, { "epoch": 1.42, "learning_rate": 1.1727341876082748e-05, "loss": 1.5207, "step": 1310 }, { "epoch": 1.42, "learning_rate": 1.1715588390563069e-05, "loss": 1.2958, "step": 1311 }, { "epoch": 1.42, "learning_rate": 1.1703832462675544e-05, "loss": 1.8622, "step": 1312 }, { "epoch": 1.42, "learning_rate": 1.1692074109156292e-05, "loss": 1.8553, "step": 1313 }, { "epoch": 1.42, "learning_rate": 1.1680313346744897e-05, "loss": 1.7046, "step": 1314 }, { "epoch": 1.42, "learning_rate": 1.1668550192184358e-05, "loss": 1.5634, "step": 1315 }, { "epoch": 1.42, "learning_rate": 1.165678466222109e-05, "loss": 1.4167, "step": 1316 }, { "epoch": 1.43, "learning_rate": 1.164501677360489e-05, "loss": 1.4836, "step": 1317 }, { "epoch": 1.43, "learning_rate": 1.1633246543088903e-05, "loss": 1.4215, "step": 1318 }, { "epoch": 1.43, "learning_rate": 1.162147398742962e-05, "loss": 1.4499, "step": 1319 }, { "epoch": 1.43, "learning_rate": 1.160969912338684e-05, "loss": 1.6927, "step": 1320 }, { "epoch": 1.43, "learning_rate": 1.1597921967723638e-05, "loss": 1.6764, "step": 1321 }, { "epoch": 1.43, "learning_rate": 1.1586142537206365e-05, "loss": 1.3818, "step": 1322 }, { "epoch": 1.43, "learning_rate": 1.15743608486046e-05, "loss": 1.5537, "step": 1323 }, { "epoch": 1.43, "learning_rate": 1.1562576918691141e-05, "loss": 1.7134, "step": 1324 }, { "epoch": 1.43, "learning_rate": 1.1550790764241979e-05, "loss": 1.2854, "step": 1325 }, { "epoch": 1.44, "learning_rate": 1.153900240203627e-05, "loss": 1.4848, "step": 1326 }, { "epoch": 1.44, "learning_rate": 1.152721184885631e-05, "loss": 1.448, "step": 1327 }, { "epoch": 1.44, "learning_rate": 1.1515419121487522e-05, "loss": 1.7475, "step": 1328 }, { "epoch": 1.44, "learning_rate": 1.150362423671841e-05, "loss": 1.4458, "step": 1329 }, { "epoch": 1.44, "learning_rate": 1.1491827211340568e-05, "loss": 1.3791, "step": 1330 }, { "epoch": 1.44, "learning_rate": 1.1480028062148622e-05, "loss": 1.6132, "step": 1331 }, { "epoch": 1.44, "learning_rate": 1.1468226805940227e-05, "loss": 1.4072, "step": 1332 }, { "epoch": 1.44, "learning_rate": 1.1456423459516047e-05, "loss": 1.5475, "step": 1333 }, { "epoch": 1.44, "learning_rate": 1.1444618039679702e-05, "loss": 1.5749, "step": 1334 }, { "epoch": 1.44, "learning_rate": 1.143281056323778e-05, "loss": 1.7015, "step": 1335 }, { "epoch": 1.45, "learning_rate": 1.1421001046999787e-05, "loss": 1.4643, "step": 1336 }, { "epoch": 1.45, "learning_rate": 1.1409189507778143e-05, "loss": 1.5289, "step": 1337 }, { "epoch": 1.45, "learning_rate": 1.1397375962388137e-05, "loss": 1.7965, "step": 1338 }, { "epoch": 1.45, "learning_rate": 1.1385560427647923e-05, "loss": 1.617, "step": 1339 }, { "epoch": 1.45, "learning_rate": 1.1373742920378483e-05, "loss": 1.5381, "step": 1340 }, { "epoch": 1.45, "learning_rate": 1.1361923457403607e-05, "loss": 1.6884, "step": 1341 }, { "epoch": 1.45, "learning_rate": 1.1350102055549868e-05, "loss": 1.4769, "step": 1342 }, { "epoch": 1.45, "learning_rate": 1.1338278731646603e-05, "loss": 1.6484, "step": 1343 }, { "epoch": 1.45, "learning_rate": 1.1326453502525886e-05, "loss": 1.5862, "step": 1344 }, { "epoch": 1.46, "learning_rate": 1.1314626385022493e-05, "loss": 1.4017, "step": 1345 }, { "epoch": 1.46, "learning_rate": 1.1302797395973906e-05, "loss": 1.5226, "step": 1346 }, { "epoch": 1.46, "learning_rate": 1.1290966552220253e-05, "loss": 1.5853, "step": 1347 }, { "epoch": 1.46, "learning_rate": 1.1279133870604313e-05, "loss": 1.6298, "step": 1348 }, { "epoch": 1.46, "learning_rate": 1.1267299367971482e-05, "loss": 1.7236, "step": 1349 }, { "epoch": 1.46, "learning_rate": 1.1255463061169744e-05, "loss": 1.5666, "step": 1350 }, { "epoch": 1.46, "learning_rate": 1.1243624967049653e-05, "loss": 1.6452, "step": 1351 }, { "epoch": 1.46, "learning_rate": 1.1231785102464307e-05, "loss": 1.5262, "step": 1352 }, { "epoch": 1.46, "learning_rate": 1.1219943484269329e-05, "loss": 1.8981, "step": 1353 }, { "epoch": 1.47, "learning_rate": 1.1208100129322827e-05, "loss": 1.8015, "step": 1354 }, { "epoch": 1.47, "learning_rate": 1.1196255054485396e-05, "loss": 1.7094, "step": 1355 }, { "epoch": 1.47, "learning_rate": 1.1184408276620074e-05, "loss": 1.8474, "step": 1356 }, { "epoch": 1.47, "learning_rate": 1.1172559812592316e-05, "loss": 1.4666, "step": 1357 }, { "epoch": 1.47, "learning_rate": 1.1160709679269986e-05, "loss": 1.5856, "step": 1358 }, { "epoch": 1.47, "learning_rate": 1.114885789352332e-05, "loss": 1.7492, "step": 1359 }, { "epoch": 1.47, "learning_rate": 1.1137004472224915e-05, "loss": 1.6024, "step": 1360 }, { "epoch": 1.47, "learning_rate": 1.1125149432249687e-05, "loss": 1.6389, "step": 1361 }, { "epoch": 1.47, "learning_rate": 1.1113292790474857e-05, "loss": 1.4293, "step": 1362 }, { "epoch": 1.48, "learning_rate": 1.1101434563779929e-05, "loss": 1.8504, "step": 1363 }, { "epoch": 1.48, "learning_rate": 1.1089574769046665e-05, "loss": 1.6104, "step": 1364 }, { "epoch": 1.48, "learning_rate": 1.1077713423159056e-05, "loss": 1.5039, "step": 1365 }, { "epoch": 1.48, "learning_rate": 1.1065850543003302e-05, "loss": 1.5566, "step": 1366 }, { "epoch": 1.48, "learning_rate": 1.1053986145467795e-05, "loss": 1.4938, "step": 1367 }, { "epoch": 1.48, "learning_rate": 1.104212024744307e-05, "loss": 1.6359, "step": 1368 }, { "epoch": 1.48, "learning_rate": 1.1030252865821814e-05, "loss": 1.5906, "step": 1369 }, { "epoch": 1.48, "learning_rate": 1.1018384017498817e-05, "loss": 1.6035, "step": 1370 }, { "epoch": 1.48, "learning_rate": 1.1006513719370963e-05, "loss": 1.7358, "step": 1371 }, { "epoch": 1.48, "learning_rate": 1.09946419883372e-05, "loss": 1.7031, "step": 1372 }, { "epoch": 1.49, "learning_rate": 1.0982768841298505e-05, "loss": 1.6675, "step": 1373 }, { "epoch": 1.49, "learning_rate": 1.0970894295157887e-05, "loss": 1.7351, "step": 1374 }, { "epoch": 1.49, "learning_rate": 1.0959018366820337e-05, "loss": 1.5233, "step": 1375 }, { "epoch": 1.49, "learning_rate": 1.0947141073192813e-05, "loss": 1.3807, "step": 1376 }, { "epoch": 1.49, "learning_rate": 1.0935262431184224e-05, "loss": 1.6437, "step": 1377 }, { "epoch": 1.49, "learning_rate": 1.092338245770539e-05, "loss": 1.5475, "step": 1378 }, { "epoch": 1.49, "learning_rate": 1.0911501169669032e-05, "loss": 1.7788, "step": 1379 }, { "epoch": 1.49, "learning_rate": 1.089961858398974e-05, "loss": 1.6773, "step": 1380 }, { "epoch": 1.49, "learning_rate": 1.0887734717583953e-05, "loss": 1.4647, "step": 1381 }, { "epoch": 1.5, "learning_rate": 1.0875849587369931e-05, "loss": 1.4252, "step": 1382 }, { "epoch": 1.5, "learning_rate": 1.086396321026774e-05, "loss": 1.5415, "step": 1383 }, { "epoch": 1.5, "learning_rate": 1.0852075603199213e-05, "loss": 1.4866, "step": 1384 }, { "epoch": 1.5, "learning_rate": 1.0840186783087936e-05, "loss": 1.3765, "step": 1385 }, { "epoch": 1.5, "learning_rate": 1.0828296766859224e-05, "loss": 1.6217, "step": 1386 }, { "epoch": 1.5, "learning_rate": 1.0816405571440089e-05, "loss": 1.6645, "step": 1387 }, { "epoch": 1.5, "learning_rate": 1.0804513213759234e-05, "loss": 1.3922, "step": 1388 }, { "epoch": 1.5, "learning_rate": 1.0792619710747004e-05, "loss": 1.5749, "step": 1389 }, { "epoch": 1.5, "learning_rate": 1.0780725079335383e-05, "loss": 1.5204, "step": 1390 }, { "epoch": 1.51, "learning_rate": 1.0768829336457958e-05, "loss": 1.2803, "step": 1391 }, { "epoch": 1.51, "learning_rate": 1.0756932499049894e-05, "loss": 1.6384, "step": 1392 }, { "epoch": 1.51, "learning_rate": 1.074503458404792e-05, "loss": 1.4258, "step": 1393 }, { "epoch": 1.51, "learning_rate": 1.07331356083903e-05, "loss": 1.4932, "step": 1394 }, { "epoch": 1.51, "learning_rate": 1.0721235589016805e-05, "loss": 1.5761, "step": 1395 }, { "epoch": 1.51, "learning_rate": 1.0709334542868692e-05, "loss": 1.8186, "step": 1396 }, { "epoch": 1.51, "learning_rate": 1.0697432486888681e-05, "loss": 1.5917, "step": 1397 }, { "epoch": 1.51, "learning_rate": 1.068552943802093e-05, "loss": 1.7973, "step": 1398 }, { "epoch": 1.51, "learning_rate": 1.0673625413211002e-05, "loss": 1.5501, "step": 1399 }, { "epoch": 1.52, "learning_rate": 1.0661720429405866e-05, "loss": 1.5497, "step": 1400 }, { "epoch": 1.52, "learning_rate": 1.0649814503553844e-05, "loss": 1.4764, "step": 1401 }, { "epoch": 1.52, "learning_rate": 1.06379076526046e-05, "loss": 1.4711, "step": 1402 }, { "epoch": 1.52, "learning_rate": 1.0625999893509122e-05, "loss": 1.713, "step": 1403 }, { "epoch": 1.52, "learning_rate": 1.0614091243219682e-05, "loss": 1.5149, "step": 1404 }, { "epoch": 1.52, "learning_rate": 1.0602181718689822e-05, "loss": 1.4376, "step": 1405 }, { "epoch": 1.52, "learning_rate": 1.0590271336874339e-05, "loss": 1.6151, "step": 1406 }, { "epoch": 1.52, "learning_rate": 1.0578360114729236e-05, "loss": 1.4859, "step": 1407 }, { "epoch": 1.52, "learning_rate": 1.0566448069211723e-05, "loss": 1.3853, "step": 1408 }, { "epoch": 1.52, "learning_rate": 1.0554535217280178e-05, "loss": 1.7395, "step": 1409 }, { "epoch": 1.53, "learning_rate": 1.0542621575894125e-05, "loss": 1.4653, "step": 1410 }, { "epoch": 1.53, "learning_rate": 1.0530707162014218e-05, "loss": 1.6122, "step": 1411 }, { "epoch": 1.53, "learning_rate": 1.0518791992602204e-05, "loss": 1.6813, "step": 1412 }, { "epoch": 1.53, "learning_rate": 1.0506876084620911e-05, "loss": 1.5162, "step": 1413 }, { "epoch": 1.53, "learning_rate": 1.0494959455034215e-05, "loss": 1.5368, "step": 1414 }, { "epoch": 1.53, "learning_rate": 1.0483042120807016e-05, "loss": 1.6324, "step": 1415 }, { "epoch": 1.53, "learning_rate": 1.0471124098905228e-05, "loss": 1.4531, "step": 1416 }, { "epoch": 1.53, "learning_rate": 1.0459205406295737e-05, "loss": 1.5531, "step": 1417 }, { "epoch": 1.53, "learning_rate": 1.044728605994638e-05, "loss": 1.5739, "step": 1418 }, { "epoch": 1.54, "learning_rate": 1.0435366076825933e-05, "loss": 1.7043, "step": 1419 }, { "epoch": 1.54, "learning_rate": 1.0423445473904072e-05, "loss": 1.6949, "step": 1420 }, { "epoch": 1.54, "learning_rate": 1.0411524268151359e-05, "loss": 1.5442, "step": 1421 }, { "epoch": 1.54, "learning_rate": 1.039960247653921e-05, "loss": 1.642, "step": 1422 }, { "epoch": 1.54, "learning_rate": 1.0387680116039884e-05, "loss": 1.4562, "step": 1423 }, { "epoch": 1.54, "learning_rate": 1.0375757203626445e-05, "loss": 1.5614, "step": 1424 }, { "epoch": 1.54, "learning_rate": 1.0363833756272734e-05, "loss": 1.5004, "step": 1425 }, { "epoch": 1.54, "learning_rate": 1.0351909790953367e-05, "loss": 1.6549, "step": 1426 }, { "epoch": 1.54, "learning_rate": 1.0339985324643688e-05, "loss": 1.6174, "step": 1427 }, { "epoch": 1.55, "learning_rate": 1.0328060374319759e-05, "loss": 1.65, "step": 1428 }, { "epoch": 1.55, "learning_rate": 1.0316134956958334e-05, "loss": 1.7216, "step": 1429 }, { "epoch": 1.55, "learning_rate": 1.0304209089536827e-05, "loss": 1.6205, "step": 1430 }, { "epoch": 1.55, "learning_rate": 1.0292282789033292e-05, "loss": 1.6978, "step": 1431 }, { "epoch": 1.55, "learning_rate": 1.0280356072426398e-05, "loss": 1.6674, "step": 1432 }, { "epoch": 1.55, "learning_rate": 1.0268428956695413e-05, "loss": 1.5286, "step": 1433 }, { "epoch": 1.55, "learning_rate": 1.0256501458820172e-05, "loss": 1.6045, "step": 1434 }, { "epoch": 1.55, "learning_rate": 1.0244573595781045e-05, "loss": 1.5994, "step": 1435 }, { "epoch": 1.55, "learning_rate": 1.0232645384558935e-05, "loss": 1.6663, "step": 1436 }, { "epoch": 1.56, "learning_rate": 1.0220716842135234e-05, "loss": 1.4722, "step": 1437 }, { "epoch": 1.56, "learning_rate": 1.0208787985491801e-05, "loss": 1.5185, "step": 1438 }, { "epoch": 1.56, "learning_rate": 1.0196858831610951e-05, "loss": 1.4893, "step": 1439 }, { "epoch": 1.56, "learning_rate": 1.018492939747542e-05, "loss": 1.5822, "step": 1440 }, { "epoch": 1.56, "learning_rate": 1.0172999700068338e-05, "loss": 1.4596, "step": 1441 }, { "epoch": 1.56, "learning_rate": 1.0161069756373212e-05, "loss": 1.6666, "step": 1442 }, { "epoch": 1.56, "learning_rate": 1.0149139583373906e-05, "loss": 1.6678, "step": 1443 }, { "epoch": 1.56, "learning_rate": 1.01372091980546e-05, "loss": 1.535, "step": 1444 }, { "epoch": 1.56, "learning_rate": 1.0125278617399784e-05, "loss": 1.491, "step": 1445 }, { "epoch": 1.56, "learning_rate": 1.0113347858394223e-05, "loss": 1.7168, "step": 1446 }, { "epoch": 1.57, "learning_rate": 1.0101416938022936e-05, "loss": 1.6208, "step": 1447 }, { "epoch": 1.57, "learning_rate": 1.0089485873271176e-05, "loss": 1.3264, "step": 1448 }, { "epoch": 1.57, "learning_rate": 1.0077554681124391e-05, "loss": 1.5177, "step": 1449 }, { "epoch": 1.57, "learning_rate": 1.0065623378568223e-05, "loss": 1.8232, "step": 1450 }, { "epoch": 1.57, "learning_rate": 1.005369198258846e-05, "loss": 1.5726, "step": 1451 }, { "epoch": 1.57, "learning_rate": 1.0041760510171032e-05, "loss": 1.8903, "step": 1452 }, { "epoch": 1.57, "learning_rate": 1.0029828978301977e-05, "loss": 1.6815, "step": 1453 }, { "epoch": 1.57, "learning_rate": 1.0017897403967408e-05, "loss": 1.6521, "step": 1454 }, { "epoch": 1.57, "learning_rate": 1.0005965804153508e-05, "loss": 1.4628, "step": 1455 }, { "epoch": 1.58, "learning_rate": 9.994034195846495e-06, "loss": 1.4936, "step": 1456 }, { "epoch": 1.58, "learning_rate": 9.982102596032597e-06, "loss": 1.6062, "step": 1457 }, { "epoch": 1.58, "learning_rate": 9.970171021698027e-06, "loss": 1.5715, "step": 1458 }, { "epoch": 1.58, "learning_rate": 9.958239489828968e-06, "loss": 1.3657, "step": 1459 }, { "epoch": 1.58, "learning_rate": 9.946308017411543e-06, "loss": 1.6497, "step": 1460 }, { "epoch": 1.58, "learning_rate": 9.93437662143178e-06, "loss": 1.6405, "step": 1461 }, { "epoch": 1.58, "learning_rate": 9.922445318875612e-06, "loss": 1.3632, "step": 1462 }, { "epoch": 1.58, "learning_rate": 9.910514126728827e-06, "loss": 1.4618, "step": 1463 }, { "epoch": 1.58, "learning_rate": 9.898583061977068e-06, "loss": 1.7279, "step": 1464 }, { "epoch": 1.59, "learning_rate": 9.88665214160578e-06, "loss": 1.5738, "step": 1465 }, { "epoch": 1.59, "learning_rate": 9.874721382600218e-06, "loss": 1.8553, "step": 1466 }, { "epoch": 1.59, "learning_rate": 9.862790801945403e-06, "loss": 1.4768, "step": 1467 }, { "epoch": 1.59, "learning_rate": 9.850860416626096e-06, "loss": 1.5988, "step": 1468 }, { "epoch": 1.59, "learning_rate": 9.838930243626791e-06, "loss": 1.6183, "step": 1469 }, { "epoch": 1.59, "learning_rate": 9.827000299931666e-06, "loss": 1.5499, "step": 1470 }, { "epoch": 1.59, "learning_rate": 9.815070602524586e-06, "loss": 1.5744, "step": 1471 }, { "epoch": 1.59, "learning_rate": 9.80314116838905e-06, "loss": 1.4634, "step": 1472 }, { "epoch": 1.59, "learning_rate": 9.7912120145082e-06, "loss": 1.4936, "step": 1473 }, { "epoch": 1.6, "learning_rate": 9.77928315786477e-06, "loss": 1.7769, "step": 1474 }, { "epoch": 1.6, "learning_rate": 9.767354615441066e-06, "loss": 1.626, "step": 1475 }, { "epoch": 1.6, "learning_rate": 9.755426404218958e-06, "loss": 1.6154, "step": 1476 }, { "epoch": 1.6, "learning_rate": 9.743498541179832e-06, "loss": 2.0258, "step": 1477 }, { "epoch": 1.6, "learning_rate": 9.731571043304589e-06, "loss": 1.7951, "step": 1478 }, { "epoch": 1.6, "learning_rate": 9.719643927573604e-06, "loss": 1.3674, "step": 1479 }, { "epoch": 1.6, "learning_rate": 9.70771721096671e-06, "loss": 1.7213, "step": 1480 }, { "epoch": 1.6, "learning_rate": 9.695790910463176e-06, "loss": 1.8304, "step": 1481 }, { "epoch": 1.6, "learning_rate": 9.683865043041664e-06, "loss": 1.5519, "step": 1482 }, { "epoch": 1.6, "learning_rate": 9.671939625680243e-06, "loss": 1.6132, "step": 1483 }, { "epoch": 1.61, "learning_rate": 9.660014675356315e-06, "loss": 1.5612, "step": 1484 }, { "epoch": 1.61, "learning_rate": 9.648090209046638e-06, "loss": 1.5789, "step": 1485 }, { "epoch": 1.61, "learning_rate": 9.636166243727269e-06, "loss": 1.5749, "step": 1486 }, { "epoch": 1.61, "learning_rate": 9.624242796373557e-06, "loss": 1.641, "step": 1487 }, { "epoch": 1.61, "learning_rate": 9.612319883960117e-06, "loss": 1.5902, "step": 1488 }, { "epoch": 1.61, "learning_rate": 9.60039752346079e-06, "loss": 1.7535, "step": 1489 }, { "epoch": 1.61, "learning_rate": 9.588475731848645e-06, "loss": 1.5461, "step": 1490 }, { "epoch": 1.61, "learning_rate": 9.576554526095931e-06, "loss": 1.8523, "step": 1491 }, { "epoch": 1.61, "learning_rate": 9.564633923174072e-06, "loss": 1.739, "step": 1492 }, { "epoch": 1.62, "learning_rate": 9.552713940053622e-06, "loss": 1.4078, "step": 1493 }, { "epoch": 1.62, "learning_rate": 9.540794593704265e-06, "loss": 1.4988, "step": 1494 }, { "epoch": 1.62, "learning_rate": 9.528875901094775e-06, "loss": 1.371, "step": 1495 }, { "epoch": 1.62, "learning_rate": 9.516957879192987e-06, "loss": 1.742, "step": 1496 }, { "epoch": 1.62, "learning_rate": 9.50504054496579e-06, "loss": 1.8256, "step": 1497 }, { "epoch": 1.62, "learning_rate": 9.49312391537909e-06, "loss": 1.2345, "step": 1498 }, { "epoch": 1.62, "learning_rate": 9.4812080073978e-06, "loss": 1.4356, "step": 1499 }, { "epoch": 1.62, "learning_rate": 9.469292837985786e-06, "loss": 1.4514, "step": 1500 }, { "epoch": 1.62, "learning_rate": 9.457378424105875e-06, "loss": 1.6653, "step": 1501 }, { "epoch": 1.63, "learning_rate": 9.445464782719824e-06, "loss": 1.4308, "step": 1502 }, { "epoch": 1.63, "learning_rate": 9.433551930788278e-06, "loss": 1.5736, "step": 1503 }, { "epoch": 1.63, "learning_rate": 9.421639885270769e-06, "loss": 1.6293, "step": 1504 }, { "epoch": 1.63, "learning_rate": 9.409728663125664e-06, "loss": 1.529, "step": 1505 }, { "epoch": 1.63, "learning_rate": 9.397818281310182e-06, "loss": 1.4393, "step": 1506 }, { "epoch": 1.63, "learning_rate": 9.385908756780323e-06, "loss": 1.6974, "step": 1507 }, { "epoch": 1.63, "learning_rate": 9.374000106490882e-06, "loss": 1.33, "step": 1508 }, { "epoch": 1.63, "learning_rate": 9.362092347395401e-06, "loss": 1.3257, "step": 1509 }, { "epoch": 1.63, "learning_rate": 9.350185496446157e-06, "loss": 1.6084, "step": 1510 }, { "epoch": 1.64, "learning_rate": 9.338279570594137e-06, "loss": 1.5949, "step": 1511 }, { "epoch": 1.64, "learning_rate": 9.326374586789e-06, "loss": 1.4288, "step": 1512 }, { "epoch": 1.64, "learning_rate": 9.314470561979076e-06, "loss": 1.6462, "step": 1513 }, { "epoch": 1.64, "learning_rate": 9.302567513111322e-06, "loss": 1.6326, "step": 1514 }, { "epoch": 1.64, "learning_rate": 9.290665457131308e-06, "loss": 1.5415, "step": 1515 }, { "epoch": 1.64, "learning_rate": 9.278764410983198e-06, "loss": 1.5942, "step": 1516 }, { "epoch": 1.64, "learning_rate": 9.266864391609702e-06, "loss": 1.489, "step": 1517 }, { "epoch": 1.64, "learning_rate": 9.254965415952083e-06, "loss": 1.6327, "step": 1518 }, { "epoch": 1.64, "learning_rate": 9.243067500950109e-06, "loss": 1.6492, "step": 1519 }, { "epoch": 1.65, "learning_rate": 9.231170663542048e-06, "loss": 1.6279, "step": 1520 }, { "epoch": 1.65, "learning_rate": 9.219274920664619e-06, "loss": 1.6027, "step": 1521 }, { "epoch": 1.65, "learning_rate": 9.207380289252996e-06, "loss": 1.8429, "step": 1522 }, { "epoch": 1.65, "learning_rate": 9.195486786240771e-06, "loss": 1.5398, "step": 1523 }, { "epoch": 1.65, "learning_rate": 9.183594428559913e-06, "loss": 1.6267, "step": 1524 }, { "epoch": 1.65, "learning_rate": 9.171703233140781e-06, "loss": 1.7189, "step": 1525 }, { "epoch": 1.65, "learning_rate": 9.159813216912067e-06, "loss": 1.758, "step": 1526 }, { "epoch": 1.65, "learning_rate": 9.147924396800792e-06, "loss": 1.5452, "step": 1527 }, { "epoch": 1.65, "learning_rate": 9.136036789732261e-06, "loss": 1.5664, "step": 1528 }, { "epoch": 1.65, "learning_rate": 9.124150412630069e-06, "loss": 1.761, "step": 1529 }, { "epoch": 1.66, "learning_rate": 9.11226528241605e-06, "loss": 1.4514, "step": 1530 }, { "epoch": 1.66, "learning_rate": 9.100381416010262e-06, "loss": 1.4175, "step": 1531 }, { "epoch": 1.66, "learning_rate": 9.088498830330974e-06, "loss": 1.7212, "step": 1532 }, { "epoch": 1.66, "learning_rate": 9.076617542294613e-06, "loss": 1.5689, "step": 1533 }, { "epoch": 1.66, "learning_rate": 9.064737568815783e-06, "loss": 1.6351, "step": 1534 }, { "epoch": 1.66, "learning_rate": 9.052858926807189e-06, "loss": 1.569, "step": 1535 }, { "epoch": 1.66, "learning_rate": 9.040981633179666e-06, "loss": 1.5106, "step": 1536 }, { "epoch": 1.66, "learning_rate": 9.029105704842114e-06, "loss": 1.3715, "step": 1537 }, { "epoch": 1.66, "learning_rate": 9.017231158701494e-06, "loss": 1.5932, "step": 1538 }, { "epoch": 1.67, "learning_rate": 9.005358011662805e-06, "loss": 1.754, "step": 1539 }, { "epoch": 1.67, "learning_rate": 8.993486280629039e-06, "loss": 1.7342, "step": 1540 }, { "epoch": 1.67, "learning_rate": 8.981615982501186e-06, "loss": 1.8086, "step": 1541 }, { "epoch": 1.67, "learning_rate": 8.96974713417819e-06, "loss": 1.4102, "step": 1542 }, { "epoch": 1.67, "learning_rate": 8.95787975255693e-06, "loss": 1.6458, "step": 1543 }, { "epoch": 1.67, "learning_rate": 8.94601385453221e-06, "loss": 1.451, "step": 1544 }, { "epoch": 1.67, "learning_rate": 8.934149456996696e-06, "loss": 1.5115, "step": 1545 }, { "epoch": 1.67, "learning_rate": 8.922286576840948e-06, "loss": 1.7639, "step": 1546 }, { "epoch": 1.67, "learning_rate": 8.910425230953339e-06, "loss": 1.4725, "step": 1547 }, { "epoch": 1.68, "learning_rate": 8.898565436220076e-06, "loss": 1.5253, "step": 1548 }, { "epoch": 1.68, "learning_rate": 8.886707209525148e-06, "loss": 1.666, "step": 1549 }, { "epoch": 1.68, "learning_rate": 8.874850567750315e-06, "loss": 1.2424, "step": 1550 }, { "epoch": 1.68, "learning_rate": 8.862995527775089e-06, "loss": 1.428, "step": 1551 }, { "epoch": 1.68, "learning_rate": 8.85114210647668e-06, "loss": 1.2813, "step": 1552 }, { "epoch": 1.68, "learning_rate": 8.839290320730018e-06, "loss": 1.4706, "step": 1553 }, { "epoch": 1.68, "learning_rate": 8.827440187407687e-06, "loss": 1.7242, "step": 1554 }, { "epoch": 1.68, "learning_rate": 8.815591723379931e-06, "loss": 1.5579, "step": 1555 }, { "epoch": 1.68, "learning_rate": 8.803744945514606e-06, "loss": 1.551, "step": 1556 }, { "epoch": 1.69, "learning_rate": 8.791899870677174e-06, "loss": 1.331, "step": 1557 }, { "epoch": 1.69, "learning_rate": 8.780056515730674e-06, "loss": 1.7178, "step": 1558 }, { "epoch": 1.69, "learning_rate": 8.768214897535693e-06, "loss": 1.5405, "step": 1559 }, { "epoch": 1.69, "learning_rate": 8.75637503295035e-06, "loss": 1.6926, "step": 1560 }, { "epoch": 1.69, "learning_rate": 8.744536938830257e-06, "loss": 1.6215, "step": 1561 }, { "epoch": 1.69, "learning_rate": 8.732700632028523e-06, "loss": 1.4487, "step": 1562 }, { "epoch": 1.69, "learning_rate": 8.720866129395689e-06, "loss": 1.4904, "step": 1563 }, { "epoch": 1.69, "learning_rate": 8.709033447779749e-06, "loss": 1.3806, "step": 1564 }, { "epoch": 1.69, "learning_rate": 8.697202604026099e-06, "loss": 1.5946, "step": 1565 }, { "epoch": 1.69, "learning_rate": 8.685373614977507e-06, "loss": 1.7393, "step": 1566 }, { "epoch": 1.7, "learning_rate": 8.673546497474119e-06, "loss": 1.6652, "step": 1567 }, { "epoch": 1.7, "learning_rate": 8.661721268353398e-06, "loss": 1.6665, "step": 1568 }, { "epoch": 1.7, "learning_rate": 8.649897944450134e-06, "loss": 1.4041, "step": 1569 }, { "epoch": 1.7, "learning_rate": 8.638076542596396e-06, "loss": 1.8183, "step": 1570 }, { "epoch": 1.7, "learning_rate": 8.626257079621517e-06, "loss": 1.4689, "step": 1571 }, { "epoch": 1.7, "learning_rate": 8.61443957235208e-06, "loss": 1.4563, "step": 1572 }, { "epoch": 1.7, "learning_rate": 8.602624037611865e-06, "loss": 1.8297, "step": 1573 }, { "epoch": 1.7, "learning_rate": 8.590810492221862e-06, "loss": 1.4407, "step": 1574 }, { "epoch": 1.7, "learning_rate": 8.578998953000218e-06, "loss": 1.5484, "step": 1575 }, { "epoch": 1.71, "learning_rate": 8.567189436762227e-06, "loss": 1.57, "step": 1576 }, { "epoch": 1.71, "learning_rate": 8.555381960320301e-06, "loss": 1.5272, "step": 1577 }, { "epoch": 1.71, "learning_rate": 8.543576540483955e-06, "loss": 1.7057, "step": 1578 }, { "epoch": 1.71, "learning_rate": 8.531773194059774e-06, "loss": 1.5611, "step": 1579 }, { "epoch": 1.71, "learning_rate": 8.51997193785138e-06, "loss": 1.3945, "step": 1580 }, { "epoch": 1.71, "learning_rate": 8.508172788659436e-06, "loss": 1.7305, "step": 1581 }, { "epoch": 1.71, "learning_rate": 8.496375763281591e-06, "loss": 1.6835, "step": 1582 }, { "epoch": 1.71, "learning_rate": 8.484580878512485e-06, "loss": 1.5178, "step": 1583 }, { "epoch": 1.71, "learning_rate": 8.472788151143692e-06, "loss": 1.6548, "step": 1584 }, { "epoch": 1.72, "learning_rate": 8.460997597963732e-06, "loss": 1.6223, "step": 1585 }, { "epoch": 1.72, "learning_rate": 8.449209235758024e-06, "loss": 1.441, "step": 1586 }, { "epoch": 1.72, "learning_rate": 8.437423081308862e-06, "loss": 1.6071, "step": 1587 }, { "epoch": 1.72, "learning_rate": 8.425639151395406e-06, "loss": 1.6885, "step": 1588 }, { "epoch": 1.72, "learning_rate": 8.413857462793637e-06, "loss": 1.6291, "step": 1589 }, { "epoch": 1.72, "learning_rate": 8.402078032276367e-06, "loss": 1.4595, "step": 1590 }, { "epoch": 1.72, "learning_rate": 8.390300876613164e-06, "loss": 1.7373, "step": 1591 }, { "epoch": 1.72, "learning_rate": 8.378526012570381e-06, "loss": 1.5518, "step": 1592 }, { "epoch": 1.72, "learning_rate": 8.366753456911099e-06, "loss": 1.7596, "step": 1593 }, { "epoch": 1.73, "learning_rate": 8.354983226395114e-06, "loss": 1.4498, "step": 1594 }, { "epoch": 1.73, "learning_rate": 8.343215337778915e-06, "loss": 1.4796, "step": 1595 }, { "epoch": 1.73, "learning_rate": 8.331449807815646e-06, "loss": 1.5796, "step": 1596 }, { "epoch": 1.73, "learning_rate": 8.31968665325511e-06, "loss": 1.438, "step": 1597 }, { "epoch": 1.73, "learning_rate": 8.307925890843711e-06, "loss": 1.5338, "step": 1598 }, { "epoch": 1.73, "learning_rate": 8.296167537324459e-06, "loss": 1.576, "step": 1599 }, { "epoch": 1.73, "learning_rate": 8.284411609436933e-06, "loss": 1.6412, "step": 1600 }, { "epoch": 1.73, "learning_rate": 8.272658123917256e-06, "loss": 1.297, "step": 1601 }, { "epoch": 1.73, "learning_rate": 8.260907097498078e-06, "loss": 1.865, "step": 1602 }, { "epoch": 1.73, "learning_rate": 8.24915854690854e-06, "loss": 1.5555, "step": 1603 }, { "epoch": 1.74, "learning_rate": 8.237412488874266e-06, "loss": 1.6192, "step": 1604 }, { "epoch": 1.74, "learning_rate": 8.225668940117327e-06, "loss": 1.5302, "step": 1605 }, { "epoch": 1.74, "learning_rate": 8.213927917356228e-06, "loss": 1.6969, "step": 1606 }, { "epoch": 1.74, "learning_rate": 8.202189437305874e-06, "loss": 1.8516, "step": 1607 }, { "epoch": 1.74, "learning_rate": 8.190453516677545e-06, "loss": 1.568, "step": 1608 }, { "epoch": 1.74, "learning_rate": 8.178720172178886e-06, "loss": 1.7605, "step": 1609 }, { "epoch": 1.74, "learning_rate": 8.166989420513867e-06, "loss": 1.2721, "step": 1610 }, { "epoch": 1.74, "learning_rate": 8.155261278382773e-06, "loss": 1.5424, "step": 1611 }, { "epoch": 1.74, "learning_rate": 8.143535762482176e-06, "loss": 1.534, "step": 1612 }, { "epoch": 1.75, "learning_rate": 8.131812889504895e-06, "loss": 1.3295, "step": 1613 }, { "epoch": 1.75, "learning_rate": 8.120092676140007e-06, "loss": 1.4227, "step": 1614 }, { "epoch": 1.75, "learning_rate": 8.108375139072781e-06, "loss": 1.6211, "step": 1615 }, { "epoch": 1.75, "learning_rate": 8.096660294984693e-06, "loss": 1.6758, "step": 1616 }, { "epoch": 1.75, "learning_rate": 8.084948160553375e-06, "loss": 1.7365, "step": 1617 }, { "epoch": 1.75, "learning_rate": 8.07323875245261e-06, "loss": 1.7047, "step": 1618 }, { "epoch": 1.75, "learning_rate": 8.061532087352295e-06, "loss": 1.3846, "step": 1619 }, { "epoch": 1.75, "learning_rate": 8.049828181918417e-06, "loss": 1.5463, "step": 1620 }, { "epoch": 1.75, "learning_rate": 8.038127052813044e-06, "loss": 1.5289, "step": 1621 }, { "epoch": 1.76, "learning_rate": 8.026428716694284e-06, "loss": 1.6241, "step": 1622 }, { "epoch": 1.76, "learning_rate": 8.014733190216275e-06, "loss": 1.7027, "step": 1623 }, { "epoch": 1.76, "learning_rate": 8.003040490029145e-06, "loss": 1.5098, "step": 1624 }, { "epoch": 1.76, "learning_rate": 7.991350632779012e-06, "loss": 1.5048, "step": 1625 }, { "epoch": 1.76, "learning_rate": 7.979663635107941e-06, "loss": 1.521, "step": 1626 }, { "epoch": 1.76, "learning_rate": 7.967979513653917e-06, "loss": 1.6509, "step": 1627 }, { "epoch": 1.76, "learning_rate": 7.956298285050842e-06, "loss": 1.6593, "step": 1628 }, { "epoch": 1.76, "learning_rate": 7.944619965928498e-06, "loss": 1.6461, "step": 1629 }, { "epoch": 1.76, "learning_rate": 7.932944572912523e-06, "loss": 1.6019, "step": 1630 }, { "epoch": 1.77, "learning_rate": 7.921272122624384e-06, "loss": 1.7219, "step": 1631 }, { "epoch": 1.77, "learning_rate": 7.909602631681366e-06, "loss": 1.4021, "step": 1632 }, { "epoch": 1.77, "learning_rate": 7.897936116696542e-06, "loss": 1.5456, "step": 1633 }, { "epoch": 1.77, "learning_rate": 7.88627259427874e-06, "loss": 1.5395, "step": 1634 }, { "epoch": 1.77, "learning_rate": 7.87461208103254e-06, "loss": 1.465, "step": 1635 }, { "epoch": 1.77, "learning_rate": 7.862954593558223e-06, "loss": 1.6116, "step": 1636 }, { "epoch": 1.77, "learning_rate": 7.851300148451774e-06, "loss": 1.7174, "step": 1637 }, { "epoch": 1.77, "learning_rate": 7.839648762304841e-06, "loss": 1.5312, "step": 1638 }, { "epoch": 1.77, "learning_rate": 7.828000451704717e-06, "loss": 1.6013, "step": 1639 }, { "epoch": 1.77, "learning_rate": 7.816355233234327e-06, "loss": 1.5673, "step": 1640 }, { "epoch": 1.78, "learning_rate": 7.804713123472178e-06, "loss": 1.6745, "step": 1641 }, { "epoch": 1.78, "learning_rate": 7.793074138992364e-06, "loss": 1.7497, "step": 1642 }, { "epoch": 1.78, "learning_rate": 7.78143829636452e-06, "loss": 1.6192, "step": 1643 }, { "epoch": 1.78, "learning_rate": 7.769805612153816e-06, "loss": 1.6124, "step": 1644 }, { "epoch": 1.78, "learning_rate": 7.758176102920918e-06, "loss": 1.3206, "step": 1645 }, { "epoch": 1.78, "learning_rate": 7.746549785221982e-06, "loss": 1.6474, "step": 1646 }, { "epoch": 1.78, "learning_rate": 7.734926675608616e-06, "loss": 1.4634, "step": 1647 }, { "epoch": 1.78, "learning_rate": 7.723306790627852e-06, "loss": 1.6819, "step": 1648 }, { "epoch": 1.78, "learning_rate": 7.711690146822147e-06, "loss": 1.6936, "step": 1649 }, { "epoch": 1.79, "learning_rate": 7.700076760729328e-06, "loss": 1.8941, "step": 1650 }, { "epoch": 1.79, "learning_rate": 7.6884666488826e-06, "loss": 1.381, "step": 1651 }, { "epoch": 1.79, "learning_rate": 7.67685982781049e-06, "loss": 1.534, "step": 1652 }, { "epoch": 1.79, "learning_rate": 7.665256314036856e-06, "loss": 1.6982, "step": 1653 }, { "epoch": 1.79, "learning_rate": 7.653656124080839e-06, "loss": 1.5934, "step": 1654 }, { "epoch": 1.79, "learning_rate": 7.642059274456848e-06, "loss": 1.7371, "step": 1655 }, { "epoch": 1.79, "learning_rate": 7.630465781674538e-06, "loss": 1.4087, "step": 1656 }, { "epoch": 1.79, "learning_rate": 7.618875662238786e-06, "loss": 1.612, "step": 1657 }, { "epoch": 1.79, "learning_rate": 7.607288932649669e-06, "loss": 1.7681, "step": 1658 }, { "epoch": 1.8, "learning_rate": 7.595705609402427e-06, "loss": 1.7609, "step": 1659 }, { "epoch": 1.8, "learning_rate": 7.584125708987464e-06, "loss": 1.6934, "step": 1660 }, { "epoch": 1.8, "learning_rate": 7.5725492478903065e-06, "loss": 1.4336, "step": 1661 }, { "epoch": 1.8, "learning_rate": 7.56097624259158e-06, "loss": 1.712, "step": 1662 }, { "epoch": 1.8, "learning_rate": 7.549406709567001e-06, "loss": 1.6162, "step": 1663 }, { "epoch": 1.8, "learning_rate": 7.537840665287327e-06, "loss": 1.5464, "step": 1664 }, { "epoch": 1.8, "learning_rate": 7.526278126218365e-06, "loss": 1.6153, "step": 1665 }, { "epoch": 1.8, "learning_rate": 7.514719108820918e-06, "loss": 1.408, "step": 1666 }, { "epoch": 1.8, "learning_rate": 7.5031636295507845e-06, "loss": 1.5711, "step": 1667 }, { "epoch": 1.81, "learning_rate": 7.491611704858727e-06, "loss": 1.4098, "step": 1668 }, { "epoch": 1.81, "learning_rate": 7.480063351190439e-06, "loss": 1.3499, "step": 1669 }, { "epoch": 1.81, "learning_rate": 7.46851858498654e-06, "loss": 1.4203, "step": 1670 }, { "epoch": 1.81, "learning_rate": 7.456977422682532e-06, "loss": 1.4782, "step": 1671 }, { "epoch": 1.81, "learning_rate": 7.445439880708796e-06, "loss": 1.5853, "step": 1672 }, { "epoch": 1.81, "learning_rate": 7.433905975490552e-06, "loss": 1.5873, "step": 1673 }, { "epoch": 1.81, "learning_rate": 7.422375723447846e-06, "loss": 1.7037, "step": 1674 }, { "epoch": 1.81, "learning_rate": 7.410849140995528e-06, "loss": 1.5418, "step": 1675 }, { "epoch": 1.81, "learning_rate": 7.399326244543211e-06, "loss": 1.5074, "step": 1676 }, { "epoch": 1.81, "learning_rate": 7.387807050495275e-06, "loss": 1.7242, "step": 1677 }, { "epoch": 1.82, "learning_rate": 7.376291575250812e-06, "loss": 1.4759, "step": 1678 }, { "epoch": 1.82, "learning_rate": 7.3647798352036394e-06, "loss": 1.3884, "step": 1679 }, { "epoch": 1.82, "learning_rate": 7.3532718467422415e-06, "loss": 1.5181, "step": 1680 }, { "epoch": 1.82, "learning_rate": 7.34176762624977e-06, "loss": 1.7005, "step": 1681 }, { "epoch": 1.82, "learning_rate": 7.330267190104015e-06, "loss": 1.431, "step": 1682 }, { "epoch": 1.82, "learning_rate": 7.318770554677366e-06, "loss": 1.8979, "step": 1683 }, { "epoch": 1.82, "learning_rate": 7.307277736336815e-06, "loss": 1.5267, "step": 1684 }, { "epoch": 1.82, "learning_rate": 7.295788751443913e-06, "loss": 1.5903, "step": 1685 }, { "epoch": 1.82, "learning_rate": 7.284303616354759e-06, "loss": 1.253, "step": 1686 }, { "epoch": 1.83, "learning_rate": 7.27282234741996e-06, "loss": 1.2221, "step": 1687 }, { "epoch": 1.83, "learning_rate": 7.261344960984631e-06, "loss": 1.7133, "step": 1688 }, { "epoch": 1.83, "learning_rate": 7.249871473388361e-06, "loss": 1.5898, "step": 1689 }, { "epoch": 1.83, "learning_rate": 7.238401900965172e-06, "loss": 1.8137, "step": 1690 }, { "epoch": 1.83, "learning_rate": 7.226936260043531e-06, "loss": 1.3861, "step": 1691 }, { "epoch": 1.83, "learning_rate": 7.215474566946296e-06, "loss": 1.7047, "step": 1692 }, { "epoch": 1.83, "learning_rate": 7.204016837990715e-06, "loss": 1.4074, "step": 1693 }, { "epoch": 1.83, "learning_rate": 7.192563089488377e-06, "loss": 1.4883, "step": 1694 }, { "epoch": 1.83, "learning_rate": 7.181113337745218e-06, "loss": 1.5436, "step": 1695 }, { "epoch": 1.84, "learning_rate": 7.169667599061484e-06, "loss": 1.3418, "step": 1696 }, { "epoch": 1.84, "learning_rate": 7.158225889731698e-06, "loss": 1.5738, "step": 1697 }, { "epoch": 1.84, "learning_rate": 7.146788226044658e-06, "loss": 1.6208, "step": 1698 }, { "epoch": 1.84, "learning_rate": 7.1353546242833885e-06, "loss": 1.2694, "step": 1699 }, { "epoch": 1.84, "learning_rate": 7.12392510072515e-06, "loss": 1.5973, "step": 1700 }, { "epoch": 1.84, "learning_rate": 7.112499671641375e-06, "loss": 1.6895, "step": 1701 }, { "epoch": 1.84, "learning_rate": 7.1010783532976866e-06, "loss": 1.3201, "step": 1702 }, { "epoch": 1.84, "learning_rate": 7.089661161953847e-06, "loss": 1.6054, "step": 1703 }, { "epoch": 1.84, "learning_rate": 7.078248113863741e-06, "loss": 1.6929, "step": 1704 }, { "epoch": 1.85, "learning_rate": 7.066839225275366e-06, "loss": 1.347, "step": 1705 }, { "epoch": 1.85, "learning_rate": 7.0554345124307765e-06, "loss": 1.6863, "step": 1706 }, { "epoch": 1.85, "learning_rate": 7.044033991566106e-06, "loss": 1.659, "step": 1707 }, { "epoch": 1.85, "learning_rate": 7.032637678911504e-06, "loss": 1.6838, "step": 1708 }, { "epoch": 1.85, "learning_rate": 7.021245590691136e-06, "loss": 1.4575, "step": 1709 }, { "epoch": 1.85, "learning_rate": 7.009857743123156e-06, "loss": 1.5452, "step": 1710 }, { "epoch": 1.85, "learning_rate": 6.998474152419672e-06, "loss": 1.5975, "step": 1711 }, { "epoch": 1.85, "learning_rate": 6.987094834786739e-06, "loss": 1.6389, "step": 1712 }, { "epoch": 1.85, "learning_rate": 6.975719806424324e-06, "loss": 1.7488, "step": 1713 }, { "epoch": 1.85, "learning_rate": 6.964349083526295e-06, "loss": 1.6343, "step": 1714 }, { "epoch": 1.86, "learning_rate": 6.95298268228038e-06, "loss": 1.8227, "step": 1715 }, { "epoch": 1.86, "learning_rate": 6.941620618868161e-06, "loss": 1.8224, "step": 1716 }, { "epoch": 1.86, "learning_rate": 6.930262909465051e-06, "loss": 1.1914, "step": 1717 }, { "epoch": 1.86, "learning_rate": 6.918909570240249e-06, "loss": 1.7999, "step": 1718 }, { "epoch": 1.86, "learning_rate": 6.9075606173567435e-06, "loss": 1.3275, "step": 1719 }, { "epoch": 1.86, "learning_rate": 6.8962160669712755e-06, "loss": 1.6261, "step": 1720 }, { "epoch": 1.86, "learning_rate": 6.884875935234323e-06, "loss": 1.3259, "step": 1721 }, { "epoch": 1.86, "learning_rate": 6.8735402382900605e-06, "loss": 1.5051, "step": 1722 }, { "epoch": 1.86, "learning_rate": 6.862208992276362e-06, "loss": 1.447, "step": 1723 }, { "epoch": 1.87, "learning_rate": 6.850882213324766e-06, "loss": 1.863, "step": 1724 }, { "epoch": 1.87, "learning_rate": 6.839559917560437e-06, "loss": 1.5845, "step": 1725 }, { "epoch": 1.87, "learning_rate": 6.828242121102173e-06, "loss": 1.5892, "step": 1726 }, { "epoch": 1.87, "learning_rate": 6.8169288400623555e-06, "loss": 1.5102, "step": 1727 }, { "epoch": 1.87, "learning_rate": 6.805620090546947e-06, "loss": 1.3536, "step": 1728 }, { "epoch": 1.87, "learning_rate": 6.794315888655446e-06, "loss": 1.7994, "step": 1729 }, { "epoch": 1.87, "learning_rate": 6.783016250480891e-06, "loss": 1.5345, "step": 1730 }, { "epoch": 1.87, "learning_rate": 6.771721192109817e-06, "loss": 1.7666, "step": 1731 }, { "epoch": 1.87, "learning_rate": 6.760430729622236e-06, "loss": 1.5554, "step": 1732 }, { "epoch": 1.88, "learning_rate": 6.749144879091626e-06, "loss": 1.6225, "step": 1733 }, { "epoch": 1.88, "learning_rate": 6.737863656584886e-06, "loss": 1.8757, "step": 1734 }, { "epoch": 1.88, "learning_rate": 6.72658707816234e-06, "loss": 1.6269, "step": 1735 }, { "epoch": 1.88, "learning_rate": 6.715315159877691e-06, "loss": 1.7373, "step": 1736 }, { "epoch": 1.88, "learning_rate": 6.704047917778011e-06, "loss": 1.5422, "step": 1737 }, { "epoch": 1.88, "learning_rate": 6.692785367903723e-06, "loss": 1.4293, "step": 1738 }, { "epoch": 1.88, "learning_rate": 6.681527526288549e-06, "loss": 1.6787, "step": 1739 }, { "epoch": 1.88, "learning_rate": 6.670274408959535e-06, "loss": 1.5579, "step": 1740 }, { "epoch": 1.88, "learning_rate": 6.6590260319369745e-06, "loss": 1.5948, "step": 1741 }, { "epoch": 1.89, "learning_rate": 6.647782411234436e-06, "loss": 1.419, "step": 1742 }, { "epoch": 1.89, "learning_rate": 6.636543562858697e-06, "loss": 1.7773, "step": 1743 }, { "epoch": 1.89, "learning_rate": 6.625309502809758e-06, "loss": 1.464, "step": 1744 }, { "epoch": 1.89, "learning_rate": 6.614080247080794e-06, "loss": 1.4038, "step": 1745 }, { "epoch": 1.89, "learning_rate": 6.602855811658138e-06, "loss": 1.5296, "step": 1746 }, { "epoch": 1.89, "learning_rate": 6.591636212521266e-06, "loss": 1.5991, "step": 1747 }, { "epoch": 1.89, "learning_rate": 6.580421465642767e-06, "loss": 1.5339, "step": 1748 }, { "epoch": 1.89, "learning_rate": 6.569211586988324e-06, "loss": 1.6199, "step": 1749 }, { "epoch": 1.89, "learning_rate": 6.558006592516683e-06, "loss": 1.3909, "step": 1750 }, { "epoch": 1.9, "learning_rate": 6.546806498179643e-06, "loss": 1.5212, "step": 1751 }, { "epoch": 1.9, "learning_rate": 6.535611319922031e-06, "loss": 1.6971, "step": 1752 }, { "epoch": 1.9, "learning_rate": 6.524421073681659e-06, "loss": 1.411, "step": 1753 }, { "epoch": 1.9, "learning_rate": 6.513235775389338e-06, "loss": 1.5912, "step": 1754 }, { "epoch": 1.9, "learning_rate": 6.502055440968819e-06, "loss": 1.4404, "step": 1755 }, { "epoch": 1.9, "learning_rate": 6.490880086336797e-06, "loss": 1.4707, "step": 1756 }, { "epoch": 1.9, "learning_rate": 6.479709727402868e-06, "loss": 1.4696, "step": 1757 }, { "epoch": 1.9, "learning_rate": 6.468544380069523e-06, "loss": 1.7753, "step": 1758 }, { "epoch": 1.9, "learning_rate": 6.457384060232119e-06, "loss": 1.6027, "step": 1759 }, { "epoch": 1.9, "learning_rate": 6.4462287837788495e-06, "loss": 1.5656, "step": 1760 }, { "epoch": 1.91, "learning_rate": 6.435078566590738e-06, "loss": 1.4584, "step": 1761 }, { "epoch": 1.91, "learning_rate": 6.423933424541588e-06, "loss": 1.3464, "step": 1762 }, { "epoch": 1.91, "learning_rate": 6.412793373497999e-06, "loss": 1.6881, "step": 1763 }, { "epoch": 1.91, "learning_rate": 6.401658429319305e-06, "loss": 1.6128, "step": 1764 }, { "epoch": 1.91, "learning_rate": 6.390528607857582e-06, "loss": 1.818, "step": 1765 }, { "epoch": 1.91, "learning_rate": 6.379403924957612e-06, "loss": 1.859, "step": 1766 }, { "epoch": 1.91, "learning_rate": 6.36828439645685e-06, "loss": 1.5989, "step": 1767 }, { "epoch": 1.91, "learning_rate": 6.357170038185428e-06, "loss": 1.6319, "step": 1768 }, { "epoch": 1.91, "learning_rate": 6.346060865966104e-06, "loss": 1.4426, "step": 1769 }, { "epoch": 1.92, "learning_rate": 6.3349568956142636e-06, "loss": 1.8158, "step": 1770 }, { "epoch": 1.92, "learning_rate": 6.3238581429378846e-06, "loss": 1.7956, "step": 1771 }, { "epoch": 1.92, "learning_rate": 6.312764623737511e-06, "loss": 1.5887, "step": 1772 }, { "epoch": 1.92, "learning_rate": 6.301676353806245e-06, "loss": 1.4651, "step": 1773 }, { "epoch": 1.92, "learning_rate": 6.2905933489297055e-06, "loss": 1.4191, "step": 1774 }, { "epoch": 1.92, "learning_rate": 6.279515624886024e-06, "loss": 1.4645, "step": 1775 }, { "epoch": 1.92, "learning_rate": 6.26844319744581e-06, "loss": 1.6481, "step": 1776 }, { "epoch": 1.92, "learning_rate": 6.257376082372138e-06, "loss": 1.5659, "step": 1777 }, { "epoch": 1.92, "learning_rate": 6.246314295420515e-06, "loss": 1.3886, "step": 1778 }, { "epoch": 1.93, "learning_rate": 6.235257852338857e-06, "loss": 1.4936, "step": 1779 }, { "epoch": 1.93, "learning_rate": 6.2242067688674876e-06, "loss": 1.5716, "step": 1780 }, { "epoch": 1.93, "learning_rate": 6.2131610607390835e-06, "loss": 1.5199, "step": 1781 }, { "epoch": 1.93, "learning_rate": 6.202120743678682e-06, "loss": 1.5851, "step": 1782 }, { "epoch": 1.93, "learning_rate": 6.191085833403636e-06, "loss": 1.567, "step": 1783 }, { "epoch": 1.93, "learning_rate": 6.180056345623608e-06, "loss": 1.8849, "step": 1784 }, { "epoch": 1.93, "learning_rate": 6.169032296040542e-06, "loss": 1.4365, "step": 1785 }, { "epoch": 1.93, "learning_rate": 6.158013700348628e-06, "loss": 1.6608, "step": 1786 }, { "epoch": 1.93, "learning_rate": 6.1470005742343075e-06, "loss": 1.5282, "step": 1787 }, { "epoch": 1.94, "learning_rate": 6.1359929333762206e-06, "loss": 1.8344, "step": 1788 }, { "epoch": 1.94, "learning_rate": 6.124990793445214e-06, "loss": 1.7696, "step": 1789 }, { "epoch": 1.94, "learning_rate": 6.113994170104285e-06, "loss": 1.4385, "step": 1790 }, { "epoch": 1.94, "learning_rate": 6.103003079008593e-06, "loss": 1.5002, "step": 1791 }, { "epoch": 1.94, "learning_rate": 6.0920175358054166e-06, "loss": 1.7646, "step": 1792 }, { "epoch": 1.94, "learning_rate": 6.08103755613413e-06, "loss": 1.5009, "step": 1793 }, { "epoch": 1.94, "learning_rate": 6.070063155626197e-06, "loss": 1.6243, "step": 1794 }, { "epoch": 1.94, "learning_rate": 6.059094349905128e-06, "loss": 1.5057, "step": 1795 }, { "epoch": 1.94, "learning_rate": 6.048131154586483e-06, "loss": 1.3089, "step": 1796 }, { "epoch": 1.94, "learning_rate": 6.037173585277816e-06, "loss": 1.7252, "step": 1797 }, { "epoch": 1.95, "learning_rate": 6.026221657578688e-06, "loss": 1.8852, "step": 1798 }, { "epoch": 1.95, "learning_rate": 6.015275387080621e-06, "loss": 1.7413, "step": 1799 }, { "epoch": 1.95, "learning_rate": 6.004334789367083e-06, "loss": 1.6771, "step": 1800 }, { "epoch": 1.95, "learning_rate": 5.9933998800134726e-06, "loss": 1.5189, "step": 1801 }, { "epoch": 1.95, "learning_rate": 5.982470674587078e-06, "loss": 1.631, "step": 1802 }, { "epoch": 1.95, "learning_rate": 5.971547188647078e-06, "loss": 1.4991, "step": 1803 }, { "epoch": 1.95, "learning_rate": 5.9606294377445006e-06, "loss": 1.5662, "step": 1804 }, { "epoch": 1.95, "learning_rate": 5.949717437422222e-06, "loss": 1.6073, "step": 1805 }, { "epoch": 1.95, "learning_rate": 5.938811203214918e-06, "loss": 1.8055, "step": 1806 }, { "epoch": 1.96, "learning_rate": 5.92791075064906e-06, "loss": 1.7905, "step": 1807 }, { "epoch": 1.96, "learning_rate": 5.917016095242893e-06, "loss": 1.5288, "step": 1808 }, { "epoch": 1.96, "learning_rate": 5.9061272525064015e-06, "loss": 1.9134, "step": 1809 }, { "epoch": 1.96, "learning_rate": 5.8952442379413045e-06, "loss": 1.551, "step": 1810 }, { "epoch": 1.96, "learning_rate": 5.88436706704101e-06, "loss": 1.7406, "step": 1811 }, { "epoch": 1.96, "learning_rate": 5.873495755290621e-06, "loss": 1.7211, "step": 1812 }, { "epoch": 1.96, "learning_rate": 5.862630318166896e-06, "loss": 1.9039, "step": 1813 }, { "epoch": 1.96, "learning_rate": 5.851770771138218e-06, "loss": 1.1796, "step": 1814 }, { "epoch": 1.96, "learning_rate": 5.840917129664602e-06, "loss": 1.676, "step": 1815 }, { "epoch": 1.97, "learning_rate": 5.830069409197645e-06, "loss": 1.4525, "step": 1816 }, { "epoch": 1.97, "learning_rate": 5.819227625180517e-06, "loss": 1.6511, "step": 1817 }, { "epoch": 1.97, "learning_rate": 5.8083917930479365e-06, "loss": 1.8143, "step": 1818 }, { "epoch": 1.97, "learning_rate": 5.797561928226145e-06, "loss": 1.331, "step": 1819 }, { "epoch": 1.97, "learning_rate": 5.786738046132902e-06, "loss": 1.7415, "step": 1820 }, { "epoch": 1.97, "learning_rate": 5.775920162177437e-06, "loss": 1.6579, "step": 1821 }, { "epoch": 1.97, "learning_rate": 5.765108291760443e-06, "loss": 1.4423, "step": 1822 }, { "epoch": 1.97, "learning_rate": 5.754302450274053e-06, "loss": 1.343, "step": 1823 }, { "epoch": 1.97, "learning_rate": 5.743502653101818e-06, "loss": 1.4121, "step": 1824 }, { "epoch": 1.98, "learning_rate": 5.732708915618679e-06, "loss": 1.5275, "step": 1825 }, { "epoch": 1.98, "learning_rate": 5.72192125319096e-06, "loss": 1.8203, "step": 1826 }, { "epoch": 1.98, "learning_rate": 5.711139681176332e-06, "loss": 1.4243, "step": 1827 }, { "epoch": 1.98, "learning_rate": 5.700364214923788e-06, "loss": 1.4667, "step": 1828 }, { "epoch": 1.98, "learning_rate": 5.689594869773639e-06, "loss": 1.7091, "step": 1829 }, { "epoch": 1.98, "learning_rate": 5.678831661057473e-06, "loss": 1.4341, "step": 1830 }, { "epoch": 1.98, "learning_rate": 5.668074604098157e-06, "loss": 1.7594, "step": 1831 }, { "epoch": 1.98, "learning_rate": 5.657323714209775e-06, "loss": 1.7115, "step": 1832 }, { "epoch": 1.98, "learning_rate": 5.646579006697659e-06, "loss": 1.6253, "step": 1833 }, { "epoch": 1.98, "learning_rate": 5.63584049685832e-06, "loss": 2.0217, "step": 1834 }, { "epoch": 1.99, "learning_rate": 5.625108199979454e-06, "loss": 1.6056, "step": 1835 }, { "epoch": 1.99, "learning_rate": 5.614382131339911e-06, "loss": 1.8108, "step": 1836 }, { "epoch": 1.99, "learning_rate": 5.603662306209671e-06, "loss": 1.7316, "step": 1837 }, { "epoch": 1.99, "learning_rate": 5.592948739849838e-06, "loss": 1.6777, "step": 1838 }, { "epoch": 1.99, "learning_rate": 5.58224144751258e-06, "loss": 1.7195, "step": 1839 }, { "epoch": 1.99, "learning_rate": 5.5715404444411615e-06, "loss": 1.2523, "step": 1840 }, { "epoch": 1.99, "learning_rate": 5.560845745869879e-06, "loss": 1.5651, "step": 1841 }, { "epoch": 1.99, "learning_rate": 5.55015736702405e-06, "loss": 1.7505, "step": 1842 }, { "epoch": 1.99, "learning_rate": 5.539475323120014e-06, "loss": 1.5079, "step": 1843 }, { "epoch": 2.0, "learning_rate": 5.528799629365062e-06, "loss": 1.4314, "step": 1844 }, { "epoch": 2.0, "learning_rate": 5.518130300957476e-06, "loss": 1.6132, "step": 1845 }, { "epoch": 2.0, "learning_rate": 5.507467353086449e-06, "loss": 1.3751, "step": 1846 }, { "epoch": 2.0, "learning_rate": 5.496810800932113e-06, "loss": 1.6673, "step": 1847 }, { "epoch": 2.0, "learning_rate": 5.4861606596654805e-06, "loss": 1.2954, "step": 1848 }, { "epoch": 2.0, "learning_rate": 5.475516944448437e-06, "loss": 1.4659, "step": 1849 }, { "epoch": 2.0, "learning_rate": 5.464879670433738e-06, "loss": 1.3197, "step": 1850 }, { "epoch": 2.0, "learning_rate": 5.4542488527649385e-06, "loss": 1.5441, "step": 1851 }, { "epoch": 2.0, "learning_rate": 5.443624506576433e-06, "loss": 1.3866, "step": 1852 }, { "epoch": 2.01, "learning_rate": 5.433006646993379e-06, "loss": 1.5287, "step": 1853 }, { "epoch": 2.01, "learning_rate": 5.422395289131712e-06, "loss": 1.4827, "step": 1854 }, { "epoch": 2.01, "learning_rate": 5.411790448098117e-06, "loss": 1.4118, "step": 1855 }, { "epoch": 2.01, "learning_rate": 5.401192138989979e-06, "loss": 1.399, "step": 1856 }, { "epoch": 2.01, "learning_rate": 5.390600376895413e-06, "loss": 1.5438, "step": 1857 }, { "epoch": 2.01, "learning_rate": 5.380015176893183e-06, "loss": 1.5333, "step": 1858 }, { "epoch": 2.01, "learning_rate": 5.369436554052738e-06, "loss": 1.2251, "step": 1859 }, { "epoch": 2.01, "learning_rate": 5.358864523434148e-06, "loss": 1.6184, "step": 1860 }, { "epoch": 2.01, "learning_rate": 5.348299100088101e-06, "loss": 1.3506, "step": 1861 }, { "epoch": 2.02, "learning_rate": 5.3377402990558876e-06, "loss": 1.478, "step": 1862 }, { "epoch": 2.02, "learning_rate": 5.32718813536935e-06, "loss": 1.1999, "step": 1863 }, { "epoch": 2.02, "learning_rate": 5.316642624050905e-06, "loss": 1.5515, "step": 1864 }, { "epoch": 2.02, "learning_rate": 5.306103780113484e-06, "loss": 1.4353, "step": 1865 }, { "epoch": 2.02, "learning_rate": 5.295571618560531e-06, "loss": 1.7013, "step": 1866 }, { "epoch": 2.02, "learning_rate": 5.285046154385976e-06, "loss": 1.6689, "step": 1867 }, { "epoch": 2.02, "learning_rate": 5.274527402574212e-06, "loss": 1.5817, "step": 1868 }, { "epoch": 2.02, "learning_rate": 5.264015378100088e-06, "loss": 1.3292, "step": 1869 }, { "epoch": 2.02, "learning_rate": 5.2535100959288534e-06, "loss": 1.6756, "step": 1870 }, { "epoch": 2.02, "learning_rate": 5.243011571016181e-06, "loss": 1.7555, "step": 1871 }, { "epoch": 2.03, "learning_rate": 5.232519818308115e-06, "loss": 1.4063, "step": 1872 }, { "epoch": 2.03, "learning_rate": 5.222034852741054e-06, "loss": 1.3522, "step": 1873 }, { "epoch": 2.03, "learning_rate": 5.211556689241742e-06, "loss": 1.4141, "step": 1874 }, { "epoch": 2.03, "learning_rate": 5.20108534272723e-06, "loss": 1.3918, "step": 1875 }, { "epoch": 2.03, "learning_rate": 5.190620828104878e-06, "loss": 1.4967, "step": 1876 }, { "epoch": 2.03, "learning_rate": 5.180163160272309e-06, "loss": 1.2588, "step": 1877 }, { "epoch": 2.03, "learning_rate": 5.1697123541174025e-06, "loss": 1.5597, "step": 1878 }, { "epoch": 2.03, "learning_rate": 5.159268424518264e-06, "loss": 1.2347, "step": 1879 }, { "epoch": 2.03, "learning_rate": 5.14883138634322e-06, "loss": 1.5946, "step": 1880 }, { "epoch": 2.04, "learning_rate": 5.138401254450773e-06, "loss": 1.51, "step": 1881 }, { "epoch": 2.04, "learning_rate": 5.127978043689607e-06, "loss": 1.4591, "step": 1882 }, { "epoch": 2.04, "learning_rate": 5.1175617688985446e-06, "loss": 1.5665, "step": 1883 }, { "epoch": 2.04, "learning_rate": 5.107152444906536e-06, "loss": 1.2853, "step": 1884 }, { "epoch": 2.04, "learning_rate": 5.0967500865326334e-06, "loss": 1.6581, "step": 1885 }, { "epoch": 2.04, "learning_rate": 5.086354708585979e-06, "loss": 1.6065, "step": 1886 }, { "epoch": 2.04, "learning_rate": 5.07596632586577e-06, "loss": 1.4056, "step": 1887 }, { "epoch": 2.04, "learning_rate": 5.0655849531612465e-06, "loss": 1.4406, "step": 1888 }, { "epoch": 2.04, "learning_rate": 5.055210605251678e-06, "loss": 1.2587, "step": 1889 }, { "epoch": 2.05, "learning_rate": 5.0448432969063235e-06, "loss": 1.4093, "step": 1890 }, { "epoch": 2.05, "learning_rate": 5.034483042884419e-06, "loss": 1.601, "step": 1891 }, { "epoch": 2.05, "learning_rate": 5.024129857935167e-06, "loss": 1.3906, "step": 1892 }, { "epoch": 2.05, "learning_rate": 5.0137837567976926e-06, "loss": 1.2058, "step": 1893 }, { "epoch": 2.05, "learning_rate": 5.003444754201059e-06, "loss": 1.372, "step": 1894 }, { "epoch": 2.05, "learning_rate": 4.993112864864191e-06, "loss": 1.65, "step": 1895 }, { "epoch": 2.05, "learning_rate": 4.982788103495918e-06, "loss": 1.2757, "step": 1896 }, { "epoch": 2.05, "learning_rate": 4.972470484794906e-06, "loss": 1.3368, "step": 1897 }, { "epoch": 2.05, "learning_rate": 4.962160023449652e-06, "loss": 1.4221, "step": 1898 }, { "epoch": 2.06, "learning_rate": 4.951856734138469e-06, "loss": 1.3768, "step": 1899 }, { "epoch": 2.06, "learning_rate": 4.941560631529452e-06, "loss": 1.1427, "step": 1900 }, { "epoch": 2.06, "learning_rate": 4.931271730280482e-06, "loss": 1.3772, "step": 1901 }, { "epoch": 2.06, "learning_rate": 4.92099004503916e-06, "loss": 1.3651, "step": 1902 }, { "epoch": 2.06, "learning_rate": 4.9107155904428435e-06, "loss": 1.3227, "step": 1903 }, { "epoch": 2.06, "learning_rate": 4.900448381118578e-06, "loss": 1.3926, "step": 1904 }, { "epoch": 2.06, "learning_rate": 4.890188431683095e-06, "loss": 1.4526, "step": 1905 }, { "epoch": 2.06, "learning_rate": 4.879935756742805e-06, "loss": 1.2755, "step": 1906 }, { "epoch": 2.06, "learning_rate": 4.869690370893739e-06, "loss": 1.4351, "step": 1907 }, { "epoch": 2.06, "learning_rate": 4.8594522887215776e-06, "loss": 1.5472, "step": 1908 }, { "epoch": 2.07, "learning_rate": 4.849221524801576e-06, "loss": 1.5711, "step": 1909 }, { "epoch": 2.07, "learning_rate": 4.838998093698596e-06, "loss": 1.3841, "step": 1910 }, { "epoch": 2.07, "learning_rate": 4.828782009967044e-06, "loss": 1.2828, "step": 1911 }, { "epoch": 2.07, "learning_rate": 4.81857328815087e-06, "loss": 1.5097, "step": 1912 }, { "epoch": 2.07, "learning_rate": 4.808371942783554e-06, "loss": 1.1843, "step": 1913 }, { "epoch": 2.07, "learning_rate": 4.798177988388052e-06, "loss": 1.3296, "step": 1914 }, { "epoch": 2.07, "learning_rate": 4.7879914394768225e-06, "loss": 1.4215, "step": 1915 }, { "epoch": 2.07, "learning_rate": 4.777812310551767e-06, "loss": 1.4029, "step": 1916 }, { "epoch": 2.07, "learning_rate": 4.767640616104222e-06, "loss": 1.6383, "step": 1917 }, { "epoch": 2.08, "learning_rate": 4.757476370614957e-06, "loss": 1.2297, "step": 1918 }, { "epoch": 2.08, "learning_rate": 4.74731958855411e-06, "loss": 1.2133, "step": 1919 }, { "epoch": 2.08, "learning_rate": 4.737170284381224e-06, "loss": 1.3092, "step": 1920 }, { "epoch": 2.08, "learning_rate": 4.727028472545165e-06, "loss": 1.4945, "step": 1921 }, { "epoch": 2.08, "learning_rate": 4.716894167484161e-06, "loss": 1.5155, "step": 1922 }, { "epoch": 2.08, "learning_rate": 4.706767383625737e-06, "loss": 1.4136, "step": 1923 }, { "epoch": 2.08, "learning_rate": 4.6966481353867085e-06, "loss": 1.7102, "step": 1924 }, { "epoch": 2.08, "learning_rate": 4.6865364371731825e-06, "loss": 1.2092, "step": 1925 }, { "epoch": 2.08, "learning_rate": 4.6764323033804895e-06, "loss": 1.5983, "step": 1926 }, { "epoch": 2.09, "learning_rate": 4.666335748393214e-06, "loss": 1.3997, "step": 1927 }, { "epoch": 2.09, "learning_rate": 4.65624678658514e-06, "loss": 1.6628, "step": 1928 }, { "epoch": 2.09, "learning_rate": 4.646165432319242e-06, "loss": 1.5539, "step": 1929 }, { "epoch": 2.09, "learning_rate": 4.636091699947675e-06, "loss": 1.3694, "step": 1930 }, { "epoch": 2.09, "learning_rate": 4.62602560381172e-06, "loss": 1.4263, "step": 1931 }, { "epoch": 2.09, "learning_rate": 4.615967158241814e-06, "loss": 1.5073, "step": 1932 }, { "epoch": 2.09, "learning_rate": 4.6059163775574856e-06, "loss": 1.4607, "step": 1933 }, { "epoch": 2.09, "learning_rate": 4.595873276067354e-06, "loss": 1.3914, "step": 1934 }, { "epoch": 2.09, "learning_rate": 4.5858378680691085e-06, "loss": 1.6595, "step": 1935 }, { "epoch": 2.1, "learning_rate": 4.575810167849481e-06, "loss": 1.4194, "step": 1936 }, { "epoch": 2.1, "learning_rate": 4.565790189684247e-06, "loss": 1.5239, "step": 1937 }, { "epoch": 2.1, "learning_rate": 4.555777947838158e-06, "loss": 1.4987, "step": 1938 }, { "epoch": 2.1, "learning_rate": 4.545773456564983e-06, "loss": 1.5604, "step": 1939 }, { "epoch": 2.1, "learning_rate": 4.535776730107438e-06, "loss": 1.4635, "step": 1940 }, { "epoch": 2.1, "learning_rate": 4.525787782697191e-06, "loss": 1.3534, "step": 1941 }, { "epoch": 2.1, "learning_rate": 4.515806628554835e-06, "loss": 1.3225, "step": 1942 }, { "epoch": 2.1, "learning_rate": 4.505833281889866e-06, "loss": 1.2723, "step": 1943 }, { "epoch": 2.1, "learning_rate": 4.495867756900671e-06, "loss": 1.3468, "step": 1944 }, { "epoch": 2.1, "learning_rate": 4.485910067774498e-06, "loss": 1.0302, "step": 1945 }, { "epoch": 2.11, "learning_rate": 4.475960228687437e-06, "loss": 1.5602, "step": 1946 }, { "epoch": 2.11, "learning_rate": 4.466018253804405e-06, "loss": 1.3989, "step": 1947 }, { "epoch": 2.11, "learning_rate": 4.456084157279125e-06, "loss": 1.2241, "step": 1948 }, { "epoch": 2.11, "learning_rate": 4.446157953254101e-06, "loss": 1.2904, "step": 1949 }, { "epoch": 2.11, "learning_rate": 4.436239655860599e-06, "loss": 1.3973, "step": 1950 }, { "epoch": 2.11, "learning_rate": 4.42632927921864e-06, "loss": 1.4488, "step": 1951 }, { "epoch": 2.11, "learning_rate": 4.416426837436956e-06, "loss": 1.5007, "step": 1952 }, { "epoch": 2.11, "learning_rate": 4.406532344612988e-06, "loss": 1.4288, "step": 1953 }, { "epoch": 2.11, "learning_rate": 4.39664581483286e-06, "loss": 1.358, "step": 1954 }, { "epoch": 2.12, "learning_rate": 4.386767262171362e-06, "loss": 1.7036, "step": 1955 }, { "epoch": 2.12, "learning_rate": 4.376896700691919e-06, "loss": 1.2204, "step": 1956 }, { "epoch": 2.12, "learning_rate": 4.367034144446594e-06, "loss": 1.4717, "step": 1957 }, { "epoch": 2.12, "learning_rate": 4.357179607476042e-06, "loss": 1.4448, "step": 1958 }, { "epoch": 2.12, "learning_rate": 4.347333103809504e-06, "loss": 1.296, "step": 1959 }, { "epoch": 2.12, "learning_rate": 4.337494647464785e-06, "loss": 1.3612, "step": 1960 }, { "epoch": 2.12, "learning_rate": 4.327664252448235e-06, "loss": 1.4104, "step": 1961 }, { "epoch": 2.12, "learning_rate": 4.317841932754725e-06, "loss": 1.4945, "step": 1962 }, { "epoch": 2.12, "learning_rate": 4.308027702367628e-06, "loss": 1.4625, "step": 1963 }, { "epoch": 2.13, "learning_rate": 4.298221575258814e-06, "loss": 1.2922, "step": 1964 }, { "epoch": 2.13, "learning_rate": 4.288423565388599e-06, "loss": 1.5656, "step": 1965 }, { "epoch": 2.13, "learning_rate": 4.278633686705753e-06, "loss": 1.5695, "step": 1966 }, { "epoch": 2.13, "learning_rate": 4.268851953147468e-06, "loss": 1.5205, "step": 1967 }, { "epoch": 2.13, "learning_rate": 4.259078378639338e-06, "loss": 1.6002, "step": 1968 }, { "epoch": 2.13, "learning_rate": 4.249312977095352e-06, "loss": 1.5762, "step": 1969 }, { "epoch": 2.13, "learning_rate": 4.239555762417843e-06, "loss": 1.2551, "step": 1970 }, { "epoch": 2.13, "learning_rate": 4.229806748497512e-06, "loss": 1.4463, "step": 1971 }, { "epoch": 2.13, "learning_rate": 4.2200659492133715e-06, "loss": 1.5712, "step": 1972 }, { "epoch": 2.14, "learning_rate": 4.2103333784327406e-06, "loss": 1.4268, "step": 1973 }, { "epoch": 2.14, "learning_rate": 4.200609050011229e-06, "loss": 1.4475, "step": 1974 }, { "epoch": 2.14, "learning_rate": 4.190892977792704e-06, "loss": 1.4122, "step": 1975 }, { "epoch": 2.14, "learning_rate": 4.1811851756092945e-06, "loss": 1.6737, "step": 1976 }, { "epoch": 2.14, "learning_rate": 4.171485657281333e-06, "loss": 1.2418, "step": 1977 }, { "epoch": 2.14, "learning_rate": 4.161794436617381e-06, "loss": 1.6695, "step": 1978 }, { "epoch": 2.14, "learning_rate": 4.152111527414177e-06, "loss": 1.7606, "step": 1979 }, { "epoch": 2.14, "learning_rate": 4.14243694345662e-06, "loss": 1.2592, "step": 1980 }, { "epoch": 2.14, "learning_rate": 4.1327706985177775e-06, "loss": 1.2693, "step": 1981 }, { "epoch": 2.15, "learning_rate": 4.123112806358819e-06, "loss": 1.6064, "step": 1982 }, { "epoch": 2.15, "learning_rate": 4.113463280729047e-06, "loss": 1.4412, "step": 1983 }, { "epoch": 2.15, "learning_rate": 4.103822135365837e-06, "loss": 1.4213, "step": 1984 }, { "epoch": 2.15, "learning_rate": 4.094189383994638e-06, "loss": 1.8249, "step": 1985 }, { "epoch": 2.15, "learning_rate": 4.08456504032896e-06, "loss": 1.531, "step": 1986 }, { "epoch": 2.15, "learning_rate": 4.074949118070319e-06, "loss": 1.1268, "step": 1987 }, { "epoch": 2.15, "learning_rate": 4.065341630908273e-06, "loss": 1.2828, "step": 1988 }, { "epoch": 2.15, "learning_rate": 4.0557425925203394e-06, "loss": 1.4168, "step": 1989 }, { "epoch": 2.15, "learning_rate": 4.046152016572037e-06, "loss": 1.2655, "step": 1990 }, { "epoch": 2.15, "learning_rate": 4.03656991671682e-06, "loss": 1.3362, "step": 1991 }, { "epoch": 2.16, "learning_rate": 4.026996306596073e-06, "loss": 1.5129, "step": 1992 }, { "epoch": 2.16, "learning_rate": 4.017431199839115e-06, "loss": 1.2201, "step": 1993 }, { "epoch": 2.16, "learning_rate": 4.00787461006313e-06, "loss": 1.1742, "step": 1994 }, { "epoch": 2.16, "learning_rate": 3.998326550873203e-06, "loss": 1.5935, "step": 1995 }, { "epoch": 2.16, "learning_rate": 3.9887870358622595e-06, "loss": 1.4226, "step": 1996 }, { "epoch": 2.16, "learning_rate": 3.979256078611068e-06, "loss": 1.1554, "step": 1997 }, { "epoch": 2.16, "learning_rate": 3.96973369268821e-06, "loss": 1.4834, "step": 1998 }, { "epoch": 2.16, "learning_rate": 3.960219891650063e-06, "loss": 1.3716, "step": 1999 }, { "epoch": 2.16, "learning_rate": 3.950714689040797e-06, "loss": 1.4048, "step": 2000 }, { "epoch": 2.17, "learning_rate": 3.941218098392313e-06, "loss": 1.539, "step": 2001 }, { "epoch": 2.17, "learning_rate": 3.931730133224283e-06, "loss": 1.5156, "step": 2002 }, { "epoch": 2.17, "learning_rate": 3.92225080704408e-06, "loss": 1.4501, "step": 2003 }, { "epoch": 2.17, "learning_rate": 3.912780133346783e-06, "loss": 1.3051, "step": 2004 }, { "epoch": 2.17, "learning_rate": 3.903318125615153e-06, "loss": 1.4459, "step": 2005 }, { "epoch": 2.17, "learning_rate": 3.893864797319612e-06, "loss": 1.4032, "step": 2006 }, { "epoch": 2.17, "learning_rate": 3.884420161918234e-06, "loss": 1.481, "step": 2007 }, { "epoch": 2.17, "learning_rate": 3.874984232856709e-06, "loss": 1.3008, "step": 2008 }, { "epoch": 2.17, "learning_rate": 3.865557023568334e-06, "loss": 1.4493, "step": 2009 }, { "epoch": 2.18, "learning_rate": 3.856138547473993e-06, "loss": 1.5349, "step": 2010 }, { "epoch": 2.18, "learning_rate": 3.846728817982137e-06, "loss": 1.3654, "step": 2011 }, { "epoch": 2.18, "learning_rate": 3.837327848488767e-06, "loss": 1.4167, "step": 2012 }, { "epoch": 2.18, "learning_rate": 3.827935652377404e-06, "loss": 1.3823, "step": 2013 }, { "epoch": 2.18, "learning_rate": 3.818552243019098e-06, "loss": 1.3817, "step": 2014 }, { "epoch": 2.18, "learning_rate": 3.8091776337723708e-06, "loss": 1.4042, "step": 2015 }, { "epoch": 2.18, "learning_rate": 3.7998118379832236e-06, "loss": 1.7141, "step": 2016 }, { "epoch": 2.18, "learning_rate": 3.790454868985113e-06, "loss": 1.3342, "step": 2017 }, { "epoch": 2.18, "learning_rate": 3.7811067400989233e-06, "loss": 1.3006, "step": 2018 }, { "epoch": 2.19, "learning_rate": 3.771767464632956e-06, "loss": 1.562, "step": 2019 }, { "epoch": 2.19, "learning_rate": 3.762437055882916e-06, "loss": 1.4655, "step": 2020 }, { "epoch": 2.19, "learning_rate": 3.7531155271318744e-06, "loss": 1.4776, "step": 2021 }, { "epoch": 2.19, "learning_rate": 3.7438028916502656e-06, "loss": 1.5002, "step": 2022 }, { "epoch": 2.19, "learning_rate": 3.734499162695864e-06, "loss": 1.4091, "step": 2023 }, { "epoch": 2.19, "learning_rate": 3.7252043535137606e-06, "loss": 1.3672, "step": 2024 }, { "epoch": 2.19, "learning_rate": 3.715918477336352e-06, "loss": 1.5356, "step": 2025 }, { "epoch": 2.19, "learning_rate": 3.706641547383312e-06, "loss": 1.3502, "step": 2026 }, { "epoch": 2.19, "learning_rate": 3.6973735768615894e-06, "loss": 1.1353, "step": 2027 }, { "epoch": 2.19, "learning_rate": 3.688114578965366e-06, "loss": 1.4528, "step": 2028 }, { "epoch": 2.2, "learning_rate": 3.678864566876055e-06, "loss": 1.2521, "step": 2029 }, { "epoch": 2.2, "learning_rate": 3.669623553762278e-06, "loss": 1.4918, "step": 2030 }, { "epoch": 2.2, "learning_rate": 3.6603915527798382e-06, "loss": 1.5781, "step": 2031 }, { "epoch": 2.2, "learning_rate": 3.6511685770717286e-06, "loss": 1.5873, "step": 2032 }, { "epoch": 2.2, "learning_rate": 3.6419546397680627e-06, "loss": 1.395, "step": 2033 }, { "epoch": 2.2, "learning_rate": 3.6327497539861144e-06, "loss": 1.3087, "step": 2034 }, { "epoch": 2.2, "learning_rate": 3.6235539328302584e-06, "loss": 1.6652, "step": 2035 }, { "epoch": 2.2, "learning_rate": 3.614367189391964e-06, "loss": 1.4231, "step": 2036 }, { "epoch": 2.2, "learning_rate": 3.605189536749788e-06, "loss": 1.4437, "step": 2037 }, { "epoch": 2.21, "learning_rate": 3.5960209879693263e-06, "loss": 1.2058, "step": 2038 }, { "epoch": 2.21, "learning_rate": 3.586861556103237e-06, "loss": 1.5371, "step": 2039 }, { "epoch": 2.21, "learning_rate": 3.577711254191176e-06, "loss": 1.2755, "step": 2040 }, { "epoch": 2.21, "learning_rate": 3.568570095259821e-06, "loss": 1.2946, "step": 2041 }, { "epoch": 2.21, "learning_rate": 3.5594380923228244e-06, "loss": 1.0936, "step": 2042 }, { "epoch": 2.21, "learning_rate": 3.5503152583807987e-06, "loss": 1.3753, "step": 2043 }, { "epoch": 2.21, "learning_rate": 3.5412016064213216e-06, "loss": 1.3818, "step": 2044 }, { "epoch": 2.21, "learning_rate": 3.5320971494188715e-06, "loss": 1.5873, "step": 2045 }, { "epoch": 2.21, "learning_rate": 3.5230019003348627e-06, "loss": 1.4284, "step": 2046 }, { "epoch": 2.22, "learning_rate": 3.513915872117586e-06, "loss": 1.2154, "step": 2047 }, { "epoch": 2.22, "learning_rate": 3.504839077702207e-06, "loss": 1.2334, "step": 2048 }, { "epoch": 2.22, "learning_rate": 3.495771530010755e-06, "loss": 1.46, "step": 2049 }, { "epoch": 2.22, "learning_rate": 3.486713241952078e-06, "loss": 1.5319, "step": 2050 }, { "epoch": 2.22, "learning_rate": 3.477664226421862e-06, "loss": 1.1816, "step": 2051 }, { "epoch": 2.22, "learning_rate": 3.4686244963025704e-06, "loss": 1.4446, "step": 2052 }, { "epoch": 2.22, "learning_rate": 3.4595940644634684e-06, "loss": 1.2212, "step": 2053 }, { "epoch": 2.22, "learning_rate": 3.4505729437605727e-06, "loss": 1.4979, "step": 2054 }, { "epoch": 2.22, "learning_rate": 3.441561147036642e-06, "loss": 1.2802, "step": 2055 }, { "epoch": 2.23, "learning_rate": 3.4325586871211745e-06, "loss": 1.2958, "step": 2056 }, { "epoch": 2.23, "learning_rate": 3.4235655768303542e-06, "loss": 1.1839, "step": 2057 }, { "epoch": 2.23, "learning_rate": 3.4145818289670796e-06, "loss": 1.3612, "step": 2058 }, { "epoch": 2.23, "learning_rate": 3.405607456320903e-06, "loss": 1.3424, "step": 2059 }, { "epoch": 2.23, "learning_rate": 3.396642471668037e-06, "loss": 1.3505, "step": 2060 }, { "epoch": 2.23, "learning_rate": 3.3876868877713253e-06, "loss": 1.3737, "step": 2061 }, { "epoch": 2.23, "learning_rate": 3.378740717380229e-06, "loss": 1.2234, "step": 2062 }, { "epoch": 2.23, "learning_rate": 3.3698039732308197e-06, "loss": 1.416, "step": 2063 }, { "epoch": 2.23, "learning_rate": 3.360876668045725e-06, "loss": 1.3574, "step": 2064 }, { "epoch": 2.23, "learning_rate": 3.3519588145341586e-06, "loss": 1.3652, "step": 2065 }, { "epoch": 2.24, "learning_rate": 3.343050425391866e-06, "loss": 1.1773, "step": 2066 }, { "epoch": 2.24, "learning_rate": 3.3341515133011236e-06, "loss": 1.2722, "step": 2067 }, { "epoch": 2.24, "learning_rate": 3.3252620909307123e-06, "loss": 1.3004, "step": 2068 }, { "epoch": 2.24, "learning_rate": 3.316382170935901e-06, "loss": 1.3741, "step": 2069 }, { "epoch": 2.24, "learning_rate": 3.3075117659584444e-06, "loss": 1.4394, "step": 2070 }, { "epoch": 2.24, "learning_rate": 3.2986508886265367e-06, "loss": 1.459, "step": 2071 }, { "epoch": 2.24, "learning_rate": 3.289799551554812e-06, "loss": 1.365, "step": 2072 }, { "epoch": 2.24, "learning_rate": 3.280957767344326e-06, "loss": 1.2966, "step": 2073 }, { "epoch": 2.24, "learning_rate": 3.272125548582531e-06, "loss": 1.2573, "step": 2074 }, { "epoch": 2.25, "learning_rate": 3.263302907843263e-06, "loss": 1.5322, "step": 2075 }, { "epoch": 2.25, "learning_rate": 3.2544898576867212e-06, "loss": 1.3625, "step": 2076 }, { "epoch": 2.25, "learning_rate": 3.2456864106594564e-06, "loss": 1.4287, "step": 2077 }, { "epoch": 2.25, "learning_rate": 3.236892579294343e-06, "loss": 1.4564, "step": 2078 }, { "epoch": 2.25, "learning_rate": 3.2281083761105657e-06, "loss": 1.5942, "step": 2079 }, { "epoch": 2.25, "learning_rate": 3.2193338136136055e-06, "loss": 1.7454, "step": 2080 }, { "epoch": 2.25, "learning_rate": 3.210568904295214e-06, "loss": 1.5231, "step": 2081 }, { "epoch": 2.25, "learning_rate": 3.201813660633407e-06, "loss": 1.0115, "step": 2082 }, { "epoch": 2.25, "learning_rate": 3.1930680950924375e-06, "loss": 1.3776, "step": 2083 }, { "epoch": 2.26, "learning_rate": 3.1843322201227755e-06, "loss": 1.4914, "step": 2084 }, { "epoch": 2.26, "learning_rate": 3.1756060481611006e-06, "loss": 1.4918, "step": 2085 }, { "epoch": 2.26, "learning_rate": 3.1668895916302765e-06, "loss": 1.46, "step": 2086 }, { "epoch": 2.26, "learning_rate": 3.158182862939334e-06, "loss": 1.3948, "step": 2087 }, { "epoch": 2.26, "learning_rate": 3.149485874483462e-06, "loss": 1.1684, "step": 2088 }, { "epoch": 2.26, "learning_rate": 3.140798638643977e-06, "loss": 1.3323, "step": 2089 }, { "epoch": 2.26, "learning_rate": 3.1321211677883122e-06, "loss": 1.6293, "step": 2090 }, { "epoch": 2.26, "learning_rate": 3.1234534742699984e-06, "loss": 1.3293, "step": 2091 }, { "epoch": 2.26, "learning_rate": 3.1147955704286483e-06, "loss": 1.2917, "step": 2092 }, { "epoch": 2.27, "learning_rate": 3.1061474685899386e-06, "loss": 1.5275, "step": 2093 }, { "epoch": 2.27, "learning_rate": 3.0975091810655868e-06, "loss": 1.4996, "step": 2094 }, { "epoch": 2.27, "learning_rate": 3.088880720153348e-06, "loss": 1.4054, "step": 2095 }, { "epoch": 2.27, "learning_rate": 3.080262098136979e-06, "loss": 1.2308, "step": 2096 }, { "epoch": 2.27, "learning_rate": 3.0716533272862327e-06, "loss": 1.5492, "step": 2097 }, { "epoch": 2.27, "learning_rate": 3.063054419856837e-06, "loss": 1.3727, "step": 2098 }, { "epoch": 2.27, "learning_rate": 3.054465388090475e-06, "loss": 1.4997, "step": 2099 }, { "epoch": 2.27, "learning_rate": 3.0458862442147843e-06, "loss": 1.4322, "step": 2100 }, { "epoch": 2.27, "learning_rate": 3.037317000443302e-06, "loss": 1.3417, "step": 2101 }, { "epoch": 2.27, "learning_rate": 3.0287576689754926e-06, "loss": 1.2467, "step": 2102 }, { "epoch": 2.28, "learning_rate": 3.0202082619966987e-06, "loss": 1.6833, "step": 2103 }, { "epoch": 2.28, "learning_rate": 3.0116687916781374e-06, "loss": 1.4901, "step": 2104 }, { "epoch": 2.28, "learning_rate": 3.003139270176877e-06, "loss": 1.6554, "step": 2105 }, { "epoch": 2.28, "learning_rate": 2.9946197096358197e-06, "loss": 1.2817, "step": 2106 }, { "epoch": 2.28, "learning_rate": 2.986110122183703e-06, "loss": 1.4481, "step": 2107 }, { "epoch": 2.28, "learning_rate": 2.977610519935041e-06, "loss": 1.3326, "step": 2108 }, { "epoch": 2.28, "learning_rate": 2.9691209149901543e-06, "loss": 1.3675, "step": 2109 }, { "epoch": 2.28, "learning_rate": 2.9606413194351214e-06, "loss": 1.2567, "step": 2110 }, { "epoch": 2.28, "learning_rate": 2.9521717453417665e-06, "loss": 1.5993, "step": 2111 }, { "epoch": 2.29, "learning_rate": 2.9437122047676648e-06, "loss": 1.5903, "step": 2112 }, { "epoch": 2.29, "learning_rate": 2.9352627097560826e-06, "loss": 1.2208, "step": 2113 }, { "epoch": 2.29, "learning_rate": 2.926823272336009e-06, "loss": 1.2143, "step": 2114 }, { "epoch": 2.29, "learning_rate": 2.9183939045220923e-06, "loss": 1.3815, "step": 2115 }, { "epoch": 2.29, "learning_rate": 2.909974618314665e-06, "loss": 1.332, "step": 2116 }, { "epoch": 2.29, "learning_rate": 2.9015654256996972e-06, "loss": 1.5051, "step": 2117 }, { "epoch": 2.29, "learning_rate": 2.8931663386487863e-06, "loss": 1.1522, "step": 2118 }, { "epoch": 2.29, "learning_rate": 2.8847773691191596e-06, "loss": 1.3759, "step": 2119 }, { "epoch": 2.29, "learning_rate": 2.8763985290536135e-06, "loss": 1.4331, "step": 2120 }, { "epoch": 2.3, "learning_rate": 2.868029830380551e-06, "loss": 1.4589, "step": 2121 }, { "epoch": 2.3, "learning_rate": 2.859671285013922e-06, "loss": 1.4327, "step": 2122 }, { "epoch": 2.3, "learning_rate": 2.851322904853222e-06, "loss": 1.323, "step": 2123 }, { "epoch": 2.3, "learning_rate": 2.8429847017834877e-06, "loss": 1.4878, "step": 2124 }, { "epoch": 2.3, "learning_rate": 2.834656687675247e-06, "loss": 1.5697, "step": 2125 }, { "epoch": 2.3, "learning_rate": 2.826338874384541e-06, "loss": 1.4001, "step": 2126 }, { "epoch": 2.3, "learning_rate": 2.8180312737528804e-06, "loss": 1.0184, "step": 2127 }, { "epoch": 2.3, "learning_rate": 2.8097338976072362e-06, "loss": 1.3859, "step": 2128 }, { "epoch": 2.3, "learning_rate": 2.801446757760026e-06, "loss": 1.3821, "step": 2129 }, { "epoch": 2.31, "learning_rate": 2.79316986600909e-06, "loss": 1.5042, "step": 2130 }, { "epoch": 2.31, "learning_rate": 2.7849032341376914e-06, "loss": 1.4607, "step": 2131 }, { "epoch": 2.31, "learning_rate": 2.776646873914466e-06, "loss": 1.5038, "step": 2132 }, { "epoch": 2.31, "learning_rate": 2.768400797093449e-06, "loss": 1.2624, "step": 2133 }, { "epoch": 2.31, "learning_rate": 2.76016501541402e-06, "loss": 1.3989, "step": 2134 }, { "epoch": 2.31, "learning_rate": 2.7519395406009107e-06, "loss": 1.493, "step": 2135 }, { "epoch": 2.31, "learning_rate": 2.743724384364176e-06, "loss": 1.4691, "step": 2136 }, { "epoch": 2.31, "learning_rate": 2.735519558399177e-06, "loss": 1.7036, "step": 2137 }, { "epoch": 2.31, "learning_rate": 2.727325074386579e-06, "loss": 1.3342, "step": 2138 }, { "epoch": 2.31, "learning_rate": 2.719140943992317e-06, "loss": 1.4529, "step": 2139 }, { "epoch": 2.32, "learning_rate": 2.7109671788675838e-06, "loss": 1.3309, "step": 2140 }, { "epoch": 2.32, "learning_rate": 2.7028037906488223e-06, "loss": 1.227, "step": 2141 }, { "epoch": 2.32, "learning_rate": 2.694650790957698e-06, "loss": 1.4928, "step": 2142 }, { "epoch": 2.32, "learning_rate": 2.6865081914010894e-06, "loss": 1.2924, "step": 2143 }, { "epoch": 2.32, "learning_rate": 2.6783760035710636e-06, "loss": 1.4359, "step": 2144 }, { "epoch": 2.32, "learning_rate": 2.6702542390448773e-06, "loss": 1.3516, "step": 2145 }, { "epoch": 2.32, "learning_rate": 2.662142909384937e-06, "loss": 1.3663, "step": 2146 }, { "epoch": 2.32, "learning_rate": 2.6540420261387966e-06, "loss": 1.6022, "step": 2147 }, { "epoch": 2.32, "learning_rate": 2.6459516008391394e-06, "loss": 1.6489, "step": 2148 }, { "epoch": 2.33, "learning_rate": 2.63787164500376e-06, "loss": 1.3224, "step": 2149 }, { "epoch": 2.33, "learning_rate": 2.629802170135546e-06, "loss": 1.472, "step": 2150 }, { "epoch": 2.33, "learning_rate": 2.621743187722472e-06, "loss": 1.5771, "step": 2151 }, { "epoch": 2.33, "learning_rate": 2.6136947092375655e-06, "loss": 1.1191, "step": 2152 }, { "epoch": 2.33, "learning_rate": 2.605656746138905e-06, "loss": 1.3474, "step": 2153 }, { "epoch": 2.33, "learning_rate": 2.597629309869597e-06, "loss": 1.3218, "step": 2154 }, { "epoch": 2.33, "learning_rate": 2.589612411857765e-06, "loss": 1.6099, "step": 2155 }, { "epoch": 2.33, "learning_rate": 2.5816060635165254e-06, "loss": 1.5729, "step": 2156 }, { "epoch": 2.33, "learning_rate": 2.5736102762439775e-06, "loss": 1.3291, "step": 2157 }, { "epoch": 2.34, "learning_rate": 2.5656250614231893e-06, "loss": 1.2772, "step": 2158 }, { "epoch": 2.34, "learning_rate": 2.5576504304221727e-06, "loss": 1.4413, "step": 2159 }, { "epoch": 2.34, "learning_rate": 2.549686394593874e-06, "loss": 1.3817, "step": 2160 }, { "epoch": 2.34, "learning_rate": 2.541732965276156e-06, "loss": 1.3927, "step": 2161 }, { "epoch": 2.34, "learning_rate": 2.5337901537917776e-06, "loss": 1.3298, "step": 2162 }, { "epoch": 2.34, "learning_rate": 2.5258579714483933e-06, "loss": 1.4091, "step": 2163 }, { "epoch": 2.34, "learning_rate": 2.517936429538508e-06, "loss": 1.4948, "step": 2164 }, { "epoch": 2.34, "learning_rate": 2.5100255393394956e-06, "loss": 1.2598, "step": 2165 }, { "epoch": 2.34, "learning_rate": 2.502125312113556e-06, "loss": 1.4503, "step": 2166 }, { "epoch": 2.35, "learning_rate": 2.4942357591077125e-06, "loss": 1.2476, "step": 2167 }, { "epoch": 2.35, "learning_rate": 2.4863568915537893e-06, "loss": 1.4236, "step": 2168 }, { "epoch": 2.35, "learning_rate": 2.4784887206684007e-06, "loss": 1.6978, "step": 2169 }, { "epoch": 2.35, "learning_rate": 2.470631257652939e-06, "loss": 1.3295, "step": 2170 }, { "epoch": 2.35, "learning_rate": 2.4627845136935336e-06, "loss": 1.4273, "step": 2171 }, { "epoch": 2.35, "learning_rate": 2.4549484999610773e-06, "loss": 1.3365, "step": 2172 }, { "epoch": 2.35, "learning_rate": 2.4471232276111723e-06, "loss": 1.143, "step": 2173 }, { "epoch": 2.35, "learning_rate": 2.4393087077841293e-06, "loss": 1.5348, "step": 2174 }, { "epoch": 2.35, "learning_rate": 2.431504951604964e-06, "loss": 1.6333, "step": 2175 }, { "epoch": 2.35, "learning_rate": 2.423711970183347e-06, "loss": 1.3676, "step": 2176 }, { "epoch": 2.36, "learning_rate": 2.415929774613631e-06, "loss": 1.2428, "step": 2177 }, { "epoch": 2.36, "learning_rate": 2.408158375974804e-06, "loss": 1.3881, "step": 2178 }, { "epoch": 2.36, "learning_rate": 2.4003977853304774e-06, "loss": 1.3635, "step": 2179 }, { "epoch": 2.36, "learning_rate": 2.392648013728893e-06, "loss": 1.4889, "step": 2180 }, { "epoch": 2.36, "learning_rate": 2.384909072202868e-06, "loss": 1.5048, "step": 2181 }, { "epoch": 2.36, "learning_rate": 2.3771809717698233e-06, "loss": 1.4755, "step": 2182 }, { "epoch": 2.36, "learning_rate": 2.3694637234317252e-06, "loss": 1.1849, "step": 2183 }, { "epoch": 2.36, "learning_rate": 2.3617573381751114e-06, "loss": 1.5121, "step": 2184 }, { "epoch": 2.36, "learning_rate": 2.3540618269710393e-06, "loss": 1.3481, "step": 2185 }, { "epoch": 2.37, "learning_rate": 2.3463772007750898e-06, "loss": 1.313, "step": 2186 }, { "epoch": 2.37, "learning_rate": 2.338703470527355e-06, "loss": 1.3664, "step": 2187 }, { "epoch": 2.37, "learning_rate": 2.3310406471523994e-06, "loss": 1.2965, "step": 2188 }, { "epoch": 2.37, "learning_rate": 2.323388741559277e-06, "loss": 1.5522, "step": 2189 }, { "epoch": 2.37, "learning_rate": 2.3157477646414896e-06, "loss": 1.3192, "step": 2190 }, { "epoch": 2.37, "learning_rate": 2.3081177272769806e-06, "loss": 1.2854, "step": 2191 }, { "epoch": 2.37, "learning_rate": 2.3004986403281215e-06, "loss": 1.4903, "step": 2192 }, { "epoch": 2.37, "learning_rate": 2.2928905146416925e-06, "loss": 1.422, "step": 2193 }, { "epoch": 2.37, "learning_rate": 2.285293361048878e-06, "loss": 1.3284, "step": 2194 }, { "epoch": 2.38, "learning_rate": 2.2777071903652248e-06, "loss": 1.5621, "step": 2195 }, { "epoch": 2.38, "learning_rate": 2.2701320133906614e-06, "loss": 1.5547, "step": 2196 }, { "epoch": 2.38, "learning_rate": 2.2625678409094554e-06, "loss": 1.424, "step": 2197 }, { "epoch": 2.38, "learning_rate": 2.2550146836902107e-06, "loss": 1.5292, "step": 2198 }, { "epoch": 2.38, "learning_rate": 2.247472552485849e-06, "loss": 1.5616, "step": 2199 }, { "epoch": 2.38, "learning_rate": 2.239941458033593e-06, "loss": 1.3659, "step": 2200 }, { "epoch": 2.38, "learning_rate": 2.2324214110549613e-06, "loss": 1.6546, "step": 2201 }, { "epoch": 2.38, "learning_rate": 2.2249124222557362e-06, "loss": 1.4161, "step": 2202 }, { "epoch": 2.38, "learning_rate": 2.2174145023259595e-06, "loss": 1.4106, "step": 2203 }, { "epoch": 2.39, "learning_rate": 2.209927661939918e-06, "loss": 1.258, "step": 2204 }, { "epoch": 2.39, "learning_rate": 2.2024519117561203e-06, "loss": 1.5043, "step": 2205 }, { "epoch": 2.39, "learning_rate": 2.1949872624172907e-06, "loss": 1.388, "step": 2206 }, { "epoch": 2.39, "learning_rate": 2.187533724550346e-06, "loss": 1.4317, "step": 2207 }, { "epoch": 2.39, "learning_rate": 2.180091308766391e-06, "loss": 1.29, "step": 2208 }, { "epoch": 2.39, "learning_rate": 2.172660025660692e-06, "loss": 1.3285, "step": 2209 }, { "epoch": 2.39, "learning_rate": 2.1652398858126656e-06, "loss": 1.6029, "step": 2210 }, { "epoch": 2.39, "learning_rate": 2.1578308997858664e-06, "loss": 1.3068, "step": 2211 }, { "epoch": 2.39, "learning_rate": 2.1504330781279703e-06, "loss": 1.6577, "step": 2212 }, { "epoch": 2.4, "learning_rate": 2.1430464313707557e-06, "loss": 1.4977, "step": 2213 }, { "epoch": 2.4, "learning_rate": 2.1356709700300994e-06, "loss": 1.2976, "step": 2214 }, { "epoch": 2.4, "learning_rate": 2.128306704605949e-06, "loss": 1.2786, "step": 2215 }, { "epoch": 2.4, "learning_rate": 2.1209536455823123e-06, "loss": 1.3484, "step": 2216 }, { "epoch": 2.4, "learning_rate": 2.1136118034272444e-06, "loss": 1.1646, "step": 2217 }, { "epoch": 2.4, "learning_rate": 2.106281188592836e-06, "loss": 1.5719, "step": 2218 }, { "epoch": 2.4, "learning_rate": 2.098961811515187e-06, "loss": 1.5823, "step": 2219 }, { "epoch": 2.4, "learning_rate": 2.0916536826144006e-06, "loss": 1.3301, "step": 2220 }, { "epoch": 2.4, "learning_rate": 2.0843568122945747e-06, "loss": 1.5403, "step": 2221 }, { "epoch": 2.4, "learning_rate": 2.077071210943771e-06, "loss": 1.4609, "step": 2222 }, { "epoch": 2.41, "learning_rate": 2.069796888934008e-06, "loss": 1.1364, "step": 2223 }, { "epoch": 2.41, "learning_rate": 2.0625338566212505e-06, "loss": 1.2703, "step": 2224 }, { "epoch": 2.41, "learning_rate": 2.055282124345387e-06, "loss": 1.7914, "step": 2225 }, { "epoch": 2.41, "learning_rate": 2.048041702430228e-06, "loss": 1.0629, "step": 2226 }, { "epoch": 2.41, "learning_rate": 2.040812601183465e-06, "loss": 1.3095, "step": 2227 }, { "epoch": 2.41, "learning_rate": 2.033594830896691e-06, "loss": 1.2865, "step": 2228 }, { "epoch": 2.41, "learning_rate": 2.0263884018453596e-06, "loss": 1.6661, "step": 2229 }, { "epoch": 2.41, "learning_rate": 2.019193324288775e-06, "loss": 1.4554, "step": 2230 }, { "epoch": 2.41, "learning_rate": 2.0120096084700936e-06, "loss": 1.6451, "step": 2231 }, { "epoch": 2.42, "learning_rate": 2.0048372646162774e-06, "loss": 1.3967, "step": 2232 }, { "epoch": 2.42, "learning_rate": 1.9976763029381197e-06, "loss": 1.4399, "step": 2233 }, { "epoch": 2.42, "learning_rate": 1.990526733630196e-06, "loss": 1.5767, "step": 2234 }, { "epoch": 2.42, "learning_rate": 1.9833885668708686e-06, "loss": 1.1332, "step": 2235 }, { "epoch": 2.42, "learning_rate": 1.9762618128222653e-06, "loss": 1.1133, "step": 2236 }, { "epoch": 2.42, "learning_rate": 1.969146481630263e-06, "loss": 1.4982, "step": 2237 }, { "epoch": 2.42, "learning_rate": 1.9620425834244893e-06, "loss": 1.6646, "step": 2238 }, { "epoch": 2.42, "learning_rate": 1.954950128318276e-06, "loss": 1.4872, "step": 2239 }, { "epoch": 2.42, "learning_rate": 1.9478691264086824e-06, "loss": 1.5388, "step": 2240 }, { "epoch": 2.43, "learning_rate": 1.940799587776452e-06, "loss": 1.3563, "step": 2241 }, { "epoch": 2.43, "learning_rate": 1.933741522486009e-06, "loss": 1.2739, "step": 2242 }, { "epoch": 2.43, "learning_rate": 1.926694940585454e-06, "loss": 1.2479, "step": 2243 }, { "epoch": 2.43, "learning_rate": 1.9196598521065212e-06, "loss": 1.4168, "step": 2244 }, { "epoch": 2.43, "learning_rate": 1.912636267064605e-06, "loss": 1.1941, "step": 2245 }, { "epoch": 2.43, "learning_rate": 1.9056241954586997e-06, "loss": 1.5037, "step": 2246 }, { "epoch": 2.43, "learning_rate": 1.8986236472714281e-06, "loss": 1.5659, "step": 2247 }, { "epoch": 2.43, "learning_rate": 1.8916346324689982e-06, "loss": 1.2473, "step": 2248 }, { "epoch": 2.43, "learning_rate": 1.8846571610011965e-06, "loss": 1.3946, "step": 2249 }, { "epoch": 2.44, "learning_rate": 1.8776912428013883e-06, "loss": 1.5602, "step": 2250 }, { "epoch": 2.44, "learning_rate": 1.8707368877864729e-06, "loss": 1.3332, "step": 2251 }, { "epoch": 2.44, "learning_rate": 1.8637941058569065e-06, "loss": 1.6887, "step": 2252 }, { "epoch": 2.44, "learning_rate": 1.856862906896657e-06, "loss": 1.4142, "step": 2253 }, { "epoch": 2.44, "learning_rate": 1.8499433007732037e-06, "loss": 1.2743, "step": 2254 }, { "epoch": 2.44, "learning_rate": 1.8430352973375344e-06, "loss": 1.2524, "step": 2255 }, { "epoch": 2.44, "learning_rate": 1.8361389064240965e-06, "loss": 1.4024, "step": 2256 }, { "epoch": 2.44, "learning_rate": 1.829254137850829e-06, "loss": 1.3586, "step": 2257 }, { "epoch": 2.44, "learning_rate": 1.8223810014191046e-06, "loss": 1.6931, "step": 2258 }, { "epoch": 2.44, "learning_rate": 1.815519506913752e-06, "loss": 1.3345, "step": 2259 }, { "epoch": 2.45, "learning_rate": 1.808669664103019e-06, "loss": 1.5224, "step": 2260 }, { "epoch": 2.45, "learning_rate": 1.8018314827385618e-06, "loss": 1.4597, "step": 2261 }, { "epoch": 2.45, "learning_rate": 1.7950049725554476e-06, "loss": 1.4612, "step": 2262 }, { "epoch": 2.45, "learning_rate": 1.7881901432721104e-06, "loss": 1.3099, "step": 2263 }, { "epoch": 2.45, "learning_rate": 1.7813870045903715e-06, "loss": 1.4219, "step": 2264 }, { "epoch": 2.45, "learning_rate": 1.7745955661953996e-06, "loss": 1.3978, "step": 2265 }, { "epoch": 2.45, "learning_rate": 1.7678158377557097e-06, "loss": 1.4722, "step": 2266 }, { "epoch": 2.45, "learning_rate": 1.7610478289231435e-06, "loss": 1.4937, "step": 2267 }, { "epoch": 2.45, "learning_rate": 1.7542915493328594e-06, "loss": 1.4675, "step": 2268 }, { "epoch": 2.46, "learning_rate": 1.7475470086033264e-06, "loss": 1.5819, "step": 2269 }, { "epoch": 2.46, "learning_rate": 1.7408142163362818e-06, "loss": 1.4146, "step": 2270 }, { "epoch": 2.46, "learning_rate": 1.7340931821167572e-06, "loss": 1.3415, "step": 2271 }, { "epoch": 2.46, "learning_rate": 1.7273839155130355e-06, "loss": 1.5336, "step": 2272 }, { "epoch": 2.46, "learning_rate": 1.7206864260766486e-06, "loss": 1.4796, "step": 2273 }, { "epoch": 2.46, "learning_rate": 1.7140007233423606e-06, "loss": 1.5048, "step": 2274 }, { "epoch": 2.46, "learning_rate": 1.7073268168281564e-06, "loss": 1.5898, "step": 2275 }, { "epoch": 2.46, "learning_rate": 1.7006647160352308e-06, "loss": 1.4885, "step": 2276 }, { "epoch": 2.46, "learning_rate": 1.6940144304479678e-06, "loss": 1.2994, "step": 2277 }, { "epoch": 2.47, "learning_rate": 1.687375969533932e-06, "loss": 1.4332, "step": 2278 }, { "epoch": 2.47, "learning_rate": 1.6807493427438526e-06, "loss": 1.3831, "step": 2279 }, { "epoch": 2.47, "learning_rate": 1.6741345595116133e-06, "loss": 1.4273, "step": 2280 }, { "epoch": 2.47, "learning_rate": 1.6675316292542344e-06, "loss": 1.0097, "step": 2281 }, { "epoch": 2.47, "learning_rate": 1.6609405613718688e-06, "loss": 1.3448, "step": 2282 }, { "epoch": 2.47, "learning_rate": 1.654361365247773e-06, "loss": 1.5362, "step": 2283 }, { "epoch": 2.47, "learning_rate": 1.647794050248307e-06, "loss": 1.3242, "step": 2284 }, { "epoch": 2.47, "learning_rate": 1.6412386257229152e-06, "loss": 1.3754, "step": 2285 }, { "epoch": 2.47, "learning_rate": 1.6346951010041146e-06, "loss": 1.7743, "step": 2286 }, { "epoch": 2.48, "learning_rate": 1.6281634854074823e-06, "loss": 1.568, "step": 2287 }, { "epoch": 2.48, "learning_rate": 1.6216437882316382e-06, "loss": 1.2589, "step": 2288 }, { "epoch": 2.48, "learning_rate": 1.61513601875824e-06, "loss": 1.5652, "step": 2289 }, { "epoch": 2.48, "learning_rate": 1.6086401862519608e-06, "loss": 1.793, "step": 2290 }, { "epoch": 2.48, "learning_rate": 1.6021562999604789e-06, "loss": 1.4683, "step": 2291 }, { "epoch": 2.48, "learning_rate": 1.5956843691144686e-06, "loss": 1.6087, "step": 2292 }, { "epoch": 2.48, "learning_rate": 1.5892244029275805e-06, "loss": 1.3121, "step": 2293 }, { "epoch": 2.48, "learning_rate": 1.5827764105964405e-06, "loss": 1.2332, "step": 2294 }, { "epoch": 2.48, "learning_rate": 1.5763404013006124e-06, "loss": 1.3393, "step": 2295 }, { "epoch": 2.48, "learning_rate": 1.5699163842026166e-06, "loss": 1.4695, "step": 2296 }, { "epoch": 2.49, "learning_rate": 1.5635043684478933e-06, "loss": 1.4767, "step": 2297 }, { "epoch": 2.49, "learning_rate": 1.5571043631647976e-06, "loss": 1.3673, "step": 2298 }, { "epoch": 2.49, "learning_rate": 1.5507163774645862e-06, "loss": 1.454, "step": 2299 }, { "epoch": 2.49, "learning_rate": 1.5443404204414025e-06, "loss": 1.3161, "step": 2300 }, { "epoch": 2.49, "learning_rate": 1.5379765011722758e-06, "loss": 1.6034, "step": 2301 }, { "epoch": 2.49, "learning_rate": 1.531624628717081e-06, "loss": 1.4068, "step": 2302 }, { "epoch": 2.49, "learning_rate": 1.525284812118557e-06, "loss": 1.3525, "step": 2303 }, { "epoch": 2.49, "learning_rate": 1.518957060402274e-06, "loss": 1.4532, "step": 2304 }, { "epoch": 2.49, "learning_rate": 1.512641382576624e-06, "loss": 1.4955, "step": 2305 }, { "epoch": 2.5, "learning_rate": 1.506337787632819e-06, "loss": 1.3729, "step": 2306 }, { "epoch": 2.5, "learning_rate": 1.5000462845448537e-06, "loss": 1.7372, "step": 2307 }, { "epoch": 2.5, "learning_rate": 1.4937668822695294e-06, "loss": 1.6803, "step": 2308 }, { "epoch": 2.5, "learning_rate": 1.4874995897463972e-06, "loss": 1.2643, "step": 2309 }, { "epoch": 2.5, "learning_rate": 1.481244415897789e-06, "loss": 1.6122, "step": 2310 }, { "epoch": 2.5, "learning_rate": 1.4750013696287723e-06, "loss": 1.4969, "step": 2311 }, { "epoch": 2.5, "learning_rate": 1.4687704598271502e-06, "loss": 1.4946, "step": 2312 }, { "epoch": 2.5, "learning_rate": 1.4625516953634567e-06, "loss": 1.4947, "step": 2313 }, { "epoch": 2.5, "learning_rate": 1.456345085090919e-06, "loss": 1.5622, "step": 2314 }, { "epoch": 2.51, "learning_rate": 1.4501506378454787e-06, "loss": 1.0848, "step": 2315 }, { "epoch": 2.51, "learning_rate": 1.44396836244575e-06, "loss": 1.3604, "step": 2316 }, { "epoch": 2.51, "learning_rate": 1.4377982676930235e-06, "loss": 1.521, "step": 2317 }, { "epoch": 2.51, "learning_rate": 1.4316403623712516e-06, "loss": 1.5201, "step": 2318 }, { "epoch": 2.51, "learning_rate": 1.425494655247024e-06, "loss": 1.3448, "step": 2319 }, { "epoch": 2.51, "learning_rate": 1.4193611550695773e-06, "loss": 1.4419, "step": 2320 }, { "epoch": 2.51, "learning_rate": 1.413239870570756e-06, "loss": 1.6954, "step": 2321 }, { "epoch": 2.51, "learning_rate": 1.4071308104650272e-06, "loss": 1.5843, "step": 2322 }, { "epoch": 2.51, "learning_rate": 1.4010339834494468e-06, "loss": 1.7759, "step": 2323 }, { "epoch": 2.52, "learning_rate": 1.3949493982036555e-06, "loss": 1.3358, "step": 2324 }, { "epoch": 2.52, "learning_rate": 1.3888770633898762e-06, "loss": 1.3894, "step": 2325 }, { "epoch": 2.52, "learning_rate": 1.382816987652873e-06, "loss": 1.4051, "step": 2326 }, { "epoch": 2.52, "learning_rate": 1.3767691796199745e-06, "loss": 1.7752, "step": 2327 }, { "epoch": 2.52, "learning_rate": 1.3707336479010381e-06, "loss": 1.4798, "step": 2328 }, { "epoch": 2.52, "learning_rate": 1.3647104010884438e-06, "loss": 1.6275, "step": 2329 }, { "epoch": 2.52, "learning_rate": 1.3586994477570825e-06, "loss": 1.4139, "step": 2330 }, { "epoch": 2.52, "learning_rate": 1.3527007964643436e-06, "loss": 1.5555, "step": 2331 }, { "epoch": 2.52, "learning_rate": 1.3467144557501065e-06, "loss": 1.7837, "step": 2332 }, { "epoch": 2.52, "learning_rate": 1.3407404341367203e-06, "loss": 1.4145, "step": 2333 }, { "epoch": 2.53, "learning_rate": 1.334778740128998e-06, "loss": 1.4779, "step": 2334 }, { "epoch": 2.53, "learning_rate": 1.3288293822142017e-06, "loss": 1.4364, "step": 2335 }, { "epoch": 2.53, "learning_rate": 1.3228923688620342e-06, "loss": 1.4961, "step": 2336 }, { "epoch": 2.53, "learning_rate": 1.3169677085246213e-06, "loss": 1.514, "step": 2337 }, { "epoch": 2.53, "learning_rate": 1.3110554096365003e-06, "loss": 1.5074, "step": 2338 }, { "epoch": 2.53, "learning_rate": 1.3051554806146195e-06, "loss": 1.4463, "step": 2339 }, { "epoch": 2.53, "learning_rate": 1.2992679298583089e-06, "loss": 1.6143, "step": 2340 }, { "epoch": 2.53, "learning_rate": 1.2933927657492774e-06, "loss": 1.3421, "step": 2341 }, { "epoch": 2.53, "learning_rate": 1.287529996651602e-06, "loss": 1.5018, "step": 2342 }, { "epoch": 2.54, "learning_rate": 1.281679630911714e-06, "loss": 1.2337, "step": 2343 }, { "epoch": 2.54, "learning_rate": 1.2758416768583814e-06, "loss": 1.3933, "step": 2344 }, { "epoch": 2.54, "learning_rate": 1.2700161428027124e-06, "loss": 1.1947, "step": 2345 }, { "epoch": 2.54, "learning_rate": 1.2642030370381264e-06, "loss": 1.4681, "step": 2346 }, { "epoch": 2.54, "learning_rate": 1.2584023678403502e-06, "loss": 1.7516, "step": 2347 }, { "epoch": 2.54, "learning_rate": 1.2526141434674076e-06, "loss": 1.3862, "step": 2348 }, { "epoch": 2.54, "learning_rate": 1.2468383721596044e-06, "loss": 1.7484, "step": 2349 }, { "epoch": 2.54, "learning_rate": 1.2410750621395163e-06, "loss": 1.4472, "step": 2350 }, { "epoch": 2.54, "learning_rate": 1.2353242216119798e-06, "loss": 1.3695, "step": 2351 }, { "epoch": 2.55, "learning_rate": 1.229585858764084e-06, "loss": 1.4722, "step": 2352 }, { "epoch": 2.55, "learning_rate": 1.2238599817651486e-06, "loss": 1.3181, "step": 2353 }, { "epoch": 2.55, "learning_rate": 1.2181465987667174e-06, "loss": 1.4107, "step": 2354 }, { "epoch": 2.55, "learning_rate": 1.2124457179025527e-06, "loss": 1.2216, "step": 2355 }, { "epoch": 2.55, "learning_rate": 1.2067573472886108e-06, "loss": 1.526, "step": 2356 }, { "epoch": 2.55, "learning_rate": 1.2010814950230498e-06, "loss": 1.392, "step": 2357 }, { "epoch": 2.55, "learning_rate": 1.195418169186191e-06, "loss": 1.3603, "step": 2358 }, { "epoch": 2.55, "learning_rate": 1.1897673778405372e-06, "loss": 1.3015, "step": 2359 }, { "epoch": 2.55, "learning_rate": 1.1841291290307356e-06, "loss": 1.355, "step": 2360 }, { "epoch": 2.56, "learning_rate": 1.1785034307835853e-06, "loss": 1.5695, "step": 2361 }, { "epoch": 2.56, "learning_rate": 1.172890291108012e-06, "loss": 1.3653, "step": 2362 }, { "epoch": 2.56, "learning_rate": 1.1672897179950648e-06, "loss": 1.6426, "step": 2363 }, { "epoch": 2.56, "learning_rate": 1.161701719417908e-06, "loss": 1.3159, "step": 2364 }, { "epoch": 2.56, "learning_rate": 1.1561263033317926e-06, "loss": 1.5476, "step": 2365 }, { "epoch": 2.56, "learning_rate": 1.1505634776740693e-06, "loss": 1.3, "step": 2366 }, { "epoch": 2.56, "learning_rate": 1.1450132503641564e-06, "loss": 1.3006, "step": 2367 }, { "epoch": 2.56, "learning_rate": 1.1394756293035369e-06, "loss": 1.3904, "step": 2368 }, { "epoch": 2.56, "learning_rate": 1.1339506223757579e-06, "loss": 1.2846, "step": 2369 }, { "epoch": 2.56, "learning_rate": 1.128438237446391e-06, "loss": 1.5087, "step": 2370 }, { "epoch": 2.57, "learning_rate": 1.1229384823630552e-06, "loss": 1.4214, "step": 2371 }, { "epoch": 2.57, "learning_rate": 1.1174513649553742e-06, "loss": 1.2962, "step": 2372 }, { "epoch": 2.57, "learning_rate": 1.111976893034996e-06, "loss": 1.2474, "step": 2373 }, { "epoch": 2.57, "learning_rate": 1.1065150743955544e-06, "loss": 1.4587, "step": 2374 }, { "epoch": 2.57, "learning_rate": 1.1010659168126713e-06, "loss": 1.4724, "step": 2375 }, { "epoch": 2.57, "learning_rate": 1.0956294280439527e-06, "loss": 1.3681, "step": 2376 }, { "epoch": 2.57, "learning_rate": 1.0902056158289542e-06, "loss": 1.5189, "step": 2377 }, { "epoch": 2.57, "learning_rate": 1.084794487889199e-06, "loss": 1.7258, "step": 2378 }, { "epoch": 2.57, "learning_rate": 1.0793960519281433e-06, "loss": 1.4144, "step": 2379 }, { "epoch": 2.58, "learning_rate": 1.0740103156311753e-06, "loss": 1.2554, "step": 2380 }, { "epoch": 2.58, "learning_rate": 1.0686372866656124e-06, "loss": 1.665, "step": 2381 }, { "epoch": 2.58, "learning_rate": 1.063276972680667e-06, "loss": 1.2705, "step": 2382 }, { "epoch": 2.58, "learning_rate": 1.057929381307462e-06, "loss": 1.382, "step": 2383 }, { "epoch": 2.58, "learning_rate": 1.0525945201590037e-06, "loss": 1.3876, "step": 2384 }, { "epoch": 2.58, "learning_rate": 1.0472723968301712e-06, "loss": 1.1645, "step": 2385 }, { "epoch": 2.58, "learning_rate": 1.0419630188977213e-06, "loss": 1.5499, "step": 2386 }, { "epoch": 2.58, "learning_rate": 1.0366663939202515e-06, "loss": 1.3362, "step": 2387 }, { "epoch": 2.58, "learning_rate": 1.031382529438215e-06, "loss": 1.5812, "step": 2388 }, { "epoch": 2.59, "learning_rate": 1.0261114329738897e-06, "loss": 1.2592, "step": 2389 }, { "epoch": 2.59, "learning_rate": 1.0208531120313857e-06, "loss": 1.5211, "step": 2390 }, { "epoch": 2.59, "learning_rate": 1.0156075740966198e-06, "loss": 1.2363, "step": 2391 }, { "epoch": 2.59, "learning_rate": 1.010374826637308e-06, "loss": 1.6139, "step": 2392 }, { "epoch": 2.59, "learning_rate": 1.0051548771029696e-06, "loss": 1.3463, "step": 2393 }, { "epoch": 2.59, "learning_rate": 9.999477329248864e-07, "loss": 1.3339, "step": 2394 }, { "epoch": 2.59, "learning_rate": 9.947534015161254e-07, "loss": 1.374, "step": 2395 }, { "epoch": 2.59, "learning_rate": 9.895718902715057e-07, "loss": 1.0881, "step": 2396 }, { "epoch": 2.59, "learning_rate": 9.844032065675945e-07, "loss": 1.0885, "step": 2397 }, { "epoch": 2.6, "learning_rate": 9.792473577627026e-07, "loss": 1.2909, "step": 2398 }, { "epoch": 2.6, "learning_rate": 9.741043511968605e-07, "loss": 1.6412, "step": 2399 }, { "epoch": 2.6, "learning_rate": 9.689741941918273e-07, "loss": 1.3653, "step": 2400 }, { "epoch": 2.6, "learning_rate": 9.638568940510563e-07, "loss": 1.2859, "step": 2401 }, { "epoch": 2.6, "learning_rate": 9.587524580597086e-07, "loss": 1.4281, "step": 2402 }, { "epoch": 2.6, "learning_rate": 9.536608934846236e-07, "loss": 1.2901, "step": 2403 }, { "epoch": 2.6, "learning_rate": 9.4858220757432e-07, "loss": 1.6859, "step": 2404 }, { "epoch": 2.6, "learning_rate": 9.435164075589832e-07, "loss": 1.6743, "step": 2405 }, { "epoch": 2.6, "learning_rate": 9.384635006504483e-07, "loss": 1.2505, "step": 2406 }, { "epoch": 2.6, "learning_rate": 9.334234940422027e-07, "loss": 1.3083, "step": 2407 }, { "epoch": 2.61, "learning_rate": 9.283963949093655e-07, "loss": 1.3136, "step": 2408 }, { "epoch": 2.61, "learning_rate": 9.233822104086765e-07, "loss": 1.4036, "step": 2409 }, { "epoch": 2.61, "learning_rate": 9.183809476784955e-07, "loss": 1.5002, "step": 2410 }, { "epoch": 2.61, "learning_rate": 9.133926138387827e-07, "loss": 1.5041, "step": 2411 }, { "epoch": 2.61, "learning_rate": 9.084172159910942e-07, "loss": 1.3798, "step": 2412 }, { "epoch": 2.61, "learning_rate": 9.034547612185673e-07, "loss": 1.3899, "step": 2413 }, { "epoch": 2.61, "learning_rate": 8.985052565859198e-07, "loss": 1.549, "step": 2414 }, { "epoch": 2.61, "learning_rate": 8.935687091394251e-07, "loss": 1.3845, "step": 2415 }, { "epoch": 2.61, "learning_rate": 8.886451259069151e-07, "loss": 1.5776, "step": 2416 }, { "epoch": 2.62, "learning_rate": 8.837345138977638e-07, "loss": 1.6024, "step": 2417 }, { "epoch": 2.62, "learning_rate": 8.788368801028801e-07, "loss": 1.3396, "step": 2418 }, { "epoch": 2.62, "learning_rate": 8.739522314946936e-07, "loss": 1.3144, "step": 2419 }, { "epoch": 2.62, "learning_rate": 8.690805750271536e-07, "loss": 1.3015, "step": 2420 }, { "epoch": 2.62, "learning_rate": 8.642219176357081e-07, "loss": 1.369, "step": 2421 }, { "epoch": 2.62, "learning_rate": 8.593762662373018e-07, "loss": 1.195, "step": 2422 }, { "epoch": 2.62, "learning_rate": 8.545436277303609e-07, "loss": 1.522, "step": 2423 }, { "epoch": 2.62, "learning_rate": 8.497240089947901e-07, "loss": 1.5346, "step": 2424 }, { "epoch": 2.62, "learning_rate": 8.449174168919549e-07, "loss": 1.2527, "step": 2425 }, { "epoch": 2.63, "learning_rate": 8.401238582646775e-07, "loss": 1.21, "step": 2426 }, { "epoch": 2.63, "learning_rate": 8.353433399372257e-07, "loss": 1.5814, "step": 2427 }, { "epoch": 2.63, "learning_rate": 8.305758687153032e-07, "loss": 1.2182, "step": 2428 }, { "epoch": 2.63, "learning_rate": 8.258214513860363e-07, "loss": 1.2452, "step": 2429 }, { "epoch": 2.63, "learning_rate": 8.210800947179698e-07, "loss": 1.3678, "step": 2430 }, { "epoch": 2.63, "learning_rate": 8.163518054610531e-07, "loss": 1.4426, "step": 2431 }, { "epoch": 2.63, "learning_rate": 8.116365903466394e-07, "loss": 1.469, "step": 2432 }, { "epoch": 2.63, "learning_rate": 8.069344560874548e-07, "loss": 1.431, "step": 2433 }, { "epoch": 2.63, "learning_rate": 8.022454093776178e-07, "loss": 1.5834, "step": 2434 }, { "epoch": 2.64, "learning_rate": 7.975694568926085e-07, "loss": 1.5362, "step": 2435 }, { "epoch": 2.64, "learning_rate": 7.92906605289262e-07, "loss": 1.5635, "step": 2436 }, { "epoch": 2.64, "learning_rate": 7.882568612057728e-07, "loss": 1.1414, "step": 2437 }, { "epoch": 2.64, "learning_rate": 7.836202312616626e-07, "loss": 1.8295, "step": 2438 }, { "epoch": 2.64, "learning_rate": 7.789967220577965e-07, "loss": 1.2892, "step": 2439 }, { "epoch": 2.64, "learning_rate": 7.743863401763463e-07, "loss": 1.3894, "step": 2440 }, { "epoch": 2.64, "learning_rate": 7.697890921808082e-07, "loss": 1.4946, "step": 2441 }, { "epoch": 2.64, "learning_rate": 7.652049846159726e-07, "loss": 1.5447, "step": 2442 }, { "epoch": 2.64, "learning_rate": 7.606340240079257e-07, "loss": 1.265, "step": 2443 }, { "epoch": 2.65, "learning_rate": 7.56076216864039e-07, "loss": 1.3459, "step": 2444 }, { "epoch": 2.65, "learning_rate": 7.515315696729519e-07, "loss": 1.4567, "step": 2445 }, { "epoch": 2.65, "learning_rate": 7.470000889045758e-07, "loss": 1.2605, "step": 2446 }, { "epoch": 2.65, "learning_rate": 7.424817810100749e-07, "loss": 1.4518, "step": 2447 }, { "epoch": 2.65, "learning_rate": 7.37976652421859e-07, "loss": 1.2771, "step": 2448 }, { "epoch": 2.65, "learning_rate": 7.334847095535813e-07, "loss": 1.2964, "step": 2449 }, { "epoch": 2.65, "learning_rate": 7.290059588001119e-07, "loss": 1.8313, "step": 2450 }, { "epoch": 2.65, "learning_rate": 7.24540406537555e-07, "loss": 1.4013, "step": 2451 }, { "epoch": 2.65, "learning_rate": 7.200880591232084e-07, "loss": 1.4062, "step": 2452 }, { "epoch": 2.65, "learning_rate": 7.156489228955866e-07, "loss": 1.569, "step": 2453 }, { "epoch": 2.66, "learning_rate": 7.112230041743862e-07, "loss": 1.6697, "step": 2454 }, { "epoch": 2.66, "learning_rate": 7.068103092604894e-07, "loss": 1.5741, "step": 2455 }, { "epoch": 2.66, "learning_rate": 7.02410844435959e-07, "loss": 1.3958, "step": 2456 }, { "epoch": 2.66, "learning_rate": 6.980246159640092e-07, "loss": 1.4013, "step": 2457 }, { "epoch": 2.66, "learning_rate": 6.936516300890239e-07, "loss": 1.4053, "step": 2458 }, { "epoch": 2.66, "learning_rate": 6.892918930365289e-07, "loss": 1.4796, "step": 2459 }, { "epoch": 2.66, "learning_rate": 6.84945411013186e-07, "loss": 1.6008, "step": 2460 }, { "epoch": 2.66, "learning_rate": 6.806121902067919e-07, "loss": 1.4403, "step": 2461 }, { "epoch": 2.66, "learning_rate": 6.76292236786259e-07, "loss": 1.3376, "step": 2462 }, { "epoch": 2.67, "learning_rate": 6.719855569016187e-07, "loss": 1.4014, "step": 2463 }, { "epoch": 2.67, "learning_rate": 6.676921566839977e-07, "loss": 1.2418, "step": 2464 }, { "epoch": 2.67, "learning_rate": 6.634120422456225e-07, "loss": 1.3975, "step": 2465 }, { "epoch": 2.67, "learning_rate": 6.591452196798053e-07, "loss": 1.5146, "step": 2466 }, { "epoch": 2.67, "learning_rate": 6.548916950609341e-07, "loss": 1.2588, "step": 2467 }, { "epoch": 2.67, "learning_rate": 6.506514744444658e-07, "loss": 1.678, "step": 2468 }, { "epoch": 2.67, "learning_rate": 6.464245638669154e-07, "loss": 1.5591, "step": 2469 }, { "epoch": 2.67, "learning_rate": 6.422109693458545e-07, "loss": 1.4771, "step": 2470 }, { "epoch": 2.67, "learning_rate": 6.380106968798927e-07, "loss": 1.544, "step": 2471 }, { "epoch": 2.68, "learning_rate": 6.338237524486756e-07, "loss": 1.5229, "step": 2472 }, { "epoch": 2.68, "learning_rate": 6.296501420128753e-07, "loss": 1.2298, "step": 2473 }, { "epoch": 2.68, "learning_rate": 6.254898715141788e-07, "loss": 1.3072, "step": 2474 }, { "epoch": 2.68, "learning_rate": 6.21342946875283e-07, "loss": 1.2121, "step": 2475 }, { "epoch": 2.68, "learning_rate": 6.172093739998897e-07, "loss": 1.452, "step": 2476 }, { "epoch": 2.68, "learning_rate": 6.13089158772685e-07, "loss": 1.2103, "step": 2477 }, { "epoch": 2.68, "learning_rate": 6.089823070593437e-07, "loss": 1.5263, "step": 2478 }, { "epoch": 2.68, "learning_rate": 6.048888247065143e-07, "loss": 1.3849, "step": 2479 }, { "epoch": 2.68, "learning_rate": 6.008087175418131e-07, "loss": 1.3651, "step": 2480 }, { "epoch": 2.69, "learning_rate": 5.967419913738126e-07, "loss": 1.1303, "step": 2481 }, { "epoch": 2.69, "learning_rate": 5.926886519920372e-07, "loss": 1.4894, "step": 2482 }, { "epoch": 2.69, "learning_rate": 5.886487051669554e-07, "loss": 1.5404, "step": 2483 }, { "epoch": 2.69, "learning_rate": 5.846221566499688e-07, "loss": 1.2261, "step": 2484 }, { "epoch": 2.69, "learning_rate": 5.80609012173401e-07, "loss": 1.1054, "step": 2485 }, { "epoch": 2.69, "learning_rate": 5.766092774504983e-07, "loss": 1.3206, "step": 2486 }, { "epoch": 2.69, "learning_rate": 5.726229581754117e-07, "loss": 1.7983, "step": 2487 }, { "epoch": 2.69, "learning_rate": 5.686500600232003e-07, "loss": 1.523, "step": 2488 }, { "epoch": 2.69, "learning_rate": 5.646905886498055e-07, "loss": 1.2592, "step": 2489 }, { "epoch": 2.69, "learning_rate": 5.607445496920661e-07, "loss": 1.637, "step": 2490 }, { "epoch": 2.7, "learning_rate": 5.568119487676904e-07, "loss": 1.462, "step": 2491 }, { "epoch": 2.7, "learning_rate": 5.528927914752579e-07, "loss": 1.4331, "step": 2492 }, { "epoch": 2.7, "learning_rate": 5.489870833942102e-07, "loss": 1.3316, "step": 2493 }, { "epoch": 2.7, "learning_rate": 5.450948300848379e-07, "loss": 1.522, "step": 2494 }, { "epoch": 2.7, "learning_rate": 5.412160370882868e-07, "loss": 1.5579, "step": 2495 }, { "epoch": 2.7, "learning_rate": 5.373507099265274e-07, "loss": 1.5086, "step": 2496 }, { "epoch": 2.7, "learning_rate": 5.3349885410237e-07, "loss": 1.3342, "step": 2497 }, { "epoch": 2.7, "learning_rate": 5.296604750994416e-07, "loss": 1.2813, "step": 2498 }, { "epoch": 2.7, "learning_rate": 5.258355783821822e-07, "loss": 1.0772, "step": 2499 }, { "epoch": 2.71, "learning_rate": 5.22024169395845e-07, "loss": 1.2934, "step": 2500 }, { "epoch": 2.71, "learning_rate": 5.182262535664706e-07, "loss": 1.3594, "step": 2501 }, { "epoch": 2.71, "learning_rate": 5.14441836300903e-07, "loss": 1.4169, "step": 2502 }, { "epoch": 2.71, "learning_rate": 5.106709229867568e-07, "loss": 1.397, "step": 2503 }, { "epoch": 2.71, "learning_rate": 5.069135189924312e-07, "loss": 1.371, "step": 2504 }, { "epoch": 2.71, "learning_rate": 5.031696296670885e-07, "loss": 1.073, "step": 2505 }, { "epoch": 2.71, "learning_rate": 4.994392603406507e-07, "loss": 1.3909, "step": 2506 }, { "epoch": 2.71, "learning_rate": 4.957224163237995e-07, "loss": 1.5981, "step": 2507 }, { "epoch": 2.71, "learning_rate": 4.92019102907948e-07, "loss": 1.2035, "step": 2508 }, { "epoch": 2.72, "learning_rate": 4.883293253652599e-07, "loss": 1.3625, "step": 2509 }, { "epoch": 2.72, "learning_rate": 4.846530889486222e-07, "loss": 1.4266, "step": 2510 }, { "epoch": 2.72, "learning_rate": 4.809903988916431e-07, "loss": 1.4738, "step": 2511 }, { "epoch": 2.72, "learning_rate": 4.773412604086536e-07, "loss": 1.2482, "step": 2512 }, { "epoch": 2.72, "learning_rate": 4.737056786946803e-07, "loss": 1.403, "step": 2513 }, { "epoch": 2.72, "learning_rate": 4.7008365892546314e-07, "loss": 1.1934, "step": 2514 }, { "epoch": 2.72, "learning_rate": 4.6647520625742184e-07, "loss": 1.4173, "step": 2515 }, { "epoch": 2.72, "learning_rate": 4.628803258276715e-07, "loss": 1.4158, "step": 2516 }, { "epoch": 2.72, "learning_rate": 4.592990227540006e-07, "loss": 1.2161, "step": 2517 }, { "epoch": 2.73, "learning_rate": 4.557313021348697e-07, "loss": 1.5801, "step": 2518 }, { "epoch": 2.73, "learning_rate": 4.521771690494048e-07, "loss": 1.6454, "step": 2519 }, { "epoch": 2.73, "learning_rate": 4.486366285573818e-07, "loss": 1.2621, "step": 2520 }, { "epoch": 2.73, "learning_rate": 4.451096856992343e-07, "loss": 1.5782, "step": 2521 }, { "epoch": 2.73, "learning_rate": 4.4159634549603145e-07, "loss": 1.3241, "step": 2522 }, { "epoch": 2.73, "learning_rate": 4.3809661294948124e-07, "loss": 1.5076, "step": 2523 }, { "epoch": 2.73, "learning_rate": 4.3461049304191483e-07, "loss": 1.4416, "step": 2524 }, { "epoch": 2.73, "learning_rate": 4.3113799073628894e-07, "loss": 1.2328, "step": 2525 }, { "epoch": 2.73, "learning_rate": 4.276791109761713e-07, "loss": 1.3776, "step": 2526 }, { "epoch": 2.73, "learning_rate": 4.2423385868573643e-07, "loss": 1.5137, "step": 2527 }, { "epoch": 2.74, "learning_rate": 4.208022387697586e-07, "loss": 1.4593, "step": 2528 }, { "epoch": 2.74, "learning_rate": 4.1738425611360435e-07, "loss": 1.515, "step": 2529 }, { "epoch": 2.74, "learning_rate": 4.139799155832247e-07, "loss": 1.758, "step": 2530 }, { "epoch": 2.74, "learning_rate": 4.105892220251517e-07, "loss": 1.6762, "step": 2531 }, { "epoch": 2.74, "learning_rate": 4.0721218026648633e-07, "loss": 1.5668, "step": 2532 }, { "epoch": 2.74, "learning_rate": 4.038487951148973e-07, "loss": 1.3416, "step": 2533 }, { "epoch": 2.74, "learning_rate": 4.0049907135860986e-07, "loss": 1.146, "step": 2534 }, { "epoch": 2.74, "learning_rate": 3.9716301376639955e-07, "loss": 1.3017, "step": 2535 }, { "epoch": 2.74, "learning_rate": 3.938406270875883e-07, "loss": 1.5709, "step": 2536 }, { "epoch": 2.75, "learning_rate": 3.905319160520349e-07, "loss": 1.3528, "step": 2537 }, { "epoch": 2.75, "learning_rate": 3.872368853701258e-07, "loss": 1.6733, "step": 2538 }, { "epoch": 2.75, "learning_rate": 3.8395553973277876e-07, "loss": 1.1098, "step": 2539 }, { "epoch": 2.75, "learning_rate": 3.806878838114225e-07, "loss": 1.2154, "step": 2540 }, { "epoch": 2.75, "learning_rate": 3.7743392225800036e-07, "loss": 1.3542, "step": 2541 }, { "epoch": 2.75, "learning_rate": 3.741936597049578e-07, "loss": 1.4238, "step": 2542 }, { "epoch": 2.75, "learning_rate": 3.709671007652393e-07, "loss": 1.2855, "step": 2543 }, { "epoch": 2.75, "learning_rate": 3.6775425003227725e-07, "loss": 1.4152, "step": 2544 }, { "epoch": 2.75, "learning_rate": 3.6455511207999504e-07, "loss": 1.1686, "step": 2545 }, { "epoch": 2.76, "learning_rate": 3.6136969146278953e-07, "loss": 1.4802, "step": 2546 }, { "epoch": 2.76, "learning_rate": 3.581979927155288e-07, "loss": 1.4657, "step": 2547 }, { "epoch": 2.76, "learning_rate": 3.550400203535476e-07, "loss": 1.3599, "step": 2548 }, { "epoch": 2.76, "learning_rate": 3.518957788726374e-07, "loss": 1.348, "step": 2549 }, { "epoch": 2.76, "learning_rate": 3.487652727490454e-07, "loss": 1.685, "step": 2550 }, { "epoch": 2.76, "learning_rate": 3.4564850643946214e-07, "loss": 1.4447, "step": 2551 }, { "epoch": 2.76, "learning_rate": 3.425454843810183e-07, "loss": 1.5913, "step": 2552 }, { "epoch": 2.76, "learning_rate": 3.394562109912769e-07, "loss": 1.3624, "step": 2553 }, { "epoch": 2.76, "learning_rate": 3.36380690668231e-07, "loss": 1.2554, "step": 2554 }, { "epoch": 2.77, "learning_rate": 3.333189277902893e-07, "loss": 1.3403, "step": 2555 }, { "epoch": 2.77, "learning_rate": 3.3027092671627957e-07, "loss": 1.3742, "step": 2556 }, { "epoch": 2.77, "learning_rate": 3.2723669178543414e-07, "loss": 1.5359, "step": 2557 }, { "epoch": 2.77, "learning_rate": 3.242162273173921e-07, "loss": 1.4275, "step": 2558 }, { "epoch": 2.77, "learning_rate": 3.2120953761218376e-07, "loss": 1.4927, "step": 2559 }, { "epoch": 2.77, "learning_rate": 3.182166269502307e-07, "loss": 1.6279, "step": 2560 }, { "epoch": 2.77, "learning_rate": 3.152374995923413e-07, "loss": 1.4738, "step": 2561 }, { "epoch": 2.77, "learning_rate": 3.1227215977969407e-07, "loss": 1.6822, "step": 2562 }, { "epoch": 2.77, "learning_rate": 3.093206117338499e-07, "loss": 1.4295, "step": 2563 }, { "epoch": 2.77, "learning_rate": 3.063828596567242e-07, "loss": 1.3922, "step": 2564 }, { "epoch": 2.78, "learning_rate": 3.034589077306005e-07, "loss": 1.5716, "step": 2565 }, { "epoch": 2.78, "learning_rate": 3.005487601181112e-07, "loss": 1.3622, "step": 2566 }, { "epoch": 2.78, "learning_rate": 2.976524209622367e-07, "loss": 1.4171, "step": 2567 }, { "epoch": 2.78, "learning_rate": 2.947698943863031e-07, "loss": 1.5839, "step": 2568 }, { "epoch": 2.78, "learning_rate": 2.9190118449396565e-07, "loss": 1.3866, "step": 2569 }, { "epoch": 2.78, "learning_rate": 2.8904629536921856e-07, "loss": 1.3458, "step": 2570 }, { "epoch": 2.78, "learning_rate": 2.8620523107637186e-07, "loss": 1.652, "step": 2571 }, { "epoch": 2.78, "learning_rate": 2.8337799566005907e-07, "loss": 1.7191, "step": 2572 }, { "epoch": 2.78, "learning_rate": 2.805645931452261e-07, "loss": 1.5986, "step": 2573 }, { "epoch": 2.79, "learning_rate": 2.7776502753712243e-07, "loss": 1.5402, "step": 2574 }, { "epoch": 2.79, "learning_rate": 2.749793028213055e-07, "loss": 1.4443, "step": 2575 }, { "epoch": 2.79, "learning_rate": 2.7220742296361845e-07, "loss": 1.2336, "step": 2576 }, { "epoch": 2.79, "learning_rate": 2.694493919102059e-07, "loss": 1.4248, "step": 2577 }, { "epoch": 2.79, "learning_rate": 2.667052135874881e-07, "loss": 1.492, "step": 2578 }, { "epoch": 2.79, "learning_rate": 2.6397489190216786e-07, "loss": 1.4607, "step": 2579 }, { "epoch": 2.79, "learning_rate": 2.6125843074122246e-07, "loss": 1.3069, "step": 2580 }, { "epoch": 2.79, "learning_rate": 2.5855583397189077e-07, "loss": 1.2833, "step": 2581 }, { "epoch": 2.79, "learning_rate": 2.558671054416839e-07, "loss": 1.4302, "step": 2582 }, { "epoch": 2.8, "learning_rate": 2.531922489783578e-07, "loss": 1.2546, "step": 2583 }, { "epoch": 2.8, "learning_rate": 2.505312683899297e-07, "loss": 1.5225, "step": 2584 }, { "epoch": 2.8, "learning_rate": 2.4788416746465813e-07, "loss": 1.5358, "step": 2585 }, { "epoch": 2.8, "learning_rate": 2.452509499710409e-07, "loss": 1.1396, "step": 2586 }, { "epoch": 2.8, "learning_rate": 2.42631619657816e-07, "loss": 1.3756, "step": 2587 }, { "epoch": 2.8, "learning_rate": 2.4002618025394495e-07, "loss": 1.4283, "step": 2588 }, { "epoch": 2.8, "learning_rate": 2.3743463546861744e-07, "loss": 1.2582, "step": 2589 }, { "epoch": 2.8, "learning_rate": 2.3485698899124333e-07, "loss": 1.4219, "step": 2590 }, { "epoch": 2.8, "learning_rate": 2.3229324449144165e-07, "loss": 1.5266, "step": 2591 }, { "epoch": 2.81, "learning_rate": 2.2974340561904506e-07, "loss": 1.3222, "step": 2592 }, { "epoch": 2.81, "learning_rate": 2.2720747600408655e-07, "loss": 1.581, "step": 2593 }, { "epoch": 2.81, "learning_rate": 2.2468545925680152e-07, "loss": 1.6482, "step": 2594 }, { "epoch": 2.81, "learning_rate": 2.2217735896761128e-07, "loss": 1.3587, "step": 2595 }, { "epoch": 2.81, "learning_rate": 2.1968317870713186e-07, "loss": 1.4174, "step": 2596 }, { "epoch": 2.81, "learning_rate": 2.1720292202616066e-07, "loss": 1.2659, "step": 2597 }, { "epoch": 2.81, "learning_rate": 2.14736592455671e-07, "loss": 1.4968, "step": 2598 }, { "epoch": 2.81, "learning_rate": 2.1228419350681206e-07, "loss": 1.4472, "step": 2599 }, { "epoch": 2.81, "learning_rate": 2.0984572867089658e-07, "loss": 1.3207, "step": 2600 }, { "epoch": 2.81, "learning_rate": 2.0742120141940548e-07, "loss": 1.7412, "step": 2601 }, { "epoch": 2.82, "learning_rate": 2.0501061520397547e-07, "loss": 1.4375, "step": 2602 }, { "epoch": 2.82, "learning_rate": 2.0261397345639366e-07, "loss": 1.5255, "step": 2603 }, { "epoch": 2.82, "learning_rate": 2.0023127958860078e-07, "loss": 1.6913, "step": 2604 }, { "epoch": 2.82, "learning_rate": 1.9786253699267567e-07, "loss": 1.4362, "step": 2605 }, { "epoch": 2.82, "learning_rate": 1.9550774904083747e-07, "loss": 1.3471, "step": 2606 }, { "epoch": 2.82, "learning_rate": 1.9316691908544127e-07, "loss": 1.363, "step": 2607 }, { "epoch": 2.82, "learning_rate": 1.9084005045896803e-07, "loss": 1.6219, "step": 2608 }, { "epoch": 2.82, "learning_rate": 1.8852714647402571e-07, "loss": 1.548, "step": 2609 }, { "epoch": 2.82, "learning_rate": 1.8622821042333927e-07, "loss": 1.2939, "step": 2610 }, { "epoch": 2.83, "learning_rate": 1.8394324557974962e-07, "loss": 1.4058, "step": 2611 }, { "epoch": 2.83, "learning_rate": 1.816722551962069e-07, "loss": 1.7237, "step": 2612 }, { "epoch": 2.83, "learning_rate": 1.7941524250576602e-07, "loss": 1.7418, "step": 2613 }, { "epoch": 2.83, "learning_rate": 1.7717221072158786e-07, "loss": 1.4954, "step": 2614 }, { "epoch": 2.83, "learning_rate": 1.7494316303692472e-07, "loss": 1.3937, "step": 2615 }, { "epoch": 2.83, "learning_rate": 1.727281026251204e-07, "loss": 1.6045, "step": 2616 }, { "epoch": 2.83, "learning_rate": 1.7052703263960912e-07, "loss": 1.2003, "step": 2617 }, { "epoch": 2.83, "learning_rate": 1.683399562139043e-07, "loss": 1.571, "step": 2618 }, { "epoch": 2.83, "learning_rate": 1.6616687646160202e-07, "loss": 1.5339, "step": 2619 }, { "epoch": 2.84, "learning_rate": 1.6400779647636754e-07, "loss": 1.3924, "step": 2620 }, { "epoch": 2.84, "learning_rate": 1.6186271933193997e-07, "loss": 1.2853, "step": 2621 }, { "epoch": 2.84, "learning_rate": 1.5973164808212094e-07, "loss": 1.3534, "step": 2622 }, { "epoch": 2.84, "learning_rate": 1.5761458576077138e-07, "loss": 1.3648, "step": 2623 }, { "epoch": 2.84, "learning_rate": 1.5551153538181374e-07, "loss": 1.5253, "step": 2624 }, { "epoch": 2.84, "learning_rate": 1.5342249993921532e-07, "loss": 1.4273, "step": 2625 }, { "epoch": 2.84, "learning_rate": 1.5134748240700048e-07, "loss": 1.5722, "step": 2626 }, { "epoch": 2.84, "learning_rate": 1.4928648573922732e-07, "loss": 1.4696, "step": 2627 }, { "epoch": 2.84, "learning_rate": 1.4723951287000214e-07, "loss": 1.5012, "step": 2628 }, { "epoch": 2.85, "learning_rate": 1.4520656671346056e-07, "loss": 1.505, "step": 2629 }, { "epoch": 2.85, "learning_rate": 1.4318765016377078e-07, "loss": 1.5803, "step": 2630 }, { "epoch": 2.85, "learning_rate": 1.4118276609513038e-07, "loss": 1.4299, "step": 2631 }, { "epoch": 2.85, "learning_rate": 1.391919173617562e-07, "loss": 1.3572, "step": 2632 }, { "epoch": 2.85, "learning_rate": 1.3721510679788774e-07, "loss": 1.3128, "step": 2633 }, { "epoch": 2.85, "learning_rate": 1.3525233721777498e-07, "loss": 1.2744, "step": 2634 }, { "epoch": 2.85, "learning_rate": 1.3330361141568383e-07, "loss": 1.2885, "step": 2635 }, { "epoch": 2.85, "learning_rate": 1.3136893216588175e-07, "loss": 1.2948, "step": 2636 }, { "epoch": 2.85, "learning_rate": 1.2944830222264115e-07, "loss": 1.5285, "step": 2637 }, { "epoch": 2.85, "learning_rate": 1.2754172432023703e-07, "loss": 1.3903, "step": 2638 }, { "epoch": 2.86, "learning_rate": 1.2564920117293266e-07, "loss": 1.3907, "step": 2639 }, { "epoch": 2.86, "learning_rate": 1.237707354749884e-07, "loss": 1.3469, "step": 2640 }, { "epoch": 2.86, "learning_rate": 1.2190632990064734e-07, "loss": 1.4327, "step": 2641 }, { "epoch": 2.86, "learning_rate": 1.2005598710414067e-07, "loss": 1.1509, "step": 2642 }, { "epoch": 2.86, "learning_rate": 1.1821970971967579e-07, "loss": 1.2846, "step": 2643 }, { "epoch": 2.86, "learning_rate": 1.1639750036143704e-07, "loss": 1.5876, "step": 2644 }, { "epoch": 2.86, "learning_rate": 1.145893616235827e-07, "loss": 1.386, "step": 2645 }, { "epoch": 2.86, "learning_rate": 1.1279529608023698e-07, "loss": 1.4907, "step": 2646 }, { "epoch": 2.86, "learning_rate": 1.1101530628549128e-07, "loss": 1.4283, "step": 2647 }, { "epoch": 2.87, "learning_rate": 1.0924939477339635e-07, "loss": 1.48, "step": 2648 }, { "epoch": 2.87, "learning_rate": 1.074975640579623e-07, "loss": 1.4688, "step": 2649 }, { "epoch": 2.87, "learning_rate": 1.0575981663315416e-07, "loss": 1.3441, "step": 2650 }, { "epoch": 2.87, "learning_rate": 1.04036154972883e-07, "loss": 1.2524, "step": 2651 }, { "epoch": 2.87, "learning_rate": 1.0232658153101261e-07, "loss": 1.5244, "step": 2652 }, { "epoch": 2.87, "learning_rate": 1.0063109874134724e-07, "loss": 1.3212, "step": 2653 }, { "epoch": 2.87, "learning_rate": 9.894970901763057e-08, "loss": 1.514, "step": 2654 }, { "epoch": 2.87, "learning_rate": 9.728241475354561e-08, "loss": 1.1713, "step": 2655 }, { "epoch": 2.87, "learning_rate": 9.562921832270588e-08, "loss": 1.337, "step": 2656 }, { "epoch": 2.88, "learning_rate": 9.399012207865765e-08, "loss": 1.5239, "step": 2657 }, { "epoch": 2.88, "learning_rate": 9.236512835486989e-08, "loss": 1.5626, "step": 2658 }, { "epoch": 2.88, "learning_rate": 9.075423946473871e-08, "loss": 1.5068, "step": 2659 }, { "epoch": 2.88, "learning_rate": 8.915745770157747e-08, "loss": 1.7189, "step": 2660 }, { "epoch": 2.88, "learning_rate": 8.757478533861663e-08, "loss": 1.6673, "step": 2661 }, { "epoch": 2.88, "learning_rate": 8.600622462900165e-08, "loss": 1.3928, "step": 2662 }, { "epoch": 2.88, "learning_rate": 8.445177780578517e-08, "loss": 1.3171, "step": 2663 }, { "epoch": 2.88, "learning_rate": 8.291144708193033e-08, "loss": 1.2642, "step": 2664 }, { "epoch": 2.88, "learning_rate": 8.138523465030191e-08, "loss": 1.3229, "step": 2665 }, { "epoch": 2.89, "learning_rate": 7.98731426836663e-08, "loss": 1.3029, "step": 2666 }, { "epoch": 2.89, "learning_rate": 7.837517333468603e-08, "loss": 1.2242, "step": 2667 }, { "epoch": 2.89, "learning_rate": 7.689132873592076e-08, "loss": 1.6488, "step": 2668 }, { "epoch": 2.89, "learning_rate": 7.542161099981849e-08, "loss": 1.8174, "step": 2669 }, { "epoch": 2.89, "learning_rate": 7.396602221871885e-08, "loss": 1.3012, "step": 2670 }, { "epoch": 2.89, "learning_rate": 7.252456446484534e-08, "loss": 1.409, "step": 2671 }, { "epoch": 2.89, "learning_rate": 7.109723979030536e-08, "loss": 1.4102, "step": 2672 }, { "epoch": 2.89, "learning_rate": 6.968405022708347e-08, "loss": 1.3756, "step": 2673 }, { "epoch": 2.89, "learning_rate": 6.828499778704367e-08, "loss": 1.244, "step": 2674 }, { "epoch": 2.9, "learning_rate": 6.690008446192276e-08, "loss": 1.2074, "step": 2675 }, { "epoch": 2.9, "learning_rate": 6.552931222332803e-08, "loss": 1.4653, "step": 2676 }, { "epoch": 2.9, "learning_rate": 6.417268302273739e-08, "loss": 1.408, "step": 2677 }, { "epoch": 2.9, "learning_rate": 6.283019879149144e-08, "loss": 1.7122, "step": 2678 }, { "epoch": 2.9, "learning_rate": 6.150186144079473e-08, "loss": 1.217, "step": 2679 }, { "epoch": 2.9, "learning_rate": 6.018767286171234e-08, "loss": 1.5713, "step": 2680 }, { "epoch": 2.9, "learning_rate": 5.888763492516436e-08, "loss": 1.4467, "step": 2681 }, { "epoch": 2.9, "learning_rate": 5.760174948193031e-08, "loss": 1.7328, "step": 2682 }, { "epoch": 2.9, "learning_rate": 5.633001836263696e-08, "loss": 1.3985, "step": 2683 }, { "epoch": 2.9, "learning_rate": 5.507244337776274e-08, "loss": 1.6614, "step": 2684 }, { "epoch": 2.91, "learning_rate": 5.382902631763331e-08, "loss": 1.5567, "step": 2685 }, { "epoch": 2.91, "learning_rate": 5.259976895241714e-08, "loss": 1.4546, "step": 2686 }, { "epoch": 2.91, "learning_rate": 5.138467303212546e-08, "loss": 1.6378, "step": 2687 }, { "epoch": 2.91, "learning_rate": 5.018374028660788e-08, "loss": 1.4673, "step": 2688 }, { "epoch": 2.91, "learning_rate": 4.899697242555346e-08, "loss": 1.2791, "step": 2689 }, { "epoch": 2.91, "learning_rate": 4.7824371138481815e-08, "loss": 1.4178, "step": 2690 }, { "epoch": 2.91, "learning_rate": 4.666593809474762e-08, "loss": 1.4014, "step": 2691 }, { "epoch": 2.91, "learning_rate": 4.5521674943534985e-08, "loss": 1.1117, "step": 2692 }, { "epoch": 2.91, "learning_rate": 4.439158331385196e-08, "loss": 1.4885, "step": 2693 }, { "epoch": 2.92, "learning_rate": 4.327566481453715e-08, "loss": 1.2032, "step": 2694 }, { "epoch": 2.92, "learning_rate": 4.2173921034246444e-08, "loss": 1.566, "step": 2695 }, { "epoch": 2.92, "learning_rate": 4.108635354145851e-08, "loss": 1.3328, "step": 2696 }, { "epoch": 2.92, "learning_rate": 4.001296388447151e-08, "loss": 1.3768, "step": 2697 }, { "epoch": 2.92, "learning_rate": 3.8953753591396415e-08, "loss": 1.5365, "step": 2698 }, { "epoch": 2.92, "learning_rate": 3.790872417016034e-08, "loss": 1.4503, "step": 2699 }, { "epoch": 2.92, "learning_rate": 3.68778771085021e-08, "loss": 1.0304, "step": 2700 }, { "epoch": 2.92, "learning_rate": 3.5861213873968904e-08, "loss": 1.6038, "step": 2701 }, { "epoch": 2.92, "learning_rate": 3.48587359139152e-08, "loss": 1.601, "step": 2702 }, { "epoch": 2.93, "learning_rate": 3.38704446555016e-08, "loss": 1.4354, "step": 2703 }, { "epoch": 2.93, "learning_rate": 3.289634150569376e-08, "loss": 1.4827, "step": 2704 }, { "epoch": 2.93, "learning_rate": 3.1936427851253503e-08, "loss": 1.0858, "step": 2705 }, { "epoch": 2.93, "learning_rate": 3.0990705058748794e-08, "loss": 1.3856, "step": 2706 }, { "epoch": 2.93, "learning_rate": 3.0059174474539324e-08, "loss": 1.4821, "step": 2707 }, { "epoch": 2.93, "learning_rate": 2.914183742478427e-08, "loss": 1.5497, "step": 2708 }, { "epoch": 2.93, "learning_rate": 2.8238695215432323e-08, "loss": 1.5207, "step": 2709 }, { "epoch": 2.93, "learning_rate": 2.734974913222943e-08, "loss": 1.5781, "step": 2710 }, { "epoch": 2.93, "learning_rate": 2.64750004407055e-08, "loss": 1.6297, "step": 2711 }, { "epoch": 2.94, "learning_rate": 2.5614450386182155e-08, "loss": 1.5763, "step": 2712 }, { "epoch": 2.94, "learning_rate": 2.4768100193768295e-08, "loss": 1.3805, "step": 2713 }, { "epoch": 2.94, "learning_rate": 2.3935951068353446e-08, "loss": 1.5306, "step": 2714 }, { "epoch": 2.94, "learning_rate": 2.3118004194614406e-08, "loss": 1.4369, "step": 2715 }, { "epoch": 2.94, "learning_rate": 2.2314260737006376e-08, "loss": 1.2034, "step": 2716 }, { "epoch": 2.94, "learning_rate": 2.152472183976406e-08, "loss": 1.5095, "step": 2717 }, { "epoch": 2.94, "learning_rate": 2.074938862690279e-08, "loss": 1.4576, "step": 2718 }, { "epoch": 2.94, "learning_rate": 1.998826220220962e-08, "loss": 1.2408, "step": 2719 }, { "epoch": 2.94, "learning_rate": 1.924134364925112e-08, "loss": 1.4286, "step": 2720 }, { "epoch": 2.94, "learning_rate": 1.850863403136449e-08, "loss": 1.3504, "step": 2721 }, { "epoch": 2.95, "learning_rate": 1.7790134391659775e-08, "loss": 1.3752, "step": 2722 }, { "epoch": 2.95, "learning_rate": 1.708584575301542e-08, "loss": 1.4447, "step": 2723 }, { "epoch": 2.95, "learning_rate": 1.6395769118080495e-08, "loss": 1.2743, "step": 2724 }, { "epoch": 2.95, "learning_rate": 1.571990546927138e-08, "loss": 1.3814, "step": 2725 }, { "epoch": 2.95, "learning_rate": 1.5058255768767295e-08, "loss": 1.3547, "step": 2726 }, { "epoch": 2.95, "learning_rate": 1.441082095851698e-08, "loss": 1.4415, "step": 2727 }, { "epoch": 2.95, "learning_rate": 1.3777601960229814e-08, "loss": 1.4448, "step": 2728 }, { "epoch": 2.95, "learning_rate": 1.3158599675374695e-08, "loss": 1.4281, "step": 2729 }, { "epoch": 2.95, "learning_rate": 1.2553814985186707e-08, "loss": 1.493, "step": 2730 }, { "epoch": 2.96, "learning_rate": 1.19632487506538e-08, "loss": 1.4782, "step": 2731 }, { "epoch": 2.96, "learning_rate": 1.1386901812527883e-08, "loss": 1.348, "step": 2732 }, { "epoch": 2.96, "learning_rate": 1.0824774991314845e-08, "loss": 1.3377, "step": 2733 }, { "epoch": 2.96, "learning_rate": 1.0276869087276764e-08, "loss": 1.4975, "step": 2734 }, { "epoch": 2.96, "learning_rate": 9.74318488042969e-09, "loss": 1.0908, "step": 2735 }, { "epoch": 2.96, "learning_rate": 9.223723130544759e-09, "loss": 1.5807, "step": 2736 }, { "epoch": 2.96, "learning_rate": 8.718484577144859e-09, "loss": 1.3614, "step": 2737 }, { "epoch": 2.96, "learning_rate": 8.227469939503518e-09, "loss": 1.6107, "step": 2738 }, { "epoch": 2.96, "learning_rate": 7.75067991664602e-09, "loss": 1.3086, "step": 2739 }, { "epoch": 2.97, "learning_rate": 7.288115187344957e-09, "loss": 1.4008, "step": 2740 }, { "epoch": 2.97, "learning_rate": 6.839776410124677e-09, "loss": 1.4084, "step": 2741 }, { "epoch": 2.97, "learning_rate": 6.4056642232523945e-09, "loss": 1.5999, "step": 2742 }, { "epoch": 2.97, "learning_rate": 5.985779244747081e-09, "loss": 1.732, "step": 2743 }, { "epoch": 2.97, "learning_rate": 5.5801220723683545e-09, "loss": 1.4423, "step": 2744 }, { "epoch": 2.97, "learning_rate": 5.1886932836253675e-09, "loss": 1.5881, "step": 2745 }, { "epoch": 2.97, "learning_rate": 4.811493435766812e-09, "loss": 1.3558, "step": 2746 }, { "epoch": 2.97, "learning_rate": 4.44852306578869e-09, "loss": 1.3489, "step": 2747 }, { "epoch": 2.97, "learning_rate": 4.099782690425435e-09, "loss": 1.2391, "step": 2748 }, { "epoch": 2.98, "learning_rate": 3.7652728061576824e-09, "loss": 1.8044, "step": 2749 }, { "epoch": 2.98, "learning_rate": 3.444993889202275e-09, "loss": 1.3535, "step": 2750 }, { "epoch": 2.98, "learning_rate": 3.1389463955200373e-09, "loss": 1.3158, "step": 2751 }, { "epoch": 2.98, "learning_rate": 2.8471307608102238e-09, "loss": 1.4599, "step": 2752 }, { "epoch": 2.98, "learning_rate": 2.5695474005116295e-09, "loss": 1.2495, "step": 2753 }, { "epoch": 2.98, "learning_rate": 2.3061967097992575e-09, "loss": 1.6066, "step": 2754 }, { "epoch": 2.98, "learning_rate": 2.057079063589873e-09, "loss": 1.5093, "step": 2755 }, { "epoch": 2.98, "learning_rate": 1.8221948165342285e-09, "loss": 1.4025, "step": 2756 }, { "epoch": 2.98, "learning_rate": 1.6015443030215073e-09, "loss": 1.3788, "step": 2757 }, { "epoch": 2.98, "learning_rate": 1.3951278371759913e-09, "loss": 1.4646, "step": 2758 }, { "epoch": 2.99, "learning_rate": 1.2029457128615029e-09, "loss": 1.353, "step": 2759 }, { "epoch": 2.99, "learning_rate": 1.0249982036725226e-09, "loss": 1.4964, "step": 2760 }, { "epoch": 2.99, "learning_rate": 8.612855629419603e-10, "loss": 1.3455, "step": 2761 }, { "epoch": 2.99, "learning_rate": 7.11808023735605e-10, "loss": 1.7719, "step": 2762 }, { "epoch": 2.99, "learning_rate": 5.765657988554551e-10, "loss": 1.4145, "step": 2763 }, { "epoch": 2.99, "learning_rate": 4.555590808374977e-10, "loss": 1.4484, "step": 2764 }, { "epoch": 2.99, "learning_rate": 3.4878804194948825e-10, "loss": 1.3246, "step": 2765 }, { "epoch": 2.99, "learning_rate": 2.562528341942816e-10, "loss": 1.4381, "step": 2766 }, { "epoch": 2.99, "learning_rate": 1.779535893076112e-10, "loss": 1.2841, "step": 2767 }, { "epoch": 3.0, "learning_rate": 1.1389041876030959e-10, "loss": 1.1594, "step": 2768 }, { "epoch": 3.0, "learning_rate": 6.406341375497782e-11, "loss": 1.278, "step": 2769 }, { "epoch": 3.0, "learning_rate": 2.847264522487514e-11, "loss": 1.2287, "step": 2770 }, { "epoch": 3.0, "learning_rate": 7.118163839470171e-12, "loss": 1.2849, "step": 2771 }, { "epoch": 3.0, "learning_rate": 0.0, "loss": 1.6482, "step": 2772 } ], "logging_steps": 1.0, "max_steps": 2772, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 3.786131489826013e+18, "train_batch_size": 4, "trial_name": null, "trial_params": null }