{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "eval_steps": 500, "global_step": 1630, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.0816326530612243e-07, "loss": 11.0156, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.163265306122449e-07, "loss": 10.5312, "step": 2 }, { "epoch": 0.02, "learning_rate": 1.2244897959183673e-06, "loss": 10.9531, "step": 3 }, { "epoch": 0.02, "learning_rate": 1.6326530612244897e-06, "loss": 10.4062, "step": 4 }, { "epoch": 0.03, "learning_rate": 2.0408163265306125e-06, "loss": 10.5156, "step": 5 }, { "epoch": 0.04, "learning_rate": 2.4489795918367347e-06, "loss": 10.5156, "step": 6 }, { "epoch": 0.04, "learning_rate": 2.8571428571428573e-06, "loss": 9.8281, "step": 7 }, { "epoch": 0.05, "learning_rate": 3.2653061224489794e-06, "loss": 8.9531, "step": 8 }, { "epoch": 0.06, "learning_rate": 3.6734693877551024e-06, "loss": 9.0156, "step": 9 }, { "epoch": 0.06, "learning_rate": 4.081632653061225e-06, "loss": 8.9062, "step": 10 }, { "epoch": 0.07, "learning_rate": 4.489795918367348e-06, "loss": 8.0781, "step": 11 }, { "epoch": 0.07, "learning_rate": 4.897959183673469e-06, "loss": 7.625, "step": 12 }, { "epoch": 0.08, "learning_rate": 5.306122448979593e-06, "loss": 7.7188, "step": 13 }, { "epoch": 0.09, "learning_rate": 5.7142857142857145e-06, "loss": 7.0391, "step": 14 }, { "epoch": 0.09, "learning_rate": 6.122448979591837e-06, "loss": 6.6875, "step": 15 }, { "epoch": 0.1, "learning_rate": 6.530612244897959e-06, "loss": 6.4922, "step": 16 }, { "epoch": 0.1, "learning_rate": 6.938775510204082e-06, "loss": 6.1953, "step": 17 }, { "epoch": 0.11, "learning_rate": 7.346938775510205e-06, "loss": 6.2578, "step": 18 }, { "epoch": 0.12, "learning_rate": 7.755102040816327e-06, "loss": 5.8906, "step": 19 }, { "epoch": 0.12, "learning_rate": 8.16326530612245e-06, "loss": 5.7656, "step": 20 }, { "epoch": 0.13, "learning_rate": 8.571428571428571e-06, "loss": 5.4844, "step": 21 }, { "epoch": 0.13, "learning_rate": 8.979591836734695e-06, "loss": 5.4922, "step": 22 }, { "epoch": 0.14, "learning_rate": 9.387755102040818e-06, "loss": 5.3594, "step": 23 }, { "epoch": 0.15, "learning_rate": 9.795918367346939e-06, "loss": 5.0625, "step": 24 }, { "epoch": 0.15, "learning_rate": 1.0204081632653063e-05, "loss": 4.9219, "step": 25 }, { "epoch": 0.16, "learning_rate": 1.0612244897959186e-05, "loss": 4.9609, "step": 26 }, { "epoch": 0.17, "learning_rate": 1.1020408163265306e-05, "loss": 4.6172, "step": 27 }, { "epoch": 0.17, "learning_rate": 1.1428571428571429e-05, "loss": 4.7578, "step": 28 }, { "epoch": 0.18, "learning_rate": 1.1836734693877552e-05, "loss": 4.4219, "step": 29 }, { "epoch": 0.18, "learning_rate": 1.2244897959183674e-05, "loss": 4.4688, "step": 30 }, { "epoch": 0.19, "learning_rate": 1.2653061224489798e-05, "loss": 4.8281, "step": 31 }, { "epoch": 0.2, "learning_rate": 1.3061224489795918e-05, "loss": 4.6484, "step": 32 }, { "epoch": 0.2, "learning_rate": 1.3469387755102042e-05, "loss": 4.6172, "step": 33 }, { "epoch": 0.21, "learning_rate": 1.3877551020408165e-05, "loss": 4.5938, "step": 34 }, { "epoch": 0.21, "learning_rate": 1.4285714285714287e-05, "loss": 4.5156, "step": 35 }, { "epoch": 0.22, "learning_rate": 1.469387755102041e-05, "loss": 4.5938, "step": 36 }, { "epoch": 0.23, "learning_rate": 1.510204081632653e-05, "loss": 4.2109, "step": 37 }, { "epoch": 0.23, "learning_rate": 1.5510204081632655e-05, "loss": 4.1094, "step": 38 }, { "epoch": 0.24, "learning_rate": 1.5918367346938776e-05, "loss": 4.0742, "step": 39 }, { "epoch": 0.25, "learning_rate": 1.63265306122449e-05, "loss": 4.1406, "step": 40 }, { "epoch": 0.25, "learning_rate": 1.673469387755102e-05, "loss": 3.9453, "step": 41 }, { "epoch": 0.26, "learning_rate": 1.7142857142857142e-05, "loss": 3.7773, "step": 42 }, { "epoch": 0.26, "learning_rate": 1.7551020408163266e-05, "loss": 4.0781, "step": 43 }, { "epoch": 0.27, "learning_rate": 1.795918367346939e-05, "loss": 4.3906, "step": 44 }, { "epoch": 0.28, "learning_rate": 1.836734693877551e-05, "loss": 4.2031, "step": 45 }, { "epoch": 0.28, "learning_rate": 1.8775510204081636e-05, "loss": 4.0469, "step": 46 }, { "epoch": 0.29, "learning_rate": 1.9183673469387756e-05, "loss": 4.3359, "step": 47 }, { "epoch": 0.29, "learning_rate": 1.9591836734693877e-05, "loss": 3.9688, "step": 48 }, { "epoch": 0.3, "learning_rate": 2e-05, "loss": 4.1289, "step": 49 }, { "epoch": 0.31, "learning_rate": 1.9999980257330883e-05, "loss": 4.2734, "step": 50 }, { "epoch": 0.31, "learning_rate": 1.9999921029401478e-05, "loss": 4.2344, "step": 51 }, { "epoch": 0.32, "learning_rate": 1.9999822316445652e-05, "loss": 4.1797, "step": 52 }, { "epoch": 0.33, "learning_rate": 1.9999684118853177e-05, "loss": 4.0859, "step": 53 }, { "epoch": 0.33, "learning_rate": 1.9999506437169723e-05, "loss": 4.082, "step": 54 }, { "epoch": 0.34, "learning_rate": 1.9999289272096886e-05, "loss": 4.1875, "step": 55 }, { "epoch": 0.34, "learning_rate": 1.9999032624492144e-05, "loss": 3.9062, "step": 56 }, { "epoch": 0.35, "learning_rate": 1.999873649536887e-05, "loss": 4.0664, "step": 57 }, { "epoch": 0.36, "learning_rate": 1.9998400885896355e-05, "loss": 3.9922, "step": 58 }, { "epoch": 0.36, "learning_rate": 1.9998025797399753e-05, "loss": 4.0312, "step": 59 }, { "epoch": 0.37, "learning_rate": 1.9997611231360117e-05, "loss": 3.9297, "step": 60 }, { "epoch": 0.37, "learning_rate": 1.9997157189414373e-05, "loss": 4.0781, "step": 61 }, { "epoch": 0.38, "learning_rate": 1.9996663673355326e-05, "loss": 3.8125, "step": 62 }, { "epoch": 0.39, "learning_rate": 1.9996130685131637e-05, "loss": 3.9375, "step": 63 }, { "epoch": 0.39, "learning_rate": 1.999555822684783e-05, "loss": 4.1602, "step": 64 }, { "epoch": 0.4, "learning_rate": 1.9994946300764276e-05, "loss": 3.9336, "step": 65 }, { "epoch": 0.4, "learning_rate": 1.999429490929718e-05, "loss": 4.207, "step": 66 }, { "epoch": 0.41, "learning_rate": 1.999360405501859e-05, "loss": 3.7422, "step": 67 }, { "epoch": 0.42, "learning_rate": 1.9992873740656372e-05, "loss": 3.9414, "step": 68 }, { "epoch": 0.42, "learning_rate": 1.9992103969094182e-05, "loss": 3.8711, "step": 69 }, { "epoch": 0.43, "learning_rate": 1.99912947433715e-05, "loss": 4.25, "step": 70 }, { "epoch": 0.44, "learning_rate": 1.999044606668358e-05, "loss": 3.8203, "step": 71 }, { "epoch": 0.44, "learning_rate": 1.998955794238145e-05, "loss": 3.9688, "step": 72 }, { "epoch": 0.45, "learning_rate": 1.9988630373971896e-05, "loss": 3.9414, "step": 73 }, { "epoch": 0.45, "learning_rate": 1.9987663365117456e-05, "loss": 3.5312, "step": 74 }, { "epoch": 0.46, "learning_rate": 1.99866569196364e-05, "loss": 3.8789, "step": 75 }, { "epoch": 0.47, "learning_rate": 1.9985611041502704e-05, "loss": 3.9062, "step": 76 }, { "epoch": 0.47, "learning_rate": 1.9984525734846056e-05, "loss": 3.75, "step": 77 }, { "epoch": 0.48, "learning_rate": 1.998340100395183e-05, "loss": 3.8984, "step": 78 }, { "epoch": 0.48, "learning_rate": 1.9982236853261067e-05, "loss": 4.0781, "step": 79 }, { "epoch": 0.49, "learning_rate": 1.9981033287370443e-05, "loss": 3.8672, "step": 80 }, { "epoch": 0.5, "learning_rate": 1.9979790311032288e-05, "loss": 3.7461, "step": 81 }, { "epoch": 0.5, "learning_rate": 1.9978507929154534e-05, "loss": 3.6602, "step": 82 }, { "epoch": 0.51, "learning_rate": 1.9977186146800707e-05, "loss": 3.8555, "step": 83 }, { "epoch": 0.52, "learning_rate": 1.9975824969189913e-05, "loss": 3.8086, "step": 84 }, { "epoch": 0.52, "learning_rate": 1.997442440169681e-05, "loss": 3.9727, "step": 85 }, { "epoch": 0.53, "learning_rate": 1.997298444985158e-05, "loss": 3.6172, "step": 86 }, { "epoch": 0.53, "learning_rate": 1.9971505119339923e-05, "loss": 3.8359, "step": 87 }, { "epoch": 0.54, "learning_rate": 1.9969986416003026e-05, "loss": 3.8594, "step": 88 }, { "epoch": 0.55, "learning_rate": 1.9968428345837542e-05, "loss": 3.7227, "step": 89 }, { "epoch": 0.55, "learning_rate": 1.996683091499556e-05, "loss": 4.125, "step": 90 }, { "epoch": 0.56, "learning_rate": 1.9965194129784597e-05, "loss": 3.832, "step": 91 }, { "epoch": 0.56, "learning_rate": 1.9963517996667548e-05, "loss": 4.0, "step": 92 }, { "epoch": 0.57, "learning_rate": 1.9961802522262685e-05, "loss": 4.0703, "step": 93 }, { "epoch": 0.58, "learning_rate": 1.996004771334361e-05, "loss": 3.7461, "step": 94 }, { "epoch": 0.58, "learning_rate": 1.9958253576839256e-05, "loss": 3.9727, "step": 95 }, { "epoch": 0.59, "learning_rate": 1.9956420119833826e-05, "loss": 4.0664, "step": 96 }, { "epoch": 0.6, "learning_rate": 1.9954547349566783e-05, "loss": 4.2539, "step": 97 }, { "epoch": 0.6, "learning_rate": 1.9952635273432835e-05, "loss": 4.0156, "step": 98 }, { "epoch": 0.61, "learning_rate": 1.9950683898981866e-05, "loss": 4.1406, "step": 99 }, { "epoch": 0.61, "learning_rate": 1.994869323391895e-05, "loss": 4.1523, "step": 100 }, { "epoch": 0.62, "learning_rate": 1.9946663286104303e-05, "loss": 3.9023, "step": 101 }, { "epoch": 0.63, "learning_rate": 1.994459406355323e-05, "loss": 3.8086, "step": 102 }, { "epoch": 0.63, "learning_rate": 1.994248557443613e-05, "loss": 4.0391, "step": 103 }, { "epoch": 0.64, "learning_rate": 1.9940337827078448e-05, "loss": 3.9453, "step": 104 }, { "epoch": 0.64, "learning_rate": 1.9938150829960634e-05, "loss": 4.0039, "step": 105 }, { "epoch": 0.65, "learning_rate": 1.993592459171812e-05, "loss": 3.9883, "step": 106 }, { "epoch": 0.66, "learning_rate": 1.9933659121141283e-05, "loss": 3.6758, "step": 107 }, { "epoch": 0.66, "learning_rate": 1.993135442717541e-05, "loss": 3.793, "step": 108 }, { "epoch": 0.67, "learning_rate": 1.9929010518920667e-05, "loss": 3.7383, "step": 109 }, { "epoch": 0.67, "learning_rate": 1.9926627405632048e-05, "loss": 3.7227, "step": 110 }, { "epoch": 0.68, "learning_rate": 1.992420509671936e-05, "loss": 3.9023, "step": 111 }, { "epoch": 0.69, "learning_rate": 1.992174360174717e-05, "loss": 4.0078, "step": 112 }, { "epoch": 0.69, "learning_rate": 1.991924293043478e-05, "loss": 4.2109, "step": 113 }, { "epoch": 0.7, "learning_rate": 1.991670309265617e-05, "loss": 3.7461, "step": 114 }, { "epoch": 0.71, "learning_rate": 1.9914124098439976e-05, "loss": 4.0039, "step": 115 }, { "epoch": 0.71, "learning_rate": 1.9911505957969443e-05, "loss": 3.8867, "step": 116 }, { "epoch": 0.72, "learning_rate": 1.990884868158239e-05, "loss": 3.9883, "step": 117 }, { "epoch": 0.72, "learning_rate": 1.9906152279771162e-05, "loss": 3.8359, "step": 118 }, { "epoch": 0.73, "learning_rate": 1.990341676318259e-05, "loss": 3.6719, "step": 119 }, { "epoch": 0.74, "learning_rate": 1.9900642142617958e-05, "loss": 3.5898, "step": 120 }, { "epoch": 0.74, "learning_rate": 1.9897828429032946e-05, "loss": 3.9922, "step": 121 }, { "epoch": 0.75, "learning_rate": 1.98949756335376e-05, "loss": 3.8711, "step": 122 }, { "epoch": 0.75, "learning_rate": 1.9892083767396274e-05, "loss": 3.6797, "step": 123 }, { "epoch": 0.76, "learning_rate": 1.9889152842027607e-05, "loss": 4.0078, "step": 124 }, { "epoch": 0.77, "learning_rate": 1.9886182869004447e-05, "loss": 3.8164, "step": 125 }, { "epoch": 0.77, "learning_rate": 1.9883173860053845e-05, "loss": 3.6953, "step": 126 }, { "epoch": 0.78, "learning_rate": 1.9880125827056967e-05, "loss": 3.7344, "step": 127 }, { "epoch": 0.79, "learning_rate": 1.9877038782049074e-05, "loss": 3.6562, "step": 128 }, { "epoch": 0.79, "learning_rate": 1.9873912737219468e-05, "loss": 3.5625, "step": 129 }, { "epoch": 0.8, "learning_rate": 1.987074770491145e-05, "loss": 4.0859, "step": 130 }, { "epoch": 0.8, "learning_rate": 1.9867543697622248e-05, "loss": 3.7344, "step": 131 }, { "epoch": 0.81, "learning_rate": 1.9864300728002997e-05, "loss": 3.9453, "step": 132 }, { "epoch": 0.82, "learning_rate": 1.986101880885867e-05, "loss": 4.1211, "step": 133 }, { "epoch": 0.82, "learning_rate": 1.985769795314804e-05, "loss": 3.7344, "step": 134 }, { "epoch": 0.83, "learning_rate": 1.9854338173983615e-05, "loss": 3.6875, "step": 135 }, { "epoch": 0.83, "learning_rate": 1.9850939484631598e-05, "loss": 3.8125, "step": 136 }, { "epoch": 0.84, "learning_rate": 1.9847501898511824e-05, "loss": 3.707, "step": 137 }, { "epoch": 0.85, "learning_rate": 1.9844025429197727e-05, "loss": 4.0781, "step": 138 }, { "epoch": 0.85, "learning_rate": 1.984051009041626e-05, "loss": 3.8281, "step": 139 }, { "epoch": 0.86, "learning_rate": 1.983695589604785e-05, "loss": 4.0391, "step": 140 }, { "epoch": 0.87, "learning_rate": 1.9833362860126364e-05, "loss": 3.6719, "step": 141 }, { "epoch": 0.87, "learning_rate": 1.982973099683902e-05, "loss": 4.0898, "step": 142 }, { "epoch": 0.88, "learning_rate": 1.9826060320526355e-05, "loss": 3.8281, "step": 143 }, { "epoch": 0.88, "learning_rate": 1.982235084568216e-05, "loss": 3.9219, "step": 144 }, { "epoch": 0.89, "learning_rate": 1.9818602586953414e-05, "loss": 3.9961, "step": 145 }, { "epoch": 0.9, "learning_rate": 1.9814815559140258e-05, "loss": 3.8125, "step": 146 }, { "epoch": 0.9, "learning_rate": 1.9810989777195884e-05, "loss": 3.8164, "step": 147 }, { "epoch": 0.91, "learning_rate": 1.9807125256226532e-05, "loss": 4.1094, "step": 148 }, { "epoch": 0.91, "learning_rate": 1.9803222011491385e-05, "loss": 3.4805, "step": 149 }, { "epoch": 0.92, "learning_rate": 1.979928005840255e-05, "loss": 3.7305, "step": 150 }, { "epoch": 0.93, "learning_rate": 1.9795299412524948e-05, "loss": 3.793, "step": 151 }, { "epoch": 0.93, "learning_rate": 1.9791280089576302e-05, "loss": 4.0312, "step": 152 }, { "epoch": 0.94, "learning_rate": 1.978722210542704e-05, "loss": 3.6953, "step": 153 }, { "epoch": 0.94, "learning_rate": 1.9783125476100254e-05, "loss": 3.7891, "step": 154 }, { "epoch": 0.95, "learning_rate": 1.977899021777162e-05, "loss": 3.6523, "step": 155 }, { "epoch": 0.96, "learning_rate": 1.977481634676935e-05, "loss": 3.9414, "step": 156 }, { "epoch": 0.96, "learning_rate": 1.9770603879574108e-05, "loss": 3.9609, "step": 157 }, { "epoch": 0.97, "learning_rate": 1.9766352832818972e-05, "loss": 3.4336, "step": 158 }, { "epoch": 0.98, "learning_rate": 1.9762063223289334e-05, "loss": 3.6484, "step": 159 }, { "epoch": 0.98, "learning_rate": 1.975773506792287e-05, "loss": 3.8281, "step": 160 }, { "epoch": 0.99, "learning_rate": 1.9753368383809445e-05, "loss": 3.7578, "step": 161 }, { "epoch": 0.99, "learning_rate": 1.974896318819106e-05, "loss": 3.8555, "step": 162 }, { "epoch": 1.0, "learning_rate": 1.974451949846177e-05, "loss": 3.7617, "step": 163 }, { "epoch": 1.01, "learning_rate": 1.974003733216765e-05, "loss": 3.5039, "step": 164 }, { "epoch": 1.01, "learning_rate": 1.9735516707006676e-05, "loss": 3.7344, "step": 165 }, { "epoch": 1.02, "learning_rate": 1.973095764082869e-05, "loss": 3.6172, "step": 166 }, { "epoch": 1.02, "learning_rate": 1.972636015163532e-05, "loss": 3.7734, "step": 167 }, { "epoch": 1.03, "learning_rate": 1.9721724257579907e-05, "loss": 3.543, "step": 168 }, { "epoch": 1.04, "learning_rate": 1.9717049976967437e-05, "loss": 3.7031, "step": 169 }, { "epoch": 1.04, "learning_rate": 1.971233732825446e-05, "loss": 3.543, "step": 170 }, { "epoch": 1.05, "learning_rate": 1.9707586330049037e-05, "loss": 3.6836, "step": 171 }, { "epoch": 1.06, "learning_rate": 1.9702797001110642e-05, "loss": 3.2969, "step": 172 }, { "epoch": 1.06, "learning_rate": 1.9697969360350098e-05, "loss": 3.3789, "step": 173 }, { "epoch": 1.07, "learning_rate": 1.969310342682951e-05, "loss": 3.5625, "step": 174 }, { "epoch": 1.07, "learning_rate": 1.9688199219762183e-05, "loss": 3.9297, "step": 175 }, { "epoch": 1.08, "learning_rate": 1.9683256758512544e-05, "loss": 3.6094, "step": 176 }, { "epoch": 1.09, "learning_rate": 1.967827606259607e-05, "loss": 3.5547, "step": 177 }, { "epoch": 1.09, "learning_rate": 1.96732571516792e-05, "loss": 3.5742, "step": 178 }, { "epoch": 1.1, "learning_rate": 1.9668200045579283e-05, "loss": 3.3047, "step": 179 }, { "epoch": 1.1, "learning_rate": 1.9663104764264468e-05, "loss": 3.5117, "step": 180 }, { "epoch": 1.11, "learning_rate": 1.9657971327853644e-05, "loss": 3.4805, "step": 181 }, { "epoch": 1.12, "learning_rate": 1.9652799756616364e-05, "loss": 3.4453, "step": 182 }, { "epoch": 1.12, "learning_rate": 1.964759007097275e-05, "loss": 3.5195, "step": 183 }, { "epoch": 1.13, "learning_rate": 1.964234229149342e-05, "loss": 3.375, "step": 184 }, { "epoch": 1.13, "learning_rate": 1.963705643889941e-05, "loss": 3.4648, "step": 185 }, { "epoch": 1.14, "learning_rate": 1.9631732534062088e-05, "loss": 3.6719, "step": 186 }, { "epoch": 1.15, "learning_rate": 1.962637059800307e-05, "loss": 3.582, "step": 187 }, { "epoch": 1.15, "learning_rate": 1.9620970651894146e-05, "loss": 3.8086, "step": 188 }, { "epoch": 1.16, "learning_rate": 1.9615532717057185e-05, "loss": 3.5234, "step": 189 }, { "epoch": 1.17, "learning_rate": 1.9610056814964053e-05, "loss": 3.6016, "step": 190 }, { "epoch": 1.17, "learning_rate": 1.9604542967236535e-05, "loss": 3.6172, "step": 191 }, { "epoch": 1.18, "learning_rate": 1.9598991195646252e-05, "loss": 3.3477, "step": 192 }, { "epoch": 1.18, "learning_rate": 1.959340152211455e-05, "loss": 3.3125, "step": 193 }, { "epoch": 1.19, "learning_rate": 1.9587773968712458e-05, "loss": 3.7891, "step": 194 }, { "epoch": 1.2, "learning_rate": 1.958210855766055e-05, "loss": 3.8008, "step": 195 }, { "epoch": 1.2, "learning_rate": 1.95764053113289e-05, "loss": 3.5156, "step": 196 }, { "epoch": 1.21, "learning_rate": 1.9570664252236966e-05, "loss": 3.9531, "step": 197 }, { "epoch": 1.21, "learning_rate": 1.956488540305351e-05, "loss": 3.3164, "step": 198 }, { "epoch": 1.22, "learning_rate": 1.9559068786596526e-05, "loss": 3.6797, "step": 199 }, { "epoch": 1.23, "learning_rate": 1.9553214425833108e-05, "loss": 3.4844, "step": 200 }, { "epoch": 1.23, "learning_rate": 1.9547322343879397e-05, "loss": 3.6641, "step": 201 }, { "epoch": 1.24, "learning_rate": 1.954139256400049e-05, "loss": 3.5195, "step": 202 }, { "epoch": 1.25, "learning_rate": 1.9535425109610317e-05, "loss": 3.7773, "step": 203 }, { "epoch": 1.25, "learning_rate": 1.9529420004271568e-05, "loss": 3.3711, "step": 204 }, { "epoch": 1.26, "learning_rate": 1.952337727169561e-05, "loss": 3.8828, "step": 205 }, { "epoch": 1.26, "learning_rate": 1.951729693574238e-05, "loss": 3.5781, "step": 206 }, { "epoch": 1.27, "learning_rate": 1.9511179020420284e-05, "loss": 3.457, "step": 207 }, { "epoch": 1.28, "learning_rate": 1.950502354988612e-05, "loss": 3.5312, "step": 208 }, { "epoch": 1.28, "learning_rate": 1.9498830548444972e-05, "loss": 3.6367, "step": 209 }, { "epoch": 1.29, "learning_rate": 1.9492600040550114e-05, "loss": 3.5625, "step": 210 }, { "epoch": 1.29, "learning_rate": 1.948633205080292e-05, "loss": 3.5703, "step": 211 }, { "epoch": 1.3, "learning_rate": 1.948002660395276e-05, "loss": 3.7461, "step": 212 }, { "epoch": 1.31, "learning_rate": 1.9473683724896898e-05, "loss": 3.7148, "step": 213 }, { "epoch": 1.31, "learning_rate": 1.9467303438680414e-05, "loss": 3.5039, "step": 214 }, { "epoch": 1.32, "learning_rate": 1.946088577049608e-05, "loss": 3.5273, "step": 215 }, { "epoch": 1.33, "learning_rate": 1.9454430745684276e-05, "loss": 3.7188, "step": 216 }, { "epoch": 1.33, "learning_rate": 1.944793838973289e-05, "loss": 3.5586, "step": 217 }, { "epoch": 1.34, "learning_rate": 1.94414087282772e-05, "loss": 3.6602, "step": 218 }, { "epoch": 1.34, "learning_rate": 1.9434841787099804e-05, "loss": 3.3633, "step": 219 }, { "epoch": 1.35, "learning_rate": 1.9428237592130487e-05, "loss": 3.2969, "step": 220 }, { "epoch": 1.36, "learning_rate": 1.9421596169446135e-05, "loss": 3.7031, "step": 221 }, { "epoch": 1.36, "learning_rate": 1.941491754527064e-05, "loss": 3.4375, "step": 222 }, { "epoch": 1.37, "learning_rate": 1.940820174597476e-05, "loss": 3.6016, "step": 223 }, { "epoch": 1.37, "learning_rate": 1.9401448798076064e-05, "loss": 3.6406, "step": 224 }, { "epoch": 1.38, "learning_rate": 1.9394658728238797e-05, "loss": 3.5273, "step": 225 }, { "epoch": 1.39, "learning_rate": 1.9387831563273775e-05, "loss": 3.4336, "step": 226 }, { "epoch": 1.39, "learning_rate": 1.938096733013829e-05, "loss": 3.4141, "step": 227 }, { "epoch": 1.4, "learning_rate": 1.9374066055936004e-05, "loss": 3.6797, "step": 228 }, { "epoch": 1.4, "learning_rate": 1.9367127767916828e-05, "loss": 3.6953, "step": 229 }, { "epoch": 1.41, "learning_rate": 1.9360152493476828e-05, "loss": 3.6797, "step": 230 }, { "epoch": 1.42, "learning_rate": 1.9353140260158108e-05, "loss": 3.5938, "step": 231 }, { "epoch": 1.42, "learning_rate": 1.9346091095648712e-05, "loss": 3.9492, "step": 232 }, { "epoch": 1.43, "learning_rate": 1.93390050277825e-05, "loss": 3.6172, "step": 233 }, { "epoch": 1.44, "learning_rate": 1.9331882084539056e-05, "loss": 3.5977, "step": 234 }, { "epoch": 1.44, "learning_rate": 1.932472229404356e-05, "loss": 3.5703, "step": 235 }, { "epoch": 1.45, "learning_rate": 1.9317525684566686e-05, "loss": 3.4336, "step": 236 }, { "epoch": 1.45, "learning_rate": 1.931029228452449e-05, "loss": 3.5508, "step": 237 }, { "epoch": 1.46, "learning_rate": 1.9303022122478303e-05, "loss": 3.7188, "step": 238 }, { "epoch": 1.47, "learning_rate": 1.9295715227134595e-05, "loss": 3.4766, "step": 239 }, { "epoch": 1.47, "learning_rate": 1.9288371627344894e-05, "loss": 3.6484, "step": 240 }, { "epoch": 1.48, "learning_rate": 1.9280991352105656e-05, "loss": 3.5703, "step": 241 }, { "epoch": 1.48, "learning_rate": 1.9273574430558143e-05, "loss": 3.4336, "step": 242 }, { "epoch": 1.49, "learning_rate": 1.9266120891988326e-05, "loss": 3.5469, "step": 243 }, { "epoch": 1.5, "learning_rate": 1.925863076582674e-05, "loss": 3.2812, "step": 244 }, { "epoch": 1.5, "learning_rate": 1.9251104081648423e-05, "loss": 3.4102, "step": 245 }, { "epoch": 1.51, "learning_rate": 1.9243540869172724e-05, "loss": 3.332, "step": 246 }, { "epoch": 1.52, "learning_rate": 1.9235941158263253e-05, "loss": 3.5039, "step": 247 }, { "epoch": 1.52, "learning_rate": 1.922830497892772e-05, "loss": 3.4883, "step": 248 }, { "epoch": 1.53, "learning_rate": 1.9220632361317843e-05, "loss": 3.5664, "step": 249 }, { "epoch": 1.53, "learning_rate": 1.9212923335729206e-05, "loss": 3.5195, "step": 250 }, { "epoch": 1.54, "learning_rate": 1.920517793260116e-05, "loss": 3.4531, "step": 251 }, { "epoch": 1.55, "learning_rate": 1.9197396182516694e-05, "loss": 3.7734, "step": 252 }, { "epoch": 1.55, "learning_rate": 1.918957811620231e-05, "loss": 3.6953, "step": 253 }, { "epoch": 1.56, "learning_rate": 1.9181723764527902e-05, "loss": 3.6133, "step": 254 }, { "epoch": 1.56, "learning_rate": 1.917383315850665e-05, "loss": 3.5391, "step": 255 }, { "epoch": 1.57, "learning_rate": 1.9165906329294875e-05, "loss": 3.5898, "step": 256 }, { "epoch": 1.58, "learning_rate": 1.9157943308191934e-05, "loss": 3.7188, "step": 257 }, { "epoch": 1.58, "learning_rate": 1.914994412664008e-05, "loss": 3.8125, "step": 258 }, { "epoch": 1.59, "learning_rate": 1.9141908816224356e-05, "loss": 3.875, "step": 259 }, { "epoch": 1.6, "learning_rate": 1.9133837408672456e-05, "loss": 3.4102, "step": 260 }, { "epoch": 1.6, "learning_rate": 1.9125729935854606e-05, "loss": 3.2344, "step": 261 }, { "epoch": 1.61, "learning_rate": 1.9117586429783433e-05, "loss": 3.7656, "step": 262 }, { "epoch": 1.61, "learning_rate": 1.910940692261385e-05, "loss": 3.6992, "step": 263 }, { "epoch": 1.62, "learning_rate": 1.9101191446642917e-05, "loss": 3.4766, "step": 264 }, { "epoch": 1.63, "learning_rate": 1.909294003430972e-05, "loss": 3.1211, "step": 265 }, { "epoch": 1.63, "learning_rate": 1.9084652718195237e-05, "loss": 3.4102, "step": 266 }, { "epoch": 1.64, "learning_rate": 1.907632953102222e-05, "loss": 3.6602, "step": 267 }, { "epoch": 1.64, "learning_rate": 1.906797050565505e-05, "loss": 3.6836, "step": 268 }, { "epoch": 1.65, "learning_rate": 1.9059575675099622e-05, "loss": 3.582, "step": 269 }, { "epoch": 1.66, "learning_rate": 1.9051145072503216e-05, "loss": 3.6172, "step": 270 }, { "epoch": 1.66, "learning_rate": 1.9042678731154337e-05, "loss": 3.457, "step": 271 }, { "epoch": 1.67, "learning_rate": 1.9034176684482638e-05, "loss": 3.3398, "step": 272 }, { "epoch": 1.67, "learning_rate": 1.9025638966058722e-05, "loss": 3.4883, "step": 273 }, { "epoch": 1.68, "learning_rate": 1.901706560959407e-05, "loss": 3.6602, "step": 274 }, { "epoch": 1.69, "learning_rate": 1.900845664894086e-05, "loss": 3.6797, "step": 275 }, { "epoch": 1.69, "learning_rate": 1.8999812118091877e-05, "loss": 3.4766, "step": 276 }, { "epoch": 1.7, "learning_rate": 1.8991132051180332e-05, "loss": 3.3945, "step": 277 }, { "epoch": 1.71, "learning_rate": 1.898241648247977e-05, "loss": 3.2461, "step": 278 }, { "epoch": 1.71, "learning_rate": 1.8973665446403902e-05, "loss": 3.4023, "step": 279 }, { "epoch": 1.72, "learning_rate": 1.8964878977506496e-05, "loss": 3.4492, "step": 280 }, { "epoch": 1.72, "learning_rate": 1.895605711048122e-05, "loss": 3.5, "step": 281 }, { "epoch": 1.73, "learning_rate": 1.8947199880161515e-05, "loss": 3.4531, "step": 282 }, { "epoch": 1.74, "learning_rate": 1.8938307321520453e-05, "loss": 3.6523, "step": 283 }, { "epoch": 1.74, "learning_rate": 1.89293794696706e-05, "loss": 3.6445, "step": 284 }, { "epoch": 1.75, "learning_rate": 1.8920416359863885e-05, "loss": 3.3711, "step": 285 }, { "epoch": 1.75, "learning_rate": 1.8911418027491453e-05, "loss": 3.4414, "step": 286 }, { "epoch": 1.76, "learning_rate": 1.8902384508083518e-05, "loss": 3.2656, "step": 287 }, { "epoch": 1.77, "learning_rate": 1.8893315837309235e-05, "loss": 3.6289, "step": 288 }, { "epoch": 1.77, "learning_rate": 1.8884212050976568e-05, "loss": 3.4023, "step": 289 }, { "epoch": 1.78, "learning_rate": 1.8875073185032116e-05, "loss": 3.6914, "step": 290 }, { "epoch": 1.79, "learning_rate": 1.8865899275561003e-05, "loss": 3.3281, "step": 291 }, { "epoch": 1.79, "learning_rate": 1.885669035878672e-05, "loss": 3.7227, "step": 292 }, { "epoch": 1.8, "learning_rate": 1.8847446471070985e-05, "loss": 3.2891, "step": 293 }, { "epoch": 1.8, "learning_rate": 1.8838167648913606e-05, "loss": 3.4844, "step": 294 }, { "epoch": 1.81, "learning_rate": 1.882885392895232e-05, "loss": 3.7617, "step": 295 }, { "epoch": 1.82, "learning_rate": 1.881950534796267e-05, "loss": 3.3945, "step": 296 }, { "epoch": 1.82, "learning_rate": 1.8810121942857848e-05, "loss": 3.5547, "step": 297 }, { "epoch": 1.83, "learning_rate": 1.8800703750688536e-05, "loss": 3.6484, "step": 298 }, { "epoch": 1.83, "learning_rate": 1.8791250808642792e-05, "loss": 3.668, "step": 299 }, { "epoch": 1.84, "learning_rate": 1.8781763154045873e-05, "loss": 3.5664, "step": 300 }, { "epoch": 1.85, "learning_rate": 1.877224082436011e-05, "loss": 3.2695, "step": 301 }, { "epoch": 1.85, "learning_rate": 1.8762683857184738e-05, "loss": 3.5781, "step": 302 }, { "epoch": 1.86, "learning_rate": 1.8753092290255765e-05, "loss": 3.8359, "step": 303 }, { "epoch": 1.87, "learning_rate": 1.8743466161445823e-05, "loss": 3.3242, "step": 304 }, { "epoch": 1.87, "learning_rate": 1.8733805508764e-05, "loss": 3.3086, "step": 305 }, { "epoch": 1.88, "learning_rate": 1.872411037035572e-05, "loss": 3.4531, "step": 306 }, { "epoch": 1.88, "learning_rate": 1.8714380784502553e-05, "loss": 3.5586, "step": 307 }, { "epoch": 1.89, "learning_rate": 1.870461678962211e-05, "loss": 3.6797, "step": 308 }, { "epoch": 1.9, "learning_rate": 1.869481842426784e-05, "loss": 3.4609, "step": 309 }, { "epoch": 1.9, "learning_rate": 1.8684985727128936e-05, "loss": 3.6289, "step": 310 }, { "epoch": 1.91, "learning_rate": 1.8675118737030123e-05, "loss": 3.4844, "step": 311 }, { "epoch": 1.91, "learning_rate": 1.866521749293155e-05, "loss": 3.7461, "step": 312 }, { "epoch": 1.92, "learning_rate": 1.8655282033928618e-05, "loss": 3.2852, "step": 313 }, { "epoch": 1.93, "learning_rate": 1.8645312399251818e-05, "loss": 3.6875, "step": 314 }, { "epoch": 1.93, "learning_rate": 1.8635308628266586e-05, "loss": 3.2266, "step": 315 }, { "epoch": 1.94, "learning_rate": 1.8625270760473164e-05, "loss": 3.5977, "step": 316 }, { "epoch": 1.94, "learning_rate": 1.8615198835506393e-05, "loss": 3.6133, "step": 317 }, { "epoch": 1.95, "learning_rate": 1.8605092893135626e-05, "loss": 3.6172, "step": 318 }, { "epoch": 1.96, "learning_rate": 1.8594952973264512e-05, "loss": 3.4766, "step": 319 }, { "epoch": 1.96, "learning_rate": 1.8584779115930866e-05, "loss": 3.4766, "step": 320 }, { "epoch": 1.97, "learning_rate": 1.857457136130651e-05, "loss": 3.6875, "step": 321 }, { "epoch": 1.98, "learning_rate": 1.856432974969711e-05, "loss": 3.3359, "step": 322 }, { "epoch": 1.98, "learning_rate": 1.855405432154201e-05, "loss": 3.5, "step": 323 }, { "epoch": 1.99, "learning_rate": 1.8543745117414094e-05, "loss": 3.5547, "step": 324 }, { "epoch": 1.99, "learning_rate": 1.8533402178019596e-05, "loss": 3.1367, "step": 325 }, { "epoch": 2.0, "learning_rate": 1.8523025544197964e-05, "loss": 3.4141, "step": 326 }, { "epoch": 2.01, "learning_rate": 1.8512615256921692e-05, "loss": 3.0078, "step": 327 }, { "epoch": 2.01, "learning_rate": 1.8502171357296144e-05, "loss": 3.0586, "step": 328 }, { "epoch": 2.02, "learning_rate": 1.8491693886559413e-05, "loss": 3.1953, "step": 329 }, { "epoch": 2.02, "learning_rate": 1.848118288608215e-05, "loss": 3.0625, "step": 330 }, { "epoch": 2.03, "learning_rate": 1.8470638397367397e-05, "loss": 3.25, "step": 331 }, { "epoch": 2.04, "learning_rate": 1.846006046205042e-05, "loss": 3.2422, "step": 332 }, { "epoch": 2.04, "learning_rate": 1.8449449121898552e-05, "loss": 2.9258, "step": 333 }, { "epoch": 2.05, "learning_rate": 1.8438804418811038e-05, "loss": 2.9883, "step": 334 }, { "epoch": 2.06, "learning_rate": 1.842812639481884e-05, "loss": 3.3203, "step": 335 }, { "epoch": 2.06, "learning_rate": 1.84174150920845e-05, "loss": 3.0195, "step": 336 }, { "epoch": 2.07, "learning_rate": 1.8406670552901958e-05, "loss": 2.9375, "step": 337 }, { "epoch": 2.07, "learning_rate": 1.839589281969639e-05, "loss": 3.2578, "step": 338 }, { "epoch": 2.08, "learning_rate": 1.8385081935024044e-05, "loss": 3.0469, "step": 339 }, { "epoch": 2.09, "learning_rate": 1.837423794157206e-05, "loss": 3.1367, "step": 340 }, { "epoch": 2.09, "learning_rate": 1.836336088215831e-05, "loss": 3.0234, "step": 341 }, { "epoch": 2.1, "learning_rate": 1.835245079973124e-05, "loss": 2.8242, "step": 342 }, { "epoch": 2.1, "learning_rate": 1.834150773736967e-05, "loss": 2.9414, "step": 343 }, { "epoch": 2.11, "learning_rate": 1.8330531738282656e-05, "loss": 3.0742, "step": 344 }, { "epoch": 2.12, "learning_rate": 1.8319522845809306e-05, "loss": 3.0625, "step": 345 }, { "epoch": 2.12, "learning_rate": 1.8308481103418597e-05, "loss": 2.8828, "step": 346 }, { "epoch": 2.13, "learning_rate": 1.8297406554709228e-05, "loss": 3.1836, "step": 347 }, { "epoch": 2.13, "learning_rate": 1.8286299243409424e-05, "loss": 2.8086, "step": 348 }, { "epoch": 2.14, "learning_rate": 1.8275159213376783e-05, "loss": 2.9258, "step": 349 }, { "epoch": 2.15, "learning_rate": 1.826398650859809e-05, "loss": 3.0977, "step": 350 }, { "epoch": 2.15, "learning_rate": 1.8252781173189148e-05, "loss": 3.3086, "step": 351 }, { "epoch": 2.16, "learning_rate": 1.82415432513946e-05, "loss": 3.0117, "step": 352 }, { "epoch": 2.17, "learning_rate": 1.823027278758776e-05, "loss": 2.957, "step": 353 }, { "epoch": 2.17, "learning_rate": 1.821896982627044e-05, "loss": 3.2617, "step": 354 }, { "epoch": 2.18, "learning_rate": 1.8207634412072765e-05, "loss": 3.1172, "step": 355 }, { "epoch": 2.18, "learning_rate": 1.8196266589753e-05, "loss": 2.8867, "step": 356 }, { "epoch": 2.19, "learning_rate": 1.818486640419737e-05, "loss": 3.2539, "step": 357 }, { "epoch": 2.2, "learning_rate": 1.81734339004199e-05, "loss": 2.8633, "step": 358 }, { "epoch": 2.2, "learning_rate": 1.816196912356222e-05, "loss": 3.1016, "step": 359 }, { "epoch": 2.21, "learning_rate": 1.8150472118893382e-05, "loss": 3.0898, "step": 360 }, { "epoch": 2.21, "learning_rate": 1.8138942931809702e-05, "loss": 2.9453, "step": 361 }, { "epoch": 2.22, "learning_rate": 1.8127381607834563e-05, "loss": 3.2383, "step": 362 }, { "epoch": 2.23, "learning_rate": 1.8115788192618247e-05, "loss": 3.0703, "step": 363 }, { "epoch": 2.23, "learning_rate": 1.8104162731937746e-05, "loss": 3.0977, "step": 364 }, { "epoch": 2.24, "learning_rate": 1.8092505271696582e-05, "loss": 3.2344, "step": 365 }, { "epoch": 2.25, "learning_rate": 1.808081585792463e-05, "loss": 2.7617, "step": 366 }, { "epoch": 2.25, "learning_rate": 1.8069094536777938e-05, "loss": 3.0898, "step": 367 }, { "epoch": 2.26, "learning_rate": 1.805734135453854e-05, "loss": 3.0781, "step": 368 }, { "epoch": 2.26, "learning_rate": 1.8045556357614273e-05, "loss": 3.4922, "step": 369 }, { "epoch": 2.27, "learning_rate": 1.8033739592538598e-05, "loss": 3.1211, "step": 370 }, { "epoch": 2.28, "learning_rate": 1.8021891105970405e-05, "loss": 2.9453, "step": 371 }, { "epoch": 2.28, "learning_rate": 1.8010010944693846e-05, "loss": 3.1016, "step": 372 }, { "epoch": 2.29, "learning_rate": 1.7998099155618147e-05, "loss": 3.0117, "step": 373 }, { "epoch": 2.29, "learning_rate": 1.7986155785777402e-05, "loss": 3.1523, "step": 374 }, { "epoch": 2.3, "learning_rate": 1.7974180882330413e-05, "loss": 3.0352, "step": 375 }, { "epoch": 2.31, "learning_rate": 1.7962174492560492e-05, "loss": 2.8711, "step": 376 }, { "epoch": 2.31, "learning_rate": 1.7950136663875274e-05, "loss": 3.1953, "step": 377 }, { "epoch": 2.32, "learning_rate": 1.7938067443806538e-05, "loss": 3.2188, "step": 378 }, { "epoch": 2.33, "learning_rate": 1.7925966880009998e-05, "loss": 2.8203, "step": 379 }, { "epoch": 2.33, "learning_rate": 1.791383502026515e-05, "loss": 3.1172, "step": 380 }, { "epoch": 2.34, "learning_rate": 1.790167191247504e-05, "loss": 2.9414, "step": 381 }, { "epoch": 2.34, "learning_rate": 1.7889477604666124e-05, "loss": 2.8398, "step": 382 }, { "epoch": 2.35, "learning_rate": 1.787725214498803e-05, "loss": 3.1836, "step": 383 }, { "epoch": 2.36, "learning_rate": 1.78649955817134e-05, "loss": 3.0625, "step": 384 }, { "epoch": 2.36, "learning_rate": 1.785270796323769e-05, "loss": 2.8945, "step": 385 }, { "epoch": 2.37, "learning_rate": 1.784038933807898e-05, "loss": 2.9688, "step": 386 }, { "epoch": 2.37, "learning_rate": 1.7828039754877778e-05, "loss": 3.0352, "step": 387 }, { "epoch": 2.38, "learning_rate": 1.7815659262396825e-05, "loss": 3.0977, "step": 388 }, { "epoch": 2.39, "learning_rate": 1.780324790952092e-05, "loss": 3.1445, "step": 389 }, { "epoch": 2.39, "learning_rate": 1.7790805745256703e-05, "loss": 2.9766, "step": 390 }, { "epoch": 2.4, "learning_rate": 1.7778332818732492e-05, "loss": 3.0547, "step": 391 }, { "epoch": 2.4, "learning_rate": 1.7765829179198048e-05, "loss": 3.1758, "step": 392 }, { "epoch": 2.41, "learning_rate": 1.7753294876024417e-05, "loss": 3.0625, "step": 393 }, { "epoch": 2.42, "learning_rate": 1.7740729958703725e-05, "loss": 2.9297, "step": 394 }, { "epoch": 2.42, "learning_rate": 1.7728134476848965e-05, "loss": 3.0586, "step": 395 }, { "epoch": 2.43, "learning_rate": 1.7715508480193832e-05, "loss": 3.0039, "step": 396 }, { "epoch": 2.44, "learning_rate": 1.7702852018592493e-05, "loss": 2.8086, "step": 397 }, { "epoch": 2.44, "learning_rate": 1.769016514201942e-05, "loss": 2.9336, "step": 398 }, { "epoch": 2.45, "learning_rate": 1.7677447900569166e-05, "loss": 3.4219, "step": 399 }, { "epoch": 2.45, "learning_rate": 1.7664700344456198e-05, "loss": 3.0625, "step": 400 }, { "epoch": 2.46, "learning_rate": 1.765192252401467e-05, "loss": 3.2617, "step": 401 }, { "epoch": 2.47, "learning_rate": 1.7639114489698238e-05, "loss": 3.0977, "step": 402 }, { "epoch": 2.47, "learning_rate": 1.762627629207986e-05, "loss": 3.0703, "step": 403 }, { "epoch": 2.48, "learning_rate": 1.7613407981851586e-05, "loss": 3.0938, "step": 404 }, { "epoch": 2.48, "learning_rate": 1.760050960982439e-05, "loss": 3.3047, "step": 405 }, { "epoch": 2.49, "learning_rate": 1.758758122692791e-05, "loss": 2.8867, "step": 406 }, { "epoch": 2.5, "learning_rate": 1.757462288421032e-05, "loss": 3.2148, "step": 407 }, { "epoch": 2.5, "learning_rate": 1.7561634632838062e-05, "loss": 3.1172, "step": 408 }, { "epoch": 2.51, "learning_rate": 1.7548616524095697e-05, "loss": 2.9141, "step": 409 }, { "epoch": 2.52, "learning_rate": 1.753556860938566e-05, "loss": 3.0938, "step": 410 }, { "epoch": 2.52, "learning_rate": 1.7522490940228086e-05, "loss": 2.8672, "step": 411 }, { "epoch": 2.53, "learning_rate": 1.7509383568260597e-05, "loss": 3.1641, "step": 412 }, { "epoch": 2.53, "learning_rate": 1.749624654523809e-05, "loss": 2.9883, "step": 413 }, { "epoch": 2.54, "learning_rate": 1.7483079923032543e-05, "loss": 3.0898, "step": 414 }, { "epoch": 2.55, "learning_rate": 1.7469883753632817e-05, "loss": 3.0391, "step": 415 }, { "epoch": 2.55, "learning_rate": 1.745665808914443e-05, "loss": 3.1055, "step": 416 }, { "epoch": 2.56, "learning_rate": 1.744340298178936e-05, "loss": 3.0664, "step": 417 }, { "epoch": 2.56, "learning_rate": 1.743011848390585e-05, "loss": 2.8672, "step": 418 }, { "epoch": 2.57, "learning_rate": 1.7416804647948194e-05, "loss": 3.2891, "step": 419 }, { "epoch": 2.58, "learning_rate": 1.740346152648652e-05, "loss": 2.9805, "step": 420 }, { "epoch": 2.58, "learning_rate": 1.7390089172206594e-05, "loss": 2.7305, "step": 421 }, { "epoch": 2.59, "learning_rate": 1.7376687637909607e-05, "loss": 3.0547, "step": 422 }, { "epoch": 2.6, "learning_rate": 1.7363256976511972e-05, "loss": 2.7773, "step": 423 }, { "epoch": 2.6, "learning_rate": 1.7349797241045115e-05, "loss": 3.2188, "step": 424 }, { "epoch": 2.61, "learning_rate": 1.733630848465525e-05, "loss": 3.0156, "step": 425 }, { "epoch": 2.61, "learning_rate": 1.732279076060319e-05, "loss": 3.1328, "step": 426 }, { "epoch": 2.62, "learning_rate": 1.730924412226413e-05, "loss": 3.0664, "step": 427 }, { "epoch": 2.63, "learning_rate": 1.729566862312742e-05, "loss": 2.9102, "step": 428 }, { "epoch": 2.63, "learning_rate": 1.7282064316796387e-05, "loss": 3.0508, "step": 429 }, { "epoch": 2.64, "learning_rate": 1.726843125698809e-05, "loss": 2.8711, "step": 430 }, { "epoch": 2.64, "learning_rate": 1.7254769497533128e-05, "loss": 2.75, "step": 431 }, { "epoch": 2.65, "learning_rate": 1.724107909237542e-05, "loss": 2.8438, "step": 432 }, { "epoch": 2.66, "learning_rate": 1.7227360095571992e-05, "loss": 2.9883, "step": 433 }, { "epoch": 2.66, "learning_rate": 1.721361256129277e-05, "loss": 3.2461, "step": 434 }, { "epoch": 2.67, "learning_rate": 1.719983654382036e-05, "loss": 3.0781, "step": 435 }, { "epoch": 2.67, "learning_rate": 1.7186032097549822e-05, "loss": 3.1523, "step": 436 }, { "epoch": 2.68, "learning_rate": 1.717219927698849e-05, "loss": 2.832, "step": 437 }, { "epoch": 2.69, "learning_rate": 1.7158338136755724e-05, "loss": 3.2617, "step": 438 }, { "epoch": 2.69, "learning_rate": 1.7144448731582698e-05, "loss": 3.0781, "step": 439 }, { "epoch": 2.7, "learning_rate": 1.7130531116312202e-05, "loss": 3.1641, "step": 440 }, { "epoch": 2.71, "learning_rate": 1.7116585345898413e-05, "loss": 3.1484, "step": 441 }, { "epoch": 2.71, "learning_rate": 1.7102611475406676e-05, "loss": 3.2656, "step": 442 }, { "epoch": 2.72, "learning_rate": 1.7088609560013284e-05, "loss": 3.0938, "step": 443 }, { "epoch": 2.72, "learning_rate": 1.7074579655005282e-05, "loss": 2.9648, "step": 444 }, { "epoch": 2.73, "learning_rate": 1.7060521815780225e-05, "loss": 3.1328, "step": 445 }, { "epoch": 2.74, "learning_rate": 1.704643609784596e-05, "loss": 3.1211, "step": 446 }, { "epoch": 2.74, "learning_rate": 1.7032322556820428e-05, "loss": 3.1719, "step": 447 }, { "epoch": 2.75, "learning_rate": 1.7018181248431416e-05, "loss": 2.9883, "step": 448 }, { "epoch": 2.75, "learning_rate": 1.700401222851636e-05, "loss": 3.1172, "step": 449 }, { "epoch": 2.76, "learning_rate": 1.698981555302212e-05, "loss": 2.9531, "step": 450 }, { "epoch": 2.77, "learning_rate": 1.6975591278004747e-05, "loss": 2.9375, "step": 451 }, { "epoch": 2.77, "learning_rate": 1.696133945962927e-05, "loss": 3.1875, "step": 452 }, { "epoch": 2.78, "learning_rate": 1.6947060154169473e-05, "loss": 3.0742, "step": 453 }, { "epoch": 2.79, "learning_rate": 1.6932753418007683e-05, "loss": 3.0977, "step": 454 }, { "epoch": 2.79, "learning_rate": 1.691841930763453e-05, "loss": 2.9531, "step": 455 }, { "epoch": 2.8, "learning_rate": 1.690405787964873e-05, "loss": 2.9609, "step": 456 }, { "epoch": 2.8, "learning_rate": 1.688966919075687e-05, "loss": 2.7578, "step": 457 }, { "epoch": 2.81, "learning_rate": 1.687525329777317e-05, "loss": 2.9961, "step": 458 }, { "epoch": 2.82, "learning_rate": 1.686081025761928e-05, "loss": 3.3203, "step": 459 }, { "epoch": 2.82, "learning_rate": 1.684634012732403e-05, "loss": 2.9258, "step": 460 }, { "epoch": 2.83, "learning_rate": 1.6831842964023212e-05, "loss": 3.1445, "step": 461 }, { "epoch": 2.83, "learning_rate": 1.6817318824959375e-05, "loss": 3.2617, "step": 462 }, { "epoch": 2.84, "learning_rate": 1.680276776748157e-05, "loss": 2.9883, "step": 463 }, { "epoch": 2.85, "learning_rate": 1.6788189849045135e-05, "loss": 2.9219, "step": 464 }, { "epoch": 2.85, "learning_rate": 1.6773585127211478e-05, "loss": 2.8281, "step": 465 }, { "epoch": 2.86, "learning_rate": 1.6758953659647838e-05, "loss": 3.0312, "step": 466 }, { "epoch": 2.87, "learning_rate": 1.6744295504127055e-05, "loss": 3.2461, "step": 467 }, { "epoch": 2.87, "learning_rate": 1.6729610718527357e-05, "loss": 3.1562, "step": 468 }, { "epoch": 2.88, "learning_rate": 1.6714899360832118e-05, "loss": 2.9023, "step": 469 }, { "epoch": 2.88, "learning_rate": 1.6700161489129624e-05, "loss": 3.0898, "step": 470 }, { "epoch": 2.89, "learning_rate": 1.668539716161287e-05, "loss": 2.9414, "step": 471 }, { "epoch": 2.9, "learning_rate": 1.667060643657929e-05, "loss": 2.9844, "step": 472 }, { "epoch": 2.9, "learning_rate": 1.6655789372430572e-05, "loss": 3.0859, "step": 473 }, { "epoch": 2.91, "learning_rate": 1.6640946027672395e-05, "loss": 3.1758, "step": 474 }, { "epoch": 2.91, "learning_rate": 1.66260764609142e-05, "loss": 3.1719, "step": 475 }, { "epoch": 2.92, "learning_rate": 1.6611180730868975e-05, "loss": 3.0508, "step": 476 }, { "epoch": 2.93, "learning_rate": 1.6596258896353027e-05, "loss": 3.1406, "step": 477 }, { "epoch": 2.93, "learning_rate": 1.658131101628571e-05, "loss": 3.1836, "step": 478 }, { "epoch": 2.94, "learning_rate": 1.656633714968924e-05, "loss": 3.0352, "step": 479 }, { "epoch": 2.94, "learning_rate": 1.6551337355688437e-05, "loss": 2.8789, "step": 480 }, { "epoch": 2.95, "learning_rate": 1.653631169351049e-05, "loss": 3.1094, "step": 481 }, { "epoch": 2.96, "learning_rate": 1.6521260222484738e-05, "loss": 3.4102, "step": 482 }, { "epoch": 2.96, "learning_rate": 1.650618300204242e-05, "loss": 3.293, "step": 483 }, { "epoch": 2.97, "learning_rate": 1.6491080091716457e-05, "loss": 2.9922, "step": 484 }, { "epoch": 2.98, "learning_rate": 1.64759515511412e-05, "loss": 3.082, "step": 485 }, { "epoch": 2.98, "learning_rate": 1.6460797440052195e-05, "loss": 2.9297, "step": 486 }, { "epoch": 2.99, "learning_rate": 1.6445617818285974e-05, "loss": 2.8906, "step": 487 }, { "epoch": 2.99, "learning_rate": 1.643041274577978e-05, "loss": 3.0625, "step": 488 }, { "epoch": 3.0, "learning_rate": 1.6415182282571356e-05, "loss": 3.1562, "step": 489 }, { "epoch": 3.01, "learning_rate": 1.6399926488798702e-05, "loss": 2.6367, "step": 490 }, { "epoch": 3.01, "learning_rate": 1.6384645424699835e-05, "loss": 2.207, "step": 491 }, { "epoch": 3.02, "learning_rate": 1.6369339150612557e-05, "loss": 2.4844, "step": 492 }, { "epoch": 3.02, "learning_rate": 1.6354007726974205e-05, "loss": 2.4219, "step": 493 }, { "epoch": 3.03, "learning_rate": 1.6338651214321426e-05, "loss": 2.4531, "step": 494 }, { "epoch": 3.04, "learning_rate": 1.632326967328993e-05, "loss": 2.4961, "step": 495 }, { "epoch": 3.04, "learning_rate": 1.630786316461425e-05, "loss": 2.4219, "step": 496 }, { "epoch": 3.05, "learning_rate": 1.6292431749127507e-05, "loss": 2.5273, "step": 497 }, { "epoch": 3.06, "learning_rate": 1.627697548776117e-05, "loss": 2.4492, "step": 498 }, { "epoch": 3.06, "learning_rate": 1.6261494441544805e-05, "loss": 2.4922, "step": 499 }, { "epoch": 3.07, "learning_rate": 1.624598867160585e-05, "loss": 2.375, "step": 500 }, { "epoch": 3.07, "learning_rate": 1.623045823916936e-05, "loss": 2.6914, "step": 501 }, { "epoch": 3.08, "learning_rate": 1.6214903205557774e-05, "loss": 2.4141, "step": 502 }, { "epoch": 3.09, "learning_rate": 1.619932363219067e-05, "loss": 2.5742, "step": 503 }, { "epoch": 3.09, "learning_rate": 1.6183719580584515e-05, "loss": 2.332, "step": 504 }, { "epoch": 3.1, "learning_rate": 1.6168091112352443e-05, "loss": 2.4727, "step": 505 }, { "epoch": 3.1, "learning_rate": 1.6152438289203982e-05, "loss": 2.5352, "step": 506 }, { "epoch": 3.11, "learning_rate": 1.6136761172944837e-05, "loss": 2.4375, "step": 507 }, { "epoch": 3.12, "learning_rate": 1.612105982547663e-05, "loss": 2.543, "step": 508 }, { "epoch": 3.12, "learning_rate": 1.6105334308796665e-05, "loss": 2.3945, "step": 509 }, { "epoch": 3.13, "learning_rate": 1.6089584684997674e-05, "loss": 2.4531, "step": 510 }, { "epoch": 3.13, "learning_rate": 1.607381101626758e-05, "loss": 2.5781, "step": 511 }, { "epoch": 3.14, "learning_rate": 1.6058013364889247e-05, "loss": 2.2852, "step": 512 }, { "epoch": 3.15, "learning_rate": 1.6042191793240242e-05, "loss": 2.293, "step": 513 }, { "epoch": 3.15, "learning_rate": 1.6026346363792565e-05, "loss": 2.5156, "step": 514 }, { "epoch": 3.16, "learning_rate": 1.6010477139112438e-05, "loss": 2.3711, "step": 515 }, { "epoch": 3.17, "learning_rate": 1.5994584181860028e-05, "loss": 2.2891, "step": 516 }, { "epoch": 3.17, "learning_rate": 1.5978667554789216e-05, "loss": 2.3867, "step": 517 }, { "epoch": 3.18, "learning_rate": 1.596272732074734e-05, "loss": 2.457, "step": 518 }, { "epoch": 3.18, "learning_rate": 1.5946763542674958e-05, "loss": 2.293, "step": 519 }, { "epoch": 3.19, "learning_rate": 1.5930776283605585e-05, "loss": 2.4492, "step": 520 }, { "epoch": 3.2, "learning_rate": 1.5914765606665454e-05, "loss": 2.2383, "step": 521 }, { "epoch": 3.2, "learning_rate": 1.5898731575073262e-05, "loss": 2.3281, "step": 522 }, { "epoch": 3.21, "learning_rate": 1.5882674252139928e-05, "loss": 2.4688, "step": 523 }, { "epoch": 3.21, "learning_rate": 1.5866593701268334e-05, "loss": 2.3125, "step": 524 }, { "epoch": 3.22, "learning_rate": 1.5850489985953076e-05, "loss": 2.3672, "step": 525 }, { "epoch": 3.23, "learning_rate": 1.5834363169780227e-05, "loss": 2.4688, "step": 526 }, { "epoch": 3.23, "learning_rate": 1.5818213316427056e-05, "loss": 2.375, "step": 527 }, { "epoch": 3.24, "learning_rate": 1.5802040489661817e-05, "loss": 2.418, "step": 528 }, { "epoch": 3.25, "learning_rate": 1.578584475334345e-05, "loss": 2.3867, "step": 529 }, { "epoch": 3.25, "learning_rate": 1.5769626171421376e-05, "loss": 2.2852, "step": 530 }, { "epoch": 3.26, "learning_rate": 1.5753384807935214e-05, "loss": 2.5234, "step": 531 }, { "epoch": 3.26, "learning_rate": 1.5737120727014535e-05, "loss": 2.3828, "step": 532 }, { "epoch": 3.27, "learning_rate": 1.572083399287861e-05, "loss": 2.4023, "step": 533 }, { "epoch": 3.28, "learning_rate": 1.570452466983617e-05, "loss": 2.4961, "step": 534 }, { "epoch": 3.28, "learning_rate": 1.5688192822285116e-05, "loss": 2.5234, "step": 535 }, { "epoch": 3.29, "learning_rate": 1.567183851471231e-05, "loss": 2.418, "step": 536 }, { "epoch": 3.29, "learning_rate": 1.565546181169328e-05, "loss": 2.3555, "step": 537 }, { "epoch": 3.3, "learning_rate": 1.5639062777892e-05, "loss": 2.4883, "step": 538 }, { "epoch": 3.31, "learning_rate": 1.5622641478060602e-05, "loss": 2.5586, "step": 539 }, { "epoch": 3.31, "learning_rate": 1.5606197977039154e-05, "loss": 2.3359, "step": 540 }, { "epoch": 3.32, "learning_rate": 1.5589732339755362e-05, "loss": 2.3398, "step": 541 }, { "epoch": 3.33, "learning_rate": 1.5573244631224364e-05, "loss": 2.2969, "step": 542 }, { "epoch": 3.33, "learning_rate": 1.5556734916548432e-05, "loss": 2.375, "step": 543 }, { "epoch": 3.34, "learning_rate": 1.5540203260916728e-05, "loss": 2.3398, "step": 544 }, { "epoch": 3.34, "learning_rate": 1.552364972960506e-05, "loss": 2.3516, "step": 545 }, { "epoch": 3.35, "learning_rate": 1.5507074387975603e-05, "loss": 2.4805, "step": 546 }, { "epoch": 3.36, "learning_rate": 1.5490477301476648e-05, "loss": 2.4766, "step": 547 }, { "epoch": 3.36, "learning_rate": 1.5473858535642365e-05, "loss": 2.4062, "step": 548 }, { "epoch": 3.37, "learning_rate": 1.5457218156092503e-05, "loss": 2.4727, "step": 549 }, { "epoch": 3.37, "learning_rate": 1.5440556228532168e-05, "loss": 2.3672, "step": 550 }, { "epoch": 3.38, "learning_rate": 1.5423872818751544e-05, "loss": 2.5195, "step": 551 }, { "epoch": 3.39, "learning_rate": 1.5407167992625636e-05, "loss": 2.418, "step": 552 }, { "epoch": 3.39, "learning_rate": 1.5390441816114022e-05, "loss": 2.3828, "step": 553 }, { "epoch": 3.4, "learning_rate": 1.5373694355260565e-05, "loss": 2.4336, "step": 554 }, { "epoch": 3.4, "learning_rate": 1.5356925676193192e-05, "loss": 2.3086, "step": 555 }, { "epoch": 3.41, "learning_rate": 1.534013584512359e-05, "loss": 2.25, "step": 556 }, { "epoch": 3.42, "learning_rate": 1.5323324928346984e-05, "loss": 2.3242, "step": 557 }, { "epoch": 3.42, "learning_rate": 1.5306492992241836e-05, "loss": 2.4023, "step": 558 }, { "epoch": 3.43, "learning_rate": 1.5289640103269626e-05, "loss": 2.4531, "step": 559 }, { "epoch": 3.44, "learning_rate": 1.527276632797455e-05, "loss": 2.3945, "step": 560 }, { "epoch": 3.44, "learning_rate": 1.5255871732983284e-05, "loss": 2.4258, "step": 561 }, { "epoch": 3.45, "learning_rate": 1.5238956385004703e-05, "loss": 2.4766, "step": 562 }, { "epoch": 3.45, "learning_rate": 1.5222020350829636e-05, "loss": 2.4141, "step": 563 }, { "epoch": 3.46, "learning_rate": 1.5205063697330582e-05, "loss": 2.3359, "step": 564 }, { "epoch": 3.47, "learning_rate": 1.5188086491461467e-05, "loss": 2.3047, "step": 565 }, { "epoch": 3.47, "learning_rate": 1.5171088800257354e-05, "loss": 2.5508, "step": 566 }, { "epoch": 3.48, "learning_rate": 1.5154070690834211e-05, "loss": 2.0957, "step": 567 }, { "epoch": 3.48, "learning_rate": 1.5137032230388613e-05, "loss": 2.4102, "step": 568 }, { "epoch": 3.49, "learning_rate": 1.5119973486197497e-05, "loss": 2.5352, "step": 569 }, { "epoch": 3.5, "learning_rate": 1.5102894525617892e-05, "loss": 2.25, "step": 570 }, { "epoch": 3.5, "learning_rate": 1.5085795416086655e-05, "loss": 2.3047, "step": 571 }, { "epoch": 3.51, "learning_rate": 1.5068676225120196e-05, "loss": 2.3359, "step": 572 }, { "epoch": 3.52, "learning_rate": 1.5051537020314218e-05, "loss": 2.5508, "step": 573 }, { "epoch": 3.52, "learning_rate": 1.5034377869343453e-05, "loss": 2.6211, "step": 574 }, { "epoch": 3.53, "learning_rate": 1.5017198839961388e-05, "loss": 2.5625, "step": 575 }, { "epoch": 3.53, "learning_rate": 1.5000000000000002e-05, "loss": 2.293, "step": 576 }, { "epoch": 3.54, "learning_rate": 1.4982781417369496e-05, "loss": 2.5078, "step": 577 }, { "epoch": 3.55, "learning_rate": 1.4965543160058028e-05, "loss": 2.3594, "step": 578 }, { "epoch": 3.55, "learning_rate": 1.4948285296131435e-05, "loss": 2.4531, "step": 579 }, { "epoch": 3.56, "learning_rate": 1.4931007893732981e-05, "loss": 2.4961, "step": 580 }, { "epoch": 3.56, "learning_rate": 1.4913711021083071e-05, "loss": 2.3672, "step": 581 }, { "epoch": 3.57, "learning_rate": 1.4896394746478995e-05, "loss": 2.5469, "step": 582 }, { "epoch": 3.58, "learning_rate": 1.4879059138294647e-05, "loss": 2.5703, "step": 583 }, { "epoch": 3.58, "learning_rate": 1.4861704264980264e-05, "loss": 2.5859, "step": 584 }, { "epoch": 3.59, "learning_rate": 1.4844330195062145e-05, "loss": 2.4648, "step": 585 }, { "epoch": 3.6, "learning_rate": 1.4826936997142399e-05, "loss": 2.4883, "step": 586 }, { "epoch": 3.6, "learning_rate": 1.4809524739898651e-05, "loss": 2.2656, "step": 587 }, { "epoch": 3.61, "learning_rate": 1.4792093492083792e-05, "loss": 2.2734, "step": 588 }, { "epoch": 3.61, "learning_rate": 1.4774643322525691e-05, "loss": 2.5156, "step": 589 }, { "epoch": 3.62, "learning_rate": 1.4757174300126935e-05, "loss": 2.6797, "step": 590 }, { "epoch": 3.63, "learning_rate": 1.473968649386455e-05, "loss": 2.3398, "step": 591 }, { "epoch": 3.63, "learning_rate": 1.4722179972789725e-05, "loss": 2.2539, "step": 592 }, { "epoch": 3.64, "learning_rate": 1.4704654806027558e-05, "loss": 2.5781, "step": 593 }, { "epoch": 3.64, "learning_rate": 1.4687111062776758e-05, "loss": 2.5352, "step": 594 }, { "epoch": 3.65, "learning_rate": 1.466954881230939e-05, "loss": 2.5195, "step": 595 }, { "epoch": 3.66, "learning_rate": 1.4651968123970592e-05, "loss": 2.3945, "step": 596 }, { "epoch": 3.66, "learning_rate": 1.4634369067178312e-05, "loss": 2.4922, "step": 597 }, { "epoch": 3.67, "learning_rate": 1.4616751711423016e-05, "loss": 2.4922, "step": 598 }, { "epoch": 3.67, "learning_rate": 1.4599116126267431e-05, "loss": 2.4961, "step": 599 }, { "epoch": 3.68, "learning_rate": 1.4581462381346261e-05, "loss": 2.4922, "step": 600 }, { "epoch": 3.69, "learning_rate": 1.4563790546365914e-05, "loss": 2.5, "step": 601 }, { "epoch": 3.69, "learning_rate": 1.454610069110423e-05, "loss": 2.4219, "step": 602 }, { "epoch": 3.7, "learning_rate": 1.45283928854102e-05, "loss": 2.418, "step": 603 }, { "epoch": 3.71, "learning_rate": 1.4510667199203697e-05, "loss": 2.5488, "step": 604 }, { "epoch": 3.71, "learning_rate": 1.4492923702475183e-05, "loss": 2.5312, "step": 605 }, { "epoch": 3.72, "learning_rate": 1.4475162465285463e-05, "loss": 2.5273, "step": 606 }, { "epoch": 3.72, "learning_rate": 1.4457383557765385e-05, "loss": 2.4141, "step": 607 }, { "epoch": 3.73, "learning_rate": 1.443958705011556e-05, "loss": 2.4453, "step": 608 }, { "epoch": 3.74, "learning_rate": 1.4421773012606104e-05, "loss": 2.293, "step": 609 }, { "epoch": 3.74, "learning_rate": 1.4403941515576344e-05, "loss": 2.4258, "step": 610 }, { "epoch": 3.75, "learning_rate": 1.4386092629434551e-05, "loss": 2.4648, "step": 611 }, { "epoch": 3.75, "learning_rate": 1.4368226424657661e-05, "loss": 2.3438, "step": 612 }, { "epoch": 3.76, "learning_rate": 1.4350342971790979e-05, "loss": 2.2168, "step": 613 }, { "epoch": 3.77, "learning_rate": 1.4332442341447926e-05, "loss": 2.3828, "step": 614 }, { "epoch": 3.77, "learning_rate": 1.4314524604309748e-05, "loss": 2.5117, "step": 615 }, { "epoch": 3.78, "learning_rate": 1.4296589831125234e-05, "loss": 2.4961, "step": 616 }, { "epoch": 3.79, "learning_rate": 1.4278638092710446e-05, "loss": 2.5391, "step": 617 }, { "epoch": 3.79, "learning_rate": 1.4260669459948429e-05, "loss": 2.3828, "step": 618 }, { "epoch": 3.8, "learning_rate": 1.4242684003788934e-05, "loss": 2.4102, "step": 619 }, { "epoch": 3.8, "learning_rate": 1.4224681795248149e-05, "loss": 2.457, "step": 620 }, { "epoch": 3.81, "learning_rate": 1.42066629054084e-05, "loss": 2.5, "step": 621 }, { "epoch": 3.82, "learning_rate": 1.418862740541788e-05, "loss": 2.4102, "step": 622 }, { "epoch": 3.82, "learning_rate": 1.4170575366490376e-05, "loss": 2.1758, "step": 623 }, { "epoch": 3.83, "learning_rate": 1.415250685990497e-05, "loss": 2.6445, "step": 624 }, { "epoch": 3.83, "learning_rate": 1.4134421957005775e-05, "loss": 2.043, "step": 625 }, { "epoch": 3.84, "learning_rate": 1.4116320729201642e-05, "loss": 2.457, "step": 626 }, { "epoch": 3.85, "learning_rate": 1.4098203247965876e-05, "loss": 2.1992, "step": 627 }, { "epoch": 3.85, "learning_rate": 1.4080069584835971e-05, "loss": 2.2891, "step": 628 }, { "epoch": 3.86, "learning_rate": 1.4061919811413305e-05, "loss": 2.2227, "step": 629 }, { "epoch": 3.87, "learning_rate": 1.4043753999362872e-05, "loss": 2.2305, "step": 630 }, { "epoch": 3.87, "learning_rate": 1.4025572220412998e-05, "loss": 2.625, "step": 631 }, { "epoch": 3.88, "learning_rate": 1.400737454635505e-05, "loss": 2.4219, "step": 632 }, { "epoch": 3.88, "learning_rate": 1.398916104904316e-05, "loss": 2.6133, "step": 633 }, { "epoch": 3.89, "learning_rate": 1.3970931800393943e-05, "loss": 2.5625, "step": 634 }, { "epoch": 3.9, "learning_rate": 1.3952686872386195e-05, "loss": 2.4531, "step": 635 }, { "epoch": 3.9, "learning_rate": 1.3934426337060638e-05, "loss": 2.6016, "step": 636 }, { "epoch": 3.91, "learning_rate": 1.391615026651961e-05, "loss": 2.3789, "step": 637 }, { "epoch": 3.91, "learning_rate": 1.3897858732926794e-05, "loss": 2.3281, "step": 638 }, { "epoch": 3.92, "learning_rate": 1.3879551808506932e-05, "loss": 2.2031, "step": 639 }, { "epoch": 3.93, "learning_rate": 1.3861229565545532e-05, "loss": 2.5352, "step": 640 }, { "epoch": 3.93, "learning_rate": 1.384289207638859e-05, "loss": 2.3008, "step": 641 }, { "epoch": 3.94, "learning_rate": 1.3824539413442304e-05, "loss": 2.5352, "step": 642 }, { "epoch": 3.94, "learning_rate": 1.3806171649172782e-05, "loss": 2.4922, "step": 643 }, { "epoch": 3.95, "learning_rate": 1.3787788856105762e-05, "loss": 2.3945, "step": 644 }, { "epoch": 3.96, "learning_rate": 1.3769391106826326e-05, "loss": 2.6016, "step": 645 }, { "epoch": 3.96, "learning_rate": 1.3750978473978611e-05, "loss": 2.4375, "step": 646 }, { "epoch": 3.97, "learning_rate": 1.3732551030265514e-05, "loss": 2.5195, "step": 647 }, { "epoch": 3.98, "learning_rate": 1.371410884844843e-05, "loss": 2.5391, "step": 648 }, { "epoch": 3.98, "learning_rate": 1.3695652001346928e-05, "loss": 2.4102, "step": 649 }, { "epoch": 3.99, "learning_rate": 1.3677180561838501e-05, "loss": 2.4727, "step": 650 }, { "epoch": 3.99, "learning_rate": 1.3658694602858247e-05, "loss": 2.6055, "step": 651 }, { "epoch": 4.0, "learning_rate": 1.36401941973986e-05, "loss": 2.2852, "step": 652 }, { "epoch": 4.01, "learning_rate": 1.362167941850904e-05, "loss": 1.9121, "step": 653 }, { "epoch": 4.01, "learning_rate": 1.3603150339295797e-05, "loss": 2.0977, "step": 654 }, { "epoch": 4.02, "learning_rate": 1.3584607032921566e-05, "loss": 1.9668, "step": 655 }, { "epoch": 4.02, "learning_rate": 1.3566049572605222e-05, "loss": 1.8398, "step": 656 }, { "epoch": 4.03, "learning_rate": 1.3547478031621517e-05, "loss": 1.7559, "step": 657 }, { "epoch": 4.04, "learning_rate": 1.3528892483300821e-05, "loss": 2.0586, "step": 658 }, { "epoch": 4.04, "learning_rate": 1.3510293001028792e-05, "loss": 1.8984, "step": 659 }, { "epoch": 4.05, "learning_rate": 1.3491679658246114e-05, "loss": 1.6895, "step": 660 }, { "epoch": 4.06, "learning_rate": 1.3473052528448203e-05, "loss": 1.7812, "step": 661 }, { "epoch": 4.06, "learning_rate": 1.3454411685184913e-05, "loss": 1.7539, "step": 662 }, { "epoch": 4.07, "learning_rate": 1.3435757202060242e-05, "loss": 1.9492, "step": 663 }, { "epoch": 4.07, "learning_rate": 1.3417089152732049e-05, "loss": 1.7031, "step": 664 }, { "epoch": 4.08, "learning_rate": 1.3398407610911752e-05, "loss": 1.791, "step": 665 }, { "epoch": 4.09, "learning_rate": 1.3379712650364061e-05, "loss": 1.8066, "step": 666 }, { "epoch": 4.09, "learning_rate": 1.3361004344906652e-05, "loss": 1.6992, "step": 667 }, { "epoch": 4.1, "learning_rate": 1.3342282768409904e-05, "loss": 1.8965, "step": 668 }, { "epoch": 4.1, "learning_rate": 1.3323547994796597e-05, "loss": 1.7832, "step": 669 }, { "epoch": 4.11, "learning_rate": 1.330480009804162e-05, "loss": 1.8633, "step": 670 }, { "epoch": 4.12, "learning_rate": 1.3286039152171667e-05, "loss": 1.6055, "step": 671 }, { "epoch": 4.12, "learning_rate": 1.3267265231264982e-05, "loss": 1.8164, "step": 672 }, { "epoch": 4.13, "learning_rate": 1.3248478409451017e-05, "loss": 1.9805, "step": 673 }, { "epoch": 4.13, "learning_rate": 1.3229678760910174e-05, "loss": 1.666, "step": 674 }, { "epoch": 4.14, "learning_rate": 1.3210866359873506e-05, "loss": 1.8867, "step": 675 }, { "epoch": 4.15, "learning_rate": 1.3192041280622409e-05, "loss": 1.9473, "step": 676 }, { "epoch": 4.15, "learning_rate": 1.3173203597488348e-05, "loss": 1.9375, "step": 677 }, { "epoch": 4.16, "learning_rate": 1.3154353384852559e-05, "loss": 1.8145, "step": 678 }, { "epoch": 4.17, "learning_rate": 1.3135490717145726e-05, "loss": 1.7539, "step": 679 }, { "epoch": 4.17, "learning_rate": 1.3116615668847749e-05, "loss": 1.7734, "step": 680 }, { "epoch": 4.18, "learning_rate": 1.3097728314487385e-05, "loss": 1.7656, "step": 681 }, { "epoch": 4.18, "learning_rate": 1.3078828728641994e-05, "loss": 1.8672, "step": 682 }, { "epoch": 4.19, "learning_rate": 1.305991698593723e-05, "loss": 1.7656, "step": 683 }, { "epoch": 4.2, "learning_rate": 1.3040993161046749e-05, "loss": 1.8789, "step": 684 }, { "epoch": 4.2, "learning_rate": 1.3022057328691915e-05, "loss": 1.627, "step": 685 }, { "epoch": 4.21, "learning_rate": 1.3003109563641499e-05, "loss": 1.7695, "step": 686 }, { "epoch": 4.21, "learning_rate": 1.298414994071139e-05, "loss": 1.709, "step": 687 }, { "epoch": 4.22, "learning_rate": 1.2965178534764311e-05, "loss": 1.7383, "step": 688 }, { "epoch": 4.23, "learning_rate": 1.294619542070949e-05, "loss": 1.6523, "step": 689 }, { "epoch": 4.23, "learning_rate": 1.2927200673502399e-05, "loss": 1.8145, "step": 690 }, { "epoch": 4.24, "learning_rate": 1.2908194368144437e-05, "loss": 1.7949, "step": 691 }, { "epoch": 4.25, "learning_rate": 1.288917657968265e-05, "loss": 1.7422, "step": 692 }, { "epoch": 4.25, "learning_rate": 1.287014738320941e-05, "loss": 1.9102, "step": 693 }, { "epoch": 4.26, "learning_rate": 1.285110685386215e-05, "loss": 1.6523, "step": 694 }, { "epoch": 4.26, "learning_rate": 1.283205506682304e-05, "loss": 1.5938, "step": 695 }, { "epoch": 4.27, "learning_rate": 1.2812992097318711e-05, "loss": 1.6797, "step": 696 }, { "epoch": 4.28, "learning_rate": 1.2793918020619937e-05, "loss": 1.8164, "step": 697 }, { "epoch": 4.28, "learning_rate": 1.2774832912041356e-05, "loss": 1.6328, "step": 698 }, { "epoch": 4.29, "learning_rate": 1.2755736846941167e-05, "loss": 1.9219, "step": 699 }, { "epoch": 4.29, "learning_rate": 1.2736629900720832e-05, "loss": 1.8496, "step": 700 }, { "epoch": 4.3, "learning_rate": 1.2717512148824764e-05, "loss": 1.7031, "step": 701 }, { "epoch": 4.31, "learning_rate": 1.2698383666740064e-05, "loss": 1.7266, "step": 702 }, { "epoch": 4.31, "learning_rate": 1.2679244529996182e-05, "loss": 1.9102, "step": 703 }, { "epoch": 4.32, "learning_rate": 1.2660094814164653e-05, "loss": 1.6855, "step": 704 }, { "epoch": 4.33, "learning_rate": 1.2640934594858773e-05, "loss": 1.6641, "step": 705 }, { "epoch": 4.33, "learning_rate": 1.262176394773332e-05, "loss": 1.8672, "step": 706 }, { "epoch": 4.34, "learning_rate": 1.2602582948484243e-05, "loss": 1.7383, "step": 707 }, { "epoch": 4.34, "learning_rate": 1.2583391672848361e-05, "loss": 2.0586, "step": 708 }, { "epoch": 4.35, "learning_rate": 1.256419019660308e-05, "loss": 1.8281, "step": 709 }, { "epoch": 4.36, "learning_rate": 1.2544978595566078e-05, "loss": 1.7207, "step": 710 }, { "epoch": 4.36, "learning_rate": 1.2525756945595006e-05, "loss": 1.6328, "step": 711 }, { "epoch": 4.37, "learning_rate": 1.2506525322587207e-05, "loss": 1.8379, "step": 712 }, { "epoch": 4.37, "learning_rate": 1.2487283802479389e-05, "loss": 1.8828, "step": 713 }, { "epoch": 4.38, "learning_rate": 1.246803246124735e-05, "loss": 1.916, "step": 714 }, { "epoch": 4.39, "learning_rate": 1.2448771374905655e-05, "loss": 1.7852, "step": 715 }, { "epoch": 4.39, "learning_rate": 1.2429500619507362e-05, "loss": 2.0391, "step": 716 }, { "epoch": 4.4, "learning_rate": 1.2410220271143693e-05, "loss": 1.7422, "step": 717 }, { "epoch": 4.4, "learning_rate": 1.2390930405943766e-05, "loss": 1.8672, "step": 718 }, { "epoch": 4.41, "learning_rate": 1.237163110007426e-05, "loss": 1.8457, "step": 719 }, { "epoch": 4.42, "learning_rate": 1.2352322429739134e-05, "loss": 1.7402, "step": 720 }, { "epoch": 4.42, "learning_rate": 1.233300447117933e-05, "loss": 1.6465, "step": 721 }, { "epoch": 4.43, "learning_rate": 1.2313677300672463e-05, "loss": 1.6777, "step": 722 }, { "epoch": 4.44, "learning_rate": 1.2294340994532511e-05, "loss": 1.7656, "step": 723 }, { "epoch": 4.44, "learning_rate": 1.2274995629109545e-05, "loss": 1.8066, "step": 724 }, { "epoch": 4.45, "learning_rate": 1.2255641280789385e-05, "loss": 1.8809, "step": 725 }, { "epoch": 4.45, "learning_rate": 1.2236278025993334e-05, "loss": 1.8223, "step": 726 }, { "epoch": 4.46, "learning_rate": 1.2216905941177854e-05, "loss": 1.7656, "step": 727 }, { "epoch": 4.47, "learning_rate": 1.2197525102834284e-05, "loss": 1.8066, "step": 728 }, { "epoch": 4.47, "learning_rate": 1.2178135587488515e-05, "loss": 1.7207, "step": 729 }, { "epoch": 4.48, "learning_rate": 1.215873747170071e-05, "loss": 1.8535, "step": 730 }, { "epoch": 4.48, "learning_rate": 1.2139330832064975e-05, "loss": 1.7949, "step": 731 }, { "epoch": 4.49, "learning_rate": 1.2119915745209092e-05, "loss": 1.8926, "step": 732 }, { "epoch": 4.5, "learning_rate": 1.2100492287794186e-05, "loss": 1.6777, "step": 733 }, { "epoch": 4.5, "learning_rate": 1.2081060536514432e-05, "loss": 1.7773, "step": 734 }, { "epoch": 4.51, "learning_rate": 1.206162056809676e-05, "loss": 1.6699, "step": 735 }, { "epoch": 4.52, "learning_rate": 1.2042172459300546e-05, "loss": 1.709, "step": 736 }, { "epoch": 4.52, "learning_rate": 1.2022716286917298e-05, "loss": 1.8887, "step": 737 }, { "epoch": 4.53, "learning_rate": 1.2003252127770378e-05, "loss": 1.9219, "step": 738 }, { "epoch": 4.53, "learning_rate": 1.198378005871467e-05, "loss": 1.8535, "step": 739 }, { "epoch": 4.54, "learning_rate": 1.1964300156636304e-05, "loss": 1.7051, "step": 740 }, { "epoch": 4.55, "learning_rate": 1.1944812498452329e-05, "loss": 1.7578, "step": 741 }, { "epoch": 4.55, "learning_rate": 1.192531716111042e-05, "loss": 1.8203, "step": 742 }, { "epoch": 4.56, "learning_rate": 1.1905814221588581e-05, "loss": 1.6016, "step": 743 }, { "epoch": 4.56, "learning_rate": 1.1886303756894828e-05, "loss": 1.543, "step": 744 }, { "epoch": 4.57, "learning_rate": 1.1866785844066884e-05, "loss": 1.8145, "step": 745 }, { "epoch": 4.58, "learning_rate": 1.1847260560171895e-05, "loss": 1.6719, "step": 746 }, { "epoch": 4.58, "learning_rate": 1.18277279823061e-05, "loss": 1.6953, "step": 747 }, { "epoch": 4.59, "learning_rate": 1.1808188187594549e-05, "loss": 1.6406, "step": 748 }, { "epoch": 4.6, "learning_rate": 1.1788641253190779e-05, "loss": 1.7246, "step": 749 }, { "epoch": 4.6, "learning_rate": 1.176908725627652e-05, "loss": 1.6992, "step": 750 }, { "epoch": 4.61, "learning_rate": 1.1749526274061394e-05, "loss": 1.916, "step": 751 }, { "epoch": 4.61, "learning_rate": 1.1729958383782598e-05, "loss": 1.6543, "step": 752 }, { "epoch": 4.62, "learning_rate": 1.1710383662704608e-05, "loss": 1.707, "step": 753 }, { "epoch": 4.63, "learning_rate": 1.1690802188118878e-05, "loss": 1.6953, "step": 754 }, { "epoch": 4.63, "learning_rate": 1.1671214037343515e-05, "loss": 1.6875, "step": 755 }, { "epoch": 4.64, "learning_rate": 1.1651619287723e-05, "loss": 1.7969, "step": 756 }, { "epoch": 4.64, "learning_rate": 1.1632018016627859e-05, "loss": 1.7461, "step": 757 }, { "epoch": 4.65, "learning_rate": 1.1612410301454384e-05, "loss": 1.8887, "step": 758 }, { "epoch": 4.66, "learning_rate": 1.1592796219624292e-05, "loss": 1.9414, "step": 759 }, { "epoch": 4.66, "learning_rate": 1.1573175848584455e-05, "loss": 1.8711, "step": 760 }, { "epoch": 4.67, "learning_rate": 1.1553549265806567e-05, "loss": 1.7246, "step": 761 }, { "epoch": 4.67, "learning_rate": 1.1533916548786856e-05, "loss": 1.8496, "step": 762 }, { "epoch": 4.68, "learning_rate": 1.1514277775045768e-05, "loss": 1.918, "step": 763 }, { "epoch": 4.69, "learning_rate": 1.1494633022127669e-05, "loss": 1.8574, "step": 764 }, { "epoch": 4.69, "learning_rate": 1.1474982367600524e-05, "loss": 1.668, "step": 765 }, { "epoch": 4.7, "learning_rate": 1.1455325889055616e-05, "loss": 1.7031, "step": 766 }, { "epoch": 4.71, "learning_rate": 1.1435663664107204e-05, "loss": 1.7754, "step": 767 }, { "epoch": 4.71, "learning_rate": 1.141599577039226e-05, "loss": 1.7129, "step": 768 }, { "epoch": 4.72, "learning_rate": 1.1396322285570119e-05, "loss": 1.6582, "step": 769 }, { "epoch": 4.72, "learning_rate": 1.1376643287322202e-05, "loss": 1.8672, "step": 770 }, { "epoch": 4.73, "learning_rate": 1.1356958853351705e-05, "loss": 1.8867, "step": 771 }, { "epoch": 4.74, "learning_rate": 1.1337269061383278e-05, "loss": 1.8359, "step": 772 }, { "epoch": 4.74, "learning_rate": 1.1317573989162727e-05, "loss": 1.8535, "step": 773 }, { "epoch": 4.75, "learning_rate": 1.129787371445672e-05, "loss": 1.7793, "step": 774 }, { "epoch": 4.75, "learning_rate": 1.1278168315052445e-05, "loss": 1.834, "step": 775 }, { "epoch": 4.76, "learning_rate": 1.1258457868757352e-05, "loss": 1.8906, "step": 776 }, { "epoch": 4.77, "learning_rate": 1.1238742453398794e-05, "loss": 1.9512, "step": 777 }, { "epoch": 4.77, "learning_rate": 1.1219022146823762e-05, "loss": 1.8047, "step": 778 }, { "epoch": 4.78, "learning_rate": 1.1199297026898547e-05, "loss": 1.627, "step": 779 }, { "epoch": 4.79, "learning_rate": 1.1179567171508463e-05, "loss": 1.8242, "step": 780 }, { "epoch": 4.79, "learning_rate": 1.1159832658557498e-05, "loss": 1.7129, "step": 781 }, { "epoch": 4.8, "learning_rate": 1.1140093565968055e-05, "loss": 1.7012, "step": 782 }, { "epoch": 4.8, "learning_rate": 1.1120349971680605e-05, "loss": 1.8145, "step": 783 }, { "epoch": 4.81, "learning_rate": 1.1100601953653393e-05, "loss": 1.6426, "step": 784 }, { "epoch": 4.82, "learning_rate": 1.1080849589862142e-05, "loss": 1.8574, "step": 785 }, { "epoch": 4.82, "learning_rate": 1.1061092958299727e-05, "loss": 1.752, "step": 786 }, { "epoch": 4.83, "learning_rate": 1.1041332136975874e-05, "loss": 1.9531, "step": 787 }, { "epoch": 4.83, "learning_rate": 1.1021567203916861e-05, "loss": 1.7676, "step": 788 }, { "epoch": 4.84, "learning_rate": 1.1001798237165185e-05, "loss": 1.7656, "step": 789 }, { "epoch": 4.85, "learning_rate": 1.0982025314779287e-05, "loss": 1.9512, "step": 790 }, { "epoch": 4.85, "learning_rate": 1.0962248514833218e-05, "loss": 1.791, "step": 791 }, { "epoch": 4.86, "learning_rate": 1.0942467915416342e-05, "loss": 1.8398, "step": 792 }, { "epoch": 4.87, "learning_rate": 1.092268359463302e-05, "loss": 1.6797, "step": 793 }, { "epoch": 4.87, "learning_rate": 1.090289563060232e-05, "loss": 1.7871, "step": 794 }, { "epoch": 4.88, "learning_rate": 1.088310410145768e-05, "loss": 1.6738, "step": 795 }, { "epoch": 4.88, "learning_rate": 1.086330908534663e-05, "loss": 1.8711, "step": 796 }, { "epoch": 4.89, "learning_rate": 1.0843510660430447e-05, "loss": 1.752, "step": 797 }, { "epoch": 4.9, "learning_rate": 1.0823708904883898e-05, "loss": 1.9297, "step": 798 }, { "epoch": 4.9, "learning_rate": 1.0803903896894877e-05, "loss": 1.9141, "step": 799 }, { "epoch": 4.91, "learning_rate": 1.0784095714664124e-05, "loss": 1.7188, "step": 800 }, { "epoch": 4.91, "learning_rate": 1.0764284436404924e-05, "loss": 1.7441, "step": 801 }, { "epoch": 4.92, "learning_rate": 1.0744470140342775e-05, "loss": 1.7266, "step": 802 }, { "epoch": 4.93, "learning_rate": 1.0724652904715091e-05, "loss": 1.832, "step": 803 }, { "epoch": 4.93, "learning_rate": 1.0704832807770909e-05, "loss": 1.6152, "step": 804 }, { "epoch": 4.94, "learning_rate": 1.0685009927770542e-05, "loss": 1.8281, "step": 805 }, { "epoch": 4.94, "learning_rate": 1.0665184342985306e-05, "loss": 1.7812, "step": 806 }, { "epoch": 4.95, "learning_rate": 1.064535613169719e-05, "loss": 1.875, "step": 807 }, { "epoch": 4.96, "learning_rate": 1.0625525372198564e-05, "loss": 1.748, "step": 808 }, { "epoch": 4.96, "learning_rate": 1.0605692142791846e-05, "loss": 1.7148, "step": 809 }, { "epoch": 4.97, "learning_rate": 1.0585856521789215e-05, "loss": 1.7715, "step": 810 }, { "epoch": 4.98, "learning_rate": 1.056601858751229e-05, "loss": 1.7676, "step": 811 }, { "epoch": 4.98, "learning_rate": 1.0546178418291833e-05, "loss": 1.7852, "step": 812 }, { "epoch": 4.99, "learning_rate": 1.0526336092467414e-05, "loss": 1.9141, "step": 813 }, { "epoch": 4.99, "learning_rate": 1.0506491688387128e-05, "loss": 1.6602, "step": 814 }, { "epoch": 5.0, "learning_rate": 1.0486645284407282e-05, "loss": 1.75, "step": 815 }, { "epoch": 5.01, "learning_rate": 1.0466796958892071e-05, "loss": 1.5469, "step": 816 }, { "epoch": 5.01, "learning_rate": 1.0446946790213275e-05, "loss": 1.2852, "step": 817 }, { "epoch": 5.02, "learning_rate": 1.0427094856749966e-05, "loss": 1.3926, "step": 818 }, { "epoch": 5.02, "learning_rate": 1.0407241236888164e-05, "loss": 1.293, "step": 819 }, { "epoch": 5.03, "learning_rate": 1.0387386009020569e-05, "loss": 1.2559, "step": 820 }, { "epoch": 5.04, "learning_rate": 1.0367529251546208e-05, "loss": 1.3379, "step": 821 }, { "epoch": 5.04, "learning_rate": 1.034767104287017e-05, "loss": 1.3047, "step": 822 }, { "epoch": 5.05, "learning_rate": 1.032781146140326e-05, "loss": 1.3105, "step": 823 }, { "epoch": 5.06, "learning_rate": 1.0307950585561705e-05, "loss": 1.3203, "step": 824 }, { "epoch": 5.06, "learning_rate": 1.0288088493766846e-05, "loss": 1.2461, "step": 825 }, { "epoch": 5.07, "learning_rate": 1.0268225264444829e-05, "loss": 1.3281, "step": 826 }, { "epoch": 5.07, "learning_rate": 1.0248360976026279e-05, "loss": 1.1758, "step": 827 }, { "epoch": 5.08, "learning_rate": 1.0228495706946015e-05, "loss": 1.1465, "step": 828 }, { "epoch": 5.09, "learning_rate": 1.0208629535642726e-05, "loss": 1.1836, "step": 829 }, { "epoch": 5.09, "learning_rate": 1.0188762540558657e-05, "loss": 1.1504, "step": 830 }, { "epoch": 5.1, "learning_rate": 1.0168894800139311e-05, "loss": 1.1641, "step": 831 }, { "epoch": 5.1, "learning_rate": 1.0149026392833137e-05, "loss": 1.1504, "step": 832 }, { "epoch": 5.11, "learning_rate": 1.0129157397091208e-05, "loss": 1.2832, "step": 833 }, { "epoch": 5.12, "learning_rate": 1.010928789136693e-05, "loss": 1.25, "step": 834 }, { "epoch": 5.12, "learning_rate": 1.0089417954115715e-05, "loss": 1.2207, "step": 835 }, { "epoch": 5.13, "learning_rate": 1.0069547663794682e-05, "loss": 1.1855, "step": 836 }, { "epoch": 5.13, "learning_rate": 1.0049677098862347e-05, "loss": 1.1289, "step": 837 }, { "epoch": 5.14, "learning_rate": 1.002980633777831e-05, "loss": 1.1562, "step": 838 }, { "epoch": 5.15, "learning_rate": 1.0009935459002935e-05, "loss": 1.3242, "step": 839 }, { "epoch": 5.15, "learning_rate": 9.990064540997066e-06, "loss": 1.3105, "step": 840 }, { "epoch": 5.16, "learning_rate": 9.970193662221694e-06, "loss": 1.3145, "step": 841 }, { "epoch": 5.17, "learning_rate": 9.950322901137655e-06, "loss": 1.2441, "step": 842 }, { "epoch": 5.17, "learning_rate": 9.93045233620532e-06, "loss": 1.3262, "step": 843 }, { "epoch": 5.18, "learning_rate": 9.910582045884292e-06, "loss": 1.2656, "step": 844 }, { "epoch": 5.18, "learning_rate": 9.890712108633076e-06, "loss": 1.3633, "step": 845 }, { "epoch": 5.19, "learning_rate": 9.870842602908794e-06, "loss": 1.2734, "step": 846 }, { "epoch": 5.2, "learning_rate": 9.850973607166865e-06, "loss": 1.2656, "step": 847 }, { "epoch": 5.2, "learning_rate": 9.83110519986069e-06, "loss": 1.2949, "step": 848 }, { "epoch": 5.21, "learning_rate": 9.811237459441346e-06, "loss": 1.2227, "step": 849 }, { "epoch": 5.21, "learning_rate": 9.791370464357279e-06, "loss": 1.2793, "step": 850 }, { "epoch": 5.22, "learning_rate": 9.771504293053985e-06, "loss": 1.3633, "step": 851 }, { "epoch": 5.23, "learning_rate": 9.751639023973724e-06, "loss": 1.207, "step": 852 }, { "epoch": 5.23, "learning_rate": 9.731774735555174e-06, "loss": 1.252, "step": 853 }, { "epoch": 5.24, "learning_rate": 9.711911506233157e-06, "loss": 1.1992, "step": 854 }, { "epoch": 5.25, "learning_rate": 9.692049414438298e-06, "loss": 1.3516, "step": 855 }, { "epoch": 5.25, "learning_rate": 9.672188538596746e-06, "loss": 1.3574, "step": 856 }, { "epoch": 5.26, "learning_rate": 9.652328957129831e-06, "loss": 1.4062, "step": 857 }, { "epoch": 5.26, "learning_rate": 9.632470748453794e-06, "loss": 1.3223, "step": 858 }, { "epoch": 5.27, "learning_rate": 9.612613990979436e-06, "loss": 1.2207, "step": 859 }, { "epoch": 5.28, "learning_rate": 9.59275876311184e-06, "loss": 1.2441, "step": 860 }, { "epoch": 5.28, "learning_rate": 9.572905143250039e-06, "loss": 1.0586, "step": 861 }, { "epoch": 5.29, "learning_rate": 9.553053209786725e-06, "loss": 1.2148, "step": 862 }, { "epoch": 5.29, "learning_rate": 9.53320304110793e-06, "loss": 1.2402, "step": 863 }, { "epoch": 5.3, "learning_rate": 9.513354715592721e-06, "loss": 1.1338, "step": 864 }, { "epoch": 5.31, "learning_rate": 9.493508311612874e-06, "loss": 1.332, "step": 865 }, { "epoch": 5.31, "learning_rate": 9.473663907532593e-06, "loss": 1.2715, "step": 866 }, { "epoch": 5.32, "learning_rate": 9.453821581708174e-06, "loss": 1.2793, "step": 867 }, { "epoch": 5.33, "learning_rate": 9.433981412487711e-06, "loss": 1.2969, "step": 868 }, { "epoch": 5.33, "learning_rate": 9.414143478210786e-06, "loss": 1.1074, "step": 869 }, { "epoch": 5.34, "learning_rate": 9.394307857208158e-06, "loss": 1.1924, "step": 870 }, { "epoch": 5.34, "learning_rate": 9.374474627801439e-06, "loss": 1.2188, "step": 871 }, { "epoch": 5.35, "learning_rate": 9.354643868302813e-06, "loss": 1.2246, "step": 872 }, { "epoch": 5.36, "learning_rate": 9.334815657014696e-06, "loss": 1.2109, "step": 873 }, { "epoch": 5.36, "learning_rate": 9.314990072229461e-06, "loss": 1.2832, "step": 874 }, { "epoch": 5.37, "learning_rate": 9.295167192229093e-06, "loss": 1.2666, "step": 875 }, { "epoch": 5.37, "learning_rate": 9.27534709528491e-06, "loss": 1.3066, "step": 876 }, { "epoch": 5.38, "learning_rate": 9.25552985965723e-06, "loss": 1.5352, "step": 877 }, { "epoch": 5.39, "learning_rate": 9.235715563595082e-06, "loss": 1.2305, "step": 878 }, { "epoch": 5.39, "learning_rate": 9.215904285335876e-06, "loss": 1.1113, "step": 879 }, { "epoch": 5.4, "learning_rate": 9.196096103105127e-06, "loss": 1.2285, "step": 880 }, { "epoch": 5.4, "learning_rate": 9.176291095116104e-06, "loss": 1.2871, "step": 881 }, { "epoch": 5.41, "learning_rate": 9.156489339569555e-06, "loss": 1.2539, "step": 882 }, { "epoch": 5.42, "learning_rate": 9.136690914653377e-06, "loss": 1.2666, "step": 883 }, { "epoch": 5.42, "learning_rate": 9.11689589854232e-06, "loss": 1.2539, "step": 884 }, { "epoch": 5.43, "learning_rate": 9.097104369397681e-06, "loss": 1.1562, "step": 885 }, { "epoch": 5.44, "learning_rate": 9.07731640536698e-06, "loss": 1.2148, "step": 886 }, { "epoch": 5.44, "learning_rate": 9.057532084583662e-06, "loss": 1.3848, "step": 887 }, { "epoch": 5.45, "learning_rate": 9.037751485166785e-06, "loss": 1.2832, "step": 888 }, { "epoch": 5.45, "learning_rate": 9.017974685220716e-06, "loss": 1.2832, "step": 889 }, { "epoch": 5.46, "learning_rate": 8.998201762834815e-06, "loss": 1.3906, "step": 890 }, { "epoch": 5.47, "learning_rate": 8.97843279608314e-06, "loss": 1.2539, "step": 891 }, { "epoch": 5.47, "learning_rate": 8.958667863024127e-06, "loss": 1.168, "step": 892 }, { "epoch": 5.48, "learning_rate": 8.938907041700275e-06, "loss": 1.3086, "step": 893 }, { "epoch": 5.48, "learning_rate": 8.919150410137862e-06, "loss": 1.2656, "step": 894 }, { "epoch": 5.49, "learning_rate": 8.899398046346608e-06, "loss": 1.209, "step": 895 }, { "epoch": 5.5, "learning_rate": 8.8796500283194e-06, "loss": 1.2852, "step": 896 }, { "epoch": 5.5, "learning_rate": 8.859906434031947e-06, "loss": 1.1504, "step": 897 }, { "epoch": 5.51, "learning_rate": 8.840167341442505e-06, "loss": 1.0957, "step": 898 }, { "epoch": 5.52, "learning_rate": 8.820432828491542e-06, "loss": 1.2148, "step": 899 }, { "epoch": 5.52, "learning_rate": 8.800702973101454e-06, "loss": 1.2832, "step": 900 }, { "epoch": 5.53, "learning_rate": 8.78097785317624e-06, "loss": 1.252, "step": 901 }, { "epoch": 5.53, "learning_rate": 8.761257546601209e-06, "loss": 1.3633, "step": 902 }, { "epoch": 5.54, "learning_rate": 8.741542131242652e-06, "loss": 1.2246, "step": 903 }, { "epoch": 5.55, "learning_rate": 8.721831684947557e-06, "loss": 1.2148, "step": 904 }, { "epoch": 5.55, "learning_rate": 8.702126285543286e-06, "loss": 1.127, "step": 905 }, { "epoch": 5.56, "learning_rate": 8.682426010837274e-06, "loss": 1.25, "step": 906 }, { "epoch": 5.56, "learning_rate": 8.662730938616724e-06, "loss": 1.2031, "step": 907 }, { "epoch": 5.57, "learning_rate": 8.643041146648299e-06, "loss": 1.2246, "step": 908 }, { "epoch": 5.58, "learning_rate": 8.6233567126778e-06, "loss": 1.3438, "step": 909 }, { "epoch": 5.58, "learning_rate": 8.603677714429888e-06, "loss": 1.2852, "step": 910 }, { "epoch": 5.59, "learning_rate": 8.584004229607747e-06, "loss": 1.418, "step": 911 }, { "epoch": 5.6, "learning_rate": 8.564336335892798e-06, "loss": 1.3105, "step": 912 }, { "epoch": 5.6, "learning_rate": 8.54467411094439e-06, "loss": 1.2422, "step": 913 }, { "epoch": 5.61, "learning_rate": 8.52501763239948e-06, "loss": 1.2373, "step": 914 }, { "epoch": 5.61, "learning_rate": 8.505366977872336e-06, "loss": 1.2637, "step": 915 }, { "epoch": 5.62, "learning_rate": 8.485722224954237e-06, "loss": 1.3906, "step": 916 }, { "epoch": 5.63, "learning_rate": 8.466083451213145e-06, "loss": 1.1748, "step": 917 }, { "epoch": 5.63, "learning_rate": 8.446450734193437e-06, "loss": 1.2949, "step": 918 }, { "epoch": 5.64, "learning_rate": 8.426824151415548e-06, "loss": 1.125, "step": 919 }, { "epoch": 5.64, "learning_rate": 8.407203780375711e-06, "loss": 1.2539, "step": 920 }, { "epoch": 5.65, "learning_rate": 8.38758969854562e-06, "loss": 1.2305, "step": 921 }, { "epoch": 5.66, "learning_rate": 8.367981983372143e-06, "loss": 1.1523, "step": 922 }, { "epoch": 5.66, "learning_rate": 8.348380712277002e-06, "loss": 1.2285, "step": 923 }, { "epoch": 5.67, "learning_rate": 8.32878596265649e-06, "loss": 1.3281, "step": 924 }, { "epoch": 5.67, "learning_rate": 8.309197811881128e-06, "loss": 1.3379, "step": 925 }, { "epoch": 5.68, "learning_rate": 8.289616337295396e-06, "loss": 1.2891, "step": 926 }, { "epoch": 5.69, "learning_rate": 8.270041616217407e-06, "loss": 1.2441, "step": 927 }, { "epoch": 5.69, "learning_rate": 8.250473725938608e-06, "loss": 1.3652, "step": 928 }, { "epoch": 5.7, "learning_rate": 8.23091274372348e-06, "loss": 1.1523, "step": 929 }, { "epoch": 5.71, "learning_rate": 8.211358746809225e-06, "loss": 1.2637, "step": 930 }, { "epoch": 5.71, "learning_rate": 8.191811812405453e-06, "loss": 1.3184, "step": 931 }, { "epoch": 5.72, "learning_rate": 8.172272017693903e-06, "loss": 1.2676, "step": 932 }, { "epoch": 5.72, "learning_rate": 8.15273943982811e-06, "loss": 1.1836, "step": 933 }, { "epoch": 5.73, "learning_rate": 8.133214155933118e-06, "loss": 1.1533, "step": 934 }, { "epoch": 5.74, "learning_rate": 8.113696243105175e-06, "loss": 1.1562, "step": 935 }, { "epoch": 5.74, "learning_rate": 8.09418577841142e-06, "loss": 1.3008, "step": 936 }, { "epoch": 5.75, "learning_rate": 8.074682838889581e-06, "loss": 1.3379, "step": 937 }, { "epoch": 5.75, "learning_rate": 8.055187501547674e-06, "loss": 1.2012, "step": 938 }, { "epoch": 5.76, "learning_rate": 8.035699843363696e-06, "loss": 1.1484, "step": 939 }, { "epoch": 5.77, "learning_rate": 8.01621994128533e-06, "loss": 1.293, "step": 940 }, { "epoch": 5.77, "learning_rate": 7.996747872229624e-06, "loss": 1.3223, "step": 941 }, { "epoch": 5.78, "learning_rate": 7.977283713082706e-06, "loss": 1.3105, "step": 942 }, { "epoch": 5.79, "learning_rate": 7.95782754069946e-06, "loss": 1.207, "step": 943 }, { "epoch": 5.79, "learning_rate": 7.938379431903243e-06, "loss": 1.1992, "step": 944 }, { "epoch": 5.8, "learning_rate": 7.91893946348557e-06, "loss": 1.1582, "step": 945 }, { "epoch": 5.8, "learning_rate": 7.899507712205818e-06, "loss": 1.168, "step": 946 }, { "epoch": 5.81, "learning_rate": 7.880084254790911e-06, "loss": 1.3105, "step": 947 }, { "epoch": 5.82, "learning_rate": 7.860669167935028e-06, "loss": 1.2988, "step": 948 }, { "epoch": 5.82, "learning_rate": 7.841262528299296e-06, "loss": 1.1211, "step": 949 }, { "epoch": 5.83, "learning_rate": 7.821864412511485e-06, "loss": 1.2832, "step": 950 }, { "epoch": 5.83, "learning_rate": 7.802474897165716e-06, "loss": 1.0977, "step": 951 }, { "epoch": 5.84, "learning_rate": 7.783094058822147e-06, "loss": 1.0918, "step": 952 }, { "epoch": 5.85, "learning_rate": 7.76372197400667e-06, "loss": 1.2617, "step": 953 }, { "epoch": 5.85, "learning_rate": 7.74435871921062e-06, "loss": 1.2793, "step": 954 }, { "epoch": 5.86, "learning_rate": 7.72500437089046e-06, "loss": 1.2402, "step": 955 }, { "epoch": 5.87, "learning_rate": 7.705659005467489e-06, "loss": 1.2344, "step": 956 }, { "epoch": 5.87, "learning_rate": 7.68632269932754e-06, "loss": 1.2832, "step": 957 }, { "epoch": 5.88, "learning_rate": 7.666995528820673e-06, "loss": 1.2402, "step": 958 }, { "epoch": 5.88, "learning_rate": 7.647677570260868e-06, "loss": 1.3262, "step": 959 }, { "epoch": 5.89, "learning_rate": 7.628368899925744e-06, "loss": 1.2695, "step": 960 }, { "epoch": 5.9, "learning_rate": 7.609069594056234e-06, "loss": 1.2031, "step": 961 }, { "epoch": 5.9, "learning_rate": 7.589779728856307e-06, "loss": 1.1484, "step": 962 }, { "epoch": 5.91, "learning_rate": 7.570499380492641e-06, "loss": 1.3203, "step": 963 }, { "epoch": 5.91, "learning_rate": 7.551228625094349e-06, "loss": 1.2754, "step": 964 }, { "epoch": 5.92, "learning_rate": 7.5319675387526555e-06, "loss": 1.2559, "step": 965 }, { "epoch": 5.93, "learning_rate": 7.512716197520614e-06, "loss": 1.209, "step": 966 }, { "epoch": 5.93, "learning_rate": 7.493474677412795e-06, "loss": 1.1875, "step": 967 }, { "epoch": 5.94, "learning_rate": 7.4742430544049945e-06, "loss": 1.2168, "step": 968 }, { "epoch": 5.94, "learning_rate": 7.4550214044339256e-06, "loss": 1.209, "step": 969 }, { "epoch": 5.95, "learning_rate": 7.435809803396923e-06, "loss": 1.25, "step": 970 }, { "epoch": 5.96, "learning_rate": 7.416608327151642e-06, "loss": 1.1211, "step": 971 }, { "epoch": 5.96, "learning_rate": 7.397417051515758e-06, "loss": 1.1113, "step": 972 }, { "epoch": 5.97, "learning_rate": 7.37823605226668e-06, "loss": 1.2422, "step": 973 }, { "epoch": 5.98, "learning_rate": 7.359065405141228e-06, "loss": 1.2363, "step": 974 }, { "epoch": 5.98, "learning_rate": 7.33990518583535e-06, "loss": 1.1338, "step": 975 }, { "epoch": 5.99, "learning_rate": 7.320755470003822e-06, "loss": 1.0918, "step": 976 }, { "epoch": 5.99, "learning_rate": 7.301616333259942e-06, "loss": 1.3027, "step": 977 }, { "epoch": 6.0, "learning_rate": 7.282487851175237e-06, "loss": 1.0625, "step": 978 }, { "epoch": 6.01, "learning_rate": 7.263370099279173e-06, "loss": 0.792, "step": 979 }, { "epoch": 6.01, "learning_rate": 7.244263153058835e-06, "loss": 0.9102, "step": 980 }, { "epoch": 6.02, "learning_rate": 7.225167087958647e-06, "loss": 0.832, "step": 981 }, { "epoch": 6.02, "learning_rate": 7.2060819793800665e-06, "loss": 0.8662, "step": 982 }, { "epoch": 6.03, "learning_rate": 7.187007902681289e-06, "loss": 0.8164, "step": 983 }, { "epoch": 6.04, "learning_rate": 7.16794493317696e-06, "loss": 0.8496, "step": 984 }, { "epoch": 6.04, "learning_rate": 7.148893146137852e-06, "loss": 0.9854, "step": 985 }, { "epoch": 6.05, "learning_rate": 7.129852616790594e-06, "loss": 0.8486, "step": 986 }, { "epoch": 6.06, "learning_rate": 7.110823420317356e-06, "loss": 0.8359, "step": 987 }, { "epoch": 6.06, "learning_rate": 7.091805631855566e-06, "loss": 0.7695, "step": 988 }, { "epoch": 6.07, "learning_rate": 7.072799326497603e-06, "loss": 0.8828, "step": 989 }, { "epoch": 6.07, "learning_rate": 7.053804579290513e-06, "loss": 0.9307, "step": 990 }, { "epoch": 6.08, "learning_rate": 7.034821465235693e-06, "loss": 0.7568, "step": 991 }, { "epoch": 6.09, "learning_rate": 7.0158500592886115e-06, "loss": 0.8779, "step": 992 }, { "epoch": 6.09, "learning_rate": 6.996890436358505e-06, "loss": 0.9648, "step": 993 }, { "epoch": 6.1, "learning_rate": 6.977942671308087e-06, "loss": 0.7734, "step": 994 }, { "epoch": 6.1, "learning_rate": 6.95900683895325e-06, "loss": 0.8066, "step": 995 }, { "epoch": 6.11, "learning_rate": 6.9400830140627705e-06, "loss": 0.9189, "step": 996 }, { "epoch": 6.12, "learning_rate": 6.921171271358007e-06, "loss": 0.8271, "step": 997 }, { "epoch": 6.12, "learning_rate": 6.902271685512616e-06, "loss": 0.9258, "step": 998 }, { "epoch": 6.13, "learning_rate": 6.883384331152254e-06, "loss": 0.9004, "step": 999 }, { "epoch": 6.13, "learning_rate": 6.864509282854272e-06, "loss": 0.8652, "step": 1000 }, { "epoch": 6.14, "learning_rate": 6.845646615147445e-06, "loss": 0.8779, "step": 1001 }, { "epoch": 6.15, "learning_rate": 6.826796402511653e-06, "loss": 0.8105, "step": 1002 }, { "epoch": 6.15, "learning_rate": 6.8079587193775935e-06, "loss": 0.9023, "step": 1003 }, { "epoch": 6.16, "learning_rate": 6.789133640126498e-06, "loss": 0.8877, "step": 1004 }, { "epoch": 6.17, "learning_rate": 6.770321239089825e-06, "loss": 0.9209, "step": 1005 }, { "epoch": 6.17, "learning_rate": 6.751521590548986e-06, "loss": 0.8389, "step": 1006 }, { "epoch": 6.18, "learning_rate": 6.732734768735021e-06, "loss": 0.8125, "step": 1007 }, { "epoch": 6.18, "learning_rate": 6.713960847828335e-06, "loss": 0.8408, "step": 1008 }, { "epoch": 6.19, "learning_rate": 6.695199901958386e-06, "loss": 0.9258, "step": 1009 }, { "epoch": 6.2, "learning_rate": 6.6764520052034054e-06, "loss": 0.8213, "step": 1010 }, { "epoch": 6.2, "learning_rate": 6.657717231590095e-06, "loss": 0.8838, "step": 1011 }, { "epoch": 6.21, "learning_rate": 6.638995655093351e-06, "loss": 0.667, "step": 1012 }, { "epoch": 6.21, "learning_rate": 6.620287349635942e-06, "loss": 0.9072, "step": 1013 }, { "epoch": 6.22, "learning_rate": 6.601592389088251e-06, "loss": 0.8184, "step": 1014 }, { "epoch": 6.23, "learning_rate": 6.582910847267957e-06, "loss": 0.9688, "step": 1015 }, { "epoch": 6.23, "learning_rate": 6.564242797939759e-06, "loss": 0.7861, "step": 1016 }, { "epoch": 6.24, "learning_rate": 6.545588314815088e-06, "loss": 0.9268, "step": 1017 }, { "epoch": 6.25, "learning_rate": 6.526947471551799e-06, "loss": 0.7949, "step": 1018 }, { "epoch": 6.25, "learning_rate": 6.508320341753889e-06, "loss": 0.8994, "step": 1019 }, { "epoch": 6.26, "learning_rate": 6.489706998971212e-06, "loss": 0.8193, "step": 1020 }, { "epoch": 6.26, "learning_rate": 6.471107516699183e-06, "loss": 0.877, "step": 1021 }, { "epoch": 6.27, "learning_rate": 6.452521968378482e-06, "loss": 0.8525, "step": 1022 }, { "epoch": 6.28, "learning_rate": 6.4339504273947805e-06, "loss": 0.8115, "step": 1023 }, { "epoch": 6.28, "learning_rate": 6.415392967078438e-06, "loss": 0.8262, "step": 1024 }, { "epoch": 6.29, "learning_rate": 6.396849660704205e-06, "loss": 0.9258, "step": 1025 }, { "epoch": 6.29, "learning_rate": 6.378320581490962e-06, "loss": 0.873, "step": 1026 }, { "epoch": 6.3, "learning_rate": 6.3598058026013995e-06, "loss": 0.9082, "step": 1027 }, { "epoch": 6.31, "learning_rate": 6.3413053971417575e-06, "loss": 0.9756, "step": 1028 }, { "epoch": 6.31, "learning_rate": 6.322819438161502e-06, "loss": 0.7363, "step": 1029 }, { "epoch": 6.32, "learning_rate": 6.304347998653074e-06, "loss": 0.835, "step": 1030 }, { "epoch": 6.33, "learning_rate": 6.285891151551573e-06, "loss": 0.8457, "step": 1031 }, { "epoch": 6.33, "learning_rate": 6.267448969734486e-06, "loss": 0.833, "step": 1032 }, { "epoch": 6.34, "learning_rate": 6.24902152602139e-06, "loss": 0.7949, "step": 1033 }, { "epoch": 6.34, "learning_rate": 6.2306088931736766e-06, "loss": 0.9092, "step": 1034 }, { "epoch": 6.35, "learning_rate": 6.21221114389424e-06, "loss": 0.8643, "step": 1035 }, { "epoch": 6.36, "learning_rate": 6.193828350827222e-06, "loss": 0.8809, "step": 1036 }, { "epoch": 6.36, "learning_rate": 6.175460586557701e-06, "loss": 0.8662, "step": 1037 }, { "epoch": 6.37, "learning_rate": 6.157107923611412e-06, "loss": 0.8682, "step": 1038 }, { "epoch": 6.37, "learning_rate": 6.1387704344544684e-06, "loss": 0.8701, "step": 1039 }, { "epoch": 6.38, "learning_rate": 6.120448191493071e-06, "loss": 0.791, "step": 1040 }, { "epoch": 6.39, "learning_rate": 6.102141267073207e-06, "loss": 0.8857, "step": 1041 }, { "epoch": 6.39, "learning_rate": 6.083849733480394e-06, "loss": 0.8623, "step": 1042 }, { "epoch": 6.4, "learning_rate": 6.065573662939367e-06, "loss": 0.8105, "step": 1043 }, { "epoch": 6.4, "learning_rate": 6.047313127613808e-06, "loss": 0.9443, "step": 1044 }, { "epoch": 6.41, "learning_rate": 6.0290681996060605e-06, "loss": 0.7783, "step": 1045 }, { "epoch": 6.42, "learning_rate": 6.010838950956841e-06, "loss": 0.8701, "step": 1046 }, { "epoch": 6.42, "learning_rate": 5.992625453644953e-06, "loss": 0.8672, "step": 1047 }, { "epoch": 6.43, "learning_rate": 5.974427779587004e-06, "loss": 0.8262, "step": 1048 }, { "epoch": 6.44, "learning_rate": 5.9562460006371295e-06, "loss": 0.8818, "step": 1049 }, { "epoch": 6.44, "learning_rate": 5.938080188586699e-06, "loss": 0.7998, "step": 1050 }, { "epoch": 6.45, "learning_rate": 5.919930415164033e-06, "loss": 0.7217, "step": 1051 }, { "epoch": 6.45, "learning_rate": 5.901796752034128e-06, "loss": 0.8486, "step": 1052 }, { "epoch": 6.46, "learning_rate": 5.883679270798363e-06, "loss": 0.7949, "step": 1053 }, { "epoch": 6.47, "learning_rate": 5.865578042994227e-06, "loss": 0.9209, "step": 1054 }, { "epoch": 6.47, "learning_rate": 5.84749314009503e-06, "loss": 0.8779, "step": 1055 }, { "epoch": 6.48, "learning_rate": 5.829424633509627e-06, "loss": 0.9678, "step": 1056 }, { "epoch": 6.48, "learning_rate": 5.8113725945821245e-06, "loss": 0.7764, "step": 1057 }, { "epoch": 6.49, "learning_rate": 5.7933370945916036e-06, "loss": 0.8252, "step": 1058 }, { "epoch": 6.5, "learning_rate": 5.775318204751854e-06, "loss": 0.8438, "step": 1059 }, { "epoch": 6.5, "learning_rate": 5.757315996211066e-06, "loss": 0.7744, "step": 1060 }, { "epoch": 6.51, "learning_rate": 5.7393305400515755e-06, "loss": 0.8027, "step": 1061 }, { "epoch": 6.52, "learning_rate": 5.721361907289556e-06, "loss": 0.834, "step": 1062 }, { "epoch": 6.52, "learning_rate": 5.703410168874768e-06, "loss": 0.8496, "step": 1063 }, { "epoch": 6.53, "learning_rate": 5.685475395690259e-06, "loss": 1.0342, "step": 1064 }, { "epoch": 6.53, "learning_rate": 5.667557658552078e-06, "loss": 0.8789, "step": 1065 }, { "epoch": 6.54, "learning_rate": 5.649657028209024e-06, "loss": 0.7568, "step": 1066 }, { "epoch": 6.55, "learning_rate": 5.631773575342343e-06, "loss": 0.791, "step": 1067 }, { "epoch": 6.55, "learning_rate": 5.61390737056545e-06, "loss": 0.9238, "step": 1068 }, { "epoch": 6.56, "learning_rate": 5.5960584844236565e-06, "loss": 0.7002, "step": 1069 }, { "epoch": 6.56, "learning_rate": 5.5782269873939e-06, "loss": 0.8096, "step": 1070 }, { "epoch": 6.57, "learning_rate": 5.560412949884442e-06, "loss": 0.8545, "step": 1071 }, { "epoch": 6.58, "learning_rate": 5.542616442234618e-06, "loss": 0.8203, "step": 1072 }, { "epoch": 6.58, "learning_rate": 5.52483753471454e-06, "loss": 0.8271, "step": 1073 }, { "epoch": 6.59, "learning_rate": 5.507076297524818e-06, "loss": 0.8428, "step": 1074 }, { "epoch": 6.6, "learning_rate": 5.48933280079631e-06, "loss": 0.8076, "step": 1075 }, { "epoch": 6.6, "learning_rate": 5.471607114589806e-06, "loss": 0.8057, "step": 1076 }, { "epoch": 6.61, "learning_rate": 5.453899308895774e-06, "loss": 0.7715, "step": 1077 }, { "epoch": 6.61, "learning_rate": 5.436209453634087e-06, "loss": 0.7207, "step": 1078 }, { "epoch": 6.62, "learning_rate": 5.418537618653743e-06, "loss": 0.7812, "step": 1079 }, { "epoch": 6.63, "learning_rate": 5.400883873732574e-06, "loss": 0.8213, "step": 1080 }, { "epoch": 6.63, "learning_rate": 5.3832482885769855e-06, "loss": 0.7451, "step": 1081 }, { "epoch": 6.64, "learning_rate": 5.365630932821688e-06, "loss": 0.835, "step": 1082 }, { "epoch": 6.64, "learning_rate": 5.3480318760294084e-06, "loss": 0.8604, "step": 1083 }, { "epoch": 6.65, "learning_rate": 5.330451187690614e-06, "loss": 0.9072, "step": 1084 }, { "epoch": 6.66, "learning_rate": 5.3128889372232436e-06, "loss": 0.8721, "step": 1085 }, { "epoch": 6.66, "learning_rate": 5.295345193972445e-06, "loss": 0.8779, "step": 1086 }, { "epoch": 6.67, "learning_rate": 5.277820027210279e-06, "loss": 0.8916, "step": 1087 }, { "epoch": 6.67, "learning_rate": 5.260313506135452e-06, "loss": 0.8721, "step": 1088 }, { "epoch": 6.68, "learning_rate": 5.242825699873068e-06, "loss": 0.8613, "step": 1089 }, { "epoch": 6.69, "learning_rate": 5.225356677474309e-06, "loss": 0.8379, "step": 1090 }, { "epoch": 6.69, "learning_rate": 5.2079065079162115e-06, "loss": 0.708, "step": 1091 }, { "epoch": 6.7, "learning_rate": 5.190475260101353e-06, "loss": 0.873, "step": 1092 }, { "epoch": 6.71, "learning_rate": 5.1730630028576055e-06, "loss": 0.7119, "step": 1093 }, { "epoch": 6.71, "learning_rate": 5.155669804937855e-06, "loss": 0.8848, "step": 1094 }, { "epoch": 6.72, "learning_rate": 5.138295735019741e-06, "loss": 0.8633, "step": 1095 }, { "epoch": 6.72, "learning_rate": 5.120940861705357e-06, "loss": 0.8203, "step": 1096 }, { "epoch": 6.73, "learning_rate": 5.103605253521007e-06, "loss": 0.8398, "step": 1097 }, { "epoch": 6.74, "learning_rate": 5.086288978916931e-06, "loss": 0.9297, "step": 1098 }, { "epoch": 6.74, "learning_rate": 5.068992106267021e-06, "loss": 0.71, "step": 1099 }, { "epoch": 6.75, "learning_rate": 5.051714703868569e-06, "loss": 0.7275, "step": 1100 }, { "epoch": 6.75, "learning_rate": 5.034456839941979e-06, "loss": 0.8164, "step": 1101 }, { "epoch": 6.76, "learning_rate": 5.017218582630507e-06, "loss": 0.7363, "step": 1102 }, { "epoch": 6.77, "learning_rate": 5.000000000000003e-06, "loss": 0.9561, "step": 1103 }, { "epoch": 6.77, "learning_rate": 4.982801160038614e-06, "loss": 0.834, "step": 1104 }, { "epoch": 6.78, "learning_rate": 4.965622130656551e-06, "loss": 0.8418, "step": 1105 }, { "epoch": 6.79, "learning_rate": 4.948462979685783e-06, "loss": 0.8418, "step": 1106 }, { "epoch": 6.79, "learning_rate": 4.931323774879807e-06, "loss": 0.8584, "step": 1107 }, { "epoch": 6.8, "learning_rate": 4.914204583913349e-06, "loss": 0.8105, "step": 1108 }, { "epoch": 6.8, "learning_rate": 4.897105474382109e-06, "loss": 0.9131, "step": 1109 }, { "epoch": 6.81, "learning_rate": 4.880026513802504e-06, "loss": 0.791, "step": 1110 }, { "epoch": 6.82, "learning_rate": 4.862967769611389e-06, "loss": 0.8828, "step": 1111 }, { "epoch": 6.82, "learning_rate": 4.845929309165793e-06, "loss": 0.8291, "step": 1112 }, { "epoch": 6.83, "learning_rate": 4.828911199742646e-06, "loss": 0.8252, "step": 1113 }, { "epoch": 6.83, "learning_rate": 4.8119135085385375e-06, "loss": 0.7529, "step": 1114 }, { "epoch": 6.84, "learning_rate": 4.794936302669417e-06, "loss": 0.8613, "step": 1115 }, { "epoch": 6.85, "learning_rate": 4.777979649170367e-06, "loss": 0.7803, "step": 1116 }, { "epoch": 6.85, "learning_rate": 4.7610436149953e-06, "loss": 0.9141, "step": 1117 }, { "epoch": 6.86, "learning_rate": 4.744128267016719e-06, "loss": 0.8291, "step": 1118 }, { "epoch": 6.87, "learning_rate": 4.727233672025453e-06, "loss": 0.7451, "step": 1119 }, { "epoch": 6.87, "learning_rate": 4.710359896730379e-06, "loss": 0.8457, "step": 1120 }, { "epoch": 6.88, "learning_rate": 4.693507007758165e-06, "loss": 0.7646, "step": 1121 }, { "epoch": 6.88, "learning_rate": 4.676675071653019e-06, "loss": 0.8506, "step": 1122 }, { "epoch": 6.89, "learning_rate": 4.659864154876411e-06, "loss": 0.7246, "step": 1123 }, { "epoch": 6.9, "learning_rate": 4.643074323806813e-06, "loss": 0.8555, "step": 1124 }, { "epoch": 6.9, "learning_rate": 4.626305644739435e-06, "loss": 0.8125, "step": 1125 }, { "epoch": 6.91, "learning_rate": 4.609558183885979e-06, "loss": 0.8418, "step": 1126 }, { "epoch": 6.91, "learning_rate": 4.592832007374364e-06, "loss": 0.8271, "step": 1127 }, { "epoch": 6.92, "learning_rate": 4.576127181248459e-06, "loss": 0.7979, "step": 1128 }, { "epoch": 6.93, "learning_rate": 4.559443771467833e-06, "loss": 0.8438, "step": 1129 }, { "epoch": 6.93, "learning_rate": 4.542781843907499e-06, "loss": 0.7432, "step": 1130 }, { "epoch": 6.94, "learning_rate": 4.5261414643576396e-06, "loss": 0.7852, "step": 1131 }, { "epoch": 6.94, "learning_rate": 4.509522698523352e-06, "loss": 0.8125, "step": 1132 }, { "epoch": 6.95, "learning_rate": 4.492925612024402e-06, "loss": 0.7588, "step": 1133 }, { "epoch": 6.96, "learning_rate": 4.476350270394942e-06, "loss": 0.751, "step": 1134 }, { "epoch": 6.96, "learning_rate": 4.4597967390832745e-06, "loss": 0.9287, "step": 1135 }, { "epoch": 6.97, "learning_rate": 4.4432650834515735e-06, "loss": 0.7432, "step": 1136 }, { "epoch": 6.98, "learning_rate": 4.426755368775637e-06, "loss": 0.7783, "step": 1137 }, { "epoch": 6.98, "learning_rate": 4.4102676602446375e-06, "loss": 0.8613, "step": 1138 }, { "epoch": 6.99, "learning_rate": 4.3938020229608506e-06, "loss": 0.8584, "step": 1139 }, { "epoch": 6.99, "learning_rate": 4.377358521939401e-06, "loss": 0.8105, "step": 1140 }, { "epoch": 7.0, "learning_rate": 4.360937222108002e-06, "loss": 0.7871, "step": 1141 }, { "epoch": 7.01, "learning_rate": 4.344538188306723e-06, "loss": 0.5469, "step": 1142 }, { "epoch": 7.01, "learning_rate": 4.328161485287693e-06, "loss": 0.6025, "step": 1143 }, { "epoch": 7.02, "learning_rate": 4.3118071777148865e-06, "loss": 0.5752, "step": 1144 }, { "epoch": 7.02, "learning_rate": 4.295475330163832e-06, "loss": 0.6367, "step": 1145 }, { "epoch": 7.03, "learning_rate": 4.279166007121389e-06, "loss": 0.5527, "step": 1146 }, { "epoch": 7.04, "learning_rate": 4.262879272985468e-06, "loss": 0.5439, "step": 1147 }, { "epoch": 7.04, "learning_rate": 4.246615192064787e-06, "loss": 0.5586, "step": 1148 }, { "epoch": 7.05, "learning_rate": 4.230373828578626e-06, "loss": 0.6318, "step": 1149 }, { "epoch": 7.06, "learning_rate": 4.21415524665655e-06, "loss": 0.6299, "step": 1150 }, { "epoch": 7.06, "learning_rate": 4.197959510338187e-06, "loss": 0.583, "step": 1151 }, { "epoch": 7.07, "learning_rate": 4.181786683572946e-06, "loss": 0.626, "step": 1152 }, { "epoch": 7.07, "learning_rate": 4.165636830219776e-06, "loss": 0.5845, "step": 1153 }, { "epoch": 7.08, "learning_rate": 4.149510014046922e-06, "loss": 0.5723, "step": 1154 }, { "epoch": 7.09, "learning_rate": 4.1334062987316695e-06, "loss": 0.5391, "step": 1155 }, { "epoch": 7.09, "learning_rate": 4.117325747860077e-06, "loss": 0.5967, "step": 1156 }, { "epoch": 7.1, "learning_rate": 4.101268424926741e-06, "loss": 0.6357, "step": 1157 }, { "epoch": 7.1, "learning_rate": 4.085234393334551e-06, "loss": 0.5654, "step": 1158 }, { "epoch": 7.11, "learning_rate": 4.069223716394419e-06, "loss": 0.5889, "step": 1159 }, { "epoch": 7.12, "learning_rate": 4.053236457325043e-06, "loss": 0.5615, "step": 1160 }, { "epoch": 7.12, "learning_rate": 4.0372726792526614e-06, "loss": 0.5459, "step": 1161 }, { "epoch": 7.13, "learning_rate": 4.021332445210785e-06, "loss": 0.6182, "step": 1162 }, { "epoch": 7.13, "learning_rate": 4.005415818139975e-06, "loss": 0.6357, "step": 1163 }, { "epoch": 7.14, "learning_rate": 3.989522860887567e-06, "loss": 0.5, "step": 1164 }, { "epoch": 7.15, "learning_rate": 3.973653636207437e-06, "loss": 0.5625, "step": 1165 }, { "epoch": 7.15, "learning_rate": 3.95780820675976e-06, "loss": 0.6074, "step": 1166 }, { "epoch": 7.16, "learning_rate": 3.941986635110754e-06, "loss": 0.6416, "step": 1167 }, { "epoch": 7.17, "learning_rate": 3.9261889837324245e-06, "loss": 0.5239, "step": 1168 }, { "epoch": 7.17, "learning_rate": 3.910415315002328e-06, "loss": 0.5127, "step": 1169 }, { "epoch": 7.18, "learning_rate": 3.89466569120334e-06, "loss": 0.5771, "step": 1170 }, { "epoch": 7.18, "learning_rate": 3.878940174523371e-06, "loss": 0.6367, "step": 1171 }, { "epoch": 7.19, "learning_rate": 3.8632388270551665e-06, "loss": 0.6191, "step": 1172 }, { "epoch": 7.2, "learning_rate": 3.847561710796019e-06, "loss": 0.5928, "step": 1173 }, { "epoch": 7.2, "learning_rate": 3.8319088876475595e-06, "loss": 0.5742, "step": 1174 }, { "epoch": 7.21, "learning_rate": 3.816280419415487e-06, "loss": 0.6201, "step": 1175 }, { "epoch": 7.21, "learning_rate": 3.8006763678093326e-06, "loss": 0.6885, "step": 1176 }, { "epoch": 7.22, "learning_rate": 3.785096794442229e-06, "loss": 0.5742, "step": 1177 }, { "epoch": 7.23, "learning_rate": 3.7695417608306415e-06, "loss": 0.5352, "step": 1178 }, { "epoch": 7.23, "learning_rate": 3.7540113283941536e-06, "loss": 0.6123, "step": 1179 }, { "epoch": 7.24, "learning_rate": 3.7385055584552e-06, "loss": 0.5605, "step": 1180 }, { "epoch": 7.25, "learning_rate": 3.723024512238833e-06, "loss": 0.541, "step": 1181 }, { "epoch": 7.25, "learning_rate": 3.707568250872493e-06, "loss": 0.6328, "step": 1182 }, { "epoch": 7.26, "learning_rate": 3.6921368353857524e-06, "loss": 0.5498, "step": 1183 }, { "epoch": 7.26, "learning_rate": 3.676730326710074e-06, "loss": 0.5938, "step": 1184 }, { "epoch": 7.27, "learning_rate": 3.6613487856785744e-06, "loss": 0.5742, "step": 1185 }, { "epoch": 7.28, "learning_rate": 3.645992273025797e-06, "loss": 0.5493, "step": 1186 }, { "epoch": 7.28, "learning_rate": 3.630660849387444e-06, "loss": 0.5947, "step": 1187 }, { "epoch": 7.29, "learning_rate": 3.6153545753001663e-06, "loss": 0.5522, "step": 1188 }, { "epoch": 7.29, "learning_rate": 3.6000735112012984e-06, "loss": 0.5967, "step": 1189 }, { "epoch": 7.3, "learning_rate": 3.584817717428647e-06, "loss": 0.6006, "step": 1190 }, { "epoch": 7.31, "learning_rate": 3.569587254220225e-06, "loss": 0.5664, "step": 1191 }, { "epoch": 7.31, "learning_rate": 3.5543821817140313e-06, "loss": 0.5898, "step": 1192 }, { "epoch": 7.32, "learning_rate": 3.5392025599478053e-06, "loss": 0.4985, "step": 1193 }, { "epoch": 7.33, "learning_rate": 3.5240484488588012e-06, "loss": 0.5273, "step": 1194 }, { "epoch": 7.33, "learning_rate": 3.5089199082835436e-06, "loss": 0.627, "step": 1195 }, { "epoch": 7.34, "learning_rate": 3.493816997957582e-06, "loss": 0.5479, "step": 1196 }, { "epoch": 7.34, "learning_rate": 3.478739777515264e-06, "loss": 0.5625, "step": 1197 }, { "epoch": 7.35, "learning_rate": 3.463688306489511e-06, "loss": 0.5649, "step": 1198 }, { "epoch": 7.36, "learning_rate": 3.448662644311567e-06, "loss": 0.6064, "step": 1199 }, { "epoch": 7.36, "learning_rate": 3.433662850310763e-06, "loss": 0.6211, "step": 1200 }, { "epoch": 7.37, "learning_rate": 3.418688983714291e-06, "loss": 0.5337, "step": 1201 }, { "epoch": 7.37, "learning_rate": 3.403741103646977e-06, "loss": 0.6035, "step": 1202 }, { "epoch": 7.38, "learning_rate": 3.3888192691310262e-06, "loss": 0.5508, "step": 1203 }, { "epoch": 7.39, "learning_rate": 3.373923539085805e-06, "loss": 0.5215, "step": 1204 }, { "epoch": 7.39, "learning_rate": 3.3590539723276083e-06, "loss": 0.5239, "step": 1205 }, { "epoch": 7.4, "learning_rate": 3.3442106275694295e-06, "loss": 0.5444, "step": 1206 }, { "epoch": 7.4, "learning_rate": 3.329393563420713e-06, "loss": 0.6401, "step": 1207 }, { "epoch": 7.41, "learning_rate": 3.3146028383871363e-06, "loss": 0.5825, "step": 1208 }, { "epoch": 7.42, "learning_rate": 3.2998385108703766e-06, "loss": 0.5347, "step": 1209 }, { "epoch": 7.42, "learning_rate": 3.285100639167883e-06, "loss": 0.5645, "step": 1210 }, { "epoch": 7.43, "learning_rate": 3.2703892814726436e-06, "loss": 0.5459, "step": 1211 }, { "epoch": 7.44, "learning_rate": 3.2557044958729466e-06, "loss": 0.582, "step": 1212 }, { "epoch": 7.44, "learning_rate": 3.2410463403521653e-06, "loss": 0.6035, "step": 1213 }, { "epoch": 7.45, "learning_rate": 3.2264148727885257e-06, "loss": 0.6094, "step": 1214 }, { "epoch": 7.45, "learning_rate": 3.211810150954867e-06, "loss": 0.5801, "step": 1215 }, { "epoch": 7.46, "learning_rate": 3.1972322325184347e-06, "loss": 0.6016, "step": 1216 }, { "epoch": 7.47, "learning_rate": 3.182681175040625e-06, "loss": 0.5352, "step": 1217 }, { "epoch": 7.47, "learning_rate": 3.1681570359767875e-06, "loss": 0.5757, "step": 1218 }, { "epoch": 7.48, "learning_rate": 3.1536598726759747e-06, "loss": 0.5894, "step": 1219 }, { "epoch": 7.48, "learning_rate": 3.1391897423807204e-06, "loss": 0.4736, "step": 1220 }, { "epoch": 7.49, "learning_rate": 3.1247467022268284e-06, "loss": 0.4985, "step": 1221 }, { "epoch": 7.5, "learning_rate": 3.110330809243134e-06, "loss": 0.5459, "step": 1222 }, { "epoch": 7.5, "learning_rate": 3.095942120351276e-06, "loss": 0.4756, "step": 1223 }, { "epoch": 7.51, "learning_rate": 3.081580692365478e-06, "loss": 0.5908, "step": 1224 }, { "epoch": 7.52, "learning_rate": 3.0672465819923215e-06, "loss": 0.583, "step": 1225 }, { "epoch": 7.52, "learning_rate": 3.052939845830528e-06, "loss": 0.5034, "step": 1226 }, { "epoch": 7.53, "learning_rate": 3.0386605403707347e-06, "loss": 0.4697, "step": 1227 }, { "epoch": 7.53, "learning_rate": 3.0244087219952565e-06, "loss": 0.5146, "step": 1228 }, { "epoch": 7.54, "learning_rate": 3.0101844469778797e-06, "loss": 0.5674, "step": 1229 }, { "epoch": 7.55, "learning_rate": 2.9959877714836406e-06, "loss": 0.542, "step": 1230 }, { "epoch": 7.55, "learning_rate": 2.981818751568586e-06, "loss": 0.5669, "step": 1231 }, { "epoch": 7.56, "learning_rate": 2.9676774431795752e-06, "loss": 0.5244, "step": 1232 }, { "epoch": 7.56, "learning_rate": 2.95356390215404e-06, "loss": 0.5679, "step": 1233 }, { "epoch": 7.57, "learning_rate": 2.939478184219777e-06, "loss": 0.4868, "step": 1234 }, { "epoch": 7.58, "learning_rate": 2.9254203449947196e-06, "loss": 0.5498, "step": 1235 }, { "epoch": 7.58, "learning_rate": 2.9113904399867188e-06, "loss": 0.6143, "step": 1236 }, { "epoch": 7.59, "learning_rate": 2.8973885245933287e-06, "loss": 0.6279, "step": 1237 }, { "epoch": 7.6, "learning_rate": 2.8834146541015874e-06, "loss": 0.5552, "step": 1238 }, { "epoch": 7.6, "learning_rate": 2.869468883687798e-06, "loss": 0.5186, "step": 1239 }, { "epoch": 7.61, "learning_rate": 2.855551268417305e-06, "loss": 0.5244, "step": 1240 }, { "epoch": 7.61, "learning_rate": 2.8416618632442785e-06, "loss": 0.5884, "step": 1241 }, { "epoch": 7.62, "learning_rate": 2.827800723011508e-06, "loss": 0.6289, "step": 1242 }, { "epoch": 7.63, "learning_rate": 2.813967902450179e-06, "loss": 0.5732, "step": 1243 }, { "epoch": 7.63, "learning_rate": 2.8001634561796463e-06, "loss": 0.5527, "step": 1244 }, { "epoch": 7.64, "learning_rate": 2.786387438707231e-06, "loss": 0.5835, "step": 1245 }, { "epoch": 7.64, "learning_rate": 2.7726399044280107e-06, "loss": 0.5557, "step": 1246 }, { "epoch": 7.65, "learning_rate": 2.758920907624585e-06, "loss": 0.5322, "step": 1247 }, { "epoch": 7.66, "learning_rate": 2.7452305024668747e-06, "loss": 0.54, "step": 1248 }, { "epoch": 7.66, "learning_rate": 2.7315687430119097e-06, "loss": 0.6719, "step": 1249 }, { "epoch": 7.67, "learning_rate": 2.7179356832036142e-06, "loss": 0.6846, "step": 1250 }, { "epoch": 7.67, "learning_rate": 2.704331376872581e-06, "loss": 0.5723, "step": 1251 }, { "epoch": 7.68, "learning_rate": 2.6907558777358756e-06, "loss": 0.5562, "step": 1252 }, { "epoch": 7.69, "learning_rate": 2.677209239396811e-06, "loss": 0.5967, "step": 1253 }, { "epoch": 7.69, "learning_rate": 2.6636915153447494e-06, "loss": 0.4829, "step": 1254 }, { "epoch": 7.7, "learning_rate": 2.650202758954886e-06, "loss": 0.6201, "step": 1255 }, { "epoch": 7.71, "learning_rate": 2.6367430234880286e-06, "loss": 0.4766, "step": 1256 }, { "epoch": 7.71, "learning_rate": 2.6233123620903946e-06, "loss": 0.583, "step": 1257 }, { "epoch": 7.72, "learning_rate": 2.6099108277934105e-06, "loss": 0.5054, "step": 1258 }, { "epoch": 7.72, "learning_rate": 2.5965384735134825e-06, "loss": 0.5459, "step": 1259 }, { "epoch": 7.73, "learning_rate": 2.583195352051808e-06, "loss": 0.5312, "step": 1260 }, { "epoch": 7.74, "learning_rate": 2.5698815160941494e-06, "loss": 0.584, "step": 1261 }, { "epoch": 7.74, "learning_rate": 2.5565970182106425e-06, "loss": 0.5928, "step": 1262 }, { "epoch": 7.75, "learning_rate": 2.5433419108555758e-06, "loss": 0.5205, "step": 1263 }, { "epoch": 7.75, "learning_rate": 2.5301162463671845e-06, "loss": 0.5303, "step": 1264 }, { "epoch": 7.76, "learning_rate": 2.516920076967455e-06, "loss": 0.5615, "step": 1265 }, { "epoch": 7.77, "learning_rate": 2.5037534547619125e-06, "loss": 0.6182, "step": 1266 }, { "epoch": 7.77, "learning_rate": 2.4906164317394067e-06, "loss": 0.5088, "step": 1267 }, { "epoch": 7.78, "learning_rate": 2.4775090597719163e-06, "loss": 0.5264, "step": 1268 }, { "epoch": 7.79, "learning_rate": 2.4644313906143414e-06, "loss": 0.5195, "step": 1269 }, { "epoch": 7.79, "learning_rate": 2.451383475904304e-06, "loss": 0.5332, "step": 1270 }, { "epoch": 7.8, "learning_rate": 2.438365367161939e-06, "loss": 0.5718, "step": 1271 }, { "epoch": 7.8, "learning_rate": 2.4253771157896856e-06, "loss": 0.5269, "step": 1272 }, { "epoch": 7.81, "learning_rate": 2.4124187730720916e-06, "loss": 0.563, "step": 1273 }, { "epoch": 7.82, "learning_rate": 2.3994903901756163e-06, "loss": 0.5156, "step": 1274 }, { "epoch": 7.82, "learning_rate": 2.3865920181484127e-06, "loss": 0.478, "step": 1275 }, { "epoch": 7.83, "learning_rate": 2.3737237079201437e-06, "loss": 0.5879, "step": 1276 }, { "epoch": 7.83, "learning_rate": 2.3608855103017613e-06, "loss": 0.5972, "step": 1277 }, { "epoch": 7.84, "learning_rate": 2.3480774759853307e-06, "loss": 0.5254, "step": 1278 }, { "epoch": 7.85, "learning_rate": 2.3352996555438036e-06, "loss": 0.5645, "step": 1279 }, { "epoch": 7.85, "learning_rate": 2.3225520994308382e-06, "loss": 0.5957, "step": 1280 }, { "epoch": 7.86, "learning_rate": 2.309834857980583e-06, "loss": 0.5371, "step": 1281 }, { "epoch": 7.87, "learning_rate": 2.297147981407509e-06, "loss": 0.5508, "step": 1282 }, { "epoch": 7.87, "learning_rate": 2.2844915198061714e-06, "loss": 0.4985, "step": 1283 }, { "epoch": 7.88, "learning_rate": 2.2718655231510368e-06, "loss": 0.5928, "step": 1284 }, { "epoch": 7.88, "learning_rate": 2.2592700412962775e-06, "loss": 0.5928, "step": 1285 }, { "epoch": 7.89, "learning_rate": 2.246705123975582e-06, "loss": 0.6377, "step": 1286 }, { "epoch": 7.9, "learning_rate": 2.234170820801954e-06, "loss": 0.5674, "step": 1287 }, { "epoch": 7.9, "learning_rate": 2.2216671812675118e-06, "loss": 0.4785, "step": 1288 }, { "epoch": 7.91, "learning_rate": 2.209194254743295e-06, "loss": 0.5767, "step": 1289 }, { "epoch": 7.91, "learning_rate": 2.196752090479083e-06, "loss": 0.5601, "step": 1290 }, { "epoch": 7.92, "learning_rate": 2.184340737603178e-06, "loss": 0.4595, "step": 1291 }, { "epoch": 7.93, "learning_rate": 2.1719602451222245e-06, "loss": 0.5625, "step": 1292 }, { "epoch": 7.93, "learning_rate": 2.159610661921018e-06, "loss": 0.5679, "step": 1293 }, { "epoch": 7.94, "learning_rate": 2.1472920367623094e-06, "loss": 0.6499, "step": 1294 }, { "epoch": 7.94, "learning_rate": 2.1350044182866025e-06, "loss": 0.4966, "step": 1295 }, { "epoch": 7.95, "learning_rate": 2.1227478550119763e-06, "loss": 0.5933, "step": 1296 }, { "epoch": 7.96, "learning_rate": 2.1105223953338805e-06, "loss": 0.4814, "step": 1297 }, { "epoch": 7.96, "learning_rate": 2.09832808752496e-06, "loss": 0.5088, "step": 1298 }, { "epoch": 7.97, "learning_rate": 2.086164979734856e-06, "loss": 0.5586, "step": 1299 }, { "epoch": 7.98, "learning_rate": 2.0740331199900053e-06, "loss": 0.5396, "step": 1300 }, { "epoch": 7.98, "learning_rate": 2.0619325561934658e-06, "loss": 0.6182, "step": 1301 }, { "epoch": 7.99, "learning_rate": 2.0498633361247278e-06, "loss": 0.5537, "step": 1302 }, { "epoch": 7.99, "learning_rate": 2.0378255074395094e-06, "loss": 0.5107, "step": 1303 }, { "epoch": 8.0, "learning_rate": 2.0258191176695896e-06, "loss": 0.5176, "step": 1304 }, { "epoch": 8.01, "learning_rate": 2.0138442142226e-06, "loss": 0.4658, "step": 1305 }, { "epoch": 8.01, "learning_rate": 2.001900844381857e-06, "loss": 0.3608, "step": 1306 }, { "epoch": 8.02, "learning_rate": 1.9899890553061565e-06, "loss": 0.4785, "step": 1307 }, { "epoch": 8.02, "learning_rate": 1.978108894029598e-06, "loss": 0.4692, "step": 1308 }, { "epoch": 8.03, "learning_rate": 1.9662604074614044e-06, "loss": 0.4463, "step": 1309 }, { "epoch": 8.04, "learning_rate": 1.954443642385727e-06, "loss": 0.4473, "step": 1310 }, { "epoch": 8.04, "learning_rate": 1.9426586454614617e-06, "loss": 0.3853, "step": 1311 }, { "epoch": 8.05, "learning_rate": 1.9309054632220645e-06, "loss": 0.4043, "step": 1312 }, { "epoch": 8.06, "learning_rate": 1.919184142075372e-06, "loss": 0.3589, "step": 1313 }, { "epoch": 8.06, "learning_rate": 1.9074947283034206e-06, "loss": 0.3608, "step": 1314 }, { "epoch": 8.07, "learning_rate": 1.895837268062256e-06, "loss": 0.499, "step": 1315 }, { "epoch": 8.07, "learning_rate": 1.884211807381755e-06, "loss": 0.4058, "step": 1316 }, { "epoch": 8.08, "learning_rate": 1.8726183921654373e-06, "loss": 0.5142, "step": 1317 }, { "epoch": 8.09, "learning_rate": 1.8610570681903018e-06, "loss": 0.3506, "step": 1318 }, { "epoch": 8.09, "learning_rate": 1.8495278811066197e-06, "loss": 0.4849, "step": 1319 }, { "epoch": 8.1, "learning_rate": 1.8380308764377841e-06, "loss": 0.3979, "step": 1320 }, { "epoch": 8.1, "learning_rate": 1.8265660995801004e-06, "loss": 0.375, "step": 1321 }, { "epoch": 8.11, "learning_rate": 1.8151335958026317e-06, "loss": 0.4575, "step": 1322 }, { "epoch": 8.12, "learning_rate": 1.803733410247006e-06, "loss": 0.3691, "step": 1323 }, { "epoch": 8.12, "learning_rate": 1.7923655879272395e-06, "loss": 0.4448, "step": 1324 }, { "epoch": 8.13, "learning_rate": 1.7810301737295588e-06, "loss": 0.4111, "step": 1325 }, { "epoch": 8.13, "learning_rate": 1.76972721241224e-06, "loss": 0.3872, "step": 1326 }, { "epoch": 8.14, "learning_rate": 1.7584567486054039e-06, "loss": 0.4336, "step": 1327 }, { "epoch": 8.15, "learning_rate": 1.7472188268108569e-06, "loss": 0.3569, "step": 1328 }, { "epoch": 8.15, "learning_rate": 1.7360134914019122e-06, "loss": 0.4526, "step": 1329 }, { "epoch": 8.16, "learning_rate": 1.7248407866232175e-06, "loss": 0.4351, "step": 1330 }, { "epoch": 8.17, "learning_rate": 1.7137007565905772e-06, "loss": 0.3394, "step": 1331 }, { "epoch": 8.17, "learning_rate": 1.7025934452907755e-06, "loss": 0.439, "step": 1332 }, { "epoch": 8.18, "learning_rate": 1.6915188965814034e-06, "loss": 0.437, "step": 1333 }, { "epoch": 8.18, "learning_rate": 1.6804771541906972e-06, "loss": 0.3999, "step": 1334 }, { "epoch": 8.19, "learning_rate": 1.6694682617173452e-06, "loss": 0.3999, "step": 1335 }, { "epoch": 8.2, "learning_rate": 1.6584922626303325e-06, "loss": 0.4165, "step": 1336 }, { "epoch": 8.2, "learning_rate": 1.6475492002687632e-06, "loss": 0.4141, "step": 1337 }, { "epoch": 8.21, "learning_rate": 1.6366391178416918e-06, "loss": 0.397, "step": 1338 }, { "epoch": 8.21, "learning_rate": 1.6257620584279454e-06, "loss": 0.3926, "step": 1339 }, { "epoch": 8.22, "learning_rate": 1.6149180649759622e-06, "loss": 0.3926, "step": 1340 }, { "epoch": 8.23, "learning_rate": 1.60410718030361e-06, "loss": 0.436, "step": 1341 }, { "epoch": 8.23, "learning_rate": 1.5933294470980443e-06, "loss": 0.4141, "step": 1342 }, { "epoch": 8.24, "learning_rate": 1.5825849079155032e-06, "loss": 0.4165, "step": 1343 }, { "epoch": 8.25, "learning_rate": 1.5718736051811634e-06, "loss": 0.4912, "step": 1344 }, { "epoch": 8.25, "learning_rate": 1.5611955811889645e-06, "loss": 0.397, "step": 1345 }, { "epoch": 8.26, "learning_rate": 1.5505508781014489e-06, "loss": 0.4297, "step": 1346 }, { "epoch": 8.26, "learning_rate": 1.539939537949583e-06, "loss": 0.4883, "step": 1347 }, { "epoch": 8.27, "learning_rate": 1.5293616026326053e-06, "loss": 0.3496, "step": 1348 }, { "epoch": 8.28, "learning_rate": 1.5188171139178486e-06, "loss": 0.4014, "step": 1349 }, { "epoch": 8.28, "learning_rate": 1.5083061134405874e-06, "loss": 0.3706, "step": 1350 }, { "epoch": 8.29, "learning_rate": 1.4978286427038602e-06, "loss": 0.4463, "step": 1351 }, { "epoch": 8.29, "learning_rate": 1.4873847430783118e-06, "loss": 0.4316, "step": 1352 }, { "epoch": 8.3, "learning_rate": 1.476974455802036e-06, "loss": 0.4258, "step": 1353 }, { "epoch": 8.31, "learning_rate": 1.4665978219804056e-06, "loss": 0.3833, "step": 1354 }, { "epoch": 8.31, "learning_rate": 1.4562548825859092e-06, "loss": 0.3687, "step": 1355 }, { "epoch": 8.32, "learning_rate": 1.4459456784579917e-06, "loss": 0.4141, "step": 1356 }, { "epoch": 8.33, "learning_rate": 1.435670250302892e-06, "loss": 0.4692, "step": 1357 }, { "epoch": 8.33, "learning_rate": 1.425428638693489e-06, "loss": 0.3999, "step": 1358 }, { "epoch": 8.34, "learning_rate": 1.415220884069135e-06, "loss": 0.4443, "step": 1359 }, { "epoch": 8.34, "learning_rate": 1.405047026735491e-06, "loss": 0.3403, "step": 1360 }, { "epoch": 8.35, "learning_rate": 1.394907106864375e-06, "loss": 0.4438, "step": 1361 }, { "epoch": 8.36, "learning_rate": 1.3848011644936077e-06, "loss": 0.3643, "step": 1362 }, { "epoch": 8.36, "learning_rate": 1.3747292395268407e-06, "loss": 0.4121, "step": 1363 }, { "epoch": 8.37, "learning_rate": 1.3646913717334142e-06, "loss": 0.394, "step": 1364 }, { "epoch": 8.37, "learning_rate": 1.3546876007481847e-06, "loss": 0.4102, "step": 1365 }, { "epoch": 8.38, "learning_rate": 1.344717966071385e-06, "loss": 0.3857, "step": 1366 }, { "epoch": 8.39, "learning_rate": 1.3347825070684518e-06, "loss": 0.3726, "step": 1367 }, { "epoch": 8.39, "learning_rate": 1.3248812629698815e-06, "loss": 0.4077, "step": 1368 }, { "epoch": 8.4, "learning_rate": 1.3150142728710669e-06, "loss": 0.4009, "step": 1369 }, { "epoch": 8.4, "learning_rate": 1.3051815757321607e-06, "loss": 0.3789, "step": 1370 }, { "epoch": 8.41, "learning_rate": 1.295383210377895e-06, "loss": 0.3452, "step": 1371 }, { "epoch": 8.42, "learning_rate": 1.2856192154974488e-06, "loss": 0.4043, "step": 1372 }, { "epoch": 8.42, "learning_rate": 1.2758896296442834e-06, "loss": 0.4385, "step": 1373 }, { "epoch": 8.43, "learning_rate": 1.266194491235998e-06, "loss": 0.4263, "step": 1374 }, { "epoch": 8.44, "learning_rate": 1.2565338385541792e-06, "loss": 0.416, "step": 1375 }, { "epoch": 8.44, "learning_rate": 1.2469077097442372e-06, "loss": 0.4087, "step": 1376 }, { "epoch": 8.45, "learning_rate": 1.2373161428152647e-06, "loss": 0.4033, "step": 1377 }, { "epoch": 8.45, "learning_rate": 1.2277591756398933e-06, "loss": 0.3394, "step": 1378 }, { "epoch": 8.46, "learning_rate": 1.2182368459541294e-06, "loss": 0.4214, "step": 1379 }, { "epoch": 8.47, "learning_rate": 1.2087491913572103e-06, "loss": 0.4229, "step": 1380 }, { "epoch": 8.47, "learning_rate": 1.1992962493114645e-06, "loss": 0.3779, "step": 1381 }, { "epoch": 8.48, "learning_rate": 1.1898780571421554e-06, "loss": 0.4639, "step": 1382 }, { "epoch": 8.48, "learning_rate": 1.1804946520373307e-06, "loss": 0.4116, "step": 1383 }, { "epoch": 8.49, "learning_rate": 1.171146071047683e-06, "loss": 0.3823, "step": 1384 }, { "epoch": 8.5, "learning_rate": 1.161832351086396e-06, "loss": 0.4209, "step": 1385 }, { "epoch": 8.5, "learning_rate": 1.1525535289290168e-06, "loss": 0.3936, "step": 1386 }, { "epoch": 8.51, "learning_rate": 1.1433096412132838e-06, "loss": 0.3999, "step": 1387 }, { "epoch": 8.52, "learning_rate": 1.1341007244390023e-06, "loss": 0.437, "step": 1388 }, { "epoch": 8.52, "learning_rate": 1.124926814967887e-06, "loss": 0.3521, "step": 1389 }, { "epoch": 8.53, "learning_rate": 1.1157879490234346e-06, "loss": 0.4141, "step": 1390 }, { "epoch": 8.53, "learning_rate": 1.1066841626907633e-06, "loss": 0.418, "step": 1391 }, { "epoch": 8.54, "learning_rate": 1.097615491916485e-06, "loss": 0.4189, "step": 1392 }, { "epoch": 8.55, "learning_rate": 1.088581972508549e-06, "loss": 0.4517, "step": 1393 }, { "epoch": 8.55, "learning_rate": 1.0795836401361148e-06, "loss": 0.4067, "step": 1394 }, { "epoch": 8.56, "learning_rate": 1.0706205303294025e-06, "loss": 0.375, "step": 1395 }, { "epoch": 8.56, "learning_rate": 1.0616926784795511e-06, "loss": 0.3359, "step": 1396 }, { "epoch": 8.57, "learning_rate": 1.0528001198384862e-06, "loss": 0.4092, "step": 1397 }, { "epoch": 8.58, "learning_rate": 1.043942889518782e-06, "loss": 0.3726, "step": 1398 }, { "epoch": 8.58, "learning_rate": 1.035121022493506e-06, "loss": 0.4136, "step": 1399 }, { "epoch": 8.59, "learning_rate": 1.026334553596101e-06, "loss": 0.3877, "step": 1400 }, { "epoch": 8.6, "learning_rate": 1.0175835175202341e-06, "loss": 0.4268, "step": 1401 }, { "epoch": 8.6, "learning_rate": 1.0088679488196695e-06, "loss": 0.4053, "step": 1402 }, { "epoch": 8.61, "learning_rate": 1.0001878819081268e-06, "loss": 0.3955, "step": 1403 }, { "epoch": 8.61, "learning_rate": 9.91543351059141e-07, "loss": 0.3677, "step": 1404 }, { "epoch": 8.62, "learning_rate": 9.829343904059342e-07, "loss": 0.3691, "step": 1405 }, { "epoch": 8.63, "learning_rate": 9.743610339412801e-07, "loss": 0.4097, "step": 1406 }, { "epoch": 8.63, "learning_rate": 9.658233155173657e-07, "loss": 0.4043, "step": 1407 }, { "epoch": 8.64, "learning_rate": 9.573212688456635e-07, "loss": 0.4346, "step": 1408 }, { "epoch": 8.64, "learning_rate": 9.488549274967873e-07, "loss": 0.3755, "step": 1409 }, { "epoch": 8.65, "learning_rate": 9.404243249003786e-07, "loss": 0.373, "step": 1410 }, { "epoch": 8.66, "learning_rate": 9.320294943449537e-07, "loss": 0.4517, "step": 1411 }, { "epoch": 8.66, "learning_rate": 9.236704689777842e-07, "loss": 0.4087, "step": 1412 }, { "epoch": 8.67, "learning_rate": 9.153472818047627e-07, "loss": 0.4146, "step": 1413 }, { "epoch": 8.67, "learning_rate": 9.070599656902801e-07, "loss": 0.3848, "step": 1414 }, { "epoch": 8.68, "learning_rate": 8.988085533570833e-07, "loss": 0.3652, "step": 1415 }, { "epoch": 8.69, "learning_rate": 8.905930773861527e-07, "loss": 0.3765, "step": 1416 }, { "epoch": 8.69, "learning_rate": 8.824135702165693e-07, "loss": 0.395, "step": 1417 }, { "epoch": 8.7, "learning_rate": 8.74270064145396e-07, "loss": 0.3818, "step": 1418 }, { "epoch": 8.71, "learning_rate": 8.661625913275463e-07, "loss": 0.375, "step": 1419 }, { "epoch": 8.71, "learning_rate": 8.580911837756467e-07, "loss": 0.3896, "step": 1420 }, { "epoch": 8.72, "learning_rate": 8.500558733599206e-07, "loss": 0.3535, "step": 1421 }, { "epoch": 8.72, "learning_rate": 8.420566918080686e-07, "loss": 0.4189, "step": 1422 }, { "epoch": 8.73, "learning_rate": 8.340936707051273e-07, "loss": 0.4199, "step": 1423 }, { "epoch": 8.74, "learning_rate": 8.261668414933521e-07, "loss": 0.4771, "step": 1424 }, { "epoch": 8.74, "learning_rate": 8.182762354720985e-07, "loss": 0.3779, "step": 1425 }, { "epoch": 8.75, "learning_rate": 8.10421883797694e-07, "loss": 0.3979, "step": 1426 }, { "epoch": 8.75, "learning_rate": 8.026038174833085e-07, "loss": 0.4072, "step": 1427 }, { "epoch": 8.76, "learning_rate": 7.948220673988427e-07, "loss": 0.4141, "step": 1428 }, { "epoch": 8.77, "learning_rate": 7.87076664270795e-07, "loss": 0.3457, "step": 1429 }, { "epoch": 8.77, "learning_rate": 7.793676386821602e-07, "loss": 0.395, "step": 1430 }, { "epoch": 8.78, "learning_rate": 7.716950210722818e-07, "loss": 0.4409, "step": 1431 }, { "epoch": 8.79, "learning_rate": 7.6405884173675e-07, "loss": 0.4697, "step": 1432 }, { "epoch": 8.79, "learning_rate": 7.564591308272773e-07, "loss": 0.3926, "step": 1433 }, { "epoch": 8.8, "learning_rate": 7.488959183515809e-07, "loss": 0.3809, "step": 1434 }, { "epoch": 8.8, "learning_rate": 7.413692341732582e-07, "loss": 0.3564, "step": 1435 }, { "epoch": 8.81, "learning_rate": 7.338791080116792e-07, "loss": 0.3618, "step": 1436 }, { "epoch": 8.82, "learning_rate": 7.264255694418576e-07, "loss": 0.4092, "step": 1437 }, { "epoch": 8.82, "learning_rate": 7.190086478943459e-07, "loss": 0.4375, "step": 1438 }, { "epoch": 8.83, "learning_rate": 7.116283726551077e-07, "loss": 0.3667, "step": 1439 }, { "epoch": 8.83, "learning_rate": 7.042847728654078e-07, "loss": 0.3511, "step": 1440 }, { "epoch": 8.84, "learning_rate": 6.969778775217007e-07, "loss": 0.3926, "step": 1441 }, { "epoch": 8.85, "learning_rate": 6.897077154755094e-07, "loss": 0.4565, "step": 1442 }, { "epoch": 8.85, "learning_rate": 6.824743154333157e-07, "loss": 0.3608, "step": 1443 }, { "epoch": 8.86, "learning_rate": 6.752777059564431e-07, "loss": 0.4204, "step": 1444 }, { "epoch": 8.87, "learning_rate": 6.681179154609463e-07, "loss": 0.4058, "step": 1445 }, { "epoch": 8.87, "learning_rate": 6.609949722175013e-07, "loss": 0.3936, "step": 1446 }, { "epoch": 8.88, "learning_rate": 6.539089043512914e-07, "loss": 0.4004, "step": 1447 }, { "epoch": 8.88, "learning_rate": 6.468597398418952e-07, "loss": 0.3545, "step": 1448 }, { "epoch": 8.89, "learning_rate": 6.398475065231746e-07, "loss": 0.3264, "step": 1449 }, { "epoch": 8.9, "learning_rate": 6.328722320831737e-07, "loss": 0.3521, "step": 1450 }, { "epoch": 8.9, "learning_rate": 6.259339440639966e-07, "loss": 0.3779, "step": 1451 }, { "epoch": 8.91, "learning_rate": 6.1903266986171e-07, "loss": 0.397, "step": 1452 }, { "epoch": 8.91, "learning_rate": 6.121684367262271e-07, "loss": 0.4111, "step": 1453 }, { "epoch": 8.92, "learning_rate": 6.053412717612061e-07, "loss": 0.373, "step": 1454 }, { "epoch": 8.93, "learning_rate": 5.985512019239392e-07, "loss": 0.4199, "step": 1455 }, { "epoch": 8.93, "learning_rate": 5.917982540252442e-07, "loss": 0.3833, "step": 1456 }, { "epoch": 8.94, "learning_rate": 5.850824547293655e-07, "loss": 0.3838, "step": 1457 }, { "epoch": 8.94, "learning_rate": 5.784038305538653e-07, "loss": 0.4448, "step": 1458 }, { "epoch": 8.95, "learning_rate": 5.71762407869515e-07, "loss": 0.4224, "step": 1459 }, { "epoch": 8.96, "learning_rate": 5.651582129001987e-07, "loss": 0.3784, "step": 1460 }, { "epoch": 8.96, "learning_rate": 5.585912717228015e-07, "loss": 0.3955, "step": 1461 }, { "epoch": 8.97, "learning_rate": 5.520616102671128e-07, "loss": 0.4287, "step": 1462 }, { "epoch": 8.98, "learning_rate": 5.455692543157243e-07, "loss": 0.4048, "step": 1463 }, { "epoch": 8.98, "learning_rate": 5.391142295039209e-07, "loss": 0.4062, "step": 1464 }, { "epoch": 8.99, "learning_rate": 5.326965613195867e-07, "loss": 0.4785, "step": 1465 }, { "epoch": 8.99, "learning_rate": 5.263162751031025e-07, "loss": 0.4512, "step": 1466 }, { "epoch": 9.0, "learning_rate": 5.199733960472431e-07, "loss": 0.416, "step": 1467 }, { "epoch": 9.01, "learning_rate": 5.136679491970809e-07, "loss": 0.3584, "step": 1468 }, { "epoch": 9.01, "learning_rate": 5.073999594498869e-07, "loss": 0.3274, "step": 1469 }, { "epoch": 9.02, "learning_rate": 5.011694515550303e-07, "loss": 0.3901, "step": 1470 }, { "epoch": 9.02, "learning_rate": 4.949764501138832e-07, "loss": 0.3359, "step": 1471 }, { "epoch": 9.03, "learning_rate": 4.888209795797205e-07, "loss": 0.3325, "step": 1472 }, { "epoch": 9.04, "learning_rate": 4.827030642576236e-07, "loss": 0.3188, "step": 1473 }, { "epoch": 9.04, "learning_rate": 4.766227283043912e-07, "loss": 0.3936, "step": 1474 }, { "epoch": 9.05, "learning_rate": 4.7057999572843516e-07, "loss": 0.3057, "step": 1475 }, { "epoch": 9.06, "learning_rate": 4.645748903896885e-07, "loss": 0.3564, "step": 1476 }, { "epoch": 9.06, "learning_rate": 4.5860743599951186e-07, "loss": 0.3252, "step": 1477 }, { "epoch": 9.07, "learning_rate": 4.5267765612060253e-07, "loss": 0.355, "step": 1478 }, { "epoch": 9.07, "learning_rate": 4.4678557416689586e-07, "loss": 0.332, "step": 1479 }, { "epoch": 9.08, "learning_rate": 4.4093121340347824e-07, "loss": 0.3267, "step": 1480 }, { "epoch": 9.09, "learning_rate": 4.3511459694648873e-07, "loss": 0.3574, "step": 1481 }, { "epoch": 9.09, "learning_rate": 4.2933574776303664e-07, "loss": 0.3354, "step": 1482 }, { "epoch": 9.1, "learning_rate": 4.235946886711018e-07, "loss": 0.3193, "step": 1483 }, { "epoch": 9.1, "learning_rate": 4.1789144233945087e-07, "loss": 0.3301, "step": 1484 }, { "epoch": 9.11, "learning_rate": 4.122260312875437e-07, "loss": 0.3311, "step": 1485 }, { "epoch": 9.12, "learning_rate": 4.0659847788544926e-07, "loss": 0.3257, "step": 1486 }, { "epoch": 9.12, "learning_rate": 4.010088043537519e-07, "loss": 0.3389, "step": 1487 }, { "epoch": 9.13, "learning_rate": 3.954570327634677e-07, "loss": 0.3252, "step": 1488 }, { "epoch": 9.13, "learning_rate": 3.899431850359503e-07, "loss": 0.3359, "step": 1489 }, { "epoch": 9.14, "learning_rate": 3.8446728294281865e-07, "loss": 0.3408, "step": 1490 }, { "epoch": 9.15, "learning_rate": 3.7902934810585603e-07, "loss": 0.3555, "step": 1491 }, { "epoch": 9.15, "learning_rate": 3.736294019969311e-07, "loss": 0.3066, "step": 1492 }, { "epoch": 9.16, "learning_rate": 3.682674659379137e-07, "loss": 0.3354, "step": 1493 }, { "epoch": 9.17, "learning_rate": 3.629435611005916e-07, "loss": 0.3721, "step": 1494 }, { "epoch": 9.17, "learning_rate": 3.5765770850658244e-07, "loss": 0.3271, "step": 1495 }, { "epoch": 9.18, "learning_rate": 3.5240992902725204e-07, "loss": 0.2993, "step": 1496 }, { "epoch": 9.18, "learning_rate": 3.4720024338363633e-07, "loss": 0.3398, "step": 1497 }, { "epoch": 9.19, "learning_rate": 3.420286721463562e-07, "loss": 0.3213, "step": 1498 }, { "epoch": 9.2, "learning_rate": 3.3689523573553597e-07, "loss": 0.3203, "step": 1499 }, { "epoch": 9.2, "learning_rate": 3.3179995442071956e-07, "loss": 0.3105, "step": 1500 }, { "epoch": 9.21, "learning_rate": 3.2674284832080127e-07, "loss": 0.3369, "step": 1501 }, { "epoch": 9.21, "learning_rate": 3.217239374039338e-07, "loss": 0.3384, "step": 1502 }, { "epoch": 9.22, "learning_rate": 3.1674324148745827e-07, "loss": 0.2983, "step": 1503 }, { "epoch": 9.23, "learning_rate": 3.118007802378198e-07, "loss": 0.374, "step": 1504 }, { "epoch": 9.23, "learning_rate": 3.0689657317049205e-07, "loss": 0.3257, "step": 1505 }, { "epoch": 9.24, "learning_rate": 3.020306396499062e-07, "loss": 0.3735, "step": 1506 }, { "epoch": 9.25, "learning_rate": 2.972029988893621e-07, "loss": 0.3589, "step": 1507 }, { "epoch": 9.25, "learning_rate": 2.9241366995096387e-07, "loss": 0.2961, "step": 1508 }, { "epoch": 9.26, "learning_rate": 2.8766267174553884e-07, "loss": 0.2913, "step": 1509 }, { "epoch": 9.26, "learning_rate": 2.8295002303256546e-07, "loss": 0.3169, "step": 1510 }, { "epoch": 9.27, "learning_rate": 2.7827574242009434e-07, "loss": 0.355, "step": 1511 }, { "epoch": 9.28, "learning_rate": 2.736398483646807e-07, "loss": 0.3374, "step": 1512 }, { "epoch": 9.28, "learning_rate": 2.6904235917131094e-07, "loss": 0.334, "step": 1513 }, { "epoch": 9.29, "learning_rate": 2.64483292993325e-07, "loss": 0.3369, "step": 1514 }, { "epoch": 9.29, "learning_rate": 2.599626678323508e-07, "loss": 0.3076, "step": 1515 }, { "epoch": 9.3, "learning_rate": 2.554805015382289e-07, "loss": 0.3066, "step": 1516 }, { "epoch": 9.31, "learning_rate": 2.5103681180894566e-07, "loss": 0.3735, "step": 1517 }, { "epoch": 9.31, "learning_rate": 2.4663161619055797e-07, "loss": 0.3203, "step": 1518 }, { "epoch": 9.32, "learning_rate": 2.422649320771331e-07, "loss": 0.2974, "step": 1519 }, { "epoch": 9.33, "learning_rate": 2.3793677671066882e-07, "loss": 0.2905, "step": 1520 }, { "epoch": 9.33, "learning_rate": 2.3364716718103143e-07, "loss": 0.3438, "step": 1521 }, { "epoch": 9.34, "learning_rate": 2.293961204258932e-07, "loss": 0.3091, "step": 1522 }, { "epoch": 9.34, "learning_rate": 2.2518365323065284e-07, "loss": 0.3037, "step": 1523 }, { "epoch": 9.35, "learning_rate": 2.2100978222838186e-07, "loss": 0.4043, "step": 1524 }, { "epoch": 9.36, "learning_rate": 2.1687452389974829e-07, "loss": 0.3203, "step": 1525 }, { "epoch": 9.36, "learning_rate": 2.1277789457296306e-07, "loss": 0.4023, "step": 1526 }, { "epoch": 9.37, "learning_rate": 2.0871991042370255e-07, "loss": 0.3345, "step": 1527 }, { "epoch": 9.37, "learning_rate": 2.0470058747505516e-07, "loss": 0.3618, "step": 1528 }, { "epoch": 9.38, "learning_rate": 2.0071994159745367e-07, "loss": 0.333, "step": 1529 }, { "epoch": 9.39, "learning_rate": 1.9677798850861517e-07, "loss": 0.3579, "step": 1530 }, { "epoch": 9.39, "learning_rate": 1.9287474377347238e-07, "loss": 0.3389, "step": 1531 }, { "epoch": 9.4, "learning_rate": 1.8901022280411906e-07, "loss": 0.292, "step": 1532 }, { "epoch": 9.4, "learning_rate": 1.8518444085974697e-07, "loss": 0.3896, "step": 1533 }, { "epoch": 9.41, "learning_rate": 1.8139741304658566e-07, "loss": 0.3501, "step": 1534 }, { "epoch": 9.42, "learning_rate": 1.776491543178438e-07, "loss": 0.3237, "step": 1535 }, { "epoch": 9.42, "learning_rate": 1.739396794736481e-07, "loss": 0.334, "step": 1536 }, { "epoch": 9.43, "learning_rate": 1.7026900316098217e-07, "loss": 0.332, "step": 1537 }, { "epoch": 9.44, "learning_rate": 1.6663713987363882e-07, "loss": 0.3452, "step": 1538 }, { "epoch": 9.44, "learning_rate": 1.6304410395215243e-07, "loss": 0.3301, "step": 1539 }, { "epoch": 9.45, "learning_rate": 1.5948990958374543e-07, "loss": 0.3374, "step": 1540 }, { "epoch": 9.45, "learning_rate": 1.559745708022753e-07, "loss": 0.2935, "step": 1541 }, { "epoch": 9.46, "learning_rate": 1.5249810148817658e-07, "loss": 0.3643, "step": 1542 }, { "epoch": 9.47, "learning_rate": 1.490605153684066e-07, "loss": 0.3765, "step": 1543 }, { "epoch": 9.47, "learning_rate": 1.4566182601638779e-07, "loss": 0.335, "step": 1544 }, { "epoch": 9.48, "learning_rate": 1.4230204685196202e-07, "loss": 0.3569, "step": 1545 }, { "epoch": 9.48, "learning_rate": 1.3898119114133192e-07, "loss": 0.356, "step": 1546 }, { "epoch": 9.49, "learning_rate": 1.3569927199700628e-07, "loss": 0.3247, "step": 1547 }, { "epoch": 9.5, "learning_rate": 1.3245630237775585e-07, "loss": 0.3125, "step": 1548 }, { "epoch": 9.5, "learning_rate": 1.292522950885533e-07, "loss": 0.3115, "step": 1549 }, { "epoch": 9.51, "learning_rate": 1.2608726278053208e-07, "loss": 0.3647, "step": 1550 }, { "epoch": 9.52, "learning_rate": 1.2296121795092874e-07, "loss": 0.3447, "step": 1551 }, { "epoch": 9.52, "learning_rate": 1.1987417294303748e-07, "loss": 0.3105, "step": 1552 }, { "epoch": 9.53, "learning_rate": 1.1682613994615788e-07, "loss": 0.3765, "step": 1553 }, { "epoch": 9.53, "learning_rate": 1.1381713099555381e-07, "loss": 0.3472, "step": 1554 }, { "epoch": 9.54, "learning_rate": 1.1084715797239798e-07, "loss": 0.2969, "step": 1555 }, { "epoch": 9.55, "learning_rate": 1.0791623260372863e-07, "loss": 0.3467, "step": 1556 }, { "epoch": 9.55, "learning_rate": 1.0502436646240399e-07, "loss": 0.3164, "step": 1557 }, { "epoch": 9.56, "learning_rate": 1.0217157096705676e-07, "loss": 0.3633, "step": 1558 }, { "epoch": 9.56, "learning_rate": 9.935785738204417e-08, "loss": 0.3267, "step": 1559 }, { "epoch": 9.57, "learning_rate": 9.658323681741133e-08, "loss": 0.3037, "step": 1560 }, { "epoch": 9.58, "learning_rate": 9.384772022884015e-08, "loss": 0.3833, "step": 1561 }, { "epoch": 9.58, "learning_rate": 9.11513184176116e-08, "loss": 0.3452, "step": 1562 }, { "epoch": 9.59, "learning_rate": 8.8494042030558e-08, "loss": 0.3096, "step": 1563 }, { "epoch": 9.6, "learning_rate": 8.587590156002635e-08, "loss": 0.3167, "step": 1564 }, { "epoch": 9.6, "learning_rate": 8.329690734383278e-08, "loss": 0.3413, "step": 1565 }, { "epoch": 9.61, "learning_rate": 8.075706956522156e-08, "loss": 0.3936, "step": 1566 }, { "epoch": 9.61, "learning_rate": 7.825639825282949e-08, "loss": 0.3364, "step": 1567 }, { "epoch": 9.62, "learning_rate": 7.579490328064265e-08, "loss": 0.3911, "step": 1568 }, { "epoch": 9.63, "learning_rate": 7.33725943679553e-08, "loss": 0.2969, "step": 1569 }, { "epoch": 9.63, "learning_rate": 7.098948107933656e-08, "loss": 0.3291, "step": 1570 }, { "epoch": 9.64, "learning_rate": 6.864557282459162e-08, "loss": 0.3184, "step": 1571 }, { "epoch": 9.64, "learning_rate": 6.634087885871832e-08, "loss": 0.335, "step": 1572 }, { "epoch": 9.65, "learning_rate": 6.407540828188175e-08, "loss": 0.3523, "step": 1573 }, { "epoch": 9.66, "learning_rate": 6.184917003936752e-08, "loss": 0.2961, "step": 1574 }, { "epoch": 9.66, "learning_rate": 5.966217292155296e-08, "loss": 0.3701, "step": 1575 }, { "epoch": 9.67, "learning_rate": 5.7514425563870436e-08, "loss": 0.3662, "step": 1576 }, { "epoch": 9.67, "learning_rate": 5.540593644677295e-08, "loss": 0.3115, "step": 1577 }, { "epoch": 9.68, "learning_rate": 5.333671389569972e-08, "loss": 0.3164, "step": 1578 }, { "epoch": 9.69, "learning_rate": 5.1306766081048456e-08, "loss": 0.3003, "step": 1579 }, { "epoch": 9.69, "learning_rate": 4.931610101813533e-08, "loss": 0.3164, "step": 1580 }, { "epoch": 9.7, "learning_rate": 4.73647265671684e-08, "loss": 0.3521, "step": 1581 }, { "epoch": 9.71, "learning_rate": 4.545265043321645e-08, "loss": 0.2876, "step": 1582 }, { "epoch": 9.71, "learning_rate": 4.357988016617687e-08, "loss": 0.2947, "step": 1583 }, { "epoch": 9.72, "learning_rate": 4.174642316074562e-08, "loss": 0.3423, "step": 1584 }, { "epoch": 9.72, "learning_rate": 3.9952286656389506e-08, "loss": 0.3438, "step": 1585 }, { "epoch": 9.73, "learning_rate": 3.819747773731841e-08, "loss": 0.3872, "step": 1586 }, { "epoch": 9.74, "learning_rate": 3.648200333245422e-08, "loss": 0.3247, "step": 1587 }, { "epoch": 9.74, "learning_rate": 3.480587021540527e-08, "loss": 0.3091, "step": 1588 }, { "epoch": 9.75, "learning_rate": 3.316908500443972e-08, "loss": 0.3633, "step": 1589 }, { "epoch": 9.75, "learning_rate": 3.1571654162461107e-08, "loss": 0.3281, "step": 1590 }, { "epoch": 9.76, "learning_rate": 3.001358399697618e-08, "loss": 0.3545, "step": 1591 }, { "epoch": 9.77, "learning_rate": 2.8494880660080437e-08, "loss": 0.3472, "step": 1592 }, { "epoch": 9.77, "learning_rate": 2.7015550148423718e-08, "loss": 0.3682, "step": 1593 }, { "epoch": 9.78, "learning_rate": 2.557559830319245e-08, "loss": 0.3105, "step": 1594 }, { "epoch": 9.79, "learning_rate": 2.417503081008632e-08, "loss": 0.3003, "step": 1595 }, { "epoch": 9.79, "learning_rate": 2.2813853199292745e-08, "loss": 0.3608, "step": 1596 }, { "epoch": 9.8, "learning_rate": 2.1492070845468005e-08, "loss": 0.2871, "step": 1597 }, { "epoch": 9.8, "learning_rate": 2.0209688967713914e-08, "loss": 0.3169, "step": 1598 }, { "epoch": 9.81, "learning_rate": 1.896671262955896e-08, "loss": 0.3218, "step": 1599 }, { "epoch": 9.82, "learning_rate": 1.7763146738938307e-08, "loss": 0.332, "step": 1600 }, { "epoch": 9.82, "learning_rate": 1.659899604816939e-08, "loss": 0.3013, "step": 1601 }, { "epoch": 9.83, "learning_rate": 1.5474265153944124e-08, "loss": 0.3262, "step": 1602 }, { "epoch": 9.83, "learning_rate": 1.4388958497300043e-08, "loss": 0.2925, "step": 1603 }, { "epoch": 9.84, "learning_rate": 1.3343080363604766e-08, "loss": 0.314, "step": 1604 }, { "epoch": 9.85, "learning_rate": 1.2336634882544885e-08, "loss": 0.3696, "step": 1605 }, { "epoch": 9.85, "learning_rate": 1.1369626028104874e-08, "loss": 0.3647, "step": 1606 }, { "epoch": 9.86, "learning_rate": 1.0442057618551549e-08, "loss": 0.3306, "step": 1607 }, { "epoch": 9.87, "learning_rate": 9.553933316420739e-09, "loss": 0.3916, "step": 1608 }, { "epoch": 9.87, "learning_rate": 8.705256628499525e-09, "loss": 0.3525, "step": 1609 }, { "epoch": 9.88, "learning_rate": 7.896030905818474e-09, "loss": 0.3662, "step": 1610 }, { "epoch": 9.88, "learning_rate": 7.126259343631648e-09, "loss": 0.3042, "step": 1611 }, { "epoch": 9.89, "learning_rate": 6.39594498140883e-09, "loss": 0.3257, "step": 1612 }, { "epoch": 9.9, "learning_rate": 5.705090702819993e-09, "loss": 0.3237, "step": 1613 }, { "epoch": 9.9, "learning_rate": 5.053699235726406e-09, "loss": 0.29, "step": 1614 }, { "epoch": 9.91, "learning_rate": 4.4417731521717576e-09, "loss": 0.3081, "step": 1615 }, { "epoch": 9.91, "learning_rate": 3.869314868363283e-09, "loss": 0.2944, "step": 1616 }, { "epoch": 9.92, "learning_rate": 3.3363266446750918e-09, "loss": 0.2676, "step": 1617 }, { "epoch": 9.93, "learning_rate": 2.842810585627076e-09, "loss": 0.3086, "step": 1618 }, { "epoch": 9.93, "learning_rate": 2.388768639886019e-09, "loss": 0.3047, "step": 1619 }, { "epoch": 9.94, "learning_rate": 1.9742026002500526e-09, "loss": 0.3242, "step": 1620 }, { "epoch": 9.94, "learning_rate": 1.5991141036475478e-09, "loss": 0.3086, "step": 1621 }, { "epoch": 9.95, "learning_rate": 1.263504631129342e-09, "loss": 0.3174, "step": 1622 }, { "epoch": 9.96, "learning_rate": 9.673755078598578e-10, "loss": 0.354, "step": 1623 }, { "epoch": 9.96, "learning_rate": 7.107279031148828e-10, "loss": 0.3208, "step": 1624 }, { "epoch": 9.97, "learning_rate": 4.935628302760175e-10, "loss": 0.3721, "step": 1625 }, { "epoch": 9.98, "learning_rate": 3.158811468273459e-10, "loss": 0.3354, "step": 1626 }, { "epoch": 9.98, "learning_rate": 1.776835543509936e-10, "loss": 0.3215, "step": 1627 }, { "epoch": 9.99, "learning_rate": 7.897059852490785e-11, "loss": 0.2866, "step": 1628 }, { "epoch": 9.99, "learning_rate": 1.9742669119526824e-11, "loss": 0.3057, "step": 1629 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.3101, "step": 1630 }, { "epoch": 10.0, "step": 1630, "total_flos": 41842376695808.0, "train_loss": 1.8768900678201688, "train_runtime": 1493.3467, "train_samples_per_second": 69.736, "train_steps_per_second": 1.092 } ], "logging_steps": 1.0, "max_steps": 1630, "num_train_epochs": 10, "save_steps": 10000, "total_flos": 41842376695808.0, "trial_name": null, "trial_params": null }