{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999861795734095, "eval_steps": 500, "global_step": 5426, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.226993865030675e-06, "loss": 2.8397, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.45398773006135e-06, "loss": 2.9611, "step": 2 }, { "epoch": 0.0, "learning_rate": 3.680981595092025e-06, "loss": 2.9096, "step": 3 }, { "epoch": 0.0, "learning_rate": 4.9079754601227e-06, "loss": 2.9153, "step": 4 }, { "epoch": 0.0, "learning_rate": 6.134969325153374e-06, "loss": 2.8089, "step": 5 }, { "epoch": 0.0, "learning_rate": 7.36196319018405e-06, "loss": 2.959, "step": 6 }, { "epoch": 0.0, "learning_rate": 8.588957055214725e-06, "loss": 2.6347, "step": 7 }, { "epoch": 0.0, "learning_rate": 9.8159509202454e-06, "loss": 2.2857, "step": 8 }, { "epoch": 0.0, "learning_rate": 1.1042944785276074e-05, "loss": 2.5173, "step": 9 }, { "epoch": 0.0, "learning_rate": 1.2269938650306748e-05, "loss": 2.1649, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.3496932515337424e-05, "loss": 2.1054, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.47239263803681e-05, "loss": 2.0367, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.5950920245398772e-05, "loss": 1.9808, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.717791411042945e-05, "loss": 2.0746, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.8404907975460123e-05, "loss": 1.9622, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.96319018404908e-05, "loss": 1.7615, "step": 16 }, { "epoch": 0.0, "learning_rate": 2.085889570552147e-05, "loss": 2.1153, "step": 17 }, { "epoch": 0.0, "learning_rate": 2.208588957055215e-05, "loss": 1.7562, "step": 18 }, { "epoch": 0.0, "learning_rate": 2.3312883435582822e-05, "loss": 1.9286, "step": 19 }, { "epoch": 0.0, "learning_rate": 2.4539877300613496e-05, "loss": 1.937, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.5766871165644174e-05, "loss": 1.835, "step": 21 }, { "epoch": 0.0, "learning_rate": 2.6993865030674848e-05, "loss": 1.7864, "step": 22 }, { "epoch": 0.0, "learning_rate": 2.822085889570552e-05, "loss": 1.6748, "step": 23 }, { "epoch": 0.0, "learning_rate": 2.94478527607362e-05, "loss": 1.5393, "step": 24 }, { "epoch": 0.0, "learning_rate": 3.067484662576687e-05, "loss": 1.5585, "step": 25 }, { "epoch": 0.0, "learning_rate": 3.1901840490797544e-05, "loss": 1.6378, "step": 26 }, { "epoch": 0.0, "learning_rate": 3.312883435582822e-05, "loss": 1.6824, "step": 27 }, { "epoch": 0.01, "learning_rate": 3.43558282208589e-05, "loss": 1.6693, "step": 28 }, { "epoch": 0.01, "learning_rate": 3.558282208588957e-05, "loss": 1.6395, "step": 29 }, { "epoch": 0.01, "learning_rate": 3.6809815950920246e-05, "loss": 1.5974, "step": 30 }, { "epoch": 0.01, "learning_rate": 3.8036809815950924e-05, "loss": 1.646, "step": 31 }, { "epoch": 0.01, "learning_rate": 3.92638036809816e-05, "loss": 1.5409, "step": 32 }, { "epoch": 0.01, "learning_rate": 4.049079754601227e-05, "loss": 1.5372, "step": 33 }, { "epoch": 0.01, "learning_rate": 4.171779141104294e-05, "loss": 1.4771, "step": 34 }, { "epoch": 0.01, "learning_rate": 4.2944785276073626e-05, "loss": 1.5276, "step": 35 }, { "epoch": 0.01, "learning_rate": 4.41717791411043e-05, "loss": 1.5981, "step": 36 }, { "epoch": 0.01, "learning_rate": 4.539877300613497e-05, "loss": 1.4838, "step": 37 }, { "epoch": 0.01, "learning_rate": 4.6625766871165645e-05, "loss": 1.594, "step": 38 }, { "epoch": 0.01, "learning_rate": 4.785276073619632e-05, "loss": 1.4278, "step": 39 }, { "epoch": 0.01, "learning_rate": 4.907975460122699e-05, "loss": 1.4571, "step": 40 }, { "epoch": 0.01, "learning_rate": 5.030674846625767e-05, "loss": 1.4489, "step": 41 }, { "epoch": 0.01, "learning_rate": 5.153374233128835e-05, "loss": 1.6419, "step": 42 }, { "epoch": 0.01, "learning_rate": 5.276073619631902e-05, "loss": 1.542, "step": 43 }, { "epoch": 0.01, "learning_rate": 5.3987730061349695e-05, "loss": 1.5106, "step": 44 }, { "epoch": 0.01, "learning_rate": 5.521472392638037e-05, "loss": 1.6279, "step": 45 }, { "epoch": 0.01, "learning_rate": 5.644171779141104e-05, "loss": 1.4189, "step": 46 }, { "epoch": 0.01, "learning_rate": 5.766871165644172e-05, "loss": 1.4459, "step": 47 }, { "epoch": 0.01, "learning_rate": 5.88957055214724e-05, "loss": 1.621, "step": 48 }, { "epoch": 0.01, "learning_rate": 6.012269938650307e-05, "loss": 1.5234, "step": 49 }, { "epoch": 0.01, "learning_rate": 6.134969325153375e-05, "loss": 1.4422, "step": 50 }, { "epoch": 0.01, "learning_rate": 6.257668711656443e-05, "loss": 1.5495, "step": 51 }, { "epoch": 0.01, "learning_rate": 6.380368098159509e-05, "loss": 1.468, "step": 52 }, { "epoch": 0.01, "learning_rate": 6.503067484662577e-05, "loss": 1.3441, "step": 53 }, { "epoch": 0.01, "learning_rate": 6.625766871165644e-05, "loss": 1.4212, "step": 54 }, { "epoch": 0.01, "learning_rate": 6.748466257668711e-05, "loss": 1.3305, "step": 55 }, { "epoch": 0.01, "learning_rate": 6.87116564417178e-05, "loss": 1.4897, "step": 56 }, { "epoch": 0.01, "learning_rate": 6.993865030674847e-05, "loss": 1.3964, "step": 57 }, { "epoch": 0.01, "learning_rate": 7.116564417177914e-05, "loss": 1.3808, "step": 58 }, { "epoch": 0.01, "learning_rate": 7.239263803680982e-05, "loss": 1.5384, "step": 59 }, { "epoch": 0.01, "learning_rate": 7.361963190184049e-05, "loss": 1.4844, "step": 60 }, { "epoch": 0.01, "learning_rate": 7.484662576687118e-05, "loss": 1.4919, "step": 61 }, { "epoch": 0.01, "learning_rate": 7.607361963190185e-05, "loss": 1.3338, "step": 62 }, { "epoch": 0.01, "learning_rate": 7.730061349693252e-05, "loss": 1.3451, "step": 63 }, { "epoch": 0.01, "learning_rate": 7.85276073619632e-05, "loss": 1.4295, "step": 64 }, { "epoch": 0.01, "learning_rate": 7.975460122699386e-05, "loss": 1.4654, "step": 65 }, { "epoch": 0.01, "learning_rate": 8.098159509202454e-05, "loss": 1.4628, "step": 66 }, { "epoch": 0.01, "learning_rate": 8.220858895705523e-05, "loss": 1.2933, "step": 67 }, { "epoch": 0.01, "learning_rate": 8.343558282208588e-05, "loss": 1.4727, "step": 68 }, { "epoch": 0.01, "learning_rate": 8.466257668711657e-05, "loss": 1.3447, "step": 69 }, { "epoch": 0.01, "learning_rate": 8.588957055214725e-05, "loss": 1.5133, "step": 70 }, { "epoch": 0.01, "learning_rate": 8.711656441717791e-05, "loss": 1.3124, "step": 71 }, { "epoch": 0.01, "learning_rate": 8.83435582822086e-05, "loss": 1.3604, "step": 72 }, { "epoch": 0.01, "learning_rate": 8.957055214723928e-05, "loss": 1.3943, "step": 73 }, { "epoch": 0.01, "learning_rate": 9.079754601226993e-05, "loss": 1.3381, "step": 74 }, { "epoch": 0.01, "learning_rate": 9.202453987730062e-05, "loss": 1.3608, "step": 75 }, { "epoch": 0.01, "learning_rate": 9.325153374233129e-05, "loss": 1.449, "step": 76 }, { "epoch": 0.01, "learning_rate": 9.447852760736196e-05, "loss": 1.1355, "step": 77 }, { "epoch": 0.01, "learning_rate": 9.570552147239264e-05, "loss": 1.3707, "step": 78 }, { "epoch": 0.01, "learning_rate": 9.693251533742331e-05, "loss": 1.4066, "step": 79 }, { "epoch": 0.01, "learning_rate": 9.815950920245399e-05, "loss": 1.2411, "step": 80 }, { "epoch": 0.01, "learning_rate": 9.938650306748467e-05, "loss": 1.4193, "step": 81 }, { "epoch": 0.02, "learning_rate": 0.00010061349693251534, "loss": 1.1368, "step": 82 }, { "epoch": 0.02, "learning_rate": 0.00010184049079754601, "loss": 1.2284, "step": 83 }, { "epoch": 0.02, "learning_rate": 0.0001030674846625767, "loss": 1.3363, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.00010429447852760737, "loss": 1.2982, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.00010552147239263804, "loss": 1.5776, "step": 86 }, { "epoch": 0.02, "learning_rate": 0.00010674846625766872, "loss": 1.4204, "step": 87 }, { "epoch": 0.02, "learning_rate": 0.00010797546012269939, "loss": 1.4114, "step": 88 }, { "epoch": 0.02, "learning_rate": 0.00010920245398773006, "loss": 1.2703, "step": 89 }, { "epoch": 0.02, "learning_rate": 0.00011042944785276075, "loss": 1.2745, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.00011165644171779142, "loss": 1.3376, "step": 91 }, { "epoch": 0.02, "learning_rate": 0.00011288343558282209, "loss": 1.4291, "step": 92 }, { "epoch": 0.02, "learning_rate": 0.00011411042944785277, "loss": 1.2831, "step": 93 }, { "epoch": 0.02, "learning_rate": 0.00011533742331288344, "loss": 1.3875, "step": 94 }, { "epoch": 0.02, "learning_rate": 0.00011656441717791411, "loss": 1.3987, "step": 95 }, { "epoch": 0.02, "learning_rate": 0.0001177914110429448, "loss": 1.442, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.00011901840490797547, "loss": 1.3423, "step": 97 }, { "epoch": 0.02, "learning_rate": 0.00012024539877300614, "loss": 1.485, "step": 98 }, { "epoch": 0.02, "learning_rate": 0.00012147239263803682, "loss": 1.2586, "step": 99 }, { "epoch": 0.02, "learning_rate": 0.0001226993865030675, "loss": 1.4191, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.00012392638036809816, "loss": 1.3004, "step": 101 }, { "epoch": 0.02, "learning_rate": 0.00012515337423312886, "loss": 1.2591, "step": 102 }, { "epoch": 0.02, "learning_rate": 0.00012638036809815953, "loss": 1.2847, "step": 103 }, { "epoch": 0.02, "learning_rate": 0.00012760736196319017, "loss": 1.3381, "step": 104 }, { "epoch": 0.02, "learning_rate": 0.00012883435582822084, "loss": 1.3939, "step": 105 }, { "epoch": 0.02, "learning_rate": 0.00013006134969325154, "loss": 1.5244, "step": 106 }, { "epoch": 0.02, "learning_rate": 0.0001312883435582822, "loss": 1.1643, "step": 107 }, { "epoch": 0.02, "learning_rate": 0.00013251533742331288, "loss": 1.2588, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.00013374233128834358, "loss": 1.3284, "step": 109 }, { "epoch": 0.02, "learning_rate": 0.00013496932515337422, "loss": 1.3669, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.0001361963190184049, "loss": 1.2505, "step": 111 }, { "epoch": 0.02, "learning_rate": 0.0001374233128834356, "loss": 1.2742, "step": 112 }, { "epoch": 0.02, "learning_rate": 0.00013865030674846626, "loss": 1.3732, "step": 113 }, { "epoch": 0.02, "learning_rate": 0.00013987730061349693, "loss": 1.4146, "step": 114 }, { "epoch": 0.02, "learning_rate": 0.00014110429447852763, "loss": 1.1562, "step": 115 }, { "epoch": 0.02, "learning_rate": 0.00014233128834355828, "loss": 1.4526, "step": 116 }, { "epoch": 0.02, "learning_rate": 0.00014355828220858895, "loss": 1.2858, "step": 117 }, { "epoch": 0.02, "learning_rate": 0.00014478527607361964, "loss": 1.3028, "step": 118 }, { "epoch": 0.02, "learning_rate": 0.00014601226993865031, "loss": 1.246, "step": 119 }, { "epoch": 0.02, "learning_rate": 0.00014723926380368098, "loss": 1.406, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.00014846625766871168, "loss": 1.3271, "step": 121 }, { "epoch": 0.02, "learning_rate": 0.00014969325153374235, "loss": 1.2386, "step": 122 }, { "epoch": 0.02, "learning_rate": 0.000150920245398773, "loss": 1.3339, "step": 123 }, { "epoch": 0.02, "learning_rate": 0.0001521472392638037, "loss": 1.2535, "step": 124 }, { "epoch": 0.02, "learning_rate": 0.00015337423312883436, "loss": 1.2994, "step": 125 }, { "epoch": 0.02, "learning_rate": 0.00015460122699386504, "loss": 1.1147, "step": 126 }, { "epoch": 0.02, "learning_rate": 0.00015582822085889573, "loss": 1.3064, "step": 127 }, { "epoch": 0.02, "learning_rate": 0.0001570552147239264, "loss": 1.2719, "step": 128 }, { "epoch": 0.02, "learning_rate": 0.00015828220858895705, "loss": 1.3109, "step": 129 }, { "epoch": 0.02, "learning_rate": 0.00015950920245398772, "loss": 1.211, "step": 130 }, { "epoch": 0.02, "learning_rate": 0.00016073619631901842, "loss": 1.2319, "step": 131 }, { "epoch": 0.02, "learning_rate": 0.00016196319018404909, "loss": 1.2851, "step": 132 }, { "epoch": 0.02, "learning_rate": 0.00016319018404907976, "loss": 1.3331, "step": 133 }, { "epoch": 0.02, "learning_rate": 0.00016441717791411045, "loss": 1.238, "step": 134 }, { "epoch": 0.02, "learning_rate": 0.0001656441717791411, "loss": 1.2655, "step": 135 }, { "epoch": 0.03, "learning_rate": 0.00016687116564417177, "loss": 1.3217, "step": 136 }, { "epoch": 0.03, "learning_rate": 0.00016809815950920247, "loss": 1.1768, "step": 137 }, { "epoch": 0.03, "learning_rate": 0.00016932515337423314, "loss": 1.3265, "step": 138 }, { "epoch": 0.03, "learning_rate": 0.0001705521472392638, "loss": 1.2316, "step": 139 }, { "epoch": 0.03, "learning_rate": 0.0001717791411042945, "loss": 1.4138, "step": 140 }, { "epoch": 0.03, "learning_rate": 0.00017300613496932518, "loss": 1.3504, "step": 141 }, { "epoch": 0.03, "learning_rate": 0.00017423312883435582, "loss": 1.2091, "step": 142 }, { "epoch": 0.03, "learning_rate": 0.00017546012269938652, "loss": 1.2455, "step": 143 }, { "epoch": 0.03, "learning_rate": 0.0001766871165644172, "loss": 1.3073, "step": 144 }, { "epoch": 0.03, "learning_rate": 0.00017791411042944786, "loss": 1.2201, "step": 145 }, { "epoch": 0.03, "learning_rate": 0.00017914110429447856, "loss": 1.2481, "step": 146 }, { "epoch": 0.03, "learning_rate": 0.00018036809815950923, "loss": 1.3003, "step": 147 }, { "epoch": 0.03, "learning_rate": 0.00018159509202453987, "loss": 1.1667, "step": 148 }, { "epoch": 0.03, "learning_rate": 0.00018282208588957057, "loss": 1.2664, "step": 149 }, { "epoch": 0.03, "learning_rate": 0.00018404907975460124, "loss": 1.2105, "step": 150 }, { "epoch": 0.03, "learning_rate": 0.0001852760736196319, "loss": 1.1856, "step": 151 }, { "epoch": 0.03, "learning_rate": 0.00018650306748466258, "loss": 1.2326, "step": 152 }, { "epoch": 0.03, "learning_rate": 0.00018773006134969328, "loss": 1.2581, "step": 153 }, { "epoch": 0.03, "learning_rate": 0.00018895705521472392, "loss": 1.3022, "step": 154 }, { "epoch": 0.03, "learning_rate": 0.0001901840490797546, "loss": 1.2152, "step": 155 }, { "epoch": 0.03, "learning_rate": 0.0001914110429447853, "loss": 1.1308, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.00019263803680981596, "loss": 1.1871, "step": 157 }, { "epoch": 0.03, "learning_rate": 0.00019386503067484663, "loss": 1.1359, "step": 158 }, { "epoch": 0.03, "learning_rate": 0.00019509202453987733, "loss": 1.3105, "step": 159 }, { "epoch": 0.03, "learning_rate": 0.00019631901840490797, "loss": 1.3347, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.00019754601226993864, "loss": 1.2766, "step": 161 }, { "epoch": 0.03, "learning_rate": 0.00019877300613496934, "loss": 1.1587, "step": 162 }, { "epoch": 0.03, "learning_rate": 0.0002, "loss": 1.3573, "step": 163 }, { "epoch": 0.03, "learning_rate": 0.00019999998218429567, "loss": 1.2893, "step": 164 }, { "epoch": 0.03, "learning_rate": 0.00019999992873718898, "loss": 1.2959, "step": 165 }, { "epoch": 0.03, "learning_rate": 0.000199999839658699, "loss": 1.3842, "step": 166 }, { "epoch": 0.03, "learning_rate": 0.0001999997149488575, "loss": 1.2838, "step": 167 }, { "epoch": 0.03, "learning_rate": 0.00019999955460770887, "loss": 1.2141, "step": 168 }, { "epoch": 0.03, "learning_rate": 0.00019999935863531025, "loss": 1.1628, "step": 169 }, { "epoch": 0.03, "learning_rate": 0.00019999912703173148, "loss": 1.2137, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.00019999885979705512, "loss": 1.2212, "step": 171 }, { "epoch": 0.03, "learning_rate": 0.0001999985569313763, "loss": 1.2414, "step": 172 }, { "epoch": 0.03, "learning_rate": 0.00019999821843480297, "loss": 1.2036, "step": 173 }, { "epoch": 0.03, "learning_rate": 0.00019999784430745575, "loss": 1.3559, "step": 174 }, { "epoch": 0.03, "learning_rate": 0.00019999743454946799, "loss": 1.3637, "step": 175 }, { "epoch": 0.03, "learning_rate": 0.00019999698916098562, "loss": 1.491, "step": 176 }, { "epoch": 0.03, "learning_rate": 0.00019999650814216742, "loss": 1.1458, "step": 177 }, { "epoch": 0.03, "learning_rate": 0.0001999959914931847, "loss": 1.1719, "step": 178 }, { "epoch": 0.03, "learning_rate": 0.0001999954392142216, "loss": 1.3666, "step": 179 }, { "epoch": 0.03, "learning_rate": 0.0001999948513054749, "loss": 1.3032, "step": 180 }, { "epoch": 0.03, "learning_rate": 0.00019999422776715404, "loss": 1.3408, "step": 181 }, { "epoch": 0.03, "learning_rate": 0.00019999356859948123, "loss": 1.3187, "step": 182 }, { "epoch": 0.03, "learning_rate": 0.00019999287380269133, "loss": 1.2885, "step": 183 }, { "epoch": 0.03, "learning_rate": 0.00019999214337703195, "loss": 1.4999, "step": 184 }, { "epoch": 0.03, "learning_rate": 0.00019999137732276326, "loss": 1.0932, "step": 185 }, { "epoch": 0.03, "learning_rate": 0.00019999057564015833, "loss": 1.3197, "step": 186 }, { "epoch": 0.03, "learning_rate": 0.0001999897383295027, "loss": 1.3814, "step": 187 }, { "epoch": 0.03, "learning_rate": 0.00019998886539109479, "loss": 1.2937, "step": 188 }, { "epoch": 0.03, "learning_rate": 0.0001999879568252456, "loss": 1.3366, "step": 189 }, { "epoch": 0.04, "learning_rate": 0.00019998701263227887, "loss": 1.145, "step": 190 }, { "epoch": 0.04, "learning_rate": 0.00019998603281253107, "loss": 1.275, "step": 191 }, { "epoch": 0.04, "learning_rate": 0.0001999850173663513, "loss": 1.2714, "step": 192 }, { "epoch": 0.04, "learning_rate": 0.00019998396629410133, "loss": 1.1932, "step": 193 }, { "epoch": 0.04, "learning_rate": 0.00019998287959615573, "loss": 1.1911, "step": 194 }, { "epoch": 0.04, "learning_rate": 0.0001999817572729017, "loss": 1.1506, "step": 195 }, { "epoch": 0.04, "learning_rate": 0.00019998059932473914, "loss": 1.2786, "step": 196 }, { "epoch": 0.04, "learning_rate": 0.0001999794057520806, "loss": 1.1796, "step": 197 }, { "epoch": 0.04, "learning_rate": 0.0001999781765553514, "loss": 1.2314, "step": 198 }, { "epoch": 0.04, "learning_rate": 0.00019997691173498955, "loss": 1.3415, "step": 199 }, { "epoch": 0.04, "learning_rate": 0.00019997561129144565, "loss": 1.3487, "step": 200 }, { "epoch": 0.04, "learning_rate": 0.00019997427522518315, "loss": 1.2016, "step": 201 }, { "epoch": 0.04, "learning_rate": 0.00019997290353667804, "loss": 1.1929, "step": 202 }, { "epoch": 0.04, "learning_rate": 0.0001999714962264191, "loss": 1.3436, "step": 203 }, { "epoch": 0.04, "learning_rate": 0.00019997005329490776, "loss": 1.3609, "step": 204 }, { "epoch": 0.04, "learning_rate": 0.0001999685747426582, "loss": 1.0451, "step": 205 }, { "epoch": 0.04, "learning_rate": 0.00019996706057019722, "loss": 1.2579, "step": 206 }, { "epoch": 0.04, "learning_rate": 0.00019996551077806435, "loss": 1.2273, "step": 207 }, { "epoch": 0.04, "learning_rate": 0.00019996392536681174, "loss": 1.2686, "step": 208 }, { "epoch": 0.04, "learning_rate": 0.0001999623043370044, "loss": 1.4125, "step": 209 }, { "epoch": 0.04, "learning_rate": 0.00019996064768921983, "loss": 1.1006, "step": 210 }, { "epoch": 0.04, "learning_rate": 0.0001999589554240484, "loss": 1.1632, "step": 211 }, { "epoch": 0.04, "learning_rate": 0.00019995722754209303, "loss": 1.2017, "step": 212 }, { "epoch": 0.04, "learning_rate": 0.0001999554640439694, "loss": 1.2351, "step": 213 }, { "epoch": 0.04, "learning_rate": 0.00019995366493030589, "loss": 1.0224, "step": 214 }, { "epoch": 0.04, "learning_rate": 0.00019995183020174353, "loss": 1.2512, "step": 215 }, { "epoch": 0.04, "learning_rate": 0.00019994995985893606, "loss": 1.3269, "step": 216 }, { "epoch": 0.04, "learning_rate": 0.0001999480539025499, "loss": 1.2849, "step": 217 }, { "epoch": 0.04, "learning_rate": 0.00019994611233326422, "loss": 1.4398, "step": 218 }, { "epoch": 0.04, "learning_rate": 0.0001999441351517708, "loss": 1.2344, "step": 219 }, { "epoch": 0.04, "learning_rate": 0.00019994212235877406, "loss": 1.4602, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.00019994007395499134, "loss": 1.2153, "step": 221 }, { "epoch": 0.04, "learning_rate": 0.00019993798994115237, "loss": 1.2751, "step": 222 }, { "epoch": 0.04, "learning_rate": 0.00019993587031799984, "loss": 1.2281, "step": 223 }, { "epoch": 0.04, "learning_rate": 0.00019993371508628886, "loss": 1.1809, "step": 224 }, { "epoch": 0.04, "learning_rate": 0.00019993152424678748, "loss": 1.1694, "step": 225 }, { "epoch": 0.04, "learning_rate": 0.0001999292978002763, "loss": 1.2658, "step": 226 }, { "epoch": 0.04, "learning_rate": 0.00019992703574754864, "loss": 1.1517, "step": 227 }, { "epoch": 0.04, "learning_rate": 0.00019992473808941045, "loss": 1.2579, "step": 228 }, { "epoch": 0.04, "learning_rate": 0.00019992240482668048, "loss": 1.1628, "step": 229 }, { "epoch": 0.04, "learning_rate": 0.0001999200359601901, "loss": 1.2483, "step": 230 }, { "epoch": 0.04, "learning_rate": 0.0001999176314907833, "loss": 1.3095, "step": 231 }, { "epoch": 0.04, "learning_rate": 0.00019991519141931692, "loss": 1.2543, "step": 232 }, { "epoch": 0.04, "learning_rate": 0.00019991271574666035, "loss": 1.2375, "step": 233 }, { "epoch": 0.04, "learning_rate": 0.00019991020447369568, "loss": 1.3303, "step": 234 }, { "epoch": 0.04, "learning_rate": 0.00019990765760131776, "loss": 1.1639, "step": 235 }, { "epoch": 0.04, "learning_rate": 0.00019990507513043405, "loss": 1.3152, "step": 236 }, { "epoch": 0.04, "learning_rate": 0.0001999024570619647, "loss": 1.183, "step": 237 }, { "epoch": 0.04, "learning_rate": 0.00019989980339684261, "loss": 1.2075, "step": 238 }, { "epoch": 0.04, "learning_rate": 0.00019989711413601332, "loss": 1.1494, "step": 239 }, { "epoch": 0.04, "learning_rate": 0.000199894389280435, "loss": 1.2425, "step": 240 }, { "epoch": 0.04, "learning_rate": 0.0001998916288310786, "loss": 1.2171, "step": 241 }, { "epoch": 0.04, "learning_rate": 0.0001998888327889277, "loss": 1.2453, "step": 242 }, { "epoch": 0.04, "learning_rate": 0.00019988600115497854, "loss": 1.3037, "step": 243 }, { "epoch": 0.04, "learning_rate": 0.0001998831339302401, "loss": 1.2325, "step": 244 }, { "epoch": 0.05, "learning_rate": 0.000199880231115734, "loss": 1.3913, "step": 245 }, { "epoch": 0.05, "learning_rate": 0.00019987729271249457, "loss": 1.48, "step": 246 }, { "epoch": 0.05, "learning_rate": 0.00019987431872156878, "loss": 1.2755, "step": 247 }, { "epoch": 0.05, "learning_rate": 0.00019987130914401633, "loss": 1.2634, "step": 248 }, { "epoch": 0.05, "learning_rate": 0.00019986826398090955, "loss": 1.2751, "step": 249 }, { "epoch": 0.05, "learning_rate": 0.0001998651832333335, "loss": 1.1711, "step": 250 }, { "epoch": 0.05, "learning_rate": 0.00019986206690238587, "loss": 1.3211, "step": 251 }, { "epoch": 0.05, "learning_rate": 0.00019985891498917706, "loss": 1.2091, "step": 252 }, { "epoch": 0.05, "learning_rate": 0.00019985572749483014, "loss": 1.2467, "step": 253 }, { "epoch": 0.05, "learning_rate": 0.0001998525044204809, "loss": 1.2268, "step": 254 }, { "epoch": 0.05, "learning_rate": 0.0001998492457672777, "loss": 1.1616, "step": 255 }, { "epoch": 0.05, "learning_rate": 0.00019984595153638165, "loss": 1.2705, "step": 256 }, { "epoch": 0.05, "learning_rate": 0.0001998426217289666, "loss": 1.2053, "step": 257 }, { "epoch": 0.05, "learning_rate": 0.00019983925634621893, "loss": 1.3416, "step": 258 }, { "epoch": 0.05, "learning_rate": 0.00019983585538933785, "loss": 1.4156, "step": 259 }, { "epoch": 0.05, "learning_rate": 0.0001998324188595351, "loss": 1.2407, "step": 260 }, { "epoch": 0.05, "learning_rate": 0.00019982894675803518, "loss": 1.2285, "step": 261 }, { "epoch": 0.05, "learning_rate": 0.00019982543908607524, "loss": 1.2838, "step": 262 }, { "epoch": 0.05, "learning_rate": 0.00019982189584490519, "loss": 1.1768, "step": 263 }, { "epoch": 0.05, "learning_rate": 0.00019981831703578742, "loss": 1.2377, "step": 264 }, { "epoch": 0.05, "learning_rate": 0.0001998147026599972, "loss": 1.3483, "step": 265 }, { "epoch": 0.05, "learning_rate": 0.00019981105271882235, "loss": 1.1509, "step": 266 }, { "epoch": 0.05, "learning_rate": 0.00019980736721356336, "loss": 1.2331, "step": 267 }, { "epoch": 0.05, "learning_rate": 0.00019980364614553348, "loss": 1.2639, "step": 268 }, { "epoch": 0.05, "learning_rate": 0.00019979988951605856, "loss": 1.3577, "step": 269 }, { "epoch": 0.05, "learning_rate": 0.00019979609732647717, "loss": 1.3327, "step": 270 }, { "epoch": 0.05, "learning_rate": 0.00019979226957814045, "loss": 1.0867, "step": 271 }, { "epoch": 0.05, "learning_rate": 0.00019978840627241236, "loss": 1.3015, "step": 272 }, { "epoch": 0.05, "learning_rate": 0.0001997845074106694, "loss": 1.2434, "step": 273 }, { "epoch": 0.05, "learning_rate": 0.00019978057299430079, "loss": 1.2731, "step": 274 }, { "epoch": 0.05, "learning_rate": 0.00019977660302470843, "loss": 1.2024, "step": 275 }, { "epoch": 0.05, "learning_rate": 0.0001997725975033069, "loss": 1.2571, "step": 276 }, { "epoch": 0.05, "learning_rate": 0.00019976855643152338, "loss": 1.2678, "step": 277 }, { "epoch": 0.05, "learning_rate": 0.00019976447981079782, "loss": 1.1214, "step": 278 }, { "epoch": 0.05, "learning_rate": 0.0001997603676425827, "loss": 1.2492, "step": 279 }, { "epoch": 0.05, "learning_rate": 0.00019975621992834327, "loss": 1.1923, "step": 280 }, { "epoch": 0.05, "learning_rate": 0.00019975203666955745, "loss": 1.1346, "step": 281 }, { "epoch": 0.05, "learning_rate": 0.0001997478178677158, "loss": 1.2489, "step": 282 }, { "epoch": 0.05, "learning_rate": 0.00019974356352432147, "loss": 1.2822, "step": 283 }, { "epoch": 0.05, "learning_rate": 0.00019973927364089043, "loss": 1.1404, "step": 284 }, { "epoch": 0.05, "learning_rate": 0.0001997349482189512, "loss": 1.1498, "step": 285 }, { "epoch": 0.05, "learning_rate": 0.0001997305872600449, "loss": 1.31, "step": 286 }, { "epoch": 0.05, "learning_rate": 0.0001997261907657255, "loss": 1.4395, "step": 287 }, { "epoch": 0.05, "learning_rate": 0.0001997217587375595, "loss": 1.2779, "step": 288 }, { "epoch": 0.05, "learning_rate": 0.0001997172911771261, "loss": 1.2425, "step": 289 }, { "epoch": 0.05, "learning_rate": 0.00019971278808601716, "loss": 1.1404, "step": 290 }, { "epoch": 0.05, "learning_rate": 0.0001997082494658372, "loss": 1.2428, "step": 291 }, { "epoch": 0.05, "learning_rate": 0.00019970367531820337, "loss": 1.3711, "step": 292 }, { "epoch": 0.05, "learning_rate": 0.0001996990656447455, "loss": 1.382, "step": 293 }, { "epoch": 0.05, "learning_rate": 0.00019969442044710613, "loss": 1.3265, "step": 294 }, { "epoch": 0.05, "learning_rate": 0.00019968973972694037, "loss": 1.2355, "step": 295 }, { "epoch": 0.05, "learning_rate": 0.000199685023485916, "loss": 1.0992, "step": 296 }, { "epoch": 0.05, "learning_rate": 0.00019968027172571356, "loss": 1.2816, "step": 297 }, { "epoch": 0.05, "learning_rate": 0.00019967548444802612, "loss": 1.2267, "step": 298 }, { "epoch": 0.06, "learning_rate": 0.00019967066165455942, "loss": 1.2255, "step": 299 }, { "epoch": 0.06, "learning_rate": 0.00019966580334703196, "loss": 1.3147, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0001996609095271748, "loss": 1.1106, "step": 301 }, { "epoch": 0.06, "learning_rate": 0.00019965598019673167, "loss": 1.1723, "step": 302 }, { "epoch": 0.06, "learning_rate": 0.00019965101535745895, "loss": 1.3691, "step": 303 }, { "epoch": 0.06, "learning_rate": 0.00019964601501112566, "loss": 1.3531, "step": 304 }, { "epoch": 0.06, "learning_rate": 0.00019964097915951357, "loss": 1.2003, "step": 305 }, { "epoch": 0.06, "learning_rate": 0.00019963590780441696, "loss": 1.2816, "step": 306 }, { "epoch": 0.06, "learning_rate": 0.00019963080094764286, "loss": 1.2862, "step": 307 }, { "epoch": 0.06, "learning_rate": 0.00019962565859101087, "loss": 1.1749, "step": 308 }, { "epoch": 0.06, "learning_rate": 0.00019962048073635336, "loss": 1.2648, "step": 309 }, { "epoch": 0.06, "learning_rate": 0.00019961526738551522, "loss": 1.4031, "step": 310 }, { "epoch": 0.06, "learning_rate": 0.000199610018540354, "loss": 1.0919, "step": 311 }, { "epoch": 0.06, "learning_rate": 0.00019960473420274004, "loss": 1.2464, "step": 312 }, { "epoch": 0.06, "learning_rate": 0.00019959941437455614, "loss": 1.2753, "step": 313 }, { "epoch": 0.06, "learning_rate": 0.00019959405905769785, "loss": 1.22, "step": 314 }, { "epoch": 0.06, "learning_rate": 0.0001995886682540734, "loss": 1.3627, "step": 315 }, { "epoch": 0.06, "learning_rate": 0.0001995832419656035, "loss": 1.2478, "step": 316 }, { "epoch": 0.06, "learning_rate": 0.00019957778019422172, "loss": 1.1603, "step": 317 }, { "epoch": 0.06, "learning_rate": 0.0001995722829418741, "loss": 1.1416, "step": 318 }, { "epoch": 0.06, "learning_rate": 0.00019956675021051945, "loss": 1.2748, "step": 319 }, { "epoch": 0.06, "learning_rate": 0.0001995611820021291, "loss": 1.3461, "step": 320 }, { "epoch": 0.06, "learning_rate": 0.00019955557831868707, "loss": 1.1568, "step": 321 }, { "epoch": 0.06, "learning_rate": 0.00019954993916219008, "loss": 1.1858, "step": 322 }, { "epoch": 0.06, "learning_rate": 0.00019954426453464746, "loss": 1.2819, "step": 323 }, { "epoch": 0.06, "learning_rate": 0.00019953855443808108, "loss": 1.1442, "step": 324 }, { "epoch": 0.06, "learning_rate": 0.0001995328088745256, "loss": 1.1556, "step": 325 }, { "epoch": 0.06, "learning_rate": 0.00019952702784602818, "loss": 1.0588, "step": 326 }, { "epoch": 0.06, "learning_rate": 0.00019952121135464874, "loss": 1.3484, "step": 327 }, { "epoch": 0.06, "learning_rate": 0.00019951535940245977, "loss": 1.1588, "step": 328 }, { "epoch": 0.06, "learning_rate": 0.00019950947199154638, "loss": 1.312, "step": 329 }, { "epoch": 0.06, "learning_rate": 0.00019950354912400635, "loss": 1.2141, "step": 330 }, { "epoch": 0.06, "learning_rate": 0.00019949759080195006, "loss": 1.2976, "step": 331 }, { "epoch": 0.06, "learning_rate": 0.0001994915970275006, "loss": 1.3022, "step": 332 }, { "epoch": 0.06, "learning_rate": 0.00019948556780279356, "loss": 1.1101, "step": 333 }, { "epoch": 0.06, "learning_rate": 0.0001994795031299773, "loss": 1.2766, "step": 334 }, { "epoch": 0.06, "learning_rate": 0.00019947340301121268, "loss": 1.1803, "step": 335 }, { "epoch": 0.06, "learning_rate": 0.00019946726744867336, "loss": 1.198, "step": 336 }, { "epoch": 0.06, "learning_rate": 0.00019946109644454544, "loss": 1.1501, "step": 337 }, { "epoch": 0.06, "learning_rate": 0.00019945489000102777, "loss": 1.4131, "step": 338 }, { "epoch": 0.06, "learning_rate": 0.00019944864812033178, "loss": 1.381, "step": 339 }, { "epoch": 0.06, "learning_rate": 0.0001994423708046816, "loss": 1.1395, "step": 340 }, { "epoch": 0.06, "learning_rate": 0.0001994360580563138, "loss": 1.1941, "step": 341 }, { "epoch": 0.06, "learning_rate": 0.0001994297098774778, "loss": 1.3061, "step": 342 }, { "epoch": 0.06, "learning_rate": 0.00019942332627043556, "loss": 1.2008, "step": 343 }, { "epoch": 0.06, "learning_rate": 0.0001994169072374616, "loss": 1.1016, "step": 344 }, { "epoch": 0.06, "learning_rate": 0.00019941045278084308, "loss": 1.2342, "step": 345 }, { "epoch": 0.06, "learning_rate": 0.00019940396290287987, "loss": 1.2221, "step": 346 }, { "epoch": 0.06, "learning_rate": 0.00019939743760588442, "loss": 1.2108, "step": 347 }, { "epoch": 0.06, "learning_rate": 0.00019939087689218175, "loss": 1.2256, "step": 348 }, { "epoch": 0.06, "learning_rate": 0.00019938428076410954, "loss": 1.2655, "step": 349 }, { "epoch": 0.06, "learning_rate": 0.00019937764922401806, "loss": 1.3665, "step": 350 }, { "epoch": 0.06, "learning_rate": 0.0001993709822742703, "loss": 1.1971, "step": 351 }, { "epoch": 0.06, "learning_rate": 0.00019936427991724167, "loss": 1.1877, "step": 352 }, { "epoch": 0.07, "learning_rate": 0.00019935754215532042, "loss": 1.1082, "step": 353 }, { "epoch": 0.07, "learning_rate": 0.00019935076899090724, "loss": 1.2556, "step": 354 }, { "epoch": 0.07, "learning_rate": 0.00019934396042641555, "loss": 1.1988, "step": 355 }, { "epoch": 0.07, "learning_rate": 0.0001993371164642713, "loss": 1.209, "step": 356 }, { "epoch": 0.07, "learning_rate": 0.00019933023710691315, "loss": 1.1013, "step": 357 }, { "epoch": 0.07, "learning_rate": 0.00019932332235679225, "loss": 1.1668, "step": 358 }, { "epoch": 0.07, "learning_rate": 0.00019931637221637242, "loss": 1.2788, "step": 359 }, { "epoch": 0.07, "learning_rate": 0.00019930938668813016, "loss": 1.2009, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.00019930236577455444, "loss": 1.156, "step": 361 }, { "epoch": 0.07, "learning_rate": 0.00019929530947814693, "loss": 1.0555, "step": 362 }, { "epoch": 0.07, "learning_rate": 0.00019928821780142194, "loss": 1.3202, "step": 363 }, { "epoch": 0.07, "learning_rate": 0.00019928109074690624, "loss": 1.2736, "step": 364 }, { "epoch": 0.07, "learning_rate": 0.0001992739283171394, "loss": 1.1188, "step": 365 }, { "epoch": 0.07, "learning_rate": 0.0001992667305146734, "loss": 1.3308, "step": 366 }, { "epoch": 0.07, "learning_rate": 0.00019925949734207299, "loss": 1.0747, "step": 367 }, { "epoch": 0.07, "learning_rate": 0.0001992522288019154, "loss": 1.1174, "step": 368 }, { "epoch": 0.07, "learning_rate": 0.0001992449248967906, "loss": 1.0753, "step": 369 }, { "epoch": 0.07, "learning_rate": 0.00019923758562930094, "loss": 1.1652, "step": 370 }, { "epoch": 0.07, "learning_rate": 0.00019923021100206164, "loss": 1.2185, "step": 371 }, { "epoch": 0.07, "learning_rate": 0.0001992228010177003, "loss": 1.2191, "step": 372 }, { "epoch": 0.07, "learning_rate": 0.0001992153556788572, "loss": 1.2828, "step": 373 }, { "epoch": 0.07, "learning_rate": 0.00019920787498818528, "loss": 1.1691, "step": 374 }, { "epoch": 0.07, "learning_rate": 0.00019920035894834993, "loss": 1.1362, "step": 375 }, { "epoch": 0.07, "learning_rate": 0.00019919280756202932, "loss": 1.2577, "step": 376 }, { "epoch": 0.07, "learning_rate": 0.00019918522083191403, "loss": 1.1471, "step": 377 }, { "epoch": 0.07, "learning_rate": 0.00019917759876070736, "loss": 1.1152, "step": 378 }, { "epoch": 0.07, "learning_rate": 0.00019916994135112516, "loss": 1.1049, "step": 379 }, { "epoch": 0.07, "learning_rate": 0.00019916224860589587, "loss": 1.2101, "step": 380 }, { "epoch": 0.07, "learning_rate": 0.00019915452052776051, "loss": 1.2618, "step": 381 }, { "epoch": 0.07, "learning_rate": 0.00019914675711947273, "loss": 1.1626, "step": 382 }, { "epoch": 0.07, "learning_rate": 0.00019913895838379872, "loss": 1.1685, "step": 383 }, { "epoch": 0.07, "learning_rate": 0.00019913112432351726, "loss": 1.2111, "step": 384 }, { "epoch": 0.07, "learning_rate": 0.00019912325494141978, "loss": 1.1577, "step": 385 }, { "epoch": 0.07, "learning_rate": 0.00019911535024031026, "loss": 1.3218, "step": 386 }, { "epoch": 0.07, "learning_rate": 0.00019910741022300519, "loss": 1.1899, "step": 387 }, { "epoch": 0.07, "learning_rate": 0.00019909943489233376, "loss": 1.1442, "step": 388 }, { "epoch": 0.07, "learning_rate": 0.00019909142425113767, "loss": 1.1172, "step": 389 }, { "epoch": 0.07, "learning_rate": 0.00019908337830227123, "loss": 1.1798, "step": 390 }, { "epoch": 0.07, "learning_rate": 0.00019907529704860137, "loss": 1.1527, "step": 391 }, { "epoch": 0.07, "learning_rate": 0.00019906718049300749, "loss": 1.1001, "step": 392 }, { "epoch": 0.07, "learning_rate": 0.00019905902863838167, "loss": 1.1234, "step": 393 }, { "epoch": 0.07, "learning_rate": 0.0001990508414876285, "loss": 1.2372, "step": 394 }, { "epoch": 0.07, "learning_rate": 0.0001990426190436652, "loss": 1.2549, "step": 395 }, { "epoch": 0.07, "learning_rate": 0.00019903436130942157, "loss": 1.0752, "step": 396 }, { "epoch": 0.07, "learning_rate": 0.0001990260682878399, "loss": 1.3167, "step": 397 }, { "epoch": 0.07, "learning_rate": 0.00019901773998187513, "loss": 1.1125, "step": 398 }, { "epoch": 0.07, "learning_rate": 0.00019900937639449477, "loss": 1.2724, "step": 399 }, { "epoch": 0.07, "learning_rate": 0.00019900097752867886, "loss": 1.2467, "step": 400 }, { "epoch": 0.07, "learning_rate": 0.00019899254338742007, "loss": 1.1831, "step": 401 }, { "epoch": 0.07, "learning_rate": 0.00019898407397372355, "loss": 1.2247, "step": 402 }, { "epoch": 0.07, "learning_rate": 0.0001989755692906071, "loss": 1.2216, "step": 403 }, { "epoch": 0.07, "learning_rate": 0.0001989670293411011, "loss": 1.4211, "step": 404 }, { "epoch": 0.07, "learning_rate": 0.0001989584541282484, "loss": 1.2358, "step": 405 }, { "epoch": 0.07, "learning_rate": 0.00019894984365510449, "loss": 1.0646, "step": 406 }, { "epoch": 0.07, "learning_rate": 0.00019894119792473734, "loss": 1.1803, "step": 407 }, { "epoch": 0.08, "learning_rate": 0.00019893251694022768, "loss": 1.2573, "step": 408 }, { "epoch": 0.08, "learning_rate": 0.00019892380070466855, "loss": 1.1105, "step": 409 }, { "epoch": 0.08, "learning_rate": 0.0001989150492211657, "loss": 1.3072, "step": 410 }, { "epoch": 0.08, "learning_rate": 0.00019890626249283744, "loss": 1.111, "step": 411 }, { "epoch": 0.08, "learning_rate": 0.00019889744052281456, "loss": 1.1981, "step": 412 }, { "epoch": 0.08, "learning_rate": 0.00019888858331424048, "loss": 1.208, "step": 413 }, { "epoch": 0.08, "learning_rate": 0.00019887969087027113, "loss": 1.1014, "step": 414 }, { "epoch": 0.08, "learning_rate": 0.00019887076319407502, "loss": 1.1917, "step": 415 }, { "epoch": 0.08, "learning_rate": 0.00019886180028883321, "loss": 1.2398, "step": 416 }, { "epoch": 0.08, "learning_rate": 0.00019885280215773933, "loss": 1.309, "step": 417 }, { "epoch": 0.08, "learning_rate": 0.0001988437688039995, "loss": 1.119, "step": 418 }, { "epoch": 0.08, "learning_rate": 0.00019883470023083244, "loss": 1.2742, "step": 419 }, { "epoch": 0.08, "learning_rate": 0.00019882559644146945, "loss": 1.2363, "step": 420 }, { "epoch": 0.08, "learning_rate": 0.00019881645743915428, "loss": 1.1619, "step": 421 }, { "epoch": 0.08, "learning_rate": 0.00019880728322714333, "loss": 1.3076, "step": 422 }, { "epoch": 0.08, "learning_rate": 0.0001987980738087055, "loss": 1.4345, "step": 423 }, { "epoch": 0.08, "learning_rate": 0.00019878882918712218, "loss": 1.2312, "step": 424 }, { "epoch": 0.08, "learning_rate": 0.00019877954936568744, "loss": 1.118, "step": 425 }, { "epoch": 0.08, "learning_rate": 0.00019877023434770774, "loss": 1.0994, "step": 426 }, { "epoch": 0.08, "learning_rate": 0.0001987608841365022, "loss": 1.348, "step": 427 }, { "epoch": 0.08, "learning_rate": 0.0001987514987354024, "loss": 1.2062, "step": 428 }, { "epoch": 0.08, "learning_rate": 0.00019874207814775253, "loss": 1.077, "step": 429 }, { "epoch": 0.08, "learning_rate": 0.00019873262237690923, "loss": 1.2571, "step": 430 }, { "epoch": 0.08, "learning_rate": 0.00019872313142624176, "loss": 1.2442, "step": 431 }, { "epoch": 0.08, "learning_rate": 0.00019871360529913186, "loss": 1.2032, "step": 432 }, { "epoch": 0.08, "learning_rate": 0.00019870404399897383, "loss": 1.2832, "step": 433 }, { "epoch": 0.08, "learning_rate": 0.00019869444752917452, "loss": 1.2266, "step": 434 }, { "epoch": 0.08, "learning_rate": 0.00019868481589315325, "loss": 1.3428, "step": 435 }, { "epoch": 0.08, "learning_rate": 0.00019867514909434193, "loss": 1.0681, "step": 436 }, { "epoch": 0.08, "learning_rate": 0.00019866544713618493, "loss": 1.2713, "step": 437 }, { "epoch": 0.08, "learning_rate": 0.00019865571002213928, "loss": 1.124, "step": 438 }, { "epoch": 0.08, "learning_rate": 0.00019864593775567436, "loss": 1.2602, "step": 439 }, { "epoch": 0.08, "learning_rate": 0.00019863613034027224, "loss": 1.0385, "step": 440 }, { "epoch": 0.08, "learning_rate": 0.0001986262877794274, "loss": 1.2134, "step": 441 }, { "epoch": 0.08, "learning_rate": 0.0001986164100766469, "loss": 1.2797, "step": 442 }, { "epoch": 0.08, "learning_rate": 0.0001986064972354503, "loss": 1.1044, "step": 443 }, { "epoch": 0.08, "learning_rate": 0.00019859654925936967, "loss": 1.1791, "step": 444 }, { "epoch": 0.08, "learning_rate": 0.00019858656615194965, "loss": 1.3044, "step": 445 }, { "epoch": 0.08, "learning_rate": 0.00019857654791674732, "loss": 1.5085, "step": 446 }, { "epoch": 0.08, "learning_rate": 0.00019856649455733232, "loss": 1.2118, "step": 447 }, { "epoch": 0.08, "learning_rate": 0.00019855640607728683, "loss": 1.2573, "step": 448 }, { "epoch": 0.08, "learning_rate": 0.00019854628248020553, "loss": 1.1443, "step": 449 }, { "epoch": 0.08, "learning_rate": 0.00019853612376969558, "loss": 1.2298, "step": 450 }, { "epoch": 0.08, "learning_rate": 0.00019852592994937663, "loss": 1.1882, "step": 451 }, { "epoch": 0.08, "learning_rate": 0.00019851570102288096, "loss": 1.1064, "step": 452 }, { "epoch": 0.08, "learning_rate": 0.0001985054369938532, "loss": 1.2463, "step": 453 }, { "epoch": 0.08, "learning_rate": 0.00019849513786595067, "loss": 1.1564, "step": 454 }, { "epoch": 0.08, "learning_rate": 0.000198484803642843, "loss": 1.2053, "step": 455 }, { "epoch": 0.08, "learning_rate": 0.00019847443432821244, "loss": 1.1426, "step": 456 }, { "epoch": 0.08, "learning_rate": 0.00019846402992575377, "loss": 1.2395, "step": 457 }, { "epoch": 0.08, "learning_rate": 0.00019845359043917416, "loss": 1.207, "step": 458 }, { "epoch": 0.08, "learning_rate": 0.0001984431158721934, "loss": 1.0262, "step": 459 }, { "epoch": 0.08, "learning_rate": 0.0001984326062285437, "loss": 1.2998, "step": 460 }, { "epoch": 0.08, "learning_rate": 0.00019842206151196977, "loss": 1.2073, "step": 461 }, { "epoch": 0.09, "learning_rate": 0.0001984114817262289, "loss": 1.2545, "step": 462 }, { "epoch": 0.09, "learning_rate": 0.00019840086687509078, "loss": 1.2251, "step": 463 }, { "epoch": 0.09, "learning_rate": 0.00019839021696233764, "loss": 1.2677, "step": 464 }, { "epoch": 0.09, "learning_rate": 0.0001983795319917642, "loss": 1.0433, "step": 465 }, { "epoch": 0.09, "learning_rate": 0.0001983688119671776, "loss": 1.1925, "step": 466 }, { "epoch": 0.09, "learning_rate": 0.00019835805689239768, "loss": 1.2566, "step": 467 }, { "epoch": 0.09, "learning_rate": 0.00019834726677125648, "loss": 1.2067, "step": 468 }, { "epoch": 0.09, "learning_rate": 0.00019833644160759874, "loss": 1.4158, "step": 469 }, { "epoch": 0.09, "learning_rate": 0.00019832558140528162, "loss": 1.1794, "step": 470 }, { "epoch": 0.09, "learning_rate": 0.00019831468616817471, "loss": 1.1727, "step": 471 }, { "epoch": 0.09, "learning_rate": 0.00019830375590016022, "loss": 1.091, "step": 472 }, { "epoch": 0.09, "learning_rate": 0.00019829279060513272, "loss": 1.2055, "step": 473 }, { "epoch": 0.09, "learning_rate": 0.00019828179028699925, "loss": 1.2429, "step": 474 }, { "epoch": 0.09, "learning_rate": 0.00019827075494967944, "loss": 1.232, "step": 475 }, { "epoch": 0.09, "learning_rate": 0.00019825968459710532, "loss": 1.2556, "step": 476 }, { "epoch": 0.09, "learning_rate": 0.0001982485792332214, "loss": 1.1901, "step": 477 }, { "epoch": 0.09, "learning_rate": 0.0001982374388619847, "loss": 1.2657, "step": 478 }, { "epoch": 0.09, "learning_rate": 0.0001982262634873647, "loss": 1.219, "step": 479 }, { "epoch": 0.09, "learning_rate": 0.00019821505311334333, "loss": 1.1886, "step": 480 }, { "epoch": 0.09, "learning_rate": 0.00019820380774391495, "loss": 1.0978, "step": 481 }, { "epoch": 0.09, "learning_rate": 0.0001981925273830865, "loss": 1.1865, "step": 482 }, { "epoch": 0.09, "learning_rate": 0.00019818121203487736, "loss": 1.0804, "step": 483 }, { "epoch": 0.09, "learning_rate": 0.0001981698617033193, "loss": 1.2821, "step": 484 }, { "epoch": 0.09, "learning_rate": 0.00019815847639245658, "loss": 1.0862, "step": 485 }, { "epoch": 0.09, "learning_rate": 0.000198147056106346, "loss": 1.0903, "step": 486 }, { "epoch": 0.09, "learning_rate": 0.00019813560084905676, "loss": 1.1038, "step": 487 }, { "epoch": 0.09, "learning_rate": 0.00019812411062467052, "loss": 1.2229, "step": 488 }, { "epoch": 0.09, "learning_rate": 0.0001981125854372814, "loss": 1.2232, "step": 489 }, { "epoch": 0.09, "learning_rate": 0.00019810102529099597, "loss": 1.1316, "step": 490 }, { "epoch": 0.09, "learning_rate": 0.00019808943018993332, "loss": 1.1599, "step": 491 }, { "epoch": 0.09, "learning_rate": 0.00019807780013822492, "loss": 1.2835, "step": 492 }, { "epoch": 0.09, "learning_rate": 0.00019806613514001474, "loss": 1.2658, "step": 493 }, { "epoch": 0.09, "learning_rate": 0.00019805443519945915, "loss": 1.1632, "step": 494 }, { "epoch": 0.09, "learning_rate": 0.00019804270032072703, "loss": 1.2162, "step": 495 }, { "epoch": 0.09, "learning_rate": 0.00019803093050799967, "loss": 1.2386, "step": 496 }, { "epoch": 0.09, "learning_rate": 0.00019801912576547083, "loss": 1.2901, "step": 497 }, { "epoch": 0.09, "learning_rate": 0.0001980072860973467, "loss": 1.2749, "step": 498 }, { "epoch": 0.09, "learning_rate": 0.00019799541150784594, "loss": 1.2116, "step": 499 }, { "epoch": 0.09, "learning_rate": 0.0001979835020011996, "loss": 1.2822, "step": 500 }, { "epoch": 0.09, "learning_rate": 0.0001979715575816512, "loss": 1.2343, "step": 501 }, { "epoch": 0.09, "learning_rate": 0.00019795957825345678, "loss": 1.3571, "step": 502 }, { "epoch": 0.09, "learning_rate": 0.00019794756402088462, "loss": 1.1497, "step": 503 }, { "epoch": 0.09, "learning_rate": 0.0001979355148882157, "loss": 1.1505, "step": 504 }, { "epoch": 0.09, "learning_rate": 0.00019792343085974317, "loss": 1.2298, "step": 505 }, { "epoch": 0.09, "learning_rate": 0.0001979113119397728, "loss": 1.2133, "step": 506 }, { "epoch": 0.09, "learning_rate": 0.00019789915813262274, "loss": 1.1817, "step": 507 }, { "epoch": 0.09, "learning_rate": 0.00019788696944262357, "loss": 1.1928, "step": 508 }, { "epoch": 0.09, "learning_rate": 0.00019787474587411823, "loss": 1.3481, "step": 509 }, { "epoch": 0.09, "learning_rate": 0.0001978624874314622, "loss": 1.1347, "step": 510 }, { "epoch": 0.09, "learning_rate": 0.00019785019411902336, "loss": 1.117, "step": 511 }, { "epoch": 0.09, "learning_rate": 0.00019783786594118193, "loss": 1.225, "step": 512 }, { "epoch": 0.09, "learning_rate": 0.00019782550290233064, "loss": 1.1332, "step": 513 }, { "epoch": 0.09, "learning_rate": 0.00019781310500687464, "loss": 1.3701, "step": 514 }, { "epoch": 0.09, "learning_rate": 0.00019780067225923142, "loss": 1.2036, "step": 515 }, { "epoch": 0.1, "learning_rate": 0.00019778820466383098, "loss": 1.0846, "step": 516 }, { "epoch": 0.1, "learning_rate": 0.0001977757022251157, "loss": 1.2197, "step": 517 }, { "epoch": 0.1, "learning_rate": 0.00019776316494754035, "loss": 1.1338, "step": 518 }, { "epoch": 0.1, "learning_rate": 0.00019775059283557217, "loss": 1.2594, "step": 519 }, { "epoch": 0.1, "learning_rate": 0.0001977379858936908, "loss": 1.1529, "step": 520 }, { "epoch": 0.1, "learning_rate": 0.0001977253441263882, "loss": 1.2322, "step": 521 }, { "epoch": 0.1, "learning_rate": 0.00019771266753816885, "loss": 1.1423, "step": 522 }, { "epoch": 0.1, "learning_rate": 0.00019769995613354958, "loss": 1.3717, "step": 523 }, { "epoch": 0.1, "learning_rate": 0.0001976872099170597, "loss": 1.178, "step": 524 }, { "epoch": 0.1, "learning_rate": 0.0001976744288932408, "loss": 1.2057, "step": 525 }, { "epoch": 0.1, "learning_rate": 0.00019766161306664694, "loss": 1.135, "step": 526 }, { "epoch": 0.1, "learning_rate": 0.00019764876244184466, "loss": 1.1521, "step": 527 }, { "epoch": 0.1, "learning_rate": 0.0001976358770234127, "loss": 1.2875, "step": 528 }, { "epoch": 0.1, "learning_rate": 0.0001976229568159424, "loss": 1.1083, "step": 529 }, { "epoch": 0.1, "learning_rate": 0.0001976100018240374, "loss": 1.1937, "step": 530 }, { "epoch": 0.1, "learning_rate": 0.00019759701205231372, "loss": 1.1522, "step": 531 }, { "epoch": 0.1, "learning_rate": 0.00019758398750539984, "loss": 1.0904, "step": 532 }, { "epoch": 0.1, "learning_rate": 0.00019757092818793655, "loss": 1.1633, "step": 533 }, { "epoch": 0.1, "learning_rate": 0.00019755783410457705, "loss": 1.2325, "step": 534 }, { "epoch": 0.1, "learning_rate": 0.00019754470525998704, "loss": 1.203, "step": 535 }, { "epoch": 0.1, "learning_rate": 0.0001975315416588444, "loss": 1.0906, "step": 536 }, { "epoch": 0.1, "learning_rate": 0.00019751834330583956, "loss": 0.9769, "step": 537 }, { "epoch": 0.1, "learning_rate": 0.00019750511020567532, "loss": 1.1717, "step": 538 }, { "epoch": 0.1, "learning_rate": 0.00019749184236306672, "loss": 1.3062, "step": 539 }, { "epoch": 0.1, "learning_rate": 0.00019747853978274135, "loss": 1.2284, "step": 540 }, { "epoch": 0.1, "learning_rate": 0.0001974652024694391, "loss": 1.1096, "step": 541 }, { "epoch": 0.1, "learning_rate": 0.00019745183042791227, "loss": 1.2186, "step": 542 }, { "epoch": 0.1, "learning_rate": 0.0001974384236629254, "loss": 1.176, "step": 543 }, { "epoch": 0.1, "learning_rate": 0.00019742498217925565, "loss": 1.194, "step": 544 }, { "epoch": 0.1, "learning_rate": 0.00019741150598169232, "loss": 1.1384, "step": 545 }, { "epoch": 0.1, "learning_rate": 0.0001973979950750372, "loss": 1.0886, "step": 546 }, { "epoch": 0.1, "learning_rate": 0.00019738444946410436, "loss": 1.0808, "step": 547 }, { "epoch": 0.1, "learning_rate": 0.00019737086915372037, "loss": 1.2143, "step": 548 }, { "epoch": 0.1, "learning_rate": 0.00019735725414872404, "loss": 1.2247, "step": 549 }, { "epoch": 0.1, "learning_rate": 0.0001973436044539666, "loss": 1.2038, "step": 550 }, { "epoch": 0.1, "learning_rate": 0.00019732992007431168, "loss": 1.2721, "step": 551 }, { "epoch": 0.1, "learning_rate": 0.00019731620101463512, "loss": 1.2642, "step": 552 }, { "epoch": 0.1, "learning_rate": 0.00019730244727982527, "loss": 1.1178, "step": 553 }, { "epoch": 0.1, "learning_rate": 0.00019728865887478278, "loss": 1.2442, "step": 554 }, { "epoch": 0.1, "learning_rate": 0.00019727483580442065, "loss": 1.1002, "step": 555 }, { "epoch": 0.1, "learning_rate": 0.0001972609780736642, "loss": 1.1314, "step": 556 }, { "epoch": 0.1, "learning_rate": 0.00019724708568745117, "loss": 1.1812, "step": 557 }, { "epoch": 0.1, "learning_rate": 0.00019723315865073163, "loss": 1.2379, "step": 558 }, { "epoch": 0.1, "learning_rate": 0.00019721919696846795, "loss": 1.191, "step": 559 }, { "epoch": 0.1, "learning_rate": 0.0001972052006456349, "loss": 1.331, "step": 560 }, { "epoch": 0.1, "learning_rate": 0.0001971911696872195, "loss": 1.1976, "step": 561 }, { "epoch": 0.1, "learning_rate": 0.00019717710409822127, "loss": 1.0579, "step": 562 }, { "epoch": 0.1, "learning_rate": 0.00019716300388365193, "loss": 1.2482, "step": 563 }, { "epoch": 0.1, "learning_rate": 0.00019714886904853554, "loss": 1.2387, "step": 564 }, { "epoch": 0.1, "learning_rate": 0.00019713469959790864, "loss": 1.2468, "step": 565 }, { "epoch": 0.1, "learning_rate": 0.00019712049553681992, "loss": 1.3166, "step": 566 }, { "epoch": 0.1, "learning_rate": 0.00019710625687033054, "loss": 1.0676, "step": 567 }, { "epoch": 0.1, "learning_rate": 0.00019709198360351392, "loss": 1.2862, "step": 568 }, { "epoch": 0.1, "learning_rate": 0.0001970776757414558, "loss": 1.1312, "step": 569 }, { "epoch": 0.11, "learning_rate": 0.00019706333328925433, "loss": 1.149, "step": 570 }, { "epoch": 0.11, "learning_rate": 0.00019704895625201986, "loss": 1.3665, "step": 571 }, { "epoch": 0.11, "learning_rate": 0.00019703454463487521, "loss": 1.0719, "step": 572 }, { "epoch": 0.11, "learning_rate": 0.00019702009844295537, "loss": 1.1427, "step": 573 }, { "epoch": 0.11, "learning_rate": 0.00019700561768140775, "loss": 1.2704, "step": 574 }, { "epoch": 0.11, "learning_rate": 0.00019699110235539205, "loss": 1.143, "step": 575 }, { "epoch": 0.11, "learning_rate": 0.00019697655247008028, "loss": 1.0463, "step": 576 }, { "epoch": 0.11, "learning_rate": 0.00019696196803065677, "loss": 1.33, "step": 577 }, { "epoch": 0.11, "learning_rate": 0.00019694734904231818, "loss": 1.2367, "step": 578 }, { "epoch": 0.11, "learning_rate": 0.00019693269551027346, "loss": 1.1844, "step": 579 }, { "epoch": 0.11, "learning_rate": 0.00019691800743974384, "loss": 1.3012, "step": 580 }, { "epoch": 0.11, "learning_rate": 0.00019690328483596288, "loss": 1.2324, "step": 581 }, { "epoch": 0.11, "learning_rate": 0.0001968885277041765, "loss": 1.1778, "step": 582 }, { "epoch": 0.11, "learning_rate": 0.00019687373604964284, "loss": 1.2012, "step": 583 }, { "epoch": 0.11, "learning_rate": 0.0001968589098776324, "loss": 1.1676, "step": 584 }, { "epoch": 0.11, "learning_rate": 0.0001968440491934279, "loss": 1.2482, "step": 585 }, { "epoch": 0.11, "learning_rate": 0.00019682915400232447, "loss": 1.0683, "step": 586 }, { "epoch": 0.11, "learning_rate": 0.00019681422430962943, "loss": 1.2391, "step": 587 }, { "epoch": 0.11, "learning_rate": 0.00019679926012066247, "loss": 1.0742, "step": 588 }, { "epoch": 0.11, "learning_rate": 0.00019678426144075558, "loss": 1.2497, "step": 589 }, { "epoch": 0.11, "learning_rate": 0.0001967692282752529, "loss": 1.2041, "step": 590 }, { "epoch": 0.11, "learning_rate": 0.00019675416062951104, "loss": 1.0776, "step": 591 }, { "epoch": 0.11, "learning_rate": 0.0001967390585088988, "loss": 1.1932, "step": 592 }, { "epoch": 0.11, "learning_rate": 0.00019672392191879724, "loss": 1.1755, "step": 593 }, { "epoch": 0.11, "learning_rate": 0.0001967087508645998, "loss": 1.2152, "step": 594 }, { "epoch": 0.11, "learning_rate": 0.00019669354535171207, "loss": 1.2041, "step": 595 }, { "epoch": 0.11, "learning_rate": 0.00019667830538555204, "loss": 1.2402, "step": 596 }, { "epoch": 0.11, "learning_rate": 0.00019666303097154992, "loss": 1.1113, "step": 597 }, { "epoch": 0.11, "learning_rate": 0.00019664772211514818, "loss": 1.1334, "step": 598 }, { "epoch": 0.11, "learning_rate": 0.0001966323788218016, "loss": 1.2671, "step": 599 }, { "epoch": 0.11, "learning_rate": 0.0001966170010969772, "loss": 1.283, "step": 600 }, { "epoch": 0.11, "learning_rate": 0.0001966015889461543, "loss": 1.1629, "step": 601 }, { "epoch": 0.11, "learning_rate": 0.0001965861423748244, "loss": 1.2332, "step": 602 }, { "epoch": 0.11, "learning_rate": 0.00019657066138849142, "loss": 1.2196, "step": 603 }, { "epoch": 0.11, "learning_rate": 0.0001965551459926714, "loss": 1.0983, "step": 604 }, { "epoch": 0.11, "learning_rate": 0.0001965395961928927, "loss": 1.2048, "step": 605 }, { "epoch": 0.11, "learning_rate": 0.00019652401199469596, "loss": 1.2282, "step": 606 }, { "epoch": 0.11, "learning_rate": 0.00019650839340363402, "loss": 1.2745, "step": 607 }, { "epoch": 0.11, "learning_rate": 0.00019649274042527203, "loss": 1.0351, "step": 608 }, { "epoch": 0.11, "learning_rate": 0.0001964770530651873, "loss": 1.1326, "step": 609 }, { "epoch": 0.11, "learning_rate": 0.00019646133132896957, "loss": 1.1223, "step": 610 }, { "epoch": 0.11, "learning_rate": 0.0001964455752222206, "loss": 1.1617, "step": 611 }, { "epoch": 0.11, "learning_rate": 0.0001964297847505546, "loss": 0.9469, "step": 612 }, { "epoch": 0.11, "learning_rate": 0.0001964139599195979, "loss": 1.3478, "step": 613 }, { "epoch": 0.11, "learning_rate": 0.0001963981007349891, "loss": 1.228, "step": 614 }, { "epoch": 0.11, "learning_rate": 0.00019638220720237908, "loss": 1.0367, "step": 615 }, { "epoch": 0.11, "learning_rate": 0.0001963662793274309, "loss": 1.1571, "step": 616 }, { "epoch": 0.11, "learning_rate": 0.0001963503171158199, "loss": 1.1625, "step": 617 }, { "epoch": 0.11, "learning_rate": 0.00019633432057323367, "loss": 1.0762, "step": 618 }, { "epoch": 0.11, "learning_rate": 0.00019631828970537196, "loss": 1.1159, "step": 619 }, { "epoch": 0.11, "learning_rate": 0.0001963022245179468, "loss": 1.1197, "step": 620 }, { "epoch": 0.11, "learning_rate": 0.00019628612501668244, "loss": 1.1938, "step": 621 }, { "epoch": 0.11, "learning_rate": 0.0001962699912073154, "loss": 1.2787, "step": 622 }, { "epoch": 0.11, "learning_rate": 0.00019625382309559433, "loss": 1.2692, "step": 623 }, { "epoch": 0.11, "learning_rate": 0.0001962376206872802, "loss": 1.2699, "step": 624 }, { "epoch": 0.12, "learning_rate": 0.0001962213839881461, "loss": 1.0435, "step": 625 }, { "epoch": 0.12, "learning_rate": 0.00019620511300397748, "loss": 1.0805, "step": 626 }, { "epoch": 0.12, "learning_rate": 0.00019618880774057186, "loss": 1.198, "step": 627 }, { "epoch": 0.12, "learning_rate": 0.00019617246820373904, "loss": 1.0487, "step": 628 }, { "epoch": 0.12, "learning_rate": 0.00019615609439930102, "loss": 1.2051, "step": 629 }, { "epoch": 0.12, "learning_rate": 0.00019613968633309204, "loss": 1.1842, "step": 630 }, { "epoch": 0.12, "learning_rate": 0.00019612324401095855, "loss": 1.1456, "step": 631 }, { "epoch": 0.12, "learning_rate": 0.00019610676743875912, "loss": 0.998, "step": 632 }, { "epoch": 0.12, "learning_rate": 0.0001960902566223646, "loss": 1.0864, "step": 633 }, { "epoch": 0.12, "learning_rate": 0.00019607371156765805, "loss": 1.0404, "step": 634 }, { "epoch": 0.12, "learning_rate": 0.0001960571322805347, "loss": 1.1791, "step": 635 }, { "epoch": 0.12, "learning_rate": 0.00019604051876690197, "loss": 1.1075, "step": 636 }, { "epoch": 0.12, "learning_rate": 0.0001960238710326795, "loss": 1.0304, "step": 637 }, { "epoch": 0.12, "learning_rate": 0.00019600718908379912, "loss": 1.1064, "step": 638 }, { "epoch": 0.12, "learning_rate": 0.00019599047292620482, "loss": 1.1932, "step": 639 }, { "epoch": 0.12, "learning_rate": 0.0001959737225658528, "loss": 1.2289, "step": 640 }, { "epoch": 0.12, "learning_rate": 0.00019595693800871148, "loss": 1.221, "step": 641 }, { "epoch": 0.12, "learning_rate": 0.00019594011926076145, "loss": 1.1202, "step": 642 }, { "epoch": 0.12, "learning_rate": 0.0001959232663279954, "loss": 1.1035, "step": 643 }, { "epoch": 0.12, "learning_rate": 0.00019590637921641834, "loss": 1.3434, "step": 644 }, { "epoch": 0.12, "learning_rate": 0.0001958894579320473, "loss": 1.2162, "step": 645 }, { "epoch": 0.12, "learning_rate": 0.00019587250248091165, "loss": 1.2895, "step": 646 }, { "epoch": 0.12, "learning_rate": 0.00019585551286905282, "loss": 1.1358, "step": 647 }, { "epoch": 0.12, "learning_rate": 0.00019583848910252445, "loss": 1.2333, "step": 648 }, { "epoch": 0.12, "learning_rate": 0.00019582143118739234, "loss": 1.241, "step": 649 }, { "epoch": 0.12, "learning_rate": 0.0001958043391297345, "loss": 1.1452, "step": 650 }, { "epoch": 0.12, "learning_rate": 0.00019578721293564104, "loss": 1.2529, "step": 651 }, { "epoch": 0.12, "learning_rate": 0.00019577005261121427, "loss": 1.1906, "step": 652 }, { "epoch": 0.12, "learning_rate": 0.00019575285816256867, "loss": 1.0766, "step": 653 }, { "epoch": 0.12, "learning_rate": 0.00019573562959583084, "loss": 1.1954, "step": 654 }, { "epoch": 0.12, "learning_rate": 0.00019571836691713957, "loss": 1.0932, "step": 655 }, { "epoch": 0.12, "learning_rate": 0.00019570107013264581, "loss": 1.2837, "step": 656 }, { "epoch": 0.12, "learning_rate": 0.00019568373924851265, "loss": 1.159, "step": 657 }, { "epoch": 0.12, "learning_rate": 0.00019566637427091533, "loss": 1.1073, "step": 658 }, { "epoch": 0.12, "learning_rate": 0.00019564897520604117, "loss": 1.1395, "step": 659 }, { "epoch": 0.12, "learning_rate": 0.0001956315420600898, "loss": 1.2631, "step": 660 }, { "epoch": 0.12, "learning_rate": 0.00019561407483927284, "loss": 1.2867, "step": 661 }, { "epoch": 0.12, "learning_rate": 0.00019559657354981412, "loss": 1.0759, "step": 662 }, { "epoch": 0.12, "learning_rate": 0.00019557903819794956, "loss": 1.0898, "step": 663 }, { "epoch": 0.12, "learning_rate": 0.00019556146878992728, "loss": 1.2107, "step": 664 }, { "epoch": 0.12, "learning_rate": 0.00019554386533200755, "loss": 1.1191, "step": 665 }, { "epoch": 0.12, "learning_rate": 0.00019552622783046268, "loss": 1.1446, "step": 666 }, { "epoch": 0.12, "learning_rate": 0.00019550855629157717, "loss": 1.1514, "step": 667 }, { "epoch": 0.12, "learning_rate": 0.00019549085072164765, "loss": 1.306, "step": 668 }, { "epoch": 0.12, "learning_rate": 0.00019547311112698282, "loss": 1.1008, "step": 669 }, { "epoch": 0.12, "learning_rate": 0.00019545533751390363, "loss": 1.0854, "step": 670 }, { "epoch": 0.12, "learning_rate": 0.000195437529888743, "loss": 1.1968, "step": 671 }, { "epoch": 0.12, "learning_rate": 0.00019541968825784607, "loss": 1.1018, "step": 672 }, { "epoch": 0.12, "learning_rate": 0.00019540181262757005, "loss": 1.1339, "step": 673 }, { "epoch": 0.12, "learning_rate": 0.00019538390300428426, "loss": 1.1211, "step": 674 }, { "epoch": 0.12, "learning_rate": 0.00019536595939437018, "loss": 1.122, "step": 675 }, { "epoch": 0.12, "learning_rate": 0.00019534798180422138, "loss": 1.199, "step": 676 }, { "epoch": 0.12, "learning_rate": 0.0001953299702402435, "loss": 1.1657, "step": 677 }, { "epoch": 0.12, "learning_rate": 0.00019531192470885434, "loss": 1.2262, "step": 678 }, { "epoch": 0.13, "learning_rate": 0.00019529384521648377, "loss": 1.0857, "step": 679 }, { "epoch": 0.13, "learning_rate": 0.00019527573176957375, "loss": 1.1846, "step": 680 }, { "epoch": 0.13, "learning_rate": 0.00019525758437457835, "loss": 1.0988, "step": 681 }, { "epoch": 0.13, "learning_rate": 0.0001952394030379638, "loss": 1.2344, "step": 682 }, { "epoch": 0.13, "learning_rate": 0.00019522118776620827, "loss": 1.1866, "step": 683 }, { "epoch": 0.13, "learning_rate": 0.00019520293856580223, "loss": 1.1222, "step": 684 }, { "epoch": 0.13, "learning_rate": 0.00019518465544324806, "loss": 1.2921, "step": 685 }, { "epoch": 0.13, "learning_rate": 0.0001951663384050603, "loss": 1.0969, "step": 686 }, { "epoch": 0.13, "learning_rate": 0.00019514798745776557, "loss": 1.1727, "step": 687 }, { "epoch": 0.13, "learning_rate": 0.00019512960260790256, "loss": 1.0635, "step": 688 }, { "epoch": 0.13, "learning_rate": 0.0001951111838620221, "loss": 1.1204, "step": 689 }, { "epoch": 0.13, "learning_rate": 0.00019509273122668698, "loss": 1.1238, "step": 690 }, { "epoch": 0.13, "learning_rate": 0.0001950742447084722, "loss": 1.2426, "step": 691 }, { "epoch": 0.13, "learning_rate": 0.0001950557243139647, "loss": 1.2987, "step": 692 }, { "epoch": 0.13, "learning_rate": 0.00019503717004976364, "loss": 1.157, "step": 693 }, { "epoch": 0.13, "learning_rate": 0.00019501858192248007, "loss": 1.064, "step": 694 }, { "epoch": 0.13, "learning_rate": 0.0001949999599387373, "loss": 1.1298, "step": 695 }, { "epoch": 0.13, "learning_rate": 0.00019498130410517054, "loss": 1.1154, "step": 696 }, { "epoch": 0.13, "learning_rate": 0.00019496261442842713, "loss": 1.1822, "step": 697 }, { "epoch": 0.13, "learning_rate": 0.0001949438909151665, "loss": 1.2467, "step": 698 }, { "epoch": 0.13, "learning_rate": 0.00019492513357206005, "loss": 1.2198, "step": 699 }, { "epoch": 0.13, "learning_rate": 0.00019490634240579134, "loss": 1.1778, "step": 700 }, { "epoch": 0.13, "learning_rate": 0.00019488751742305588, "loss": 1.0721, "step": 701 }, { "epoch": 0.13, "learning_rate": 0.0001948686586305613, "loss": 1.153, "step": 702 }, { "epoch": 0.13, "learning_rate": 0.00019484976603502727, "loss": 1.2153, "step": 703 }, { "epoch": 0.13, "learning_rate": 0.00019483083964318544, "loss": 1.1328, "step": 704 }, { "epoch": 0.13, "learning_rate": 0.0001948118794617796, "loss": 1.1401, "step": 705 }, { "epoch": 0.13, "learning_rate": 0.00019479288549756555, "loss": 1.1606, "step": 706 }, { "epoch": 0.13, "learning_rate": 0.000194773857757311, "loss": 1.149, "step": 707 }, { "epoch": 0.13, "learning_rate": 0.0001947547962477959, "loss": 1.1166, "step": 708 }, { "epoch": 0.13, "learning_rate": 0.0001947357009758121, "loss": 1.0795, "step": 709 }, { "epoch": 0.13, "learning_rate": 0.0001947165719481635, "loss": 1.1281, "step": 710 }, { "epoch": 0.13, "learning_rate": 0.00019469740917166608, "loss": 1.2369, "step": 711 }, { "epoch": 0.13, "learning_rate": 0.00019467821265314777, "loss": 1.1901, "step": 712 }, { "epoch": 0.13, "learning_rate": 0.0001946589823994486, "loss": 1.0921, "step": 713 }, { "epoch": 0.13, "learning_rate": 0.00019463971841742056, "loss": 1.3153, "step": 714 }, { "epoch": 0.13, "learning_rate": 0.00019462042071392762, "loss": 1.1921, "step": 715 }, { "epoch": 0.13, "learning_rate": 0.00019460108929584592, "loss": 1.1313, "step": 716 }, { "epoch": 0.13, "learning_rate": 0.00019458172417006347, "loss": 1.1211, "step": 717 }, { "epoch": 0.13, "learning_rate": 0.00019456232534348033, "loss": 1.167, "step": 718 }, { "epoch": 0.13, "learning_rate": 0.0001945428928230086, "loss": 1.1677, "step": 719 }, { "epoch": 0.13, "learning_rate": 0.00019452342661557237, "loss": 1.1769, "step": 720 }, { "epoch": 0.13, "learning_rate": 0.0001945039267281077, "loss": 1.222, "step": 721 }, { "epoch": 0.13, "learning_rate": 0.0001944843931675626, "loss": 1.2112, "step": 722 }, { "epoch": 0.13, "learning_rate": 0.0001944648259408973, "loss": 1.1015, "step": 723 }, { "epoch": 0.13, "learning_rate": 0.0001944452250550838, "loss": 1.1372, "step": 724 }, { "epoch": 0.13, "learning_rate": 0.00019442559051710618, "loss": 1.1051, "step": 725 }, { "epoch": 0.13, "learning_rate": 0.0001944059223339605, "loss": 1.1826, "step": 726 }, { "epoch": 0.13, "learning_rate": 0.0001943862205126548, "loss": 1.2769, "step": 727 }, { "epoch": 0.13, "learning_rate": 0.00019436648506020916, "loss": 1.2244, "step": 728 }, { "epoch": 0.13, "learning_rate": 0.00019434671598365553, "loss": 1.1627, "step": 729 }, { "epoch": 0.13, "learning_rate": 0.00019432691329003795, "loss": 1.1513, "step": 730 }, { "epoch": 0.13, "learning_rate": 0.00019430707698641242, "loss": 1.1415, "step": 731 }, { "epoch": 0.13, "learning_rate": 0.00019428720707984686, "loss": 1.1584, "step": 732 }, { "epoch": 0.14, "learning_rate": 0.00019426730357742123, "loss": 1.259, "step": 733 }, { "epoch": 0.14, "learning_rate": 0.0001942473664862274, "loss": 1.1255, "step": 734 }, { "epoch": 0.14, "learning_rate": 0.00019422739581336922, "loss": 0.9694, "step": 735 }, { "epoch": 0.14, "learning_rate": 0.00019420739156596258, "loss": 1.1176, "step": 736 }, { "epoch": 0.14, "learning_rate": 0.00019418735375113523, "loss": 1.2572, "step": 737 }, { "epoch": 0.14, "learning_rate": 0.00019416728237602694, "loss": 1.2664, "step": 738 }, { "epoch": 0.14, "learning_rate": 0.0001941471774477894, "loss": 1.1395, "step": 739 }, { "epoch": 0.14, "learning_rate": 0.00019412703897358634, "loss": 1.1221, "step": 740 }, { "epoch": 0.14, "learning_rate": 0.0001941068669605933, "loss": 1.127, "step": 741 }, { "epoch": 0.14, "learning_rate": 0.00019408666141599792, "loss": 1.227, "step": 742 }, { "epoch": 0.14, "learning_rate": 0.00019406642234699967, "loss": 1.2881, "step": 743 }, { "epoch": 0.14, "learning_rate": 0.00019404614976081008, "loss": 1.1664, "step": 744 }, { "epoch": 0.14, "learning_rate": 0.00019402584366465248, "loss": 1.2446, "step": 745 }, { "epoch": 0.14, "learning_rate": 0.00019400550406576227, "loss": 1.2914, "step": 746 }, { "epoch": 0.14, "learning_rate": 0.00019398513097138676, "loss": 1.215, "step": 747 }, { "epoch": 0.14, "learning_rate": 0.00019396472438878507, "loss": 1.2395, "step": 748 }, { "epoch": 0.14, "learning_rate": 0.00019394428432522847, "loss": 1.1152, "step": 749 }, { "epoch": 0.14, "learning_rate": 0.00019392381078799998, "loss": 0.9188, "step": 750 }, { "epoch": 0.14, "learning_rate": 0.00019390330378439458, "loss": 1.0684, "step": 751 }, { "epoch": 0.14, "learning_rate": 0.0001938827633217193, "loss": 1.1729, "step": 752 }, { "epoch": 0.14, "learning_rate": 0.00019386218940729288, "loss": 1.1021, "step": 753 }, { "epoch": 0.14, "learning_rate": 0.0001938415820484462, "loss": 1.0569, "step": 754 }, { "epoch": 0.14, "learning_rate": 0.00019382094125252187, "loss": 1.0709, "step": 755 }, { "epoch": 0.14, "learning_rate": 0.00019380026702687454, "loss": 1.1981, "step": 756 }, { "epoch": 0.14, "learning_rate": 0.00019377955937887071, "loss": 1.208, "step": 757 }, { "epoch": 0.14, "learning_rate": 0.00019375881831588886, "loss": 1.134, "step": 758 }, { "epoch": 0.14, "learning_rate": 0.00019373804384531927, "loss": 1.1296, "step": 759 }, { "epoch": 0.14, "learning_rate": 0.00019371723597456415, "loss": 1.2149, "step": 760 }, { "epoch": 0.14, "learning_rate": 0.00019369639471103768, "loss": 1.1748, "step": 761 }, { "epoch": 0.14, "learning_rate": 0.00019367552006216592, "loss": 1.1413, "step": 762 }, { "epoch": 0.14, "learning_rate": 0.00019365461203538675, "loss": 1.1725, "step": 763 }, { "epoch": 0.14, "learning_rate": 0.00019363367063815004, "loss": 1.1203, "step": 764 }, { "epoch": 0.14, "learning_rate": 0.00019361269587791745, "loss": 1.17, "step": 765 }, { "epoch": 0.14, "learning_rate": 0.00019359168776216263, "loss": 1.1384, "step": 766 }, { "epoch": 0.14, "learning_rate": 0.00019357064629837103, "loss": 1.0382, "step": 767 }, { "epoch": 0.14, "learning_rate": 0.00019354957149404008, "loss": 1.2516, "step": 768 }, { "epoch": 0.14, "learning_rate": 0.00019352846335667895, "loss": 1.2667, "step": 769 }, { "epoch": 0.14, "learning_rate": 0.00019350732189380882, "loss": 1.0647, "step": 770 }, { "epoch": 0.14, "learning_rate": 0.0001934861471129627, "loss": 1.2072, "step": 771 }, { "epoch": 0.14, "learning_rate": 0.0001934649390216854, "loss": 1.0222, "step": 772 }, { "epoch": 0.14, "learning_rate": 0.00019344369762753374, "loss": 1.2319, "step": 773 }, { "epoch": 0.14, "learning_rate": 0.00019342242293807628, "loss": 1.2308, "step": 774 }, { "epoch": 0.14, "learning_rate": 0.0001934011149608935, "loss": 1.3369, "step": 775 }, { "epoch": 0.14, "learning_rate": 0.00019337977370357772, "loss": 1.1224, "step": 776 }, { "epoch": 0.14, "learning_rate": 0.00019335839917373316, "loss": 1.1725, "step": 777 }, { "epoch": 0.14, "learning_rate": 0.00019333699137897585, "loss": 1.1872, "step": 778 }, { "epoch": 0.14, "learning_rate": 0.00019331555032693366, "loss": 1.2696, "step": 779 }, { "epoch": 0.14, "learning_rate": 0.0001932940760252464, "loss": 1.225, "step": 780 }, { "epoch": 0.14, "learning_rate": 0.00019327256848156564, "loss": 1.1504, "step": 781 }, { "epoch": 0.14, "learning_rate": 0.0001932510277035548, "loss": 1.0201, "step": 782 }, { "epoch": 0.14, "learning_rate": 0.0001932294536988892, "loss": 1.2908, "step": 783 }, { "epoch": 0.14, "learning_rate": 0.00019320784647525595, "loss": 1.2668, "step": 784 }, { "epoch": 0.14, "learning_rate": 0.00019318620604035394, "loss": 1.2523, "step": 785 }, { "epoch": 0.14, "learning_rate": 0.00019316453240189406, "loss": 1.2744, "step": 786 }, { "epoch": 0.15, "learning_rate": 0.0001931428255675989, "loss": 1.1417, "step": 787 }, { "epoch": 0.15, "learning_rate": 0.0001931210855452029, "loss": 1.0461, "step": 788 }, { "epoch": 0.15, "learning_rate": 0.00019309931234245232, "loss": 1.1046, "step": 789 }, { "epoch": 0.15, "learning_rate": 0.00019307750596710528, "loss": 1.0584, "step": 790 }, { "epoch": 0.15, "learning_rate": 0.0001930556664269317, "loss": 1.2042, "step": 791 }, { "epoch": 0.15, "learning_rate": 0.00019303379372971334, "loss": 1.201, "step": 792 }, { "epoch": 0.15, "learning_rate": 0.0001930118878832437, "loss": 1.1559, "step": 793 }, { "epoch": 0.15, "learning_rate": 0.00019298994889532816, "loss": 1.1905, "step": 794 }, { "epoch": 0.15, "learning_rate": 0.0001929679767737839, "loss": 1.075, "step": 795 }, { "epoch": 0.15, "learning_rate": 0.0001929459715264399, "loss": 1.1922, "step": 796 }, { "epoch": 0.15, "learning_rate": 0.00019292393316113694, "loss": 0.993, "step": 797 }, { "epoch": 0.15, "learning_rate": 0.00019290186168572756, "loss": 1.2722, "step": 798 }, { "epoch": 0.15, "learning_rate": 0.00019287975710807618, "loss": 1.2157, "step": 799 }, { "epoch": 0.15, "learning_rate": 0.00019285761943605898, "loss": 1.2078, "step": 800 }, { "epoch": 0.15, "learning_rate": 0.00019283544867756389, "loss": 1.1616, "step": 801 }, { "epoch": 0.15, "learning_rate": 0.0001928132448404907, "loss": 1.2044, "step": 802 }, { "epoch": 0.15, "learning_rate": 0.00019279100793275088, "loss": 1.2562, "step": 803 }, { "epoch": 0.15, "learning_rate": 0.00019276873796226785, "loss": 1.2121, "step": 804 }, { "epoch": 0.15, "learning_rate": 0.00019274643493697667, "loss": 1.1315, "step": 805 }, { "epoch": 0.15, "learning_rate": 0.0001927240988648242, "loss": 1.056, "step": 806 }, { "epoch": 0.15, "learning_rate": 0.00019270172975376912, "loss": 1.0753, "step": 807 }, { "epoch": 0.15, "learning_rate": 0.00019267932761178185, "loss": 1.1741, "step": 808 }, { "epoch": 0.15, "learning_rate": 0.0001926568924468446, "loss": 1.2032, "step": 809 }, { "epoch": 0.15, "learning_rate": 0.00019263442426695134, "loss": 1.282, "step": 810 }, { "epoch": 0.15, "learning_rate": 0.00019261192308010777, "loss": 1.2495, "step": 811 }, { "epoch": 0.15, "learning_rate": 0.0001925893888943314, "loss": 1.1594, "step": 812 }, { "epoch": 0.15, "learning_rate": 0.0001925668217176515, "loss": 1.0805, "step": 813 }, { "epoch": 0.15, "learning_rate": 0.00019254422155810902, "loss": 1.164, "step": 814 }, { "epoch": 0.15, "learning_rate": 0.00019252158842375676, "loss": 1.2381, "step": 815 }, { "epoch": 0.15, "learning_rate": 0.0001924989223226592, "loss": 1.2027, "step": 816 }, { "epoch": 0.15, "learning_rate": 0.0001924762232628926, "loss": 1.0764, "step": 817 }, { "epoch": 0.15, "learning_rate": 0.00019245349125254497, "loss": 1.1155, "step": 818 }, { "epoch": 0.15, "learning_rate": 0.00019243072629971603, "loss": 1.062, "step": 819 }, { "epoch": 0.15, "learning_rate": 0.00019240792841251724, "loss": 1.1586, "step": 820 }, { "epoch": 0.15, "learning_rate": 0.00019238509759907183, "loss": 1.117, "step": 821 }, { "epoch": 0.15, "learning_rate": 0.00019236223386751475, "loss": 1.0203, "step": 822 }, { "epoch": 0.15, "learning_rate": 0.00019233933722599263, "loss": 1.2701, "step": 823 }, { "epoch": 0.15, "learning_rate": 0.00019231640768266388, "loss": 1.2084, "step": 824 }, { "epoch": 0.15, "learning_rate": 0.00019229344524569864, "loss": 1.1774, "step": 825 }, { "epoch": 0.15, "learning_rate": 0.00019227044992327873, "loss": 1.1546, "step": 826 }, { "epoch": 0.15, "learning_rate": 0.00019224742172359774, "loss": 1.1367, "step": 827 }, { "epoch": 0.15, "learning_rate": 0.00019222436065486088, "loss": 1.1312, "step": 828 }, { "epoch": 0.15, "learning_rate": 0.0001922012667252852, "loss": 1.113, "step": 829 }, { "epoch": 0.15, "learning_rate": 0.00019217813994309933, "loss": 1.1101, "step": 830 }, { "epoch": 0.15, "learning_rate": 0.00019215498031654375, "loss": 1.0705, "step": 831 }, { "epoch": 0.15, "learning_rate": 0.00019213178785387048, "loss": 1.1857, "step": 832 }, { "epoch": 0.15, "learning_rate": 0.00019210856256334337, "loss": 1.1017, "step": 833 }, { "epoch": 0.15, "learning_rate": 0.0001920853044532379, "loss": 1.2028, "step": 834 }, { "epoch": 0.15, "learning_rate": 0.00019206201353184123, "loss": 1.1582, "step": 835 }, { "epoch": 0.15, "learning_rate": 0.0001920386898074523, "loss": 1.2937, "step": 836 }, { "epoch": 0.15, "learning_rate": 0.00019201533328838167, "loss": 1.2035, "step": 837 }, { "epoch": 0.15, "learning_rate": 0.00019199194398295157, "loss": 1.1217, "step": 838 }, { "epoch": 0.15, "learning_rate": 0.00019196852189949594, "loss": 1.2427, "step": 839 }, { "epoch": 0.15, "learning_rate": 0.00019194506704636047, "loss": 1.1224, "step": 840 }, { "epoch": 0.15, "learning_rate": 0.00019192157943190234, "loss": 1.2114, "step": 841 }, { "epoch": 0.16, "learning_rate": 0.00019189805906449057, "loss": 1.0557, "step": 842 }, { "epoch": 0.16, "learning_rate": 0.00019187450595250586, "loss": 1.1446, "step": 843 }, { "epoch": 0.16, "learning_rate": 0.00019185092010434043, "loss": 1.1961, "step": 844 }, { "epoch": 0.16, "learning_rate": 0.00019182730152839827, "loss": 1.1163, "step": 845 }, { "epoch": 0.16, "learning_rate": 0.00019180365023309504, "loss": 1.2352, "step": 846 }, { "epoch": 0.16, "learning_rate": 0.00019177996622685799, "loss": 1.1084, "step": 847 }, { "epoch": 0.16, "learning_rate": 0.00019175624951812606, "loss": 1.1205, "step": 848 }, { "epoch": 0.16, "learning_rate": 0.00019173250011534994, "loss": 1.1958, "step": 849 }, { "epoch": 0.16, "learning_rate": 0.00019170871802699177, "loss": 1.1395, "step": 850 }, { "epoch": 0.16, "learning_rate": 0.0001916849032615255, "loss": 1.1679, "step": 851 }, { "epoch": 0.16, "learning_rate": 0.00019166105582743662, "loss": 1.1547, "step": 852 }, { "epoch": 0.16, "learning_rate": 0.00019163717573322236, "loss": 1.0585, "step": 853 }, { "epoch": 0.16, "learning_rate": 0.0001916132629873915, "loss": 1.1843, "step": 854 }, { "epoch": 0.16, "learning_rate": 0.00019158931759846447, "loss": 1.1736, "step": 855 }, { "epoch": 0.16, "learning_rate": 0.00019156533957497343, "loss": 1.2642, "step": 856 }, { "epoch": 0.16, "learning_rate": 0.000191541328925462, "loss": 1.1637, "step": 857 }, { "epoch": 0.16, "learning_rate": 0.00019151728565848557, "loss": 1.1909, "step": 858 }, { "epoch": 0.16, "learning_rate": 0.00019149320978261106, "loss": 1.2414, "step": 859 }, { "epoch": 0.16, "learning_rate": 0.00019146910130641705, "loss": 1.1156, "step": 860 }, { "epoch": 0.16, "learning_rate": 0.00019144496023849373, "loss": 1.2659, "step": 861 }, { "epoch": 0.16, "learning_rate": 0.00019142078658744292, "loss": 1.1558, "step": 862 }, { "epoch": 0.16, "learning_rate": 0.00019139658036187803, "loss": 1.2439, "step": 863 }, { "epoch": 0.16, "learning_rate": 0.00019137234157042405, "loss": 1.1165, "step": 864 }, { "epoch": 0.16, "learning_rate": 0.00019134807022171764, "loss": 1.3674, "step": 865 }, { "epoch": 0.16, "learning_rate": 0.00019132376632440695, "loss": 1.2452, "step": 866 }, { "epoch": 0.16, "learning_rate": 0.00019129942988715192, "loss": 1.2254, "step": 867 }, { "epoch": 0.16, "learning_rate": 0.00019127506091862388, "loss": 1.0608, "step": 868 }, { "epoch": 0.16, "learning_rate": 0.00019125065942750586, "loss": 1.2101, "step": 869 }, { "epoch": 0.16, "learning_rate": 0.00019122622542249248, "loss": 1.1225, "step": 870 }, { "epoch": 0.16, "learning_rate": 0.00019120175891228986, "loss": 1.0681, "step": 871 }, { "epoch": 0.16, "learning_rate": 0.0001911772599056158, "loss": 1.0894, "step": 872 }, { "epoch": 0.16, "learning_rate": 0.00019115272841119967, "loss": 1.2561, "step": 873 }, { "epoch": 0.16, "learning_rate": 0.0001911281644377823, "loss": 1.0703, "step": 874 }, { "epoch": 0.16, "learning_rate": 0.0001911035679941163, "loss": 1.0727, "step": 875 }, { "epoch": 0.16, "learning_rate": 0.00019107893908896562, "loss": 1.2796, "step": 876 }, { "epoch": 0.16, "learning_rate": 0.00019105427773110593, "loss": 1.0337, "step": 877 }, { "epoch": 0.16, "learning_rate": 0.0001910295839293244, "loss": 1.1831, "step": 878 }, { "epoch": 0.16, "learning_rate": 0.00019100485769241984, "loss": 1.1625, "step": 879 }, { "epoch": 0.16, "learning_rate": 0.0001909800990292025, "loss": 1.1808, "step": 880 }, { "epoch": 0.16, "learning_rate": 0.00019095530794849426, "loss": 1.1427, "step": 881 }, { "epoch": 0.16, "learning_rate": 0.0001909304844591285, "loss": 1.196, "step": 882 }, { "epoch": 0.16, "learning_rate": 0.00019090562856995024, "loss": 1.139, "step": 883 }, { "epoch": 0.16, "learning_rate": 0.00019088074028981595, "loss": 1.0081, "step": 884 }, { "epoch": 0.16, "learning_rate": 0.00019085581962759366, "loss": 1.0277, "step": 885 }, { "epoch": 0.16, "learning_rate": 0.00019083086659216295, "loss": 1.0885, "step": 886 }, { "epoch": 0.16, "learning_rate": 0.000190805881192415, "loss": 1.1959, "step": 887 }, { "epoch": 0.16, "learning_rate": 0.00019078086343725237, "loss": 1.334, "step": 888 }, { "epoch": 0.16, "learning_rate": 0.0001907558133355893, "loss": 1.1697, "step": 889 }, { "epoch": 0.16, "learning_rate": 0.00019073073089635148, "loss": 1.2287, "step": 890 }, { "epoch": 0.16, "learning_rate": 0.00019070561612847612, "loss": 1.1088, "step": 891 }, { "epoch": 0.16, "learning_rate": 0.000190680469040912, "loss": 1.1256, "step": 892 }, { "epoch": 0.16, "learning_rate": 0.00019065528964261935, "loss": 0.9924, "step": 893 }, { "epoch": 0.16, "learning_rate": 0.00019063007794256994, "loss": 1.0082, "step": 894 }, { "epoch": 0.16, "learning_rate": 0.00019060483394974707, "loss": 1.1958, "step": 895 }, { "epoch": 0.17, "learning_rate": 0.00019057955767314556, "loss": 1.083, "step": 896 }, { "epoch": 0.17, "learning_rate": 0.00019055424912177164, "loss": 1.0688, "step": 897 }, { "epoch": 0.17, "learning_rate": 0.00019052890830464317, "loss": 1.1013, "step": 898 }, { "epoch": 0.17, "learning_rate": 0.0001905035352307894, "loss": 1.0773, "step": 899 }, { "epoch": 0.17, "learning_rate": 0.00019047812990925108, "loss": 1.2037, "step": 900 }, { "epoch": 0.17, "learning_rate": 0.00019045269234908057, "loss": 1.1815, "step": 901 }, { "epoch": 0.17, "learning_rate": 0.00019042722255934155, "loss": 1.0521, "step": 902 }, { "epoch": 0.17, "learning_rate": 0.00019040172054910932, "loss": 1.2437, "step": 903 }, { "epoch": 0.17, "learning_rate": 0.0001903761863274706, "loss": 1.2353, "step": 904 }, { "epoch": 0.17, "learning_rate": 0.00019035061990352355, "loss": 1.1777, "step": 905 }, { "epoch": 0.17, "learning_rate": 0.00019032502128637788, "loss": 1.1796, "step": 906 }, { "epoch": 0.17, "learning_rate": 0.00019029939048515474, "loss": 1.143, "step": 907 }, { "epoch": 0.17, "learning_rate": 0.00019027372750898673, "loss": 1.1784, "step": 908 }, { "epoch": 0.17, "learning_rate": 0.00019024803236701793, "loss": 1.1441, "step": 909 }, { "epoch": 0.17, "learning_rate": 0.0001902223050684039, "loss": 1.2279, "step": 910 }, { "epoch": 0.17, "learning_rate": 0.00019019654562231163, "loss": 1.2027, "step": 911 }, { "epoch": 0.17, "learning_rate": 0.0001901707540379196, "loss": 1.2721, "step": 912 }, { "epoch": 0.17, "learning_rate": 0.00019014493032441766, "loss": 1.2191, "step": 913 }, { "epoch": 0.17, "learning_rate": 0.00019011907449100718, "loss": 1.0954, "step": 914 }, { "epoch": 0.17, "learning_rate": 0.00019009318654690098, "loss": 1.2982, "step": 915 }, { "epoch": 0.17, "learning_rate": 0.0001900672665013233, "loss": 1.2199, "step": 916 }, { "epoch": 0.17, "learning_rate": 0.00019004131436350978, "loss": 1.1168, "step": 917 }, { "epoch": 0.17, "learning_rate": 0.00019001533014270755, "loss": 1.3054, "step": 918 }, { "epoch": 0.17, "learning_rate": 0.0001899893138481752, "loss": 1.0935, "step": 919 }, { "epoch": 0.17, "learning_rate": 0.0001899632654891826, "loss": 1.0488, "step": 920 }, { "epoch": 0.17, "learning_rate": 0.00018993718507501125, "loss": 1.2181, "step": 921 }, { "epoch": 0.17, "learning_rate": 0.00018991107261495394, "loss": 1.1086, "step": 922 }, { "epoch": 0.17, "learning_rate": 0.00018988492811831487, "loss": 1.0624, "step": 923 }, { "epoch": 0.17, "learning_rate": 0.00018985875159440973, "loss": 1.1398, "step": 924 }, { "epoch": 0.17, "learning_rate": 0.00018983254305256558, "loss": 1.3288, "step": 925 }, { "epoch": 0.17, "learning_rate": 0.0001898063025021209, "loss": 1.168, "step": 926 }, { "epoch": 0.17, "learning_rate": 0.00018978002995242548, "loss": 1.1931, "step": 927 }, { "epoch": 0.17, "learning_rate": 0.00018975372541284074, "loss": 1.1076, "step": 928 }, { "epoch": 0.17, "learning_rate": 0.0001897273888927393, "loss": 1.0829, "step": 929 }, { "epoch": 0.17, "learning_rate": 0.00018970102040150516, "loss": 1.2046, "step": 930 }, { "epoch": 0.17, "learning_rate": 0.0001896746199485339, "loss": 1.1628, "step": 931 }, { "epoch": 0.17, "learning_rate": 0.00018964818754323227, "loss": 1.2699, "step": 932 }, { "epoch": 0.17, "learning_rate": 0.00018962172319501863, "loss": 1.0871, "step": 933 }, { "epoch": 0.17, "learning_rate": 0.0001895952269133225, "loss": 1.1243, "step": 934 }, { "epoch": 0.17, "learning_rate": 0.00018956869870758488, "loss": 1.2092, "step": 935 }, { "epoch": 0.17, "learning_rate": 0.0001895421385872582, "loss": 1.1775, "step": 936 }, { "epoch": 0.17, "learning_rate": 0.0001895155465618062, "loss": 1.1265, "step": 937 }, { "epoch": 0.17, "learning_rate": 0.0001894889226407039, "loss": 1.0746, "step": 938 }, { "epoch": 0.17, "learning_rate": 0.0001894622668334379, "loss": 1.1149, "step": 939 }, { "epoch": 0.17, "learning_rate": 0.00018943557914950597, "loss": 1.179, "step": 940 }, { "epoch": 0.17, "learning_rate": 0.00018940885959841736, "loss": 1.0054, "step": 941 }, { "epoch": 0.17, "learning_rate": 0.00018938210818969254, "loss": 1.1344, "step": 942 }, { "epoch": 0.17, "learning_rate": 0.00018935532493286349, "loss": 1.0631, "step": 943 }, { "epoch": 0.17, "learning_rate": 0.00018932850983747343, "loss": 1.1429, "step": 944 }, { "epoch": 0.17, "learning_rate": 0.00018930166291307695, "loss": 1.2052, "step": 945 }, { "epoch": 0.17, "learning_rate": 0.00018927478416924003, "loss": 1.1611, "step": 946 }, { "epoch": 0.17, "learning_rate": 0.00018924787361553988, "loss": 1.1195, "step": 947 }, { "epoch": 0.17, "learning_rate": 0.00018922093126156516, "loss": 1.0474, "step": 948 }, { "epoch": 0.17, "learning_rate": 0.00018919395711691577, "loss": 1.1082, "step": 949 }, { "epoch": 0.18, "learning_rate": 0.000189166951191203, "loss": 1.2659, "step": 950 }, { "epoch": 0.18, "learning_rate": 0.00018913991349404948, "loss": 1.1568, "step": 951 }, { "epoch": 0.18, "learning_rate": 0.00018911284403508904, "loss": 1.1727, "step": 952 }, { "epoch": 0.18, "learning_rate": 0.00018908574282396697, "loss": 1.1706, "step": 953 }, { "epoch": 0.18, "learning_rate": 0.00018905860987033978, "loss": 1.1061, "step": 954 }, { "epoch": 0.18, "learning_rate": 0.00018903144518387537, "loss": 1.0538, "step": 955 }, { "epoch": 0.18, "learning_rate": 0.00018900424877425287, "loss": 1.1841, "step": 956 }, { "epoch": 0.18, "learning_rate": 0.00018897702065116275, "loss": 1.1456, "step": 957 }, { "epoch": 0.18, "learning_rate": 0.0001889497608243067, "loss": 1.0687, "step": 958 }, { "epoch": 0.18, "learning_rate": 0.00018892246930339793, "loss": 1.1529, "step": 959 }, { "epoch": 0.18, "learning_rate": 0.00018889514609816068, "loss": 1.1785, "step": 960 }, { "epoch": 0.18, "learning_rate": 0.00018886779121833063, "loss": 1.2733, "step": 961 }, { "epoch": 0.18, "learning_rate": 0.00018884040467365468, "loss": 1.1143, "step": 962 }, { "epoch": 0.18, "learning_rate": 0.0001888129864738911, "loss": 1.2375, "step": 963 }, { "epoch": 0.18, "learning_rate": 0.00018878553662880933, "loss": 1.1297, "step": 964 }, { "epoch": 0.18, "learning_rate": 0.00018875805514819013, "loss": 1.0753, "step": 965 }, { "epoch": 0.18, "learning_rate": 0.00018873054204182559, "loss": 1.0885, "step": 966 }, { "epoch": 0.18, "learning_rate": 0.000188702997319519, "loss": 1.2351, "step": 967 }, { "epoch": 0.18, "learning_rate": 0.0001886754209910849, "loss": 1.0491, "step": 968 }, { "epoch": 0.18, "learning_rate": 0.00018864781306634915, "loss": 1.1015, "step": 969 }, { "epoch": 0.18, "learning_rate": 0.00018862017355514884, "loss": 1.0521, "step": 970 }, { "epoch": 0.18, "learning_rate": 0.00018859250246733232, "loss": 1.2601, "step": 971 }, { "epoch": 0.18, "learning_rate": 0.0001885647998127592, "loss": 1.1256, "step": 972 }, { "epoch": 0.18, "learning_rate": 0.00018853706560130026, "loss": 1.1283, "step": 973 }, { "epoch": 0.18, "learning_rate": 0.00018850929984283766, "loss": 1.1367, "step": 974 }, { "epoch": 0.18, "learning_rate": 0.00018848150254726473, "loss": 1.0826, "step": 975 }, { "epoch": 0.18, "learning_rate": 0.00018845367372448602, "loss": 1.1743, "step": 976 }, { "epoch": 0.18, "learning_rate": 0.00018842581338441731, "loss": 1.0855, "step": 977 }, { "epoch": 0.18, "learning_rate": 0.00018839792153698567, "loss": 1.2949, "step": 978 }, { "epoch": 0.18, "learning_rate": 0.00018836999819212932, "loss": 1.201, "step": 979 }, { "epoch": 0.18, "learning_rate": 0.00018834204335979777, "loss": 1.1713, "step": 980 }, { "epoch": 0.18, "learning_rate": 0.00018831405704995173, "loss": 1.2791, "step": 981 }, { "epoch": 0.18, "learning_rate": 0.00018828603927256309, "loss": 1.2563, "step": 982 }, { "epoch": 0.18, "learning_rate": 0.00018825799003761498, "loss": 1.131, "step": 983 }, { "epoch": 0.18, "learning_rate": 0.00018822990935510173, "loss": 1.362, "step": 984 }, { "epoch": 0.18, "learning_rate": 0.00018820179723502893, "loss": 1.1201, "step": 985 }, { "epoch": 0.18, "learning_rate": 0.00018817365368741328, "loss": 1.1539, "step": 986 }, { "epoch": 0.18, "learning_rate": 0.00018814547872228273, "loss": 1.1054, "step": 987 }, { "epoch": 0.18, "learning_rate": 0.0001881172723496764, "loss": 1.0969, "step": 988 }, { "epoch": 0.18, "learning_rate": 0.00018808903457964468, "loss": 1.009, "step": 989 }, { "epoch": 0.18, "learning_rate": 0.00018806076542224903, "loss": 1.1791, "step": 990 }, { "epoch": 0.18, "learning_rate": 0.00018803246488756215, "loss": 1.1707, "step": 991 }, { "epoch": 0.18, "learning_rate": 0.00018800413298566794, "loss": 1.2645, "step": 992 }, { "epoch": 0.18, "learning_rate": 0.00018797576972666145, "loss": 1.1737, "step": 993 }, { "epoch": 0.18, "learning_rate": 0.0001879473751206489, "loss": 1.2314, "step": 994 }, { "epoch": 0.18, "learning_rate": 0.00018791894917774774, "loss": 1.1548, "step": 995 }, { "epoch": 0.18, "learning_rate": 0.00018789049190808644, "loss": 1.1928, "step": 996 }, { "epoch": 0.18, "learning_rate": 0.00018786200332180476, "loss": 1.1593, "step": 997 }, { "epoch": 0.18, "learning_rate": 0.00018783348342905364, "loss": 1.1794, "step": 998 }, { "epoch": 0.18, "learning_rate": 0.00018780493223999507, "loss": 1.1932, "step": 999 }, { "epoch": 0.18, "learning_rate": 0.00018777634976480226, "loss": 1.208, "step": 1000 }, { "epoch": 0.18, "learning_rate": 0.00018774773601365951, "loss": 1.1733, "step": 1001 }, { "epoch": 0.18, "learning_rate": 0.00018771909099676238, "loss": 1.2368, "step": 1002 }, { "epoch": 0.18, "learning_rate": 0.00018769041472431737, "loss": 1.1991, "step": 1003 }, { "epoch": 0.19, "learning_rate": 0.00018766170720654237, "loss": 1.0367, "step": 1004 }, { "epoch": 0.19, "learning_rate": 0.00018763296845366621, "loss": 1.1956, "step": 1005 }, { "epoch": 0.19, "learning_rate": 0.0001876041984759289, "loss": 1.3435, "step": 1006 }, { "epoch": 0.19, "learning_rate": 0.00018757539728358162, "loss": 1.1803, "step": 1007 }, { "epoch": 0.19, "learning_rate": 0.00018754656488688664, "loss": 1.3236, "step": 1008 }, { "epoch": 0.19, "learning_rate": 0.00018751770129611732, "loss": 1.1199, "step": 1009 }, { "epoch": 0.19, "learning_rate": 0.00018748880652155818, "loss": 1.0677, "step": 1010 }, { "epoch": 0.19, "learning_rate": 0.00018745988057350486, "loss": 1.1551, "step": 1011 }, { "epoch": 0.19, "learning_rate": 0.00018743092346226403, "loss": 1.1776, "step": 1012 }, { "epoch": 0.19, "learning_rate": 0.00018740193519815355, "loss": 1.1914, "step": 1013 }, { "epoch": 0.19, "learning_rate": 0.00018737291579150239, "loss": 1.0744, "step": 1014 }, { "epoch": 0.19, "learning_rate": 0.00018734386525265045, "loss": 1.1707, "step": 1015 }, { "epoch": 0.19, "learning_rate": 0.000187314783591949, "loss": 1.0574, "step": 1016 }, { "epoch": 0.19, "learning_rate": 0.00018728567081976013, "loss": 1.1384, "step": 1017 }, { "epoch": 0.19, "learning_rate": 0.00018725652694645716, "loss": 1.0857, "step": 1018 }, { "epoch": 0.19, "learning_rate": 0.0001872273519824245, "loss": 1.1997, "step": 1019 }, { "epoch": 0.19, "learning_rate": 0.00018719814593805753, "loss": 1.1895, "step": 1020 }, { "epoch": 0.19, "learning_rate": 0.00018716890882376284, "loss": 1.1537, "step": 1021 }, { "epoch": 0.19, "learning_rate": 0.000187139640649958, "loss": 1.1141, "step": 1022 }, { "epoch": 0.19, "learning_rate": 0.00018711034142707172, "loss": 1.0761, "step": 1023 }, { "epoch": 0.19, "learning_rate": 0.0001870810111655436, "loss": 1.0808, "step": 1024 }, { "epoch": 0.19, "learning_rate": 0.00018705164987582458, "loss": 1.1256, "step": 1025 }, { "epoch": 0.19, "learning_rate": 0.0001870222575683764, "loss": 0.9746, "step": 1026 }, { "epoch": 0.19, "learning_rate": 0.00018699283425367195, "loss": 1.0283, "step": 1027 }, { "epoch": 0.19, "learning_rate": 0.00018696337994219525, "loss": 1.0097, "step": 1028 }, { "epoch": 0.19, "learning_rate": 0.00018693389464444123, "loss": 1.1904, "step": 1029 }, { "epoch": 0.19, "learning_rate": 0.0001869043783709159, "loss": 1.2381, "step": 1030 }, { "epoch": 0.19, "learning_rate": 0.00018687483113213635, "loss": 1.1629, "step": 1031 }, { "epoch": 0.19, "learning_rate": 0.0001868452529386307, "loss": 1.1967, "step": 1032 }, { "epoch": 0.19, "learning_rate": 0.00018681564380093802, "loss": 1.1981, "step": 1033 }, { "epoch": 0.19, "learning_rate": 0.00018678600372960854, "loss": 1.11, "step": 1034 }, { "epoch": 0.19, "learning_rate": 0.00018675633273520338, "loss": 0.9778, "step": 1035 }, { "epoch": 0.19, "learning_rate": 0.00018672663082829473, "loss": 1.165, "step": 1036 }, { "epoch": 0.19, "learning_rate": 0.00018669689801946584, "loss": 1.2124, "step": 1037 }, { "epoch": 0.19, "learning_rate": 0.0001866671343193109, "loss": 1.028, "step": 1038 }, { "epoch": 0.19, "learning_rate": 0.0001866373397384351, "loss": 1.26, "step": 1039 }, { "epoch": 0.19, "learning_rate": 0.00018660751428745473, "loss": 1.1378, "step": 1040 }, { "epoch": 0.19, "learning_rate": 0.00018657765797699697, "loss": 1.0887, "step": 1041 }, { "epoch": 0.19, "learning_rate": 0.00018654777081770013, "loss": 1.2335, "step": 1042 }, { "epoch": 0.19, "learning_rate": 0.0001865178528202133, "loss": 1.1645, "step": 1043 }, { "epoch": 0.19, "learning_rate": 0.00018648790399519678, "loss": 1.2294, "step": 1044 }, { "epoch": 0.19, "learning_rate": 0.00018645792435332173, "loss": 1.0591, "step": 1045 }, { "epoch": 0.19, "learning_rate": 0.0001864279139052703, "loss": 1.1842, "step": 1046 }, { "epoch": 0.19, "learning_rate": 0.00018639787266173566, "loss": 1.1776, "step": 1047 }, { "epoch": 0.19, "learning_rate": 0.00018636780063342192, "loss": 1.1369, "step": 1048 }, { "epoch": 0.19, "learning_rate": 0.00018633769783104416, "loss": 1.1448, "step": 1049 }, { "epoch": 0.19, "learning_rate": 0.00018630756426532842, "loss": 1.1329, "step": 1050 }, { "epoch": 0.19, "learning_rate": 0.00018627739994701175, "loss": 0.9909, "step": 1051 }, { "epoch": 0.19, "learning_rate": 0.00018624720488684212, "loss": 1.2071, "step": 1052 }, { "epoch": 0.19, "learning_rate": 0.00018621697909557844, "loss": 1.0895, "step": 1053 }, { "epoch": 0.19, "learning_rate": 0.00018618672258399058, "loss": 1.1098, "step": 1054 }, { "epoch": 0.19, "learning_rate": 0.00018615643536285933, "loss": 1.1168, "step": 1055 }, { "epoch": 0.19, "learning_rate": 0.0001861261174429765, "loss": 1.2041, "step": 1056 }, { "epoch": 0.19, "learning_rate": 0.00018609576883514483, "loss": 0.9113, "step": 1057 }, { "epoch": 0.19, "learning_rate": 0.00018606538955017788, "loss": 1.0584, "step": 1058 }, { "epoch": 0.2, "learning_rate": 0.00018603497959890025, "loss": 1.1258, "step": 1059 }, { "epoch": 0.2, "learning_rate": 0.0001860045389921474, "loss": 1.1334, "step": 1060 }, { "epoch": 0.2, "learning_rate": 0.0001859740677407658, "loss": 1.0702, "step": 1061 }, { "epoch": 0.2, "learning_rate": 0.00018594356585561276, "loss": 1.1926, "step": 1062 }, { "epoch": 0.2, "learning_rate": 0.00018591303334755655, "loss": 1.0822, "step": 1063 }, { "epoch": 0.2, "learning_rate": 0.00018588247022747632, "loss": 1.0718, "step": 1064 }, { "epoch": 0.2, "learning_rate": 0.0001858518765062621, "loss": 1.0414, "step": 1065 }, { "epoch": 0.2, "learning_rate": 0.0001858212521948149, "loss": 1.2626, "step": 1066 }, { "epoch": 0.2, "learning_rate": 0.00018579059730404663, "loss": 1.1357, "step": 1067 }, { "epoch": 0.2, "learning_rate": 0.00018575991184488, "loss": 1.1079, "step": 1068 }, { "epoch": 0.2, "learning_rate": 0.00018572919582824867, "loss": 1.0306, "step": 1069 }, { "epoch": 0.2, "learning_rate": 0.00018569844926509728, "loss": 1.1594, "step": 1070 }, { "epoch": 0.2, "learning_rate": 0.00018566767216638116, "loss": 1.1093, "step": 1071 }, { "epoch": 0.2, "learning_rate": 0.00018563686454306665, "loss": 1.1133, "step": 1072 }, { "epoch": 0.2, "learning_rate": 0.00018560602640613097, "loss": 1.2774, "step": 1073 }, { "epoch": 0.2, "learning_rate": 0.00018557515776656218, "loss": 1.1543, "step": 1074 }, { "epoch": 0.2, "learning_rate": 0.00018554425863535917, "loss": 1.1018, "step": 1075 }, { "epoch": 0.2, "learning_rate": 0.00018551332902353175, "loss": 1.0834, "step": 1076 }, { "epoch": 0.2, "learning_rate": 0.00018548236894210064, "loss": 1.0105, "step": 1077 }, { "epoch": 0.2, "learning_rate": 0.00018545137840209725, "loss": 1.1797, "step": 1078 }, { "epoch": 0.2, "learning_rate": 0.000185420357414564, "loss": 1.1162, "step": 1079 }, { "epoch": 0.2, "learning_rate": 0.0001853893059905541, "loss": 1.1372, "step": 1080 }, { "epoch": 0.2, "learning_rate": 0.00018535822414113165, "loss": 1.0843, "step": 1081 }, { "epoch": 0.2, "learning_rate": 0.00018532711187737149, "loss": 1.1074, "step": 1082 }, { "epoch": 0.2, "learning_rate": 0.00018529596921035936, "loss": 1.2032, "step": 1083 }, { "epoch": 0.2, "learning_rate": 0.00018526479615119188, "loss": 0.9857, "step": 1084 }, { "epoch": 0.2, "learning_rate": 0.0001852335927109764, "loss": 1.2846, "step": 1085 }, { "epoch": 0.2, "learning_rate": 0.0001852023589008312, "loss": 1.1732, "step": 1086 }, { "epoch": 0.2, "learning_rate": 0.00018517109473188527, "loss": 1.1386, "step": 1087 }, { "epoch": 0.2, "learning_rate": 0.00018513980021527847, "loss": 1.0949, "step": 1088 }, { "epoch": 0.2, "learning_rate": 0.00018510847536216154, "loss": 1.0823, "step": 1089 }, { "epoch": 0.2, "learning_rate": 0.0001850771201836959, "loss": 1.2144, "step": 1090 }, { "epoch": 0.2, "learning_rate": 0.0001850457346910539, "loss": 1.0937, "step": 1091 }, { "epoch": 0.2, "learning_rate": 0.0001850143188954186, "loss": 1.1427, "step": 1092 }, { "epoch": 0.2, "learning_rate": 0.00018498287280798389, "loss": 1.0916, "step": 1093 }, { "epoch": 0.2, "learning_rate": 0.00018495139643995447, "loss": 1.2332, "step": 1094 }, { "epoch": 0.2, "learning_rate": 0.0001849198898025458, "loss": 1.1256, "step": 1095 }, { "epoch": 0.2, "learning_rate": 0.00018488835290698412, "loss": 1.1664, "step": 1096 }, { "epoch": 0.2, "learning_rate": 0.00018485678576450653, "loss": 1.1949, "step": 1097 }, { "epoch": 0.2, "learning_rate": 0.00018482518838636077, "loss": 1.1937, "step": 1098 }, { "epoch": 0.2, "learning_rate": 0.0001847935607838055, "loss": 1.3027, "step": 1099 }, { "epoch": 0.2, "learning_rate": 0.00018476190296811003, "loss": 1.1832, "step": 1100 }, { "epoch": 0.2, "learning_rate": 0.00018473021495055454, "loss": 1.0535, "step": 1101 }, { "epoch": 0.2, "learning_rate": 0.00018469849674242986, "loss": 1.2079, "step": 1102 }, { "epoch": 0.2, "learning_rate": 0.00018466674835503763, "loss": 1.1468, "step": 1103 }, { "epoch": 0.2, "learning_rate": 0.0001846349697996903, "loss": 1.1826, "step": 1104 }, { "epoch": 0.2, "learning_rate": 0.000184603161087711, "loss": 1.183, "step": 1105 }, { "epoch": 0.2, "learning_rate": 0.00018457132223043363, "loss": 1.1913, "step": 1106 }, { "epoch": 0.2, "learning_rate": 0.00018453945323920276, "loss": 0.9751, "step": 1107 }, { "epoch": 0.2, "learning_rate": 0.00018450755412537383, "loss": 1.0642, "step": 1108 }, { "epoch": 0.2, "learning_rate": 0.00018447562490031297, "loss": 1.171, "step": 1109 }, { "epoch": 0.2, "learning_rate": 0.0001844436655753969, "loss": 1.1429, "step": 1110 }, { "epoch": 0.2, "learning_rate": 0.00018441167616201328, "loss": 1.1839, "step": 1111 }, { "epoch": 0.2, "learning_rate": 0.0001843796566715603, "loss": 1.1642, "step": 1112 }, { "epoch": 0.21, "learning_rate": 0.00018434760711544707, "loss": 0.9978, "step": 1113 }, { "epoch": 0.21, "learning_rate": 0.0001843155275050932, "loss": 1.2501, "step": 1114 }, { "epoch": 0.21, "learning_rate": 0.00018428341785192915, "loss": 1.28, "step": 1115 }, { "epoch": 0.21, "learning_rate": 0.00018425127816739604, "loss": 1.1838, "step": 1116 }, { "epoch": 0.21, "learning_rate": 0.0001842191084629457, "loss": 1.173, "step": 1117 }, { "epoch": 0.21, "learning_rate": 0.00018418690875004058, "loss": 1.2435, "step": 1118 }, { "epoch": 0.21, "learning_rate": 0.000184154679040154, "loss": 1.1003, "step": 1119 }, { "epoch": 0.21, "learning_rate": 0.0001841224193447698, "loss": 1.1295, "step": 1120 }, { "epoch": 0.21, "learning_rate": 0.00018409012967538253, "loss": 1.1559, "step": 1121 }, { "epoch": 0.21, "learning_rate": 0.00018405781004349753, "loss": 1.2857, "step": 1122 }, { "epoch": 0.21, "learning_rate": 0.00018402546046063068, "loss": 1.1656, "step": 1123 }, { "epoch": 0.21, "learning_rate": 0.00018399308093830863, "loss": 1.1866, "step": 1124 }, { "epoch": 0.21, "learning_rate": 0.00018396067148806865, "loss": 1.1171, "step": 1125 }, { "epoch": 0.21, "learning_rate": 0.0001839282321214587, "loss": 1.2225, "step": 1126 }, { "epoch": 0.21, "learning_rate": 0.00018389576285003732, "loss": 1.1334, "step": 1127 }, { "epoch": 0.21, "learning_rate": 0.00018386326368537384, "loss": 1.228, "step": 1128 }, { "epoch": 0.21, "learning_rate": 0.00018383073463904811, "loss": 1.0579, "step": 1129 }, { "epoch": 0.21, "learning_rate": 0.00018379817572265075, "loss": 1.1619, "step": 1130 }, { "epoch": 0.21, "learning_rate": 0.00018376558694778292, "loss": 1.0494, "step": 1131 }, { "epoch": 0.21, "learning_rate": 0.00018373296832605647, "loss": 1.2112, "step": 1132 }, { "epoch": 0.21, "learning_rate": 0.00018370031986909385, "loss": 1.1648, "step": 1133 }, { "epoch": 0.21, "learning_rate": 0.00018366764158852825, "loss": 1.1345, "step": 1134 }, { "epoch": 0.21, "learning_rate": 0.0001836349334960033, "loss": 1.181, "step": 1135 }, { "epoch": 0.21, "learning_rate": 0.0001836021956031734, "loss": 1.1633, "step": 1136 }, { "epoch": 0.21, "learning_rate": 0.00018356942792170351, "loss": 1.2239, "step": 1137 }, { "epoch": 0.21, "learning_rate": 0.00018353663046326922, "loss": 1.0757, "step": 1138 }, { "epoch": 0.21, "learning_rate": 0.00018350380323955674, "loss": 1.0987, "step": 1139 }, { "epoch": 0.21, "learning_rate": 0.00018347094626226283, "loss": 1.2331, "step": 1140 }, { "epoch": 0.21, "learning_rate": 0.000183438059543095, "loss": 1.0209, "step": 1141 }, { "epoch": 0.21, "learning_rate": 0.00018340514309377114, "loss": 1.264, "step": 1142 }, { "epoch": 0.21, "learning_rate": 0.00018337219692601985, "loss": 1.0896, "step": 1143 }, { "epoch": 0.21, "learning_rate": 0.0001833392210515804, "loss": 1.1982, "step": 1144 }, { "epoch": 0.21, "learning_rate": 0.00018330621548220247, "loss": 1.2288, "step": 1145 }, { "epoch": 0.21, "learning_rate": 0.00018327318022964644, "loss": 1.0163, "step": 1146 }, { "epoch": 0.21, "learning_rate": 0.00018324011530568326, "loss": 1.0365, "step": 1147 }, { "epoch": 0.21, "learning_rate": 0.00018320702072209438, "loss": 1.2515, "step": 1148 }, { "epoch": 0.21, "learning_rate": 0.00018317389649067192, "loss": 1.1944, "step": 1149 }, { "epoch": 0.21, "learning_rate": 0.00018314074262321846, "loss": 1.0729, "step": 1150 }, { "epoch": 0.21, "learning_rate": 0.00018310755913154725, "loss": 1.0912, "step": 1151 }, { "epoch": 0.21, "learning_rate": 0.00018307434602748198, "loss": 1.0646, "step": 1152 }, { "epoch": 0.21, "learning_rate": 0.00018304110332285694, "loss": 1.2401, "step": 1153 }, { "epoch": 0.21, "learning_rate": 0.00018300783102951704, "loss": 1.1103, "step": 1154 }, { "epoch": 0.21, "learning_rate": 0.00018297452915931763, "loss": 1.0473, "step": 1155 }, { "epoch": 0.21, "learning_rate": 0.0001829411977241246, "loss": 1.1031, "step": 1156 }, { "epoch": 0.21, "learning_rate": 0.00018290783673581445, "loss": 1.1733, "step": 1157 }, { "epoch": 0.21, "learning_rate": 0.00018287444620627418, "loss": 1.1519, "step": 1158 }, { "epoch": 0.21, "learning_rate": 0.0001828410261474013, "loss": 1.0747, "step": 1159 }, { "epoch": 0.21, "learning_rate": 0.00018280757657110382, "loss": 1.1735, "step": 1160 }, { "epoch": 0.21, "learning_rate": 0.0001827740974893003, "loss": 1.1201, "step": 1161 }, { "epoch": 0.21, "learning_rate": 0.00018274058891391986, "loss": 1.1125, "step": 1162 }, { "epoch": 0.21, "learning_rate": 0.000182707050856902, "loss": 1.0967, "step": 1163 }, { "epoch": 0.21, "learning_rate": 0.00018267348333019686, "loss": 1.1319, "step": 1164 }, { "epoch": 0.21, "learning_rate": 0.00018263988634576497, "loss": 1.1892, "step": 1165 }, { "epoch": 0.21, "learning_rate": 0.00018260625991557748, "loss": 1.1391, "step": 1166 }, { "epoch": 0.22, "learning_rate": 0.0001825726040516159, "loss": 1.0474, "step": 1167 }, { "epoch": 0.22, "learning_rate": 0.0001825389187658723, "loss": 1.0985, "step": 1168 }, { "epoch": 0.22, "learning_rate": 0.00018250520407034926, "loss": 1.0105, "step": 1169 }, { "epoch": 0.22, "learning_rate": 0.00018247145997705975, "loss": 1.2798, "step": 1170 }, { "epoch": 0.22, "learning_rate": 0.00018243768649802732, "loss": 1.1049, "step": 1171 }, { "epoch": 0.22, "learning_rate": 0.00018240388364528586, "loss": 1.0739, "step": 1172 }, { "epoch": 0.22, "learning_rate": 0.00018237005143087988, "loss": 1.2523, "step": 1173 }, { "epoch": 0.22, "learning_rate": 0.00018233618986686424, "loss": 1.1264, "step": 1174 }, { "epoch": 0.22, "learning_rate": 0.0001823022989653043, "loss": 1.1163, "step": 1175 }, { "epoch": 0.22, "learning_rate": 0.00018226837873827585, "loss": 1.1482, "step": 1176 }, { "epoch": 0.22, "learning_rate": 0.00018223442919786516, "loss": 1.115, "step": 1177 }, { "epoch": 0.22, "learning_rate": 0.0001822004503561689, "loss": 1.1256, "step": 1178 }, { "epoch": 0.22, "learning_rate": 0.00018216644222529427, "loss": 1.1014, "step": 1179 }, { "epoch": 0.22, "learning_rate": 0.0001821324048173588, "loss": 1.1273, "step": 1180 }, { "epoch": 0.22, "learning_rate": 0.00018209833814449052, "loss": 1.2287, "step": 1181 }, { "epoch": 0.22, "learning_rate": 0.00018206424221882782, "loss": 1.0664, "step": 1182 }, { "epoch": 0.22, "learning_rate": 0.00018203011705251963, "loss": 1.112, "step": 1183 }, { "epoch": 0.22, "learning_rate": 0.0001819959626577252, "loss": 1.2173, "step": 1184 }, { "epoch": 0.22, "learning_rate": 0.00018196177904661415, "loss": 1.284, "step": 1185 }, { "epoch": 0.22, "learning_rate": 0.0001819275662313667, "loss": 1.1648, "step": 1186 }, { "epoch": 0.22, "learning_rate": 0.00018189332422417325, "loss": 1.0553, "step": 1187 }, { "epoch": 0.22, "learning_rate": 0.00018185905303723482, "loss": 1.1683, "step": 1188 }, { "epoch": 0.22, "learning_rate": 0.00018182475268276263, "loss": 1.1895, "step": 1189 }, { "epoch": 0.22, "learning_rate": 0.00018179042317297844, "loss": 1.2604, "step": 1190 }, { "epoch": 0.22, "learning_rate": 0.00018175606452011425, "loss": 1.0706, "step": 1191 }, { "epoch": 0.22, "learning_rate": 0.00018172167673641263, "loss": 1.1129, "step": 1192 }, { "epoch": 0.22, "learning_rate": 0.00018168725983412637, "loss": 1.0985, "step": 1193 }, { "epoch": 0.22, "learning_rate": 0.00018165281382551872, "loss": 1.1075, "step": 1194 }, { "epoch": 0.22, "learning_rate": 0.0001816183387228633, "loss": 1.0862, "step": 1195 }, { "epoch": 0.22, "learning_rate": 0.00018158383453844402, "loss": 1.1427, "step": 1196 }, { "epoch": 0.22, "learning_rate": 0.00018154930128455523, "loss": 1.1996, "step": 1197 }, { "epoch": 0.22, "learning_rate": 0.00018151473897350164, "loss": 1.1389, "step": 1198 }, { "epoch": 0.22, "learning_rate": 0.00018148014761759827, "loss": 1.2285, "step": 1199 }, { "epoch": 0.22, "learning_rate": 0.0001814455272291705, "loss": 1.1522, "step": 1200 }, { "epoch": 0.22, "learning_rate": 0.0001814108778205541, "loss": 1.2579, "step": 1201 }, { "epoch": 0.22, "learning_rate": 0.00018137619940409506, "loss": 1.2163, "step": 1202 }, { "epoch": 0.22, "learning_rate": 0.00018134149199214988, "loss": 1.2267, "step": 1203 }, { "epoch": 0.22, "learning_rate": 0.00018130675559708525, "loss": 1.1091, "step": 1204 }, { "epoch": 0.22, "learning_rate": 0.00018127199023127823, "loss": 1.2734, "step": 1205 }, { "epoch": 0.22, "learning_rate": 0.00018123719590711624, "loss": 1.1816, "step": 1206 }, { "epoch": 0.22, "learning_rate": 0.000181202372636997, "loss": 1.1651, "step": 1207 }, { "epoch": 0.22, "learning_rate": 0.00018116752043332847, "loss": 1.1129, "step": 1208 }, { "epoch": 0.22, "learning_rate": 0.00018113263930852902, "loss": 1.169, "step": 1209 }, { "epoch": 0.22, "learning_rate": 0.0001810977292750273, "loss": 1.1342, "step": 1210 }, { "epoch": 0.22, "learning_rate": 0.0001810627903452622, "loss": 1.2205, "step": 1211 }, { "epoch": 0.22, "learning_rate": 0.000181027822531683, "loss": 1.0813, "step": 1212 }, { "epoch": 0.22, "learning_rate": 0.0001809928258467492, "loss": 1.1605, "step": 1213 }, { "epoch": 0.22, "learning_rate": 0.00018095780030293062, "loss": 1.0965, "step": 1214 }, { "epoch": 0.22, "learning_rate": 0.0001809227459127074, "loss": 1.0504, "step": 1215 }, { "epoch": 0.22, "learning_rate": 0.00018088766268856984, "loss": 1.2001, "step": 1216 }, { "epoch": 0.22, "learning_rate": 0.00018085255064301858, "loss": 1.1605, "step": 1217 }, { "epoch": 0.22, "learning_rate": 0.0001808174097885646, "loss": 1.0814, "step": 1218 }, { "epoch": 0.22, "learning_rate": 0.00018078224013772903, "loss": 1.2572, "step": 1219 }, { "epoch": 0.22, "learning_rate": 0.00018074704170304334, "loss": 1.0227, "step": 1220 }, { "epoch": 0.22, "learning_rate": 0.00018071181449704927, "loss": 1.0951, "step": 1221 }, { "epoch": 0.23, "learning_rate": 0.00018067655853229865, "loss": 1.0586, "step": 1222 }, { "epoch": 0.23, "learning_rate": 0.0001806412738213538, "loss": 1.068, "step": 1223 }, { "epoch": 0.23, "learning_rate": 0.00018060596037678707, "loss": 1.1153, "step": 1224 }, { "epoch": 0.23, "learning_rate": 0.0001805706182111812, "loss": 1.1916, "step": 1225 }, { "epoch": 0.23, "learning_rate": 0.00018053524733712908, "loss": 0.9766, "step": 1226 }, { "epoch": 0.23, "learning_rate": 0.00018049984776723384, "loss": 1.061, "step": 1227 }, { "epoch": 0.23, "learning_rate": 0.00018046441951410883, "loss": 1.0675, "step": 1228 }, { "epoch": 0.23, "learning_rate": 0.0001804289625903777, "loss": 1.1113, "step": 1229 }, { "epoch": 0.23, "learning_rate": 0.0001803934770086742, "loss": 1.1922, "step": 1230 }, { "epoch": 0.23, "learning_rate": 0.00018035796278164233, "loss": 1.1963, "step": 1231 }, { "epoch": 0.23, "learning_rate": 0.00018032241992193635, "loss": 1.0978, "step": 1232 }, { "epoch": 0.23, "learning_rate": 0.00018028684844222066, "loss": 1.1786, "step": 1233 }, { "epoch": 0.23, "learning_rate": 0.00018025124835516988, "loss": 1.176, "step": 1234 }, { "epoch": 0.23, "learning_rate": 0.00018021561967346882, "loss": 1.1399, "step": 1235 }, { "epoch": 0.23, "learning_rate": 0.00018017996240981253, "loss": 1.0939, "step": 1236 }, { "epoch": 0.23, "learning_rate": 0.0001801442765769061, "loss": 1.2004, "step": 1237 }, { "epoch": 0.23, "learning_rate": 0.00018010856218746496, "loss": 0.8943, "step": 1238 }, { "epoch": 0.23, "learning_rate": 0.00018007281925421463, "loss": 1.0589, "step": 1239 }, { "epoch": 0.23, "learning_rate": 0.00018003704778989086, "loss": 1.0251, "step": 1240 }, { "epoch": 0.23, "learning_rate": 0.0001800012478072395, "loss": 1.2631, "step": 1241 }, { "epoch": 0.23, "learning_rate": 0.00017996541931901652, "loss": 1.092, "step": 1242 }, { "epoch": 0.23, "learning_rate": 0.00017992956233798825, "loss": 1.0832, "step": 1243 }, { "epoch": 0.23, "learning_rate": 0.0001798936768769309, "loss": 1.032, "step": 1244 }, { "epoch": 0.23, "learning_rate": 0.0001798577629486311, "loss": 1.1117, "step": 1245 }, { "epoch": 0.23, "learning_rate": 0.00017982182056588535, "loss": 1.1862, "step": 1246 }, { "epoch": 0.23, "learning_rate": 0.00017978584974150054, "loss": 1.0098, "step": 1247 }, { "epoch": 0.23, "learning_rate": 0.00017974985048829352, "loss": 1.1441, "step": 1248 }, { "epoch": 0.23, "learning_rate": 0.00017971382281909134, "loss": 1.0339, "step": 1249 }, { "epoch": 0.23, "learning_rate": 0.0001796777667467312, "loss": 1.1925, "step": 1250 }, { "epoch": 0.23, "learning_rate": 0.00017964168228406035, "loss": 1.2725, "step": 1251 }, { "epoch": 0.23, "learning_rate": 0.0001796055694439362, "loss": 1.1273, "step": 1252 }, { "epoch": 0.23, "learning_rate": 0.00017956942823922625, "loss": 1.083, "step": 1253 }, { "epoch": 0.23, "learning_rate": 0.0001795332586828082, "loss": 1.0878, "step": 1254 }, { "epoch": 0.23, "learning_rate": 0.00017949706078756967, "loss": 0.9667, "step": 1255 }, { "epoch": 0.23, "learning_rate": 0.00017946083456640851, "loss": 1.0684, "step": 1256 }, { "epoch": 0.23, "learning_rate": 0.00017942458003223266, "loss": 1.2116, "step": 1257 }, { "epoch": 0.23, "learning_rate": 0.0001793882971979601, "loss": 1.2066, "step": 1258 }, { "epoch": 0.23, "learning_rate": 0.0001793519860765189, "loss": 1.1052, "step": 1259 }, { "epoch": 0.23, "learning_rate": 0.00017931564668084726, "loss": 1.0518, "step": 1260 }, { "epoch": 0.23, "learning_rate": 0.00017927927902389342, "loss": 0.976, "step": 1261 }, { "epoch": 0.23, "learning_rate": 0.00017924288311861565, "loss": 1.1661, "step": 1262 }, { "epoch": 0.23, "learning_rate": 0.00017920645897798236, "loss": 1.0506, "step": 1263 }, { "epoch": 0.23, "learning_rate": 0.00017917000661497195, "loss": 1.2016, "step": 1264 }, { "epoch": 0.23, "learning_rate": 0.0001791335260425729, "loss": 1.0053, "step": 1265 }, { "epoch": 0.23, "learning_rate": 0.00017909701727378385, "loss": 1.0983, "step": 1266 }, { "epoch": 0.23, "learning_rate": 0.00017906048032161328, "loss": 1.2099, "step": 1267 }, { "epoch": 0.23, "learning_rate": 0.00017902391519907987, "loss": 1.1663, "step": 1268 }, { "epoch": 0.23, "learning_rate": 0.0001789873219192123, "loss": 1.3001, "step": 1269 }, { "epoch": 0.23, "learning_rate": 0.0001789507004950492, "loss": 1.2232, "step": 1270 }, { "epoch": 0.23, "learning_rate": 0.00017891405093963938, "loss": 1.1593, "step": 1271 }, { "epoch": 0.23, "learning_rate": 0.00017887737326604156, "loss": 1.0837, "step": 1272 }, { "epoch": 0.23, "learning_rate": 0.0001788406674873245, "loss": 1.2559, "step": 1273 }, { "epoch": 0.23, "learning_rate": 0.000178803933616567, "loss": 1.0915, "step": 1274 }, { "epoch": 0.23, "learning_rate": 0.00017876717166685787, "loss": 1.1036, "step": 1275 }, { "epoch": 0.24, "learning_rate": 0.00017873038165129587, "loss": 1.112, "step": 1276 }, { "epoch": 0.24, "learning_rate": 0.00017869356358298988, "loss": 1.3669, "step": 1277 }, { "epoch": 0.24, "learning_rate": 0.00017865671747505858, "loss": 1.0737, "step": 1278 }, { "epoch": 0.24, "learning_rate": 0.00017861984334063084, "loss": 1.0191, "step": 1279 }, { "epoch": 0.24, "learning_rate": 0.00017858294119284544, "loss": 1.0946, "step": 1280 }, { "epoch": 0.24, "learning_rate": 0.0001785460110448511, "loss": 1.1609, "step": 1281 }, { "epoch": 0.24, "learning_rate": 0.00017850905290980656, "loss": 1.1217, "step": 1282 }, { "epoch": 0.24, "learning_rate": 0.0001784720668008805, "loss": 1.1732, "step": 1283 }, { "epoch": 0.24, "learning_rate": 0.00017843505273125164, "loss": 1.1269, "step": 1284 }, { "epoch": 0.24, "learning_rate": 0.0001783980107141086, "loss": 1.2919, "step": 1285 }, { "epoch": 0.24, "learning_rate": 0.00017836094076264996, "loss": 1.2194, "step": 1286 }, { "epoch": 0.24, "learning_rate": 0.00017832384289008423, "loss": 1.1685, "step": 1287 }, { "epoch": 0.24, "learning_rate": 0.00017828671710963, "loss": 1.0132, "step": 1288 }, { "epoch": 0.24, "learning_rate": 0.0001782495634345156, "loss": 1.0959, "step": 1289 }, { "epoch": 0.24, "learning_rate": 0.00017821238187797948, "loss": 1.1356, "step": 1290 }, { "epoch": 0.24, "learning_rate": 0.00017817517245326996, "loss": 1.2902, "step": 1291 }, { "epoch": 0.24, "learning_rate": 0.00017813793517364523, "loss": 1.1389, "step": 1292 }, { "epoch": 0.24, "learning_rate": 0.00017810067005237347, "loss": 1.2142, "step": 1293 }, { "epoch": 0.24, "learning_rate": 0.0001780633771027328, "loss": 1.2478, "step": 1294 }, { "epoch": 0.24, "learning_rate": 0.00017802605633801118, "loss": 0.9825, "step": 1295 }, { "epoch": 0.24, "learning_rate": 0.0001779887077715066, "loss": 1.1931, "step": 1296 }, { "epoch": 0.24, "learning_rate": 0.00017795133141652677, "loss": 1.0975, "step": 1297 }, { "epoch": 0.24, "learning_rate": 0.0001779139272863895, "loss": 1.1604, "step": 1298 }, { "epoch": 0.24, "learning_rate": 0.00017787649539442236, "loss": 1.2067, "step": 1299 }, { "epoch": 0.24, "learning_rate": 0.0001778390357539629, "loss": 1.0618, "step": 1300 }, { "epoch": 0.24, "learning_rate": 0.00017780154837835848, "loss": 1.2179, "step": 1301 }, { "epoch": 0.24, "learning_rate": 0.00017776403328096643, "loss": 1.1279, "step": 1302 }, { "epoch": 0.24, "learning_rate": 0.00017772649047515383, "loss": 1.1834, "step": 1303 }, { "epoch": 0.24, "learning_rate": 0.00017768891997429776, "loss": 1.1475, "step": 1304 }, { "epoch": 0.24, "learning_rate": 0.00017765132179178514, "loss": 1.2549, "step": 1305 }, { "epoch": 0.24, "learning_rate": 0.00017761369594101266, "loss": 1.1063, "step": 1306 }, { "epoch": 0.24, "learning_rate": 0.00017757604243538703, "loss": 1.241, "step": 1307 }, { "epoch": 0.24, "learning_rate": 0.00017753836128832465, "loss": 1.279, "step": 1308 }, { "epoch": 0.24, "learning_rate": 0.0001775006525132519, "loss": 1.0934, "step": 1309 }, { "epoch": 0.24, "learning_rate": 0.0001774629161236049, "loss": 1.0658, "step": 1310 }, { "epoch": 0.24, "learning_rate": 0.0001774251521328297, "loss": 1.2117, "step": 1311 }, { "epoch": 0.24, "learning_rate": 0.0001773873605543821, "loss": 1.0868, "step": 1312 }, { "epoch": 0.24, "learning_rate": 0.0001773495414017278, "loss": 1.1813, "step": 1313 }, { "epoch": 0.24, "learning_rate": 0.0001773116946883423, "loss": 1.0414, "step": 1314 }, { "epoch": 0.24, "learning_rate": 0.0001772738204277109, "loss": 1.1038, "step": 1315 }, { "epoch": 0.24, "learning_rate": 0.00017723591863332873, "loss": 1.145, "step": 1316 }, { "epoch": 0.24, "learning_rate": 0.00017719798931870075, "loss": 1.1797, "step": 1317 }, { "epoch": 0.24, "learning_rate": 0.0001771600324973417, "loss": 1.0969, "step": 1318 }, { "epoch": 0.24, "learning_rate": 0.00017712204818277614, "loss": 1.0548, "step": 1319 }, { "epoch": 0.24, "learning_rate": 0.00017708403638853838, "loss": 1.1176, "step": 1320 }, { "epoch": 0.24, "learning_rate": 0.00017704599712817262, "loss": 1.1137, "step": 1321 }, { "epoch": 0.24, "learning_rate": 0.00017700793041523273, "loss": 1.1895, "step": 1322 }, { "epoch": 0.24, "learning_rate": 0.00017696983626328244, "loss": 1.0518, "step": 1323 }, { "epoch": 0.24, "learning_rate": 0.00017693171468589523, "loss": 1.1616, "step": 1324 }, { "epoch": 0.24, "learning_rate": 0.00017689356569665433, "loss": 1.1451, "step": 1325 }, { "epoch": 0.24, "learning_rate": 0.00017685538930915283, "loss": 1.2883, "step": 1326 }, { "epoch": 0.24, "learning_rate": 0.00017681718553699341, "loss": 1.1159, "step": 1327 }, { "epoch": 0.24, "learning_rate": 0.00017677895439378874, "loss": 1.1791, "step": 1328 }, { "epoch": 0.24, "learning_rate": 0.000176740695893161, "loss": 1.2099, "step": 1329 }, { "epoch": 0.25, "learning_rate": 0.00017670241004874232, "loss": 1.1095, "step": 1330 }, { "epoch": 0.25, "learning_rate": 0.0001766640968741744, "loss": 1.1675, "step": 1331 }, { "epoch": 0.25, "learning_rate": 0.00017662575638310887, "loss": 0.9656, "step": 1332 }, { "epoch": 0.25, "learning_rate": 0.00017658738858920687, "loss": 1.1185, "step": 1333 }, { "epoch": 0.25, "learning_rate": 0.00017654899350613948, "loss": 1.178, "step": 1334 }, { "epoch": 0.25, "learning_rate": 0.00017651057114758734, "loss": 1.1227, "step": 1335 }, { "epoch": 0.25, "learning_rate": 0.0001764721215272409, "loss": 1.134, "step": 1336 }, { "epoch": 0.25, "learning_rate": 0.00017643364465880036, "loss": 1.2178, "step": 1337 }, { "epoch": 0.25, "learning_rate": 0.00017639514055597548, "loss": 1.1845, "step": 1338 }, { "epoch": 0.25, "learning_rate": 0.0001763566092324859, "loss": 1.1328, "step": 1339 }, { "epoch": 0.25, "learning_rate": 0.0001763180507020608, "loss": 0.8868, "step": 1340 }, { "epoch": 0.25, "learning_rate": 0.00017627946497843916, "loss": 1.1834, "step": 1341 }, { "epoch": 0.25, "learning_rate": 0.0001762408520753696, "loss": 1.0949, "step": 1342 }, { "epoch": 0.25, "learning_rate": 0.00017620221200661046, "loss": 1.3042, "step": 1343 }, { "epoch": 0.25, "learning_rate": 0.00017616354478592973, "loss": 1.1574, "step": 1344 }, { "epoch": 0.25, "learning_rate": 0.0001761248504271051, "loss": 1.1155, "step": 1345 }, { "epoch": 0.25, "learning_rate": 0.00017608612894392392, "loss": 1.0375, "step": 1346 }, { "epoch": 0.25, "learning_rate": 0.00017604738035018316, "loss": 1.1162, "step": 1347 }, { "epoch": 0.25, "learning_rate": 0.00017600860465968952, "loss": 1.1341, "step": 1348 }, { "epoch": 0.25, "learning_rate": 0.00017596980188625932, "loss": 1.1362, "step": 1349 }, { "epoch": 0.25, "learning_rate": 0.00017593097204371852, "loss": 1.0246, "step": 1350 }, { "epoch": 0.25, "learning_rate": 0.0001758921151459028, "loss": 1.0979, "step": 1351 }, { "epoch": 0.25, "learning_rate": 0.00017585323120665732, "loss": 1.0518, "step": 1352 }, { "epoch": 0.25, "learning_rate": 0.00017581432023983706, "loss": 1.1524, "step": 1353 }, { "epoch": 0.25, "learning_rate": 0.00017577538225930654, "loss": 1.1183, "step": 1354 }, { "epoch": 0.25, "learning_rate": 0.00017573641727893985, "loss": 1.075, "step": 1355 }, { "epoch": 0.25, "learning_rate": 0.0001756974253126208, "loss": 1.0962, "step": 1356 }, { "epoch": 0.25, "learning_rate": 0.0001756584063742428, "loss": 1.0839, "step": 1357 }, { "epoch": 0.25, "learning_rate": 0.00017561936047770879, "loss": 1.22, "step": 1358 }, { "epoch": 0.25, "learning_rate": 0.0001755802876369314, "loss": 1.0859, "step": 1359 }, { "epoch": 0.25, "learning_rate": 0.0001755411878658329, "loss": 1.0569, "step": 1360 }, { "epoch": 0.25, "learning_rate": 0.00017550206117834498, "loss": 1.1243, "step": 1361 }, { "epoch": 0.25, "learning_rate": 0.00017546290758840906, "loss": 1.2134, "step": 1362 }, { "epoch": 0.25, "learning_rate": 0.00017542372710997617, "loss": 1.0116, "step": 1363 }, { "epoch": 0.25, "learning_rate": 0.0001753845197570068, "loss": 1.1316, "step": 1364 }, { "epoch": 0.25, "learning_rate": 0.00017534528554347115, "loss": 1.0104, "step": 1365 }, { "epoch": 0.25, "learning_rate": 0.00017530602448334888, "loss": 1.2082, "step": 1366 }, { "epoch": 0.25, "learning_rate": 0.00017526673659062922, "loss": 1.0042, "step": 1367 }, { "epoch": 0.25, "learning_rate": 0.00017522742187931108, "loss": 1.1336, "step": 1368 }, { "epoch": 0.25, "learning_rate": 0.00017518808036340279, "loss": 1.0691, "step": 1369 }, { "epoch": 0.25, "learning_rate": 0.00017514871205692232, "loss": 1.1323, "step": 1370 }, { "epoch": 0.25, "learning_rate": 0.00017510931697389714, "loss": 1.1299, "step": 1371 }, { "epoch": 0.25, "learning_rate": 0.00017506989512836425, "loss": 1.12, "step": 1372 }, { "epoch": 0.25, "learning_rate": 0.00017503044653437025, "loss": 1.1017, "step": 1373 }, { "epoch": 0.25, "learning_rate": 0.0001749909712059712, "loss": 0.9117, "step": 1374 }, { "epoch": 0.25, "learning_rate": 0.00017495146915723272, "loss": 1.0883, "step": 1375 }, { "epoch": 0.25, "learning_rate": 0.00017491194040222994, "loss": 1.1469, "step": 1376 }, { "epoch": 0.25, "learning_rate": 0.0001748723849550476, "loss": 1.1673, "step": 1377 }, { "epoch": 0.25, "learning_rate": 0.0001748328028297797, "loss": 1.1791, "step": 1378 }, { "epoch": 0.25, "learning_rate": 0.00017479319404053003, "loss": 1.278, "step": 1379 }, { "epoch": 0.25, "learning_rate": 0.00017475355860141172, "loss": 1.1328, "step": 1380 }, { "epoch": 0.25, "learning_rate": 0.00017471389652654745, "loss": 1.2299, "step": 1381 }, { "epoch": 0.25, "learning_rate": 0.00017467420783006933, "loss": 1.3093, "step": 1382 }, { "epoch": 0.25, "learning_rate": 0.00017463449252611905, "loss": 1.0613, "step": 1383 }, { "epoch": 0.26, "learning_rate": 0.00017459475062884776, "loss": 1.0898, "step": 1384 }, { "epoch": 0.26, "learning_rate": 0.00017455498215241598, "loss": 1.1228, "step": 1385 }, { "epoch": 0.26, "learning_rate": 0.0001745151871109938, "loss": 1.115, "step": 1386 }, { "epoch": 0.26, "learning_rate": 0.0001744753655187608, "loss": 1.1015, "step": 1387 }, { "epoch": 0.26, "learning_rate": 0.00017443551738990595, "loss": 1.0386, "step": 1388 }, { "epoch": 0.26, "learning_rate": 0.0001743956427386277, "loss": 1.1385, "step": 1389 }, { "epoch": 0.26, "learning_rate": 0.00017435574157913394, "loss": 1.2132, "step": 1390 }, { "epoch": 0.26, "learning_rate": 0.00017431581392564197, "loss": 1.1192, "step": 1391 }, { "epoch": 0.26, "learning_rate": 0.00017427585979237867, "loss": 1.0509, "step": 1392 }, { "epoch": 0.26, "learning_rate": 0.00017423587919358023, "loss": 1.09, "step": 1393 }, { "epoch": 0.26, "learning_rate": 0.00017419587214349225, "loss": 1.2455, "step": 1394 }, { "epoch": 0.26, "learning_rate": 0.00017415583865636983, "loss": 1.0094, "step": 1395 }, { "epoch": 0.26, "learning_rate": 0.0001741157787464775, "loss": 1.1711, "step": 1396 }, { "epoch": 0.26, "learning_rate": 0.00017407569242808917, "loss": 1.1383, "step": 1397 }, { "epoch": 0.26, "learning_rate": 0.0001740355797154881, "loss": 0.9617, "step": 1398 }, { "epoch": 0.26, "learning_rate": 0.00017399544062296707, "loss": 1.1024, "step": 1399 }, { "epoch": 0.26, "learning_rate": 0.00017395527516482818, "loss": 1.1268, "step": 1400 }, { "epoch": 0.26, "learning_rate": 0.00017391508335538294, "loss": 1.0834, "step": 1401 }, { "epoch": 0.26, "learning_rate": 0.00017387486520895228, "loss": 1.1444, "step": 1402 }, { "epoch": 0.26, "learning_rate": 0.0001738346207398665, "loss": 1.2187, "step": 1403 }, { "epoch": 0.26, "learning_rate": 0.00017379434996246524, "loss": 1.1903, "step": 1404 }, { "epoch": 0.26, "learning_rate": 0.00017375405289109757, "loss": 1.2134, "step": 1405 }, { "epoch": 0.26, "learning_rate": 0.00017371372954012187, "loss": 0.9875, "step": 1406 }, { "epoch": 0.26, "learning_rate": 0.00017367337992390595, "loss": 1.1894, "step": 1407 }, { "epoch": 0.26, "learning_rate": 0.00017363300405682697, "loss": 1.1992, "step": 1408 }, { "epoch": 0.26, "learning_rate": 0.00017359260195327135, "loss": 1.0384, "step": 1409 }, { "epoch": 0.26, "learning_rate": 0.000173552173627635, "loss": 1.1115, "step": 1410 }, { "epoch": 0.26, "learning_rate": 0.00017351171909432304, "loss": 1.3026, "step": 1411 }, { "epoch": 0.26, "learning_rate": 0.00017347123836775006, "loss": 1.0857, "step": 1412 }, { "epoch": 0.26, "learning_rate": 0.00017343073146233983, "loss": 1.0713, "step": 1413 }, { "epoch": 0.26, "learning_rate": 0.0001733901983925256, "loss": 1.1114, "step": 1414 }, { "epoch": 0.26, "learning_rate": 0.00017334963917274985, "loss": 0.9919, "step": 1415 }, { "epoch": 0.26, "learning_rate": 0.0001733090538174644, "loss": 1.0804, "step": 1416 }, { "epoch": 0.26, "learning_rate": 0.00017326844234113038, "loss": 1.0974, "step": 1417 }, { "epoch": 0.26, "learning_rate": 0.00017322780475821824, "loss": 1.0948, "step": 1418 }, { "epoch": 0.26, "learning_rate": 0.0001731871410832077, "loss": 1.1106, "step": 1419 }, { "epoch": 0.26, "learning_rate": 0.0001731464513305878, "loss": 1.079, "step": 1420 }, { "epoch": 0.26, "learning_rate": 0.00017310573551485692, "loss": 1.0264, "step": 1421 }, { "epoch": 0.26, "learning_rate": 0.00017306499365052266, "loss": 1.2128, "step": 1422 }, { "epoch": 0.26, "learning_rate": 0.00017302422575210186, "loss": 1.1316, "step": 1423 }, { "epoch": 0.26, "learning_rate": 0.00017298343183412077, "loss": 1.0981, "step": 1424 }, { "epoch": 0.26, "learning_rate": 0.00017294261191111484, "loss": 1.2253, "step": 1425 }, { "epoch": 0.26, "learning_rate": 0.00017290176599762873, "loss": 1.3071, "step": 1426 }, { "epoch": 0.26, "learning_rate": 0.00017286089410821641, "loss": 1.2407, "step": 1427 }, { "epoch": 0.26, "learning_rate": 0.00017281999625744118, "loss": 1.2183, "step": 1428 }, { "epoch": 0.26, "learning_rate": 0.0001727790724598755, "loss": 1.1105, "step": 1429 }, { "epoch": 0.26, "learning_rate": 0.00017273812273010104, "loss": 1.093, "step": 1430 }, { "epoch": 0.26, "learning_rate": 0.00017269714708270884, "loss": 1.0699, "step": 1431 }, { "epoch": 0.26, "learning_rate": 0.00017265614553229903, "loss": 1.1724, "step": 1432 }, { "epoch": 0.26, "learning_rate": 0.00017261511809348105, "loss": 1.1417, "step": 1433 }, { "epoch": 0.26, "learning_rate": 0.0001725740647808736, "loss": 1.0637, "step": 1434 }, { "epoch": 0.26, "learning_rate": 0.00017253298560910457, "loss": 1.0962, "step": 1435 }, { "epoch": 0.26, "learning_rate": 0.00017249188059281098, "loss": 0.985, "step": 1436 }, { "epoch": 0.26, "learning_rate": 0.00017245074974663914, "loss": 1.1073, "step": 1437 }, { "epoch": 0.26, "learning_rate": 0.00017240959308524457, "loss": 1.1347, "step": 1438 }, { "epoch": 0.27, "learning_rate": 0.00017236841062329196, "loss": 1.1381, "step": 1439 }, { "epoch": 0.27, "learning_rate": 0.00017232720237545524, "loss": 1.1202, "step": 1440 }, { "epoch": 0.27, "learning_rate": 0.0001722859683564174, "loss": 1.171, "step": 1441 }, { "epoch": 0.27, "learning_rate": 0.00017224470858087076, "loss": 1.1605, "step": 1442 }, { "epoch": 0.27, "learning_rate": 0.00017220342306351679, "loss": 0.9542, "step": 1443 }, { "epoch": 0.27, "learning_rate": 0.00017216211181906606, "loss": 1.2257, "step": 1444 }, { "epoch": 0.27, "learning_rate": 0.0001721207748622383, "loss": 1.1149, "step": 1445 }, { "epoch": 0.27, "learning_rate": 0.00017207941220776256, "loss": 1.0967, "step": 1446 }, { "epoch": 0.27, "learning_rate": 0.00017203802387037688, "loss": 0.9002, "step": 1447 }, { "epoch": 0.27, "learning_rate": 0.0001719966098648285, "loss": 1.0337, "step": 1448 }, { "epoch": 0.27, "learning_rate": 0.00017195517020587378, "loss": 1.2566, "step": 1449 }, { "epoch": 0.27, "learning_rate": 0.00017191370490827836, "loss": 1.0719, "step": 1450 }, { "epoch": 0.27, "learning_rate": 0.00017187221398681684, "loss": 1.2022, "step": 1451 }, { "epoch": 0.27, "learning_rate": 0.00017183069745627298, "loss": 1.0821, "step": 1452 }, { "epoch": 0.27, "learning_rate": 0.00017178915533143979, "loss": 1.1278, "step": 1453 }, { "epoch": 0.27, "learning_rate": 0.00017174758762711925, "loss": 1.1033, "step": 1454 }, { "epoch": 0.27, "learning_rate": 0.00017170599435812253, "loss": 1.0134, "step": 1455 }, { "epoch": 0.27, "learning_rate": 0.00017166437553926993, "loss": 1.1387, "step": 1456 }, { "epoch": 0.27, "learning_rate": 0.00017162273118539082, "loss": 1.1618, "step": 1457 }, { "epoch": 0.27, "learning_rate": 0.0001715810613113236, "loss": 1.0213, "step": 1458 }, { "epoch": 0.27, "learning_rate": 0.00017153936593191594, "loss": 1.1398, "step": 1459 }, { "epoch": 0.27, "learning_rate": 0.0001714976450620244, "loss": 1.1195, "step": 1460 }, { "epoch": 0.27, "learning_rate": 0.00017145589871651477, "loss": 1.1192, "step": 1461 }, { "epoch": 0.27, "learning_rate": 0.0001714141269102618, "loss": 1.0469, "step": 1462 }, { "epoch": 0.27, "learning_rate": 0.00017137232965814948, "loss": 0.9653, "step": 1463 }, { "epoch": 0.27, "learning_rate": 0.00017133050697507068, "loss": 1.0403, "step": 1464 }, { "epoch": 0.27, "learning_rate": 0.00017128865887592739, "loss": 1.022, "step": 1465 }, { "epoch": 0.27, "learning_rate": 0.00017124678537563072, "loss": 1.1338, "step": 1466 }, { "epoch": 0.27, "learning_rate": 0.0001712048864891008, "loss": 1.1819, "step": 1467 }, { "epoch": 0.27, "learning_rate": 0.00017116296223126676, "loss": 1.1131, "step": 1468 }, { "epoch": 0.27, "learning_rate": 0.0001711210126170668, "loss": 1.2051, "step": 1469 }, { "epoch": 0.27, "learning_rate": 0.0001710790376614482, "loss": 1.1005, "step": 1470 }, { "epoch": 0.27, "learning_rate": 0.00017103703737936718, "loss": 1.1699, "step": 1471 }, { "epoch": 0.27, "learning_rate": 0.00017099501178578907, "loss": 0.8204, "step": 1472 }, { "epoch": 0.27, "learning_rate": 0.00017095296089568817, "loss": 1.0734, "step": 1473 }, { "epoch": 0.27, "learning_rate": 0.0001709108847240478, "loss": 1.243, "step": 1474 }, { "epoch": 0.27, "learning_rate": 0.0001708687832858603, "loss": 1.0588, "step": 1475 }, { "epoch": 0.27, "learning_rate": 0.000170826656596127, "loss": 1.0408, "step": 1476 }, { "epoch": 0.27, "learning_rate": 0.0001707845046698582, "loss": 1.2251, "step": 1477 }, { "epoch": 0.27, "learning_rate": 0.00017074232752207328, "loss": 1.1121, "step": 1478 }, { "epoch": 0.27, "learning_rate": 0.00017070012516780055, "loss": 0.9745, "step": 1479 }, { "epoch": 0.27, "learning_rate": 0.00017065789762207726, "loss": 1.2142, "step": 1480 }, { "epoch": 0.27, "learning_rate": 0.00017061564489994968, "loss": 1.0552, "step": 1481 }, { "epoch": 0.27, "learning_rate": 0.0001705733670164731, "loss": 1.2674, "step": 1482 }, { "epoch": 0.27, "learning_rate": 0.0001705310639867117, "loss": 1.297, "step": 1483 }, { "epoch": 0.27, "learning_rate": 0.00017048873582573864, "loss": 1.0717, "step": 1484 }, { "epoch": 0.27, "learning_rate": 0.00017044638254863604, "loss": 1.0355, "step": 1485 }, { "epoch": 0.27, "learning_rate": 0.00017040400417049495, "loss": 1.252, "step": 1486 }, { "epoch": 0.27, "learning_rate": 0.00017036160070641544, "loss": 1.2077, "step": 1487 }, { "epoch": 0.27, "learning_rate": 0.0001703191721715064, "loss": 1.1217, "step": 1488 }, { "epoch": 0.27, "learning_rate": 0.00017027671858088576, "loss": 1.0325, "step": 1489 }, { "epoch": 0.27, "learning_rate": 0.0001702342399496803, "loss": 1.0372, "step": 1490 }, { "epoch": 0.27, "learning_rate": 0.00017019173629302577, "loss": 1.1217, "step": 1491 }, { "epoch": 0.27, "learning_rate": 0.00017014920762606687, "loss": 1.029, "step": 1492 }, { "epoch": 0.28, "learning_rate": 0.00017010665396395704, "loss": 0.9062, "step": 1493 }, { "epoch": 0.28, "learning_rate": 0.00017006407532185887, "loss": 1.1254, "step": 1494 }, { "epoch": 0.28, "learning_rate": 0.00017002147171494365, "loss": 1.1374, "step": 1495 }, { "epoch": 0.28, "learning_rate": 0.0001699788431583917, "loss": 1.2352, "step": 1496 }, { "epoch": 0.28, "learning_rate": 0.00016993618966739215, "loss": 1.2428, "step": 1497 }, { "epoch": 0.28, "learning_rate": 0.00016989351125714305, "loss": 1.1255, "step": 1498 }, { "epoch": 0.28, "learning_rate": 0.00016985080794285128, "loss": 1.1308, "step": 1499 }, { "epoch": 0.28, "learning_rate": 0.0001698080797397327, "loss": 1.0851, "step": 1500 }, { "epoch": 0.28, "learning_rate": 0.0001697653266630119, "loss": 1.0122, "step": 1501 }, { "epoch": 0.28, "learning_rate": 0.00016972254872792245, "loss": 1.2966, "step": 1502 }, { "epoch": 0.28, "learning_rate": 0.00016967974594970673, "loss": 1.1721, "step": 1503 }, { "epoch": 0.28, "learning_rate": 0.00016963691834361597, "loss": 1.1804, "step": 1504 }, { "epoch": 0.28, "learning_rate": 0.00016959406592491024, "loss": 1.1464, "step": 1505 }, { "epoch": 0.28, "learning_rate": 0.00016955118870885843, "loss": 1.2678, "step": 1506 }, { "epoch": 0.28, "learning_rate": 0.00016950828671073834, "loss": 1.0738, "step": 1507 }, { "epoch": 0.28, "learning_rate": 0.00016946535994583656, "loss": 1.0724, "step": 1508 }, { "epoch": 0.28, "learning_rate": 0.00016942240842944848, "loss": 1.2079, "step": 1509 }, { "epoch": 0.28, "learning_rate": 0.00016937943217687832, "loss": 1.0133, "step": 1510 }, { "epoch": 0.28, "learning_rate": 0.00016933643120343916, "loss": 1.0644, "step": 1511 }, { "epoch": 0.28, "learning_rate": 0.00016929340552445282, "loss": 1.1266, "step": 1512 }, { "epoch": 0.28, "learning_rate": 0.00016925035515524996, "loss": 1.2338, "step": 1513 }, { "epoch": 0.28, "learning_rate": 0.00016920728011117007, "loss": 1.1524, "step": 1514 }, { "epoch": 0.28, "learning_rate": 0.00016916418040756134, "loss": 1.0511, "step": 1515 }, { "epoch": 0.28, "learning_rate": 0.0001691210560597808, "loss": 1.1321, "step": 1516 }, { "epoch": 0.28, "learning_rate": 0.00016907790708319434, "loss": 1.2242, "step": 1517 }, { "epoch": 0.28, "learning_rate": 0.00016903473349317652, "loss": 1.1471, "step": 1518 }, { "epoch": 0.28, "learning_rate": 0.00016899153530511063, "loss": 1.0687, "step": 1519 }, { "epoch": 0.28, "learning_rate": 0.00016894831253438884, "loss": 1.1843, "step": 1520 }, { "epoch": 0.28, "learning_rate": 0.00016890506519641203, "loss": 1.1126, "step": 1521 }, { "epoch": 0.28, "learning_rate": 0.00016886179330658985, "loss": 1.1287, "step": 1522 }, { "epoch": 0.28, "learning_rate": 0.00016881849688034066, "loss": 1.0347, "step": 1523 }, { "epoch": 0.28, "learning_rate": 0.0001687751759330916, "loss": 1.202, "step": 1524 }, { "epoch": 0.28, "learning_rate": 0.00016873183048027852, "loss": 1.1679, "step": 1525 }, { "epoch": 0.28, "learning_rate": 0.00016868846053734606, "loss": 1.0493, "step": 1526 }, { "epoch": 0.28, "learning_rate": 0.0001686450661197475, "loss": 1.0735, "step": 1527 }, { "epoch": 0.28, "learning_rate": 0.00016860164724294486, "loss": 1.1642, "step": 1528 }, { "epoch": 0.28, "learning_rate": 0.00016855820392240896, "loss": 1.1517, "step": 1529 }, { "epoch": 0.28, "learning_rate": 0.00016851473617361922, "loss": 1.2425, "step": 1530 }, { "epoch": 0.28, "learning_rate": 0.00016847124401206384, "loss": 1.2307, "step": 1531 }, { "epoch": 0.28, "learning_rate": 0.00016842772745323967, "loss": 1.1832, "step": 1532 }, { "epoch": 0.28, "learning_rate": 0.00016838418651265227, "loss": 1.1613, "step": 1533 }, { "epoch": 0.28, "learning_rate": 0.00016834062120581588, "loss": 1.2321, "step": 1534 }, { "epoch": 0.28, "learning_rate": 0.0001682970315482535, "loss": 1.091, "step": 1535 }, { "epoch": 0.28, "learning_rate": 0.00016825341755549665, "loss": 1.1468, "step": 1536 }, { "epoch": 0.28, "learning_rate": 0.00016820977924308567, "loss": 1.1185, "step": 1537 }, { "epoch": 0.28, "learning_rate": 0.00016816611662656946, "loss": 1.1511, "step": 1538 }, { "epoch": 0.28, "learning_rate": 0.00016812242972150568, "loss": 1.1703, "step": 1539 }, { "epoch": 0.28, "learning_rate": 0.0001680787185434606, "loss": 1.2275, "step": 1540 }, { "epoch": 0.28, "learning_rate": 0.00016803498310800902, "loss": 1.1708, "step": 1541 }, { "epoch": 0.28, "learning_rate": 0.0001679912234307346, "loss": 1.1545, "step": 1542 }, { "epoch": 0.28, "learning_rate": 0.0001679474395272295, "loss": 1.1245, "step": 1543 }, { "epoch": 0.28, "learning_rate": 0.00016790363141309448, "loss": 1.1461, "step": 1544 }, { "epoch": 0.28, "learning_rate": 0.0001678597991039391, "loss": 1.0713, "step": 1545 }, { "epoch": 0.28, "learning_rate": 0.00016781594261538132, "loss": 1.1283, "step": 1546 }, { "epoch": 0.29, "learning_rate": 0.0001677720619630479, "loss": 1.1759, "step": 1547 }, { "epoch": 0.29, "learning_rate": 0.00016772815716257412, "loss": 1.1512, "step": 1548 }, { "epoch": 0.29, "learning_rate": 0.00016768422822960387, "loss": 0.9502, "step": 1549 }, { "epoch": 0.29, "learning_rate": 0.00016764027517978961, "loss": 1.2033, "step": 1550 }, { "epoch": 0.29, "learning_rate": 0.00016759629802879248, "loss": 1.0922, "step": 1551 }, { "epoch": 0.29, "learning_rate": 0.00016755229679228217, "loss": 1.1013, "step": 1552 }, { "epoch": 0.29, "learning_rate": 0.00016750827148593692, "loss": 1.1056, "step": 1553 }, { "epoch": 0.29, "learning_rate": 0.00016746422212544353, "loss": 1.1196, "step": 1554 }, { "epoch": 0.29, "learning_rate": 0.00016742014872649745, "loss": 1.1808, "step": 1555 }, { "epoch": 0.29, "learning_rate": 0.00016737605130480264, "loss": 1.2778, "step": 1556 }, { "epoch": 0.29, "learning_rate": 0.00016733192987607165, "loss": 1.1944, "step": 1557 }, { "epoch": 0.29, "learning_rate": 0.00016728778445602554, "loss": 1.1686, "step": 1558 }, { "epoch": 0.29, "learning_rate": 0.00016724361506039397, "loss": 1.0821, "step": 1559 }, { "epoch": 0.29, "learning_rate": 0.00016719942170491508, "loss": 1.0698, "step": 1560 }, { "epoch": 0.29, "learning_rate": 0.00016715520440533564, "loss": 1.1304, "step": 1561 }, { "epoch": 0.29, "learning_rate": 0.00016711096317741087, "loss": 1.0284, "step": 1562 }, { "epoch": 0.29, "learning_rate": 0.00016706669803690449, "loss": 1.0902, "step": 1563 }, { "epoch": 0.29, "learning_rate": 0.00016702240899958888, "loss": 1.0714, "step": 1564 }, { "epoch": 0.29, "learning_rate": 0.00016697809608124482, "loss": 1.1288, "step": 1565 }, { "epoch": 0.29, "learning_rate": 0.0001669337592976616, "loss": 0.9726, "step": 1566 }, { "epoch": 0.29, "learning_rate": 0.00016688939866463706, "loss": 1.225, "step": 1567 }, { "epoch": 0.29, "learning_rate": 0.0001668450141979775, "loss": 1.1053, "step": 1568 }, { "epoch": 0.29, "learning_rate": 0.00016680060591349775, "loss": 1.2708, "step": 1569 }, { "epoch": 0.29, "learning_rate": 0.00016675617382702114, "loss": 1.1276, "step": 1570 }, { "epoch": 0.29, "learning_rate": 0.00016671171795437938, "loss": 1.0904, "step": 1571 }, { "epoch": 0.29, "learning_rate": 0.00016666723831141277, "loss": 1.0266, "step": 1572 }, { "epoch": 0.29, "learning_rate": 0.00016662273491397004, "loss": 1.2083, "step": 1573 }, { "epoch": 0.29, "learning_rate": 0.00016657820777790834, "loss": 0.9427, "step": 1574 }, { "epoch": 0.29, "learning_rate": 0.00016653365691909333, "loss": 1.1877, "step": 1575 }, { "epoch": 0.29, "learning_rate": 0.0001664890823533991, "loss": 1.0494, "step": 1576 }, { "epoch": 0.29, "learning_rate": 0.00016644448409670822, "loss": 0.9943, "step": 1577 }, { "epoch": 0.29, "learning_rate": 0.00016639986216491167, "loss": 1.064, "step": 1578 }, { "epoch": 0.29, "learning_rate": 0.00016635521657390885, "loss": 1.0884, "step": 1579 }, { "epoch": 0.29, "learning_rate": 0.00016631054733960764, "loss": 1.0635, "step": 1580 }, { "epoch": 0.29, "learning_rate": 0.0001662658544779243, "loss": 1.1092, "step": 1581 }, { "epoch": 0.29, "learning_rate": 0.00016622113800478355, "loss": 1.0571, "step": 1582 }, { "epoch": 0.29, "learning_rate": 0.00016617639793611847, "loss": 1.1242, "step": 1583 }, { "epoch": 0.29, "learning_rate": 0.00016613163428787063, "loss": 1.1433, "step": 1584 }, { "epoch": 0.29, "learning_rate": 0.00016608684707598986, "loss": 1.1537, "step": 1585 }, { "epoch": 0.29, "learning_rate": 0.00016604203631643454, "loss": 1.1668, "step": 1586 }, { "epoch": 0.29, "learning_rate": 0.00016599720202517136, "loss": 1.0825, "step": 1587 }, { "epoch": 0.29, "learning_rate": 0.0001659523442181754, "loss": 1.0482, "step": 1588 }, { "epoch": 0.29, "learning_rate": 0.00016590746291143013, "loss": 1.0754, "step": 1589 }, { "epoch": 0.29, "learning_rate": 0.0001658625581209274, "loss": 1.2639, "step": 1590 }, { "epoch": 0.29, "learning_rate": 0.00016581762986266744, "loss": 0.9764, "step": 1591 }, { "epoch": 0.29, "learning_rate": 0.00016577267815265877, "loss": 1.1082, "step": 1592 }, { "epoch": 0.29, "learning_rate": 0.00016572770300691837, "loss": 1.1214, "step": 1593 }, { "epoch": 0.29, "learning_rate": 0.0001656827044414715, "loss": 1.0094, "step": 1594 }, { "epoch": 0.29, "learning_rate": 0.00016563768247235174, "loss": 1.1866, "step": 1595 }, { "epoch": 0.29, "learning_rate": 0.0001655926371156011, "loss": 1.0673, "step": 1596 }, { "epoch": 0.29, "learning_rate": 0.0001655475683872699, "loss": 1.1022, "step": 1597 }, { "epoch": 0.29, "learning_rate": 0.00016550247630341667, "loss": 1.1754, "step": 1598 }, { "epoch": 0.29, "learning_rate": 0.00016545736088010844, "loss": 1.1106, "step": 1599 }, { "epoch": 0.29, "learning_rate": 0.00016541222213342044, "loss": 1.253, "step": 1600 }, { "epoch": 0.3, "learning_rate": 0.00016536706007943625, "loss": 1.2112, "step": 1601 }, { "epoch": 0.3, "learning_rate": 0.00016532187473424772, "loss": 1.0953, "step": 1602 }, { "epoch": 0.3, "learning_rate": 0.0001652766661139551, "loss": 1.0976, "step": 1603 }, { "epoch": 0.3, "learning_rate": 0.00016523143423466673, "loss": 0.9909, "step": 1604 }, { "epoch": 0.3, "learning_rate": 0.00016518617911249946, "loss": 1.206, "step": 1605 }, { "epoch": 0.3, "learning_rate": 0.00016514090076357834, "loss": 1.1539, "step": 1606 }, { "epoch": 0.3, "learning_rate": 0.0001650955992040366, "loss": 1.2416, "step": 1607 }, { "epoch": 0.3, "learning_rate": 0.00016505027445001594, "loss": 1.0632, "step": 1608 }, { "epoch": 0.3, "learning_rate": 0.0001650049265176661, "loss": 1.1039, "step": 1609 }, { "epoch": 0.3, "learning_rate": 0.0001649595554231452, "loss": 1.1229, "step": 1610 }, { "epoch": 0.3, "learning_rate": 0.00016491416118261966, "loss": 1.0625, "step": 1611 }, { "epoch": 0.3, "learning_rate": 0.00016486874381226407, "loss": 1.1701, "step": 1612 }, { "epoch": 0.3, "learning_rate": 0.00016482330332826125, "loss": 1.0572, "step": 1613 }, { "epoch": 0.3, "learning_rate": 0.0001647778397468023, "loss": 1.2117, "step": 1614 }, { "epoch": 0.3, "learning_rate": 0.00016473235308408653, "loss": 1.1299, "step": 1615 }, { "epoch": 0.3, "learning_rate": 0.00016468684335632148, "loss": 1.2136, "step": 1616 }, { "epoch": 0.3, "learning_rate": 0.00016464131057972292, "loss": 1.1894, "step": 1617 }, { "epoch": 0.3, "learning_rate": 0.0001645957547705148, "loss": 1.1061, "step": 1618 }, { "epoch": 0.3, "learning_rate": 0.00016455017594492932, "loss": 1.1087, "step": 1619 }, { "epoch": 0.3, "learning_rate": 0.00016450457411920684, "loss": 1.112, "step": 1620 }, { "epoch": 0.3, "learning_rate": 0.00016445894930959591, "loss": 1.1829, "step": 1621 }, { "epoch": 0.3, "learning_rate": 0.00016441330153235334, "loss": 1.1496, "step": 1622 }, { "epoch": 0.3, "learning_rate": 0.00016436763080374407, "loss": 1.1019, "step": 1623 }, { "epoch": 0.3, "learning_rate": 0.00016432193714004117, "loss": 1.1389, "step": 1624 }, { "epoch": 0.3, "learning_rate": 0.00016427622055752603, "loss": 1.11, "step": 1625 }, { "epoch": 0.3, "learning_rate": 0.00016423048107248802, "loss": 0.9988, "step": 1626 }, { "epoch": 0.3, "learning_rate": 0.00016418471870122483, "loss": 1.1202, "step": 1627 }, { "epoch": 0.3, "learning_rate": 0.00016413893346004215, "loss": 1.153, "step": 1628 }, { "epoch": 0.3, "learning_rate": 0.00016409312536525401, "loss": 1.1047, "step": 1629 }, { "epoch": 0.3, "learning_rate": 0.00016404729443318243, "loss": 1.0228, "step": 1630 }, { "epoch": 0.3, "learning_rate": 0.0001640014406801576, "loss": 1.0847, "step": 1631 }, { "epoch": 0.3, "learning_rate": 0.0001639555641225179, "loss": 1.1058, "step": 1632 }, { "epoch": 0.3, "learning_rate": 0.00016390966477660977, "loss": 1.1048, "step": 1633 }, { "epoch": 0.3, "learning_rate": 0.00016386374265878778, "loss": 1.1536, "step": 1634 }, { "epoch": 0.3, "learning_rate": 0.00016381779778541468, "loss": 1.1205, "step": 1635 }, { "epoch": 0.3, "learning_rate": 0.0001637718301728612, "loss": 1.1035, "step": 1636 }, { "epoch": 0.3, "learning_rate": 0.0001637258398375063, "loss": 1.0896, "step": 1637 }, { "epoch": 0.3, "learning_rate": 0.00016367982679573698, "loss": 1.0613, "step": 1638 }, { "epoch": 0.3, "learning_rate": 0.0001636337910639483, "loss": 1.1278, "step": 1639 }, { "epoch": 0.3, "learning_rate": 0.0001635877326585435, "loss": 1.1298, "step": 1640 }, { "epoch": 0.3, "learning_rate": 0.0001635416515959338, "loss": 1.0206, "step": 1641 }, { "epoch": 0.3, "learning_rate": 0.00016349554789253851, "loss": 1.1401, "step": 1642 }, { "epoch": 0.3, "learning_rate": 0.00016344942156478506, "loss": 1.0577, "step": 1643 }, { "epoch": 0.3, "learning_rate": 0.00016340327262910894, "loss": 1.0823, "step": 1644 }, { "epoch": 0.3, "learning_rate": 0.0001633571011019536, "loss": 1.1719, "step": 1645 }, { "epoch": 0.3, "learning_rate": 0.00016331090699977062, "loss": 1.3157, "step": 1646 }, { "epoch": 0.3, "learning_rate": 0.0001632646903390197, "loss": 1.0233, "step": 1647 }, { "epoch": 0.3, "learning_rate": 0.00016321845113616835, "loss": 1.0767, "step": 1648 }, { "epoch": 0.3, "learning_rate": 0.00016317218940769233, "loss": 1.262, "step": 1649 }, { "epoch": 0.3, "learning_rate": 0.0001631259051700753, "loss": 1.1998, "step": 1650 }, { "epoch": 0.3, "learning_rate": 0.00016307959843980905, "loss": 1.0256, "step": 1651 }, { "epoch": 0.3, "learning_rate": 0.0001630332692333933, "loss": 1.0979, "step": 1652 }, { "epoch": 0.3, "learning_rate": 0.00016298691756733573, "loss": 1.2192, "step": 1653 }, { "epoch": 0.3, "learning_rate": 0.00016294054345815216, "loss": 1.1645, "step": 1654 }, { "epoch": 0.3, "learning_rate": 0.00016289414692236635, "loss": 1.1847, "step": 1655 }, { "epoch": 0.31, "learning_rate": 0.00016284772797651, "loss": 1.1516, "step": 1656 }, { "epoch": 0.31, "learning_rate": 0.00016280128663712285, "loss": 1.0559, "step": 1657 }, { "epoch": 0.31, "learning_rate": 0.0001627548229207526, "loss": 1.1608, "step": 1658 }, { "epoch": 0.31, "learning_rate": 0.00016270833684395492, "loss": 1.0376, "step": 1659 }, { "epoch": 0.31, "learning_rate": 0.00016266182842329349, "loss": 1.0618, "step": 1660 }, { "epoch": 0.31, "learning_rate": 0.00016261529767533985, "loss": 1.1043, "step": 1661 }, { "epoch": 0.31, "learning_rate": 0.00016256874461667358, "loss": 1.1171, "step": 1662 }, { "epoch": 0.31, "learning_rate": 0.00016252216926388228, "loss": 1.1866, "step": 1663 }, { "epoch": 0.31, "learning_rate": 0.00016247557163356127, "loss": 1.2633, "step": 1664 }, { "epoch": 0.31, "learning_rate": 0.000162428951742314, "loss": 1.1605, "step": 1665 }, { "epoch": 0.31, "learning_rate": 0.00016238230960675183, "loss": 1.0475, "step": 1666 }, { "epoch": 0.31, "learning_rate": 0.000162335645243494, "loss": 1.197, "step": 1667 }, { "epoch": 0.31, "learning_rate": 0.0001622889586691676, "loss": 1.1622, "step": 1668 }, { "epoch": 0.31, "learning_rate": 0.00016224224990040782, "loss": 1.0498, "step": 1669 }, { "epoch": 0.31, "learning_rate": 0.00016219551895385758, "loss": 1.0729, "step": 1670 }, { "epoch": 0.31, "learning_rate": 0.00016214876584616782, "loss": 1.2183, "step": 1671 }, { "epoch": 0.31, "learning_rate": 0.0001621019905939973, "loss": 1.0453, "step": 1672 }, { "epoch": 0.31, "learning_rate": 0.00016205519321401267, "loss": 1.1741, "step": 1673 }, { "epoch": 0.31, "learning_rate": 0.00016200837372288863, "loss": 1.1927, "step": 1674 }, { "epoch": 0.31, "learning_rate": 0.00016196153213730746, "loss": 1.0963, "step": 1675 }, { "epoch": 0.31, "learning_rate": 0.00016191466847395954, "loss": 1.1538, "step": 1676 }, { "epoch": 0.31, "learning_rate": 0.0001618677827495431, "loss": 1.1899, "step": 1677 }, { "epoch": 0.31, "learning_rate": 0.00016182087498076416, "loss": 1.1077, "step": 1678 }, { "epoch": 0.31, "learning_rate": 0.00016177394518433654, "loss": 1.0136, "step": 1679 }, { "epoch": 0.31, "learning_rate": 0.0001617269933769821, "loss": 1.1585, "step": 1680 }, { "epoch": 0.31, "learning_rate": 0.00016168001957543037, "loss": 1.017, "step": 1681 }, { "epoch": 0.31, "learning_rate": 0.0001616330237964188, "loss": 1.0575, "step": 1682 }, { "epoch": 0.31, "learning_rate": 0.00016158600605669263, "loss": 1.2953, "step": 1683 }, { "epoch": 0.31, "learning_rate": 0.00016153896637300497, "loss": 1.0954, "step": 1684 }, { "epoch": 0.31, "learning_rate": 0.00016149190476211667, "loss": 1.0756, "step": 1685 }, { "epoch": 0.31, "learning_rate": 0.00016144482124079649, "loss": 1.1154, "step": 1686 }, { "epoch": 0.31, "learning_rate": 0.00016139771582582093, "loss": 1.1345, "step": 1687 }, { "epoch": 0.31, "learning_rate": 0.00016135058853397434, "loss": 1.1875, "step": 1688 }, { "epoch": 0.31, "learning_rate": 0.00016130343938204882, "loss": 1.0864, "step": 1689 }, { "epoch": 0.31, "learning_rate": 0.00016125626838684425, "loss": 1.1815, "step": 1690 }, { "epoch": 0.31, "learning_rate": 0.00016120907556516837, "loss": 1.1379, "step": 1691 }, { "epoch": 0.31, "learning_rate": 0.00016116186093383662, "loss": 1.1617, "step": 1692 }, { "epoch": 0.31, "learning_rate": 0.00016111462450967226, "loss": 1.2622, "step": 1693 }, { "epoch": 0.31, "learning_rate": 0.00016106736630950628, "loss": 1.173, "step": 1694 }, { "epoch": 0.31, "learning_rate": 0.0001610200863501774, "loss": 1.0628, "step": 1695 }, { "epoch": 0.31, "learning_rate": 0.0001609727846485322, "loss": 1.1041, "step": 1696 }, { "epoch": 0.31, "learning_rate": 0.0001609254612214249, "loss": 1.2239, "step": 1697 }, { "epoch": 0.31, "learning_rate": 0.00016087811608571752, "loss": 1.1919, "step": 1698 }, { "epoch": 0.31, "learning_rate": 0.0001608307492582798, "loss": 1.061, "step": 1699 }, { "epoch": 0.31, "learning_rate": 0.00016078336075598925, "loss": 1.0981, "step": 1700 }, { "epoch": 0.31, "learning_rate": 0.00016073595059573097, "loss": 1.1565, "step": 1701 }, { "epoch": 0.31, "learning_rate": 0.00016068851879439793, "loss": 1.0114, "step": 1702 }, { "epoch": 0.31, "learning_rate": 0.00016064106536889074, "loss": 1.0849, "step": 1703 }, { "epoch": 0.31, "learning_rate": 0.00016059359033611772, "loss": 1.1335, "step": 1704 }, { "epoch": 0.31, "learning_rate": 0.00016054609371299487, "loss": 1.1138, "step": 1705 }, { "epoch": 0.31, "learning_rate": 0.00016049857551644596, "loss": 1.0939, "step": 1706 }, { "epoch": 0.31, "learning_rate": 0.00016045103576340237, "loss": 1.0178, "step": 1707 }, { "epoch": 0.31, "learning_rate": 0.00016040347447080313, "loss": 1.0686, "step": 1708 }, { "epoch": 0.31, "learning_rate": 0.00016035589165559508, "loss": 1.1193, "step": 1709 }, { "epoch": 0.32, "learning_rate": 0.00016030828733473257, "loss": 1.078, "step": 1710 }, { "epoch": 0.32, "learning_rate": 0.00016026066152517775, "loss": 1.1417, "step": 1711 }, { "epoch": 0.32, "learning_rate": 0.00016021301424390036, "loss": 1.1554, "step": 1712 }, { "epoch": 0.32, "learning_rate": 0.0001601653455078778, "loss": 1.2413, "step": 1713 }, { "epoch": 0.32, "learning_rate": 0.00016011765533409505, "loss": 1.1857, "step": 1714 }, { "epoch": 0.32, "learning_rate": 0.0001600699437395449, "loss": 1.1559, "step": 1715 }, { "epoch": 0.32, "learning_rate": 0.0001600222107412275, "loss": 1.1916, "step": 1716 }, { "epoch": 0.32, "learning_rate": 0.00015997445635615099, "loss": 1.2695, "step": 1717 }, { "epoch": 0.32, "learning_rate": 0.0001599266806013308, "loss": 1.1569, "step": 1718 }, { "epoch": 0.32, "learning_rate": 0.0001598788834937901, "loss": 1.0289, "step": 1719 }, { "epoch": 0.32, "learning_rate": 0.00015983106505055976, "loss": 1.1454, "step": 1720 }, { "epoch": 0.32, "learning_rate": 0.00015978322528867808, "loss": 1.1219, "step": 1721 }, { "epoch": 0.32, "learning_rate": 0.0001597353642251911, "loss": 1.1634, "step": 1722 }, { "epoch": 0.32, "learning_rate": 0.00015968748187715235, "loss": 1.189, "step": 1723 }, { "epoch": 0.32, "learning_rate": 0.000159639578261623, "loss": 1.0833, "step": 1724 }, { "epoch": 0.32, "learning_rate": 0.00015959165339567174, "loss": 1.1915, "step": 1725 }, { "epoch": 0.32, "learning_rate": 0.00015954370729637498, "loss": 1.1384, "step": 1726 }, { "epoch": 0.32, "learning_rate": 0.0001594957399808165, "loss": 1.1074, "step": 1727 }, { "epoch": 0.32, "learning_rate": 0.00015944775146608777, "loss": 1.0866, "step": 1728 }, { "epoch": 0.32, "learning_rate": 0.00015939974176928777, "loss": 1.0694, "step": 1729 }, { "epoch": 0.32, "learning_rate": 0.00015935171090752302, "loss": 1.1496, "step": 1730 }, { "epoch": 0.32, "learning_rate": 0.00015930365889790758, "loss": 1.1811, "step": 1731 }, { "epoch": 0.32, "learning_rate": 0.0001592555857575631, "loss": 1.1192, "step": 1732 }, { "epoch": 0.32, "learning_rate": 0.00015920749150361867, "loss": 1.1363, "step": 1733 }, { "epoch": 0.32, "learning_rate": 0.00015915937615321098, "loss": 1.0747, "step": 1734 }, { "epoch": 0.32, "learning_rate": 0.00015911123972348417, "loss": 1.0347, "step": 1735 }, { "epoch": 0.32, "learning_rate": 0.00015906308223158998, "loss": 1.0385, "step": 1736 }, { "epoch": 0.32, "learning_rate": 0.00015901490369468762, "loss": 1.1429, "step": 1737 }, { "epoch": 0.32, "learning_rate": 0.00015896670412994366, "loss": 1.1539, "step": 1738 }, { "epoch": 0.32, "learning_rate": 0.0001589184835545324, "loss": 1.1181, "step": 1739 }, { "epoch": 0.32, "learning_rate": 0.00015887024198563552, "loss": 1.1746, "step": 1740 }, { "epoch": 0.32, "learning_rate": 0.00015882197944044207, "loss": 1.1808, "step": 1741 }, { "epoch": 0.32, "learning_rate": 0.00015877369593614874, "loss": 1.0374, "step": 1742 }, { "epoch": 0.32, "learning_rate": 0.00015872539148995964, "loss": 1.2183, "step": 1743 }, { "epoch": 0.32, "learning_rate": 0.00015867706611908625, "loss": 1.23, "step": 1744 }, { "epoch": 0.32, "learning_rate": 0.0001586287198407477, "loss": 1.0478, "step": 1745 }, { "epoch": 0.32, "learning_rate": 0.00015858035267217032, "loss": 1.0459, "step": 1746 }, { "epoch": 0.32, "learning_rate": 0.0001585319646305881, "loss": 0.9512, "step": 1747 }, { "epoch": 0.32, "learning_rate": 0.00015848355573324234, "loss": 1.0838, "step": 1748 }, { "epoch": 0.32, "learning_rate": 0.0001584351259973818, "loss": 1.1183, "step": 1749 }, { "epoch": 0.32, "learning_rate": 0.00015838667544026275, "loss": 1.0153, "step": 1750 }, { "epoch": 0.32, "learning_rate": 0.00015833820407914874, "loss": 1.1337, "step": 1751 }, { "epoch": 0.32, "learning_rate": 0.0001582897119313108, "loss": 1.1365, "step": 1752 }, { "epoch": 0.32, "learning_rate": 0.0001582411990140274, "loss": 1.1545, "step": 1753 }, { "epoch": 0.32, "learning_rate": 0.00015819266534458434, "loss": 1.0872, "step": 1754 }, { "epoch": 0.32, "learning_rate": 0.0001581441109402749, "loss": 1.3105, "step": 1755 }, { "epoch": 0.32, "learning_rate": 0.00015809553581839966, "loss": 1.1293, "step": 1756 }, { "epoch": 0.32, "learning_rate": 0.0001580469399962666, "loss": 0.9969, "step": 1757 }, { "epoch": 0.32, "learning_rate": 0.00015799832349119112, "loss": 1.0131, "step": 1758 }, { "epoch": 0.32, "learning_rate": 0.000157949686320496, "loss": 1.066, "step": 1759 }, { "epoch": 0.32, "learning_rate": 0.00015790102850151127, "loss": 1.1068, "step": 1760 }, { "epoch": 0.32, "learning_rate": 0.00015785235005157447, "loss": 1.06, "step": 1761 }, { "epoch": 0.32, "learning_rate": 0.00015780365098803042, "loss": 1.0526, "step": 1762 }, { "epoch": 0.32, "learning_rate": 0.0001577549313282312, "loss": 1.05, "step": 1763 }, { "epoch": 0.33, "learning_rate": 0.00015770619108953642, "loss": 1.1532, "step": 1764 }, { "epoch": 0.33, "learning_rate": 0.00015765743028931285, "loss": 1.0622, "step": 1765 }, { "epoch": 0.33, "learning_rate": 0.0001576086489449346, "loss": 1.104, "step": 1766 }, { "epoch": 0.33, "learning_rate": 0.00015755984707378326, "loss": 1.2554, "step": 1767 }, { "epoch": 0.33, "learning_rate": 0.00015751102469324755, "loss": 0.9946, "step": 1768 }, { "epoch": 0.33, "learning_rate": 0.00015746218182072357, "loss": 1.0868, "step": 1769 }, { "epoch": 0.33, "learning_rate": 0.0001574133184736148, "loss": 0.9782, "step": 1770 }, { "epoch": 0.33, "learning_rate": 0.0001573644346693319, "loss": 1.1432, "step": 1771 }, { "epoch": 0.33, "learning_rate": 0.00015731553042529277, "loss": 1.1656, "step": 1772 }, { "epoch": 0.33, "learning_rate": 0.0001572666057589228, "loss": 1.0021, "step": 1773 }, { "epoch": 0.33, "learning_rate": 0.00015721766068765448, "loss": 1.205, "step": 1774 }, { "epoch": 0.33, "learning_rate": 0.00015716869522892766, "loss": 1.0893, "step": 1775 }, { "epoch": 0.33, "learning_rate": 0.0001571197094001894, "loss": 1.2197, "step": 1776 }, { "epoch": 0.33, "learning_rate": 0.00015707070321889404, "loss": 1.1019, "step": 1777 }, { "epoch": 0.33, "learning_rate": 0.0001570216767025032, "loss": 1.1473, "step": 1778 }, { "epoch": 0.33, "learning_rate": 0.00015697262986848564, "loss": 1.1782, "step": 1779 }, { "epoch": 0.33, "learning_rate": 0.0001569235627343175, "loss": 1.1586, "step": 1780 }, { "epoch": 0.33, "learning_rate": 0.00015687447531748214, "loss": 1.1508, "step": 1781 }, { "epoch": 0.33, "learning_rate": 0.00015682536763547003, "loss": 1.1356, "step": 1782 }, { "epoch": 0.33, "learning_rate": 0.00015677623970577893, "loss": 1.0877, "step": 1783 }, { "epoch": 0.33, "learning_rate": 0.0001567270915459138, "loss": 1.2133, "step": 1784 }, { "epoch": 0.33, "learning_rate": 0.0001566779231733869, "loss": 1.2109, "step": 1785 }, { "epoch": 0.33, "learning_rate": 0.00015662873460571753, "loss": 1.0441, "step": 1786 }, { "epoch": 0.33, "learning_rate": 0.0001565795258604323, "loss": 1.2442, "step": 1787 }, { "epoch": 0.33, "learning_rate": 0.000156530296955065, "loss": 1.0387, "step": 1788 }, { "epoch": 0.33, "learning_rate": 0.00015648104790715658, "loss": 1.299, "step": 1789 }, { "epoch": 0.33, "learning_rate": 0.00015643177873425507, "loss": 1.1702, "step": 1790 }, { "epoch": 0.33, "learning_rate": 0.00015638248945391592, "loss": 1.1624, "step": 1791 }, { "epoch": 0.33, "learning_rate": 0.00015633318008370151, "loss": 1.0872, "step": 1792 }, { "epoch": 0.33, "learning_rate": 0.00015628385064118147, "loss": 1.2172, "step": 1793 }, { "epoch": 0.33, "learning_rate": 0.0001562345011439326, "loss": 1.1674, "step": 1794 }, { "epoch": 0.33, "learning_rate": 0.00015618513160953882, "loss": 1.0997, "step": 1795 }, { "epoch": 0.33, "learning_rate": 0.00015613574205559116, "loss": 1.1239, "step": 1796 }, { "epoch": 0.33, "learning_rate": 0.00015608633249968783, "loss": 1.0679, "step": 1797 }, { "epoch": 0.33, "learning_rate": 0.00015603690295943413, "loss": 1.035, "step": 1798 }, { "epoch": 0.33, "learning_rate": 0.00015598745345244257, "loss": 1.0696, "step": 1799 }, { "epoch": 0.33, "learning_rate": 0.00015593798399633264, "loss": 1.0933, "step": 1800 }, { "epoch": 0.33, "learning_rate": 0.00015588849460873103, "loss": 1.2306, "step": 1801 }, { "epoch": 0.33, "learning_rate": 0.00015583898530727148, "loss": 0.9633, "step": 1802 }, { "epoch": 0.33, "learning_rate": 0.0001557894561095949, "loss": 0.9473, "step": 1803 }, { "epoch": 0.33, "learning_rate": 0.00015573990703334918, "loss": 1.2104, "step": 1804 }, { "epoch": 0.33, "learning_rate": 0.0001556903380961894, "loss": 1.1492, "step": 1805 }, { "epoch": 0.33, "learning_rate": 0.00015564074931577766, "loss": 1.0566, "step": 1806 }, { "epoch": 0.33, "learning_rate": 0.0001555911407097831, "loss": 1.0445, "step": 1807 }, { "epoch": 0.33, "learning_rate": 0.00015554151229588205, "loss": 1.0848, "step": 1808 }, { "epoch": 0.33, "learning_rate": 0.00015549186409175776, "loss": 1.1179, "step": 1809 }, { "epoch": 0.33, "learning_rate": 0.00015544219611510058, "loss": 1.1619, "step": 1810 }, { "epoch": 0.33, "learning_rate": 0.00015539250838360794, "loss": 1.2628, "step": 1811 }, { "epoch": 0.33, "learning_rate": 0.0001553428009149842, "loss": 1.1167, "step": 1812 }, { "epoch": 0.33, "learning_rate": 0.00015529307372694095, "loss": 1.183, "step": 1813 }, { "epoch": 0.33, "learning_rate": 0.00015524332683719663, "loss": 1.1672, "step": 1814 }, { "epoch": 0.33, "learning_rate": 0.0001551935602634767, "loss": 1.1823, "step": 1815 }, { "epoch": 0.33, "learning_rate": 0.00015514377402351377, "loss": 1.1223, "step": 1816 }, { "epoch": 0.33, "learning_rate": 0.0001550939681350474, "loss": 0.93, "step": 1817 }, { "epoch": 0.34, "learning_rate": 0.000155044142615824, "loss": 1.1862, "step": 1818 }, { "epoch": 0.34, "learning_rate": 0.00015499429748359726, "loss": 1.1107, "step": 1819 }, { "epoch": 0.34, "learning_rate": 0.00015494443275612763, "loss": 1.1582, "step": 1820 }, { "epoch": 0.34, "learning_rate": 0.00015489454845118257, "loss": 1.0126, "step": 1821 }, { "epoch": 0.34, "learning_rate": 0.00015484464458653664, "loss": 1.0228, "step": 1822 }, { "epoch": 0.34, "learning_rate": 0.00015479472117997122, "loss": 1.1545, "step": 1823 }, { "epoch": 0.34, "learning_rate": 0.0001547447782492748, "loss": 1.3054, "step": 1824 }, { "epoch": 0.34, "learning_rate": 0.00015469481581224272, "loss": 1.1207, "step": 1825 }, { "epoch": 0.34, "learning_rate": 0.00015464483388667724, "loss": 1.0655, "step": 1826 }, { "epoch": 0.34, "learning_rate": 0.0001545948324903877, "loss": 1.0955, "step": 1827 }, { "epoch": 0.34, "learning_rate": 0.00015454481164119027, "loss": 0.9898, "step": 1828 }, { "epoch": 0.34, "learning_rate": 0.00015449477135690812, "loss": 1.1803, "step": 1829 }, { "epoch": 0.34, "learning_rate": 0.00015444471165537124, "loss": 1.102, "step": 1830 }, { "epoch": 0.34, "learning_rate": 0.00015439463255441667, "loss": 1.1106, "step": 1831 }, { "epoch": 0.34, "learning_rate": 0.00015434453407188825, "loss": 1.1499, "step": 1832 }, { "epoch": 0.34, "learning_rate": 0.00015429441622563683, "loss": 1.1034, "step": 1833 }, { "epoch": 0.34, "learning_rate": 0.00015424427903352003, "loss": 1.0744, "step": 1834 }, { "epoch": 0.34, "learning_rate": 0.0001541941225134025, "loss": 0.9993, "step": 1835 }, { "epoch": 0.34, "learning_rate": 0.0001541439466831557, "loss": 1.2144, "step": 1836 }, { "epoch": 0.34, "learning_rate": 0.00015409375156065798, "loss": 1.0872, "step": 1837 }, { "epoch": 0.34, "learning_rate": 0.00015404353716379454, "loss": 1.1095, "step": 1838 }, { "epoch": 0.34, "learning_rate": 0.00015399330351045754, "loss": 1.372, "step": 1839 }, { "epoch": 0.34, "learning_rate": 0.00015394305061854592, "loss": 1.2603, "step": 1840 }, { "epoch": 0.34, "learning_rate": 0.00015389277850596546, "loss": 1.2164, "step": 1841 }, { "epoch": 0.34, "learning_rate": 0.00015384248719062883, "loss": 1.1368, "step": 1842 }, { "epoch": 0.34, "learning_rate": 0.00015379217669045554, "loss": 1.1774, "step": 1843 }, { "epoch": 0.34, "learning_rate": 0.00015374184702337198, "loss": 1.0483, "step": 1844 }, { "epoch": 0.34, "learning_rate": 0.00015369149820731122, "loss": 1.1876, "step": 1845 }, { "epoch": 0.34, "learning_rate": 0.00015364113026021333, "loss": 1.1091, "step": 1846 }, { "epoch": 0.34, "learning_rate": 0.0001535907432000251, "loss": 1.2094, "step": 1847 }, { "epoch": 0.34, "learning_rate": 0.00015354033704470012, "loss": 1.1184, "step": 1848 }, { "epoch": 0.34, "learning_rate": 0.00015348991181219886, "loss": 1.0746, "step": 1849 }, { "epoch": 0.34, "learning_rate": 0.00015343946752048853, "loss": 0.9717, "step": 1850 }, { "epoch": 0.34, "learning_rate": 0.00015338900418754309, "loss": 1.0376, "step": 1851 }, { "epoch": 0.34, "learning_rate": 0.0001533385218313434, "loss": 1.266, "step": 1852 }, { "epoch": 0.34, "learning_rate": 0.000153288020469877, "loss": 1.0979, "step": 1853 }, { "epoch": 0.34, "learning_rate": 0.00015323750012113827, "loss": 1.0834, "step": 1854 }, { "epoch": 0.34, "learning_rate": 0.00015318696080312828, "loss": 1.1798, "step": 1855 }, { "epoch": 0.34, "learning_rate": 0.00015313640253385494, "loss": 1.2633, "step": 1856 }, { "epoch": 0.34, "learning_rate": 0.00015308582533133283, "loss": 1.2035, "step": 1857 }, { "epoch": 0.34, "learning_rate": 0.00015303522921358335, "loss": 1.0961, "step": 1858 }, { "epoch": 0.34, "learning_rate": 0.0001529846141986346, "loss": 1.0881, "step": 1859 }, { "epoch": 0.34, "learning_rate": 0.00015293398030452143, "loss": 1.1825, "step": 1860 }, { "epoch": 0.34, "learning_rate": 0.00015288332754928542, "loss": 0.9918, "step": 1861 }, { "epoch": 0.34, "learning_rate": 0.00015283265595097481, "loss": 1.1433, "step": 1862 }, { "epoch": 0.34, "learning_rate": 0.0001527819655276447, "loss": 0.9664, "step": 1863 }, { "epoch": 0.34, "learning_rate": 0.0001527312562973567, "loss": 1.025, "step": 1864 }, { "epoch": 0.34, "learning_rate": 0.00015268052827817927, "loss": 1.205, "step": 1865 }, { "epoch": 0.34, "learning_rate": 0.00015262978148818755, "loss": 1.1829, "step": 1866 }, { "epoch": 0.34, "learning_rate": 0.00015257901594546326, "loss": 1.1833, "step": 1867 }, { "epoch": 0.34, "learning_rate": 0.0001525282316680949, "loss": 1.0703, "step": 1868 }, { "epoch": 0.34, "learning_rate": 0.00015247742867417768, "loss": 1.131, "step": 1869 }, { "epoch": 0.34, "learning_rate": 0.00015242660698181336, "loss": 1.099, "step": 1870 }, { "epoch": 0.34, "learning_rate": 0.00015237576660911042, "loss": 1.0313, "step": 1871 }, { "epoch": 0.34, "learning_rate": 0.00015232490757418406, "loss": 1.1617, "step": 1872 }, { "epoch": 0.35, "learning_rate": 0.00015227402989515608, "loss": 1.1903, "step": 1873 }, { "epoch": 0.35, "learning_rate": 0.00015222313359015483, "loss": 1.1493, "step": 1874 }, { "epoch": 0.35, "learning_rate": 0.00015217221867731544, "loss": 0.9945, "step": 1875 }, { "epoch": 0.35, "learning_rate": 0.00015212128517477956, "loss": 1.0876, "step": 1876 }, { "epoch": 0.35, "learning_rate": 0.00015207033310069557, "loss": 1.2332, "step": 1877 }, { "epoch": 0.35, "learning_rate": 0.00015201936247321841, "loss": 1.0252, "step": 1878 }, { "epoch": 0.35, "learning_rate": 0.00015196837331050959, "loss": 1.0791, "step": 1879 }, { "epoch": 0.35, "learning_rate": 0.0001519173656307373, "loss": 1.1238, "step": 1880 }, { "epoch": 0.35, "learning_rate": 0.0001518663394520763, "loss": 1.0482, "step": 1881 }, { "epoch": 0.35, "learning_rate": 0.00015181529479270787, "loss": 1.2131, "step": 1882 }, { "epoch": 0.35, "learning_rate": 0.00015176423167082007, "loss": 1.0052, "step": 1883 }, { "epoch": 0.35, "learning_rate": 0.00015171315010460728, "loss": 1.1433, "step": 1884 }, { "epoch": 0.35, "learning_rate": 0.00015166205011227061, "loss": 1.1047, "step": 1885 }, { "epoch": 0.35, "learning_rate": 0.00015161093171201778, "loss": 1.278, "step": 1886 }, { "epoch": 0.35, "learning_rate": 0.0001515597949220629, "loss": 1.271, "step": 1887 }, { "epoch": 0.35, "learning_rate": 0.0001515086397606268, "loss": 1.0238, "step": 1888 }, { "epoch": 0.35, "learning_rate": 0.0001514574662459368, "loss": 1.1586, "step": 1889 }, { "epoch": 0.35, "learning_rate": 0.00015140627439622667, "loss": 1.112, "step": 1890 }, { "epoch": 0.35, "learning_rate": 0.0001513550642297368, "loss": 1.0883, "step": 1891 }, { "epoch": 0.35, "learning_rate": 0.00015130383576471415, "loss": 1.1201, "step": 1892 }, { "epoch": 0.35, "learning_rate": 0.00015125258901941207, "loss": 1.2324, "step": 1893 }, { "epoch": 0.35, "learning_rate": 0.00015120132401209055, "loss": 1.0532, "step": 1894 }, { "epoch": 0.35, "learning_rate": 0.00015115004076101608, "loss": 0.9708, "step": 1895 }, { "epoch": 0.35, "learning_rate": 0.00015109873928446148, "loss": 0.9788, "step": 1896 }, { "epoch": 0.35, "learning_rate": 0.00015104741960070625, "loss": 1.1381, "step": 1897 }, { "epoch": 0.35, "learning_rate": 0.00015099608172803634, "loss": 1.0847, "step": 1898 }, { "epoch": 0.35, "learning_rate": 0.00015094472568474415, "loss": 1.1362, "step": 1899 }, { "epoch": 0.35, "learning_rate": 0.0001508933514891285, "loss": 1.1509, "step": 1900 }, { "epoch": 0.35, "learning_rate": 0.00015084195915949486, "loss": 1.1063, "step": 1901 }, { "epoch": 0.35, "learning_rate": 0.00015079054871415494, "loss": 1.0895, "step": 1902 }, { "epoch": 0.35, "learning_rate": 0.00015073912017142703, "loss": 1.0888, "step": 1903 }, { "epoch": 0.35, "learning_rate": 0.00015068767354963584, "loss": 1.0672, "step": 1904 }, { "epoch": 0.35, "learning_rate": 0.00015063620886711254, "loss": 1.0521, "step": 1905 }, { "epoch": 0.35, "learning_rate": 0.00015058472614219473, "loss": 1.0922, "step": 1906 }, { "epoch": 0.35, "learning_rate": 0.00015053322539322644, "loss": 1.1362, "step": 1907 }, { "epoch": 0.35, "learning_rate": 0.00015048170663855802, "loss": 1.0942, "step": 1908 }, { "epoch": 0.35, "learning_rate": 0.00015043016989654647, "loss": 1.1677, "step": 1909 }, { "epoch": 0.35, "learning_rate": 0.00015037861518555493, "loss": 1.1284, "step": 1910 }, { "epoch": 0.35, "learning_rate": 0.00015032704252395315, "loss": 1.0595, "step": 1911 }, { "epoch": 0.35, "learning_rate": 0.00015027545193011717, "loss": 1.2428, "step": 1912 }, { "epoch": 0.35, "learning_rate": 0.00015022384342242945, "loss": 1.0471, "step": 1913 }, { "epoch": 0.35, "learning_rate": 0.0001501722170192788, "loss": 1.1869, "step": 1914 }, { "epoch": 0.35, "learning_rate": 0.00015012057273906048, "loss": 1.189, "step": 1915 }, { "epoch": 0.35, "learning_rate": 0.00015006891060017607, "loss": 1.1232, "step": 1916 }, { "epoch": 0.35, "learning_rate": 0.00015001723062103348, "loss": 1.0378, "step": 1917 }, { "epoch": 0.35, "learning_rate": 0.00014996553282004704, "loss": 1.0002, "step": 1918 }, { "epoch": 0.35, "learning_rate": 0.0001499138172156374, "loss": 1.0332, "step": 1919 }, { "epoch": 0.35, "learning_rate": 0.00014986208382623156, "loss": 1.1433, "step": 1920 }, { "epoch": 0.35, "learning_rate": 0.00014981033267026284, "loss": 1.1308, "step": 1921 }, { "epoch": 0.35, "learning_rate": 0.00014975856376617093, "loss": 1.1142, "step": 1922 }, { "epoch": 0.35, "learning_rate": 0.0001497067771324018, "loss": 1.1349, "step": 1923 }, { "epoch": 0.35, "learning_rate": 0.0001496549727874078, "loss": 1.1479, "step": 1924 }, { "epoch": 0.35, "learning_rate": 0.0001496031507496475, "loss": 1.1595, "step": 1925 }, { "epoch": 0.35, "learning_rate": 0.00014955131103758582, "loss": 1.0329, "step": 1926 }, { "epoch": 0.36, "learning_rate": 0.000149499453669694, "loss": 1.1821, "step": 1927 }, { "epoch": 0.36, "learning_rate": 0.00014944757866444956, "loss": 1.1012, "step": 1928 }, { "epoch": 0.36, "learning_rate": 0.0001493956860403363, "loss": 1.1233, "step": 1929 }, { "epoch": 0.36, "learning_rate": 0.00014934377581584424, "loss": 1.1281, "step": 1930 }, { "epoch": 0.36, "learning_rate": 0.00014929184800946976, "loss": 1.0847, "step": 1931 }, { "epoch": 0.36, "learning_rate": 0.0001492399026397155, "loss": 1.1407, "step": 1932 }, { "epoch": 0.36, "learning_rate": 0.00014918793972509026, "loss": 1.2242, "step": 1933 }, { "epoch": 0.36, "learning_rate": 0.0001491359592841092, "loss": 1.1155, "step": 1934 }, { "epoch": 0.36, "learning_rate": 0.00014908396133529373, "loss": 1.1632, "step": 1935 }, { "epoch": 0.36, "learning_rate": 0.00014903194589717134, "loss": 1.2467, "step": 1936 }, { "epoch": 0.36, "learning_rate": 0.00014897991298827595, "loss": 1.0904, "step": 1937 }, { "epoch": 0.36, "learning_rate": 0.00014892786262714756, "loss": 1.1917, "step": 1938 }, { "epoch": 0.36, "learning_rate": 0.0001488757948323325, "loss": 1.2044, "step": 1939 }, { "epoch": 0.36, "learning_rate": 0.00014882370962238323, "loss": 1.0371, "step": 1940 }, { "epoch": 0.36, "learning_rate": 0.00014877160701585847, "loss": 1.0899, "step": 1941 }, { "epoch": 0.36, "learning_rate": 0.00014871948703132305, "loss": 1.1576, "step": 1942 }, { "epoch": 0.36, "learning_rate": 0.00014866734968734815, "loss": 1.0228, "step": 1943 }, { "epoch": 0.36, "learning_rate": 0.00014861519500251096, "loss": 1.1251, "step": 1944 }, { "epoch": 0.36, "learning_rate": 0.00014856302299539494, "loss": 1.0608, "step": 1945 }, { "epoch": 0.36, "learning_rate": 0.0001485108336845898, "loss": 1.0086, "step": 1946 }, { "epoch": 0.36, "learning_rate": 0.0001484586270886912, "loss": 1.2011, "step": 1947 }, { "epoch": 0.36, "learning_rate": 0.00014840640322630113, "loss": 1.1605, "step": 1948 }, { "epoch": 0.36, "learning_rate": 0.0001483541621160277, "loss": 1.0319, "step": 1949 }, { "epoch": 0.36, "learning_rate": 0.00014830190377648517, "loss": 1.1399, "step": 1950 }, { "epoch": 0.36, "learning_rate": 0.0001482496282262939, "loss": 1.0714, "step": 1951 }, { "epoch": 0.36, "learning_rate": 0.00014819733548408038, "loss": 1.0102, "step": 1952 }, { "epoch": 0.36, "learning_rate": 0.00014814502556847732, "loss": 1.0841, "step": 1953 }, { "epoch": 0.36, "learning_rate": 0.00014809269849812344, "loss": 0.9905, "step": 1954 }, { "epoch": 0.36, "learning_rate": 0.00014804035429166358, "loss": 1.0418, "step": 1955 }, { "epoch": 0.36, "learning_rate": 0.00014798799296774877, "loss": 1.0093, "step": 1956 }, { "epoch": 0.36, "learning_rate": 0.0001479356145450361, "loss": 1.0132, "step": 1957 }, { "epoch": 0.36, "learning_rate": 0.00014788321904218865, "loss": 1.1207, "step": 1958 }, { "epoch": 0.36, "learning_rate": 0.00014783080647787575, "loss": 1.0049, "step": 1959 }, { "epoch": 0.36, "learning_rate": 0.00014777837687077273, "loss": 1.0536, "step": 1960 }, { "epoch": 0.36, "learning_rate": 0.00014772593023956097, "loss": 1.1634, "step": 1961 }, { "epoch": 0.36, "learning_rate": 0.00014767346660292797, "loss": 1.0799, "step": 1962 }, { "epoch": 0.36, "learning_rate": 0.00014762098597956725, "loss": 1.0463, "step": 1963 }, { "epoch": 0.36, "learning_rate": 0.00014756848838817836, "loss": 1.1692, "step": 1964 }, { "epoch": 0.36, "learning_rate": 0.00014751597384746702, "loss": 1.2748, "step": 1965 }, { "epoch": 0.36, "learning_rate": 0.00014746344237614482, "loss": 1.1197, "step": 1966 }, { "epoch": 0.36, "learning_rate": 0.00014741089399292952, "loss": 0.9392, "step": 1967 }, { "epoch": 0.36, "learning_rate": 0.0001473583287165448, "loss": 1.0307, "step": 1968 }, { "epoch": 0.36, "learning_rate": 0.00014730574656572043, "loss": 1.127, "step": 1969 }, { "epoch": 0.36, "learning_rate": 0.00014725314755919218, "loss": 1.0488, "step": 1970 }, { "epoch": 0.36, "learning_rate": 0.0001472005317157018, "loss": 1.0954, "step": 1971 }, { "epoch": 0.36, "learning_rate": 0.00014714789905399708, "loss": 1.1949, "step": 1972 }, { "epoch": 0.36, "learning_rate": 0.00014709524959283175, "loss": 1.0672, "step": 1973 }, { "epoch": 0.36, "learning_rate": 0.00014704258335096557, "loss": 1.1574, "step": 1974 }, { "epoch": 0.36, "learning_rate": 0.00014698990034716424, "loss": 1.2398, "step": 1975 }, { "epoch": 0.36, "learning_rate": 0.00014693720060019954, "loss": 1.1366, "step": 1976 }, { "epoch": 0.36, "learning_rate": 0.000146884484128849, "loss": 1.1929, "step": 1977 }, { "epoch": 0.36, "learning_rate": 0.00014683175095189633, "loss": 0.9773, "step": 1978 }, { "epoch": 0.36, "learning_rate": 0.00014677900108813112, "loss": 0.9611, "step": 1979 }, { "epoch": 0.36, "learning_rate": 0.0001467262345563488, "loss": 1.0296, "step": 1980 }, { "epoch": 0.37, "learning_rate": 0.00014667345137535094, "loss": 0.9679, "step": 1981 }, { "epoch": 0.37, "learning_rate": 0.00014662065156394484, "loss": 1.1741, "step": 1982 }, { "epoch": 0.37, "learning_rate": 0.00014656783514094385, "loss": 1.0755, "step": 1983 }, { "epoch": 0.37, "learning_rate": 0.00014651500212516723, "loss": 1.1751, "step": 1984 }, { "epoch": 0.37, "learning_rate": 0.00014646215253544007, "loss": 0.9348, "step": 1985 }, { "epoch": 0.37, "learning_rate": 0.0001464092863905935, "loss": 1.0493, "step": 1986 }, { "epoch": 0.37, "learning_rate": 0.0001463564037094644, "loss": 1.0477, "step": 1987 }, { "epoch": 0.37, "learning_rate": 0.00014630350451089562, "loss": 1.262, "step": 1988 }, { "epoch": 0.37, "learning_rate": 0.00014625058881373595, "loss": 1.2439, "step": 1989 }, { "epoch": 0.37, "learning_rate": 0.00014619765663683996, "loss": 1.1934, "step": 1990 }, { "epoch": 0.37, "learning_rate": 0.0001461447079990681, "loss": 1.1298, "step": 1991 }, { "epoch": 0.37, "learning_rate": 0.0001460917429192868, "loss": 1.0951, "step": 1992 }, { "epoch": 0.37, "learning_rate": 0.0001460387614163682, "loss": 1.0536, "step": 1993 }, { "epoch": 0.37, "learning_rate": 0.00014598576350919034, "loss": 1.231, "step": 1994 }, { "epoch": 0.37, "learning_rate": 0.00014593274921663717, "loss": 1.1123, "step": 1995 }, { "epoch": 0.37, "learning_rate": 0.0001458797185575984, "loss": 1.1122, "step": 1996 }, { "epoch": 0.37, "learning_rate": 0.0001458266715509696, "loss": 1.0509, "step": 1997 }, { "epoch": 0.37, "learning_rate": 0.0001457736082156522, "loss": 1.0682, "step": 1998 }, { "epoch": 0.37, "learning_rate": 0.00014572052857055333, "loss": 1.175, "step": 1999 }, { "epoch": 0.37, "learning_rate": 0.0001456674326345861, "loss": 1.1064, "step": 2000 }, { "epoch": 0.37, "learning_rate": 0.00014561432042666935, "loss": 1.049, "step": 2001 }, { "epoch": 0.37, "learning_rate": 0.00014556119196572763, "loss": 0.9718, "step": 2002 }, { "epoch": 0.37, "learning_rate": 0.00014550804727069143, "loss": 1.1257, "step": 2003 }, { "epoch": 0.37, "learning_rate": 0.0001454548863604969, "loss": 1.1471, "step": 2004 }, { "epoch": 0.37, "learning_rate": 0.00014540170925408607, "loss": 1.1593, "step": 2005 }, { "epoch": 0.37, "learning_rate": 0.00014534851597040665, "loss": 1.2186, "step": 2006 }, { "epoch": 0.37, "learning_rate": 0.00014529530652841217, "loss": 1.1696, "step": 2007 }, { "epoch": 0.37, "learning_rate": 0.00014524208094706192, "loss": 1.0447, "step": 2008 }, { "epoch": 0.37, "learning_rate": 0.0001451888392453209, "loss": 1.1368, "step": 2009 }, { "epoch": 0.37, "learning_rate": 0.0001451355814421599, "loss": 1.1789, "step": 2010 }, { "epoch": 0.37, "learning_rate": 0.00014508230755655538, "loss": 1.1475, "step": 2011 }, { "epoch": 0.37, "learning_rate": 0.00014502901760748965, "loss": 1.0393, "step": 2012 }, { "epoch": 0.37, "learning_rate": 0.00014497571161395061, "loss": 1.1644, "step": 2013 }, { "epoch": 0.37, "learning_rate": 0.000144922389594932, "loss": 1.1219, "step": 2014 }, { "epoch": 0.37, "learning_rate": 0.0001448690515694331, "loss": 1.0809, "step": 2015 }, { "epoch": 0.37, "learning_rate": 0.0001448156975564591, "loss": 1.0896, "step": 2016 }, { "epoch": 0.37, "learning_rate": 0.00014476232757502076, "loss": 1.1586, "step": 2017 }, { "epoch": 0.37, "learning_rate": 0.0001447089416441345, "loss": 1.1945, "step": 2018 }, { "epoch": 0.37, "learning_rate": 0.00014465553978282257, "loss": 1.1993, "step": 2019 }, { "epoch": 0.37, "learning_rate": 0.00014460212201011276, "loss": 1.0292, "step": 2020 }, { "epoch": 0.37, "learning_rate": 0.00014454868834503853, "loss": 1.1593, "step": 2021 }, { "epoch": 0.37, "learning_rate": 0.00014449523880663914, "loss": 1.1005, "step": 2022 }, { "epoch": 0.37, "learning_rate": 0.00014444177341395935, "loss": 0.9938, "step": 2023 }, { "epoch": 0.37, "learning_rate": 0.00014438829218604962, "loss": 1.0274, "step": 2024 }, { "epoch": 0.37, "learning_rate": 0.00014433479514196616, "loss": 1.0154, "step": 2025 }, { "epoch": 0.37, "learning_rate": 0.0001442812823007706, "loss": 1.0725, "step": 2026 }, { "epoch": 0.37, "learning_rate": 0.00014422775368153038, "loss": 1.1067, "step": 2027 }, { "epoch": 0.37, "learning_rate": 0.00014417420930331852, "loss": 1.2531, "step": 2028 }, { "epoch": 0.37, "learning_rate": 0.00014412064918521358, "loss": 1.1407, "step": 2029 }, { "epoch": 0.37, "learning_rate": 0.00014406707334629982, "loss": 1.1453, "step": 2030 }, { "epoch": 0.37, "learning_rate": 0.0001440134818056671, "loss": 1.1687, "step": 2031 }, { "epoch": 0.37, "learning_rate": 0.0001439598745824108, "loss": 1.0439, "step": 2032 }, { "epoch": 0.37, "learning_rate": 0.00014390625169563188, "loss": 1.0992, "step": 2033 }, { "epoch": 0.37, "learning_rate": 0.000143852613164437, "loss": 1.1441, "step": 2034 }, { "epoch": 0.37, "learning_rate": 0.00014379895900793832, "loss": 1.0701, "step": 2035 }, { "epoch": 0.38, "learning_rate": 0.00014374528924525354, "loss": 1.0195, "step": 2036 }, { "epoch": 0.38, "learning_rate": 0.000143691603895506, "loss": 0.9798, "step": 2037 }, { "epoch": 0.38, "learning_rate": 0.0001436379029778245, "loss": 1.1374, "step": 2038 }, { "epoch": 0.38, "learning_rate": 0.0001435841865113434, "loss": 1.1109, "step": 2039 }, { "epoch": 0.38, "learning_rate": 0.00014353045451520277, "loss": 1.0831, "step": 2040 }, { "epoch": 0.38, "learning_rate": 0.00014347670700854797, "loss": 1.1469, "step": 2041 }, { "epoch": 0.38, "learning_rate": 0.00014342294401052997, "loss": 1.0829, "step": 2042 }, { "epoch": 0.38, "learning_rate": 0.00014336916554030538, "loss": 1.124, "step": 2043 }, { "epoch": 0.38, "learning_rate": 0.0001433153716170361, "loss": 1.083, "step": 2044 }, { "epoch": 0.38, "learning_rate": 0.0001432615622598898, "loss": 1.2464, "step": 2045 }, { "epoch": 0.38, "learning_rate": 0.00014320773748803943, "loss": 1.068, "step": 2046 }, { "epoch": 0.38, "learning_rate": 0.00014315389732066352, "loss": 1.1755, "step": 2047 }, { "epoch": 0.38, "learning_rate": 0.00014310004177694611, "loss": 1.0832, "step": 2048 }, { "epoch": 0.38, "learning_rate": 0.0001430461708760767, "loss": 1.1371, "step": 2049 }, { "epoch": 0.38, "learning_rate": 0.00014299228463725017, "loss": 1.0222, "step": 2050 }, { "epoch": 0.38, "learning_rate": 0.00014293838307966703, "loss": 1.1106, "step": 2051 }, { "epoch": 0.38, "learning_rate": 0.00014288446622253312, "loss": 1.1772, "step": 2052 }, { "epoch": 0.38, "learning_rate": 0.0001428305340850598, "loss": 1.2387, "step": 2053 }, { "epoch": 0.38, "learning_rate": 0.00014277658668646382, "loss": 1.1106, "step": 2054 }, { "epoch": 0.38, "learning_rate": 0.00014272262404596746, "loss": 0.9711, "step": 2055 }, { "epoch": 0.38, "learning_rate": 0.00014266864618279828, "loss": 1.0931, "step": 2056 }, { "epoch": 0.38, "learning_rate": 0.0001426146531161894, "loss": 0.9573, "step": 2057 }, { "epoch": 0.38, "learning_rate": 0.00014256064486537935, "loss": 1.0644, "step": 2058 }, { "epoch": 0.38, "learning_rate": 0.00014250662144961197, "loss": 1.0962, "step": 2059 }, { "epoch": 0.38, "learning_rate": 0.00014245258288813655, "loss": 1.3016, "step": 2060 }, { "epoch": 0.38, "learning_rate": 0.00014239852920020787, "loss": 1.0784, "step": 2061 }, { "epoch": 0.38, "learning_rate": 0.00014234446040508597, "loss": 1.0913, "step": 2062 }, { "epoch": 0.38, "learning_rate": 0.00014229037652203628, "loss": 1.1124, "step": 2063 }, { "epoch": 0.38, "learning_rate": 0.00014223627757032977, "loss": 1.2437, "step": 2064 }, { "epoch": 0.38, "learning_rate": 0.00014218216356924254, "loss": 1.0255, "step": 2065 }, { "epoch": 0.38, "learning_rate": 0.00014212803453805624, "loss": 1.0024, "step": 2066 }, { "epoch": 0.38, "learning_rate": 0.00014207389049605777, "loss": 1.0569, "step": 2067 }, { "epoch": 0.38, "learning_rate": 0.00014201973146253945, "loss": 1.0942, "step": 2068 }, { "epoch": 0.38, "learning_rate": 0.00014196555745679888, "loss": 1.234, "step": 2069 }, { "epoch": 0.38, "learning_rate": 0.000141911368498139, "loss": 1.0601, "step": 2070 }, { "epoch": 0.38, "learning_rate": 0.00014185716460586814, "loss": 1.1266, "step": 2071 }, { "epoch": 0.38, "learning_rate": 0.00014180294579929992, "loss": 1.1474, "step": 2072 }, { "epoch": 0.38, "learning_rate": 0.0001417487120977532, "loss": 1.1278, "step": 2073 }, { "epoch": 0.38, "learning_rate": 0.00014169446352055228, "loss": 1.0265, "step": 2074 }, { "epoch": 0.38, "learning_rate": 0.00014164020008702665, "loss": 0.9987, "step": 2075 }, { "epoch": 0.38, "learning_rate": 0.00014158592181651115, "loss": 1.1179, "step": 2076 }, { "epoch": 0.38, "learning_rate": 0.0001415316287283459, "loss": 1.1688, "step": 2077 }, { "epoch": 0.38, "learning_rate": 0.0001414773208418763, "loss": 1.2869, "step": 2078 }, { "epoch": 0.38, "learning_rate": 0.00014142299817645297, "loss": 1.0145, "step": 2079 }, { "epoch": 0.38, "learning_rate": 0.0001413686607514319, "loss": 1.0989, "step": 2080 }, { "epoch": 0.38, "learning_rate": 0.00014131430858617424, "loss": 1.1505, "step": 2081 }, { "epoch": 0.38, "learning_rate": 0.00014125994170004644, "loss": 1.1531, "step": 2082 }, { "epoch": 0.38, "learning_rate": 0.00014120556011242022, "loss": 1.0556, "step": 2083 }, { "epoch": 0.38, "learning_rate": 0.00014115116384267243, "loss": 1.0396, "step": 2084 }, { "epoch": 0.38, "learning_rate": 0.0001410967529101853, "loss": 1.2427, "step": 2085 }, { "epoch": 0.38, "learning_rate": 0.00014104232733434617, "loss": 1.0223, "step": 2086 }, { "epoch": 0.38, "learning_rate": 0.00014098788713454768, "loss": 1.1618, "step": 2087 }, { "epoch": 0.38, "learning_rate": 0.0001409334323301876, "loss": 1.2263, "step": 2088 }, { "epoch": 0.38, "learning_rate": 0.00014087896294066892, "loss": 1.1024, "step": 2089 }, { "epoch": 0.39, "learning_rate": 0.00014082447898539993, "loss": 1.0658, "step": 2090 }, { "epoch": 0.39, "learning_rate": 0.00014076998048379402, "loss": 1.1509, "step": 2091 }, { "epoch": 0.39, "learning_rate": 0.0001407154674552697, "loss": 1.1659, "step": 2092 }, { "epoch": 0.39, "learning_rate": 0.00014066093991925077, "loss": 1.0947, "step": 2093 }, { "epoch": 0.39, "learning_rate": 0.00014060639789516619, "loss": 1.1595, "step": 2094 }, { "epoch": 0.39, "learning_rate": 0.00014055184140245, "loss": 1.0872, "step": 2095 }, { "epoch": 0.39, "learning_rate": 0.00014049727046054148, "loss": 1.1511, "step": 2096 }, { "epoch": 0.39, "learning_rate": 0.00014044268508888504, "loss": 1.3408, "step": 2097 }, { "epoch": 0.39, "learning_rate": 0.00014038808530693017, "loss": 1.2369, "step": 2098 }, { "epoch": 0.39, "learning_rate": 0.00014033347113413157, "loss": 1.1983, "step": 2099 }, { "epoch": 0.39, "learning_rate": 0.00014027884258994904, "loss": 1.1678, "step": 2100 }, { "epoch": 0.39, "learning_rate": 0.00014022419969384747, "loss": 1.0276, "step": 2101 }, { "epoch": 0.39, "learning_rate": 0.00014016954246529696, "loss": 1.2045, "step": 2102 }, { "epoch": 0.39, "learning_rate": 0.00014011487092377257, "loss": 1.0824, "step": 2103 }, { "epoch": 0.39, "learning_rate": 0.0001400601850887546, "loss": 1.1615, "step": 2104 }, { "epoch": 0.39, "learning_rate": 0.00014000548497972837, "loss": 1.0654, "step": 2105 }, { "epoch": 0.39, "learning_rate": 0.00013995077061618427, "loss": 1.1594, "step": 2106 }, { "epoch": 0.39, "learning_rate": 0.00013989604201761782, "loss": 1.123, "step": 2107 }, { "epoch": 0.39, "learning_rate": 0.0001398412992035296, "loss": 1.0737, "step": 2108 }, { "epoch": 0.39, "learning_rate": 0.00013978654219342523, "loss": 1.0598, "step": 2109 }, { "epoch": 0.39, "learning_rate": 0.00013973177100681542, "loss": 1.0244, "step": 2110 }, { "epoch": 0.39, "learning_rate": 0.0001396769856632159, "loss": 1.1173, "step": 2111 }, { "epoch": 0.39, "learning_rate": 0.00013962218618214747, "loss": 1.0863, "step": 2112 }, { "epoch": 0.39, "learning_rate": 0.00013956737258313595, "loss": 1.0013, "step": 2113 }, { "epoch": 0.39, "learning_rate": 0.00013951254488571218, "loss": 1.2538, "step": 2114 }, { "epoch": 0.39, "learning_rate": 0.00013945770310941209, "loss": 1.2037, "step": 2115 }, { "epoch": 0.39, "learning_rate": 0.00013940284727377656, "loss": 1.0286, "step": 2116 }, { "epoch": 0.39, "learning_rate": 0.00013934797739835144, "loss": 1.0649, "step": 2117 }, { "epoch": 0.39, "learning_rate": 0.0001392930935026877, "loss": 1.0628, "step": 2118 }, { "epoch": 0.39, "learning_rate": 0.00013923819560634126, "loss": 1.1608, "step": 2119 }, { "epoch": 0.39, "learning_rate": 0.00013918328372887294, "loss": 1.0145, "step": 2120 }, { "epoch": 0.39, "learning_rate": 0.0001391283578898487, "loss": 1.1519, "step": 2121 }, { "epoch": 0.39, "learning_rate": 0.00013907341810883933, "loss": 1.2104, "step": 2122 }, { "epoch": 0.39, "learning_rate": 0.00013901846440542064, "loss": 1.2135, "step": 2123 }, { "epoch": 0.39, "learning_rate": 0.0001389634967991735, "loss": 1.1382, "step": 2124 }, { "epoch": 0.39, "learning_rate": 0.00013890851530968355, "loss": 1.1351, "step": 2125 }, { "epoch": 0.39, "learning_rate": 0.00013885351995654148, "loss": 1.046, "step": 2126 }, { "epoch": 0.39, "learning_rate": 0.00013879851075934295, "loss": 1.1706, "step": 2127 }, { "epoch": 0.39, "learning_rate": 0.00013874348773768846, "loss": 1.1835, "step": 2128 }, { "epoch": 0.39, "learning_rate": 0.0001386884509111835, "loss": 1.0161, "step": 2129 }, { "epoch": 0.39, "learning_rate": 0.00013863340029943857, "loss": 1.1218, "step": 2130 }, { "epoch": 0.39, "learning_rate": 0.00013857833592206882, "loss": 1.0719, "step": 2131 }, { "epoch": 0.39, "learning_rate": 0.00013852325779869457, "loss": 1.0204, "step": 2132 }, { "epoch": 0.39, "learning_rate": 0.00013846816594894086, "loss": 1.1602, "step": 2133 }, { "epoch": 0.39, "learning_rate": 0.00013841306039243776, "loss": 1.0507, "step": 2134 }, { "epoch": 0.39, "learning_rate": 0.0001383579411488201, "loss": 1.0267, "step": 2135 }, { "epoch": 0.39, "learning_rate": 0.00013830280823772767, "loss": 1.01, "step": 2136 }, { "epoch": 0.39, "learning_rate": 0.0001382476616788051, "loss": 1.1666, "step": 2137 }, { "epoch": 0.39, "learning_rate": 0.0001381925014917019, "loss": 1.0796, "step": 2138 }, { "epoch": 0.39, "learning_rate": 0.00013813732769607238, "loss": 1.0532, "step": 2139 }, { "epoch": 0.39, "learning_rate": 0.0001380821403115758, "loss": 1.1733, "step": 2140 }, { "epoch": 0.39, "learning_rate": 0.00013802693935787618, "loss": 1.2117, "step": 2141 }, { "epoch": 0.39, "learning_rate": 0.00013797172485464237, "loss": 1.0698, "step": 2142 }, { "epoch": 0.39, "learning_rate": 0.0001379164968215481, "loss": 0.9445, "step": 2143 }, { "epoch": 0.4, "learning_rate": 0.00013786125527827188, "loss": 1.0617, "step": 2144 }, { "epoch": 0.4, "learning_rate": 0.00013780600024449702, "loss": 1.1813, "step": 2145 }, { "epoch": 0.4, "learning_rate": 0.0001377507317399118, "loss": 1.1934, "step": 2146 }, { "epoch": 0.4, "learning_rate": 0.000137695449784209, "loss": 1.0196, "step": 2147 }, { "epoch": 0.4, "learning_rate": 0.00013764015439708641, "loss": 1.1566, "step": 2148 }, { "epoch": 0.4, "learning_rate": 0.00013758484559824665, "loss": 0.8798, "step": 2149 }, { "epoch": 0.4, "learning_rate": 0.0001375295234073969, "loss": 1.1912, "step": 2150 }, { "epoch": 0.4, "learning_rate": 0.00013747418784424933, "loss": 1.1139, "step": 2151 }, { "epoch": 0.4, "learning_rate": 0.00013741883892852072, "loss": 0.9986, "step": 2152 }, { "epoch": 0.4, "learning_rate": 0.0001373634766799327, "loss": 1.1676, "step": 2153 }, { "epoch": 0.4, "learning_rate": 0.00013730810111821156, "loss": 1.0212, "step": 2154 }, { "epoch": 0.4, "learning_rate": 0.0001372527122630885, "loss": 0.992, "step": 2155 }, { "epoch": 0.4, "learning_rate": 0.00013719731013429924, "loss": 1.0496, "step": 2156 }, { "epoch": 0.4, "learning_rate": 0.00013714189475158442, "loss": 1.1261, "step": 2157 }, { "epoch": 0.4, "learning_rate": 0.00013708646613468925, "loss": 0.9894, "step": 2158 }, { "epoch": 0.4, "learning_rate": 0.0001370310243033638, "loss": 1.1177, "step": 2159 }, { "epoch": 0.4, "learning_rate": 0.00013697556927736275, "loss": 1.0744, "step": 2160 }, { "epoch": 0.4, "learning_rate": 0.00013692010107644546, "loss": 1.0729, "step": 2161 }, { "epoch": 0.4, "learning_rate": 0.00013686461972037609, "loss": 1.0896, "step": 2162 }, { "epoch": 0.4, "learning_rate": 0.00013680912522892342, "loss": 1.1156, "step": 2163 }, { "epoch": 0.4, "learning_rate": 0.00013675361762186086, "loss": 1.0383, "step": 2164 }, { "epoch": 0.4, "learning_rate": 0.00013669809691896668, "loss": 0.8603, "step": 2165 }, { "epoch": 0.4, "learning_rate": 0.00013664256314002354, "loss": 1.0517, "step": 2166 }, { "epoch": 0.4, "learning_rate": 0.00013658701630481898, "loss": 1.1169, "step": 2167 }, { "epoch": 0.4, "learning_rate": 0.00013653145643314514, "loss": 1.111, "step": 2168 }, { "epoch": 0.4, "learning_rate": 0.00013647588354479876, "loss": 1.0902, "step": 2169 }, { "epoch": 0.4, "learning_rate": 0.00013642029765958125, "loss": 1.0725, "step": 2170 }, { "epoch": 0.4, "learning_rate": 0.00013636469879729863, "loss": 1.1614, "step": 2171 }, { "epoch": 0.4, "learning_rate": 0.00013630908697776154, "loss": 1.288, "step": 2172 }, { "epoch": 0.4, "learning_rate": 0.00013625346222078533, "loss": 1.0718, "step": 2173 }, { "epoch": 0.4, "learning_rate": 0.0001361978245461898, "loss": 1.0644, "step": 2174 }, { "epoch": 0.4, "learning_rate": 0.00013614217397379946, "loss": 1.1828, "step": 2175 }, { "epoch": 0.4, "learning_rate": 0.00013608651052344342, "loss": 1.116, "step": 2176 }, { "epoch": 0.4, "learning_rate": 0.00013603083421495534, "loss": 1.0076, "step": 2177 }, { "epoch": 0.4, "learning_rate": 0.00013597514506817347, "loss": 1.176, "step": 2178 }, { "epoch": 0.4, "learning_rate": 0.00013591944310294065, "loss": 1.2904, "step": 2179 }, { "epoch": 0.4, "learning_rate": 0.00013586372833910423, "loss": 0.9739, "step": 2180 }, { "epoch": 0.4, "learning_rate": 0.0001358080007965162, "loss": 1.0625, "step": 2181 }, { "epoch": 0.4, "learning_rate": 0.00013575226049503312, "loss": 1.0457, "step": 2182 }, { "epoch": 0.4, "learning_rate": 0.00013569650745451592, "loss": 1.138, "step": 2183 }, { "epoch": 0.4, "learning_rate": 0.0001356407416948303, "loss": 1.0841, "step": 2184 }, { "epoch": 0.4, "learning_rate": 0.00013558496323584632, "loss": 1.0765, "step": 2185 }, { "epoch": 0.4, "learning_rate": 0.00013552917209743869, "loss": 1.1372, "step": 2186 }, { "epoch": 0.4, "learning_rate": 0.00013547336829948653, "loss": 1.1035, "step": 2187 }, { "epoch": 0.4, "learning_rate": 0.00013541755186187356, "loss": 1.1973, "step": 2188 }, { "epoch": 0.4, "learning_rate": 0.00013536172280448794, "loss": 1.1078, "step": 2189 }, { "epoch": 0.4, "learning_rate": 0.0001353058811472223, "loss": 1.1597, "step": 2190 }, { "epoch": 0.4, "learning_rate": 0.00013525002690997393, "loss": 0.964, "step": 2191 }, { "epoch": 0.4, "learning_rate": 0.00013519416011264435, "loss": 0.9475, "step": 2192 }, { "epoch": 0.4, "learning_rate": 0.0001351382807751398, "loss": 1.0705, "step": 2193 }, { "epoch": 0.4, "learning_rate": 0.0001350823889173708, "loss": 1.0737, "step": 2194 }, { "epoch": 0.4, "learning_rate": 0.00013502648455925243, "loss": 1.1121, "step": 2195 }, { "epoch": 0.4, "learning_rate": 0.00013497056772070418, "loss": 1.1648, "step": 2196 }, { "epoch": 0.4, "learning_rate": 0.00013491463842165008, "loss": 1.1471, "step": 2197 }, { "epoch": 0.41, "learning_rate": 0.0001348586966820184, "loss": 1.1237, "step": 2198 }, { "epoch": 0.41, "learning_rate": 0.00013480274252174206, "loss": 1.1834, "step": 2199 }, { "epoch": 0.41, "learning_rate": 0.00013474677596075836, "loss": 1.0819, "step": 2200 }, { "epoch": 0.41, "learning_rate": 0.00013469079701900887, "loss": 1.1108, "step": 2201 }, { "epoch": 0.41, "learning_rate": 0.0001346348057164397, "loss": 1.16, "step": 2202 }, { "epoch": 0.41, "learning_rate": 0.0001345788020730014, "loss": 1.0896, "step": 2203 }, { "epoch": 0.41, "learning_rate": 0.00013452278610864878, "loss": 1.251, "step": 2204 }, { "epoch": 0.41, "learning_rate": 0.00013446675784334116, "loss": 1.0292, "step": 2205 }, { "epoch": 0.41, "learning_rate": 0.00013441071729704225, "loss": 1.0816, "step": 2206 }, { "epoch": 0.41, "learning_rate": 0.00013435466448971997, "loss": 1.0875, "step": 2207 }, { "epoch": 0.41, "learning_rate": 0.0001342985994413468, "loss": 1.1057, "step": 2208 }, { "epoch": 0.41, "learning_rate": 0.0001342425221718995, "loss": 1.3622, "step": 2209 }, { "epoch": 0.41, "learning_rate": 0.00013418643270135915, "loss": 1.1988, "step": 2210 }, { "epoch": 0.41, "learning_rate": 0.00013413033104971126, "loss": 1.0612, "step": 2211 }, { "epoch": 0.41, "learning_rate": 0.00013407421723694567, "loss": 1.215, "step": 2212 }, { "epoch": 0.41, "learning_rate": 0.00013401809128305643, "loss": 1.1484, "step": 2213 }, { "epoch": 0.41, "learning_rate": 0.00013396195320804208, "loss": 1.1393, "step": 2214 }, { "epoch": 0.41, "learning_rate": 0.0001339058030319054, "loss": 1.1187, "step": 2215 }, { "epoch": 0.41, "learning_rate": 0.00013384964077465346, "loss": 1.2449, "step": 2216 }, { "epoch": 0.41, "learning_rate": 0.00013379346645629768, "loss": 1.0657, "step": 2217 }, { "epoch": 0.41, "learning_rate": 0.00013373728009685376, "loss": 1.1494, "step": 2218 }, { "epoch": 0.41, "learning_rate": 0.00013368108171634167, "loss": 1.2723, "step": 2219 }, { "epoch": 0.41, "learning_rate": 0.00013362487133478573, "loss": 1.2211, "step": 2220 }, { "epoch": 0.41, "learning_rate": 0.00013356864897221446, "loss": 1.0991, "step": 2221 }, { "epoch": 0.41, "learning_rate": 0.0001335124146486607, "loss": 1.0572, "step": 2222 }, { "epoch": 0.41, "learning_rate": 0.00013345616838416152, "loss": 1.0586, "step": 2223 }, { "epoch": 0.41, "learning_rate": 0.0001333999101987582, "loss": 1.0581, "step": 2224 }, { "epoch": 0.41, "learning_rate": 0.00013334364011249642, "loss": 0.9822, "step": 2225 }, { "epoch": 0.41, "learning_rate": 0.00013328735814542597, "loss": 1.1641, "step": 2226 }, { "epoch": 0.41, "learning_rate": 0.00013323106431760087, "loss": 1.2663, "step": 2227 }, { "epoch": 0.41, "learning_rate": 0.00013317475864907947, "loss": 0.9792, "step": 2228 }, { "epoch": 0.41, "learning_rate": 0.0001331184411599242, "loss": 1.1241, "step": 2229 }, { "epoch": 0.41, "learning_rate": 0.0001330621118702018, "loss": 1.1401, "step": 2230 }, { "epoch": 0.41, "learning_rate": 0.00013300577079998325, "loss": 1.107, "step": 2231 }, { "epoch": 0.41, "learning_rate": 0.0001329494179693436, "loss": 1.1001, "step": 2232 }, { "epoch": 0.41, "learning_rate": 0.00013289305339836215, "loss": 1.1966, "step": 2233 }, { "epoch": 0.41, "learning_rate": 0.00013283667710712243, "loss": 1.1118, "step": 2234 }, { "epoch": 0.41, "learning_rate": 0.0001327802891157121, "loss": 1.0544, "step": 2235 }, { "epoch": 0.41, "learning_rate": 0.00013272388944422298, "loss": 1.0099, "step": 2236 }, { "epoch": 0.41, "learning_rate": 0.0001326674781127511, "loss": 1.2911, "step": 2237 }, { "epoch": 0.41, "learning_rate": 0.00013261105514139656, "loss": 1.1267, "step": 2238 }, { "epoch": 0.41, "learning_rate": 0.00013255462055026369, "loss": 1.1014, "step": 2239 }, { "epoch": 0.41, "learning_rate": 0.00013249817435946092, "loss": 1.1383, "step": 2240 }, { "epoch": 0.41, "learning_rate": 0.00013244171658910084, "loss": 1.0347, "step": 2241 }, { "epoch": 0.41, "learning_rate": 0.00013238524725930014, "loss": 1.0669, "step": 2242 }, { "epoch": 0.41, "learning_rate": 0.00013232876639017964, "loss": 1.0736, "step": 2243 }, { "epoch": 0.41, "learning_rate": 0.00013227227400186426, "loss": 1.2003, "step": 2244 }, { "epoch": 0.41, "learning_rate": 0.00013221577011448304, "loss": 1.1759, "step": 2245 }, { "epoch": 0.41, "learning_rate": 0.0001321592547481691, "loss": 1.1288, "step": 2246 }, { "epoch": 0.41, "learning_rate": 0.00013210272792305968, "loss": 1.0791, "step": 2247 }, { "epoch": 0.41, "learning_rate": 0.00013204618965929608, "loss": 1.1416, "step": 2248 }, { "epoch": 0.41, "learning_rate": 0.00013198963997702366, "loss": 1.0905, "step": 2249 }, { "epoch": 0.41, "learning_rate": 0.00013193307889639193, "loss": 1.1015, "step": 2250 }, { "epoch": 0.41, "learning_rate": 0.0001318765064375543, "loss": 1.1297, "step": 2251 }, { "epoch": 0.41, "learning_rate": 0.0001318199226206684, "loss": 0.9098, "step": 2252 }, { "epoch": 0.42, "learning_rate": 0.00013176332746589586, "loss": 0.9851, "step": 2253 }, { "epoch": 0.42, "learning_rate": 0.00013170672099340226, "loss": 1.1369, "step": 2254 }, { "epoch": 0.42, "learning_rate": 0.00013165010322335733, "loss": 1.1591, "step": 2255 }, { "epoch": 0.42, "learning_rate": 0.0001315934741759348, "loss": 1.2614, "step": 2256 }, { "epoch": 0.42, "learning_rate": 0.00013153683387131233, "loss": 1.0529, "step": 2257 }, { "epoch": 0.42, "learning_rate": 0.00013148018232967171, "loss": 1.0684, "step": 2258 }, { "epoch": 0.42, "learning_rate": 0.0001314235195711987, "loss": 1.2117, "step": 2259 }, { "epoch": 0.42, "learning_rate": 0.00013136684561608296, "loss": 1.1993, "step": 2260 }, { "epoch": 0.42, "learning_rate": 0.00013131016048451832, "loss": 1.1845, "step": 2261 }, { "epoch": 0.42, "learning_rate": 0.0001312534641967024, "loss": 0.8912, "step": 2262 }, { "epoch": 0.42, "learning_rate": 0.00013119675677283695, "loss": 1.1396, "step": 2263 }, { "epoch": 0.42, "learning_rate": 0.0001311400382331276, "loss": 1.119, "step": 2264 }, { "epoch": 0.42, "learning_rate": 0.00013108330859778393, "loss": 1.0467, "step": 2265 }, { "epoch": 0.42, "learning_rate": 0.00013102656788701954, "loss": 1.0872, "step": 2266 }, { "epoch": 0.42, "learning_rate": 0.000130969816121052, "loss": 1.1802, "step": 2267 }, { "epoch": 0.42, "learning_rate": 0.00013091305332010267, "loss": 1.1302, "step": 2268 }, { "epoch": 0.42, "learning_rate": 0.00013085627950439697, "loss": 1.1481, "step": 2269 }, { "epoch": 0.42, "learning_rate": 0.00013079949469416423, "loss": 0.9951, "step": 2270 }, { "epoch": 0.42, "learning_rate": 0.00013074269890963767, "loss": 1.1103, "step": 2271 }, { "epoch": 0.42, "learning_rate": 0.00013068589217105441, "loss": 1.0434, "step": 2272 }, { "epoch": 0.42, "learning_rate": 0.00013062907449865548, "loss": 0.9354, "step": 2273 }, { "epoch": 0.42, "learning_rate": 0.00013057224591268588, "loss": 1.052, "step": 2274 }, { "epoch": 0.42, "learning_rate": 0.0001305154064333944, "loss": 1.1371, "step": 2275 }, { "epoch": 0.42, "learning_rate": 0.0001304585560810337, "loss": 1.1267, "step": 2276 }, { "epoch": 0.42, "learning_rate": 0.00013040169487586043, "loss": 1.1609, "step": 2277 }, { "epoch": 0.42, "learning_rate": 0.00013034482283813501, "loss": 1.1738, "step": 2278 }, { "epoch": 0.42, "learning_rate": 0.00013028793998812174, "loss": 1.0807, "step": 2279 }, { "epoch": 0.42, "learning_rate": 0.00013023104634608883, "loss": 1.0762, "step": 2280 }, { "epoch": 0.42, "learning_rate": 0.0001301741419323082, "loss": 1.0894, "step": 2281 }, { "epoch": 0.42, "learning_rate": 0.00013011722676705575, "loss": 1.1312, "step": 2282 }, { "epoch": 0.42, "learning_rate": 0.00013006030087061118, "loss": 1.0495, "step": 2283 }, { "epoch": 0.42, "learning_rate": 0.0001300033642632579, "loss": 1.157, "step": 2284 }, { "epoch": 0.42, "learning_rate": 0.00012994641696528332, "loss": 1.0955, "step": 2285 }, { "epoch": 0.42, "learning_rate": 0.00012988945899697853, "loss": 1.076, "step": 2286 }, { "epoch": 0.42, "learning_rate": 0.00012983249037863844, "loss": 1.1398, "step": 2287 }, { "epoch": 0.42, "learning_rate": 0.00012977551113056178, "loss": 0.9886, "step": 2288 }, { "epoch": 0.42, "learning_rate": 0.00012971852127305108, "loss": 1.072, "step": 2289 }, { "epoch": 0.42, "learning_rate": 0.0001296615208264126, "loss": 1.0663, "step": 2290 }, { "epoch": 0.42, "learning_rate": 0.00012960450981095643, "loss": 1.1249, "step": 2291 }, { "epoch": 0.42, "learning_rate": 0.00012954748824699634, "loss": 1.1455, "step": 2292 }, { "epoch": 0.42, "learning_rate": 0.00012949045615484996, "loss": 1.1779, "step": 2293 }, { "epoch": 0.42, "learning_rate": 0.00012943341355483865, "loss": 1.2061, "step": 2294 }, { "epoch": 0.42, "learning_rate": 0.00012937636046728748, "loss": 1.2008, "step": 2295 }, { "epoch": 0.42, "learning_rate": 0.0001293192969125252, "loss": 1.1078, "step": 2296 }, { "epoch": 0.42, "learning_rate": 0.0001292622229108845, "loss": 1.1787, "step": 2297 }, { "epoch": 0.42, "learning_rate": 0.00012920513848270148, "loss": 1.1882, "step": 2298 }, { "epoch": 0.42, "learning_rate": 0.00012914804364831623, "loss": 1.1264, "step": 2299 }, { "epoch": 0.42, "learning_rate": 0.00012909093842807247, "loss": 1.1767, "step": 2300 }, { "epoch": 0.42, "learning_rate": 0.0001290338228423175, "loss": 0.95, "step": 2301 }, { "epoch": 0.42, "learning_rate": 0.0001289766969114025, "loss": 1.1099, "step": 2302 }, { "epoch": 0.42, "learning_rate": 0.00012891956065568216, "loss": 1.1008, "step": 2303 }, { "epoch": 0.42, "learning_rate": 0.00012886241409551497, "loss": 1.1755, "step": 2304 }, { "epoch": 0.42, "learning_rate": 0.0001288052572512631, "loss": 1.2235, "step": 2305 }, { "epoch": 0.42, "learning_rate": 0.00012874809014329228, "loss": 1.0822, "step": 2306 }, { "epoch": 0.43, "learning_rate": 0.00012869091279197196, "loss": 1.1569, "step": 2307 }, { "epoch": 0.43, "learning_rate": 0.00012863372521767526, "loss": 0.971, "step": 2308 }, { "epoch": 0.43, "learning_rate": 0.00012857652744077892, "loss": 1.1384, "step": 2309 }, { "epoch": 0.43, "learning_rate": 0.00012851931948166327, "loss": 1.1972, "step": 2310 }, { "epoch": 0.43, "learning_rate": 0.00012846210136071236, "loss": 1.1149, "step": 2311 }, { "epoch": 0.43, "learning_rate": 0.0001284048730983138, "loss": 1.0019, "step": 2312 }, { "epoch": 0.43, "learning_rate": 0.0001283476347148588, "loss": 1.1165, "step": 2313 }, { "epoch": 0.43, "learning_rate": 0.00012829038623074222, "loss": 0.895, "step": 2314 }, { "epoch": 0.43, "learning_rate": 0.00012823312766636254, "loss": 1.0395, "step": 2315 }, { "epoch": 0.43, "learning_rate": 0.00012817585904212174, "loss": 1.0981, "step": 2316 }, { "epoch": 0.43, "learning_rate": 0.00012811858037842542, "loss": 1.115, "step": 2317 }, { "epoch": 0.43, "learning_rate": 0.00012806129169568283, "loss": 1.084, "step": 2318 }, { "epoch": 0.43, "learning_rate": 0.00012800399301430676, "loss": 1.055, "step": 2319 }, { "epoch": 0.43, "learning_rate": 0.00012794668435471345, "loss": 1.2665, "step": 2320 }, { "epoch": 0.43, "learning_rate": 0.00012788936573732281, "loss": 1.0661, "step": 2321 }, { "epoch": 0.43, "learning_rate": 0.0001278320371825583, "loss": 1.0815, "step": 2322 }, { "epoch": 0.43, "learning_rate": 0.0001277746987108469, "loss": 1.2374, "step": 2323 }, { "epoch": 0.43, "learning_rate": 0.0001277173503426191, "loss": 1.125, "step": 2324 }, { "epoch": 0.43, "learning_rate": 0.00012765999209830888, "loss": 1.0776, "step": 2325 }, { "epoch": 0.43, "learning_rate": 0.00012760262399835384, "loss": 1.1436, "step": 2326 }, { "epoch": 0.43, "learning_rate": 0.00012754524606319506, "loss": 1.0801, "step": 2327 }, { "epoch": 0.43, "learning_rate": 0.00012748785831327708, "loss": 1.2052, "step": 2328 }, { "epoch": 0.43, "learning_rate": 0.00012743046076904793, "loss": 1.1377, "step": 2329 }, { "epoch": 0.43, "learning_rate": 0.00012737305345095925, "loss": 1.0264, "step": 2330 }, { "epoch": 0.43, "learning_rate": 0.000127315636379466, "loss": 1.1464, "step": 2331 }, { "epoch": 0.43, "learning_rate": 0.0001272582095750267, "loss": 1.0857, "step": 2332 }, { "epoch": 0.43, "learning_rate": 0.00012720077305810337, "loss": 1.0307, "step": 2333 }, { "epoch": 0.43, "learning_rate": 0.00012714332684916145, "loss": 1.113, "step": 2334 }, { "epoch": 0.43, "learning_rate": 0.00012708587096866975, "loss": 1.0013, "step": 2335 }, { "epoch": 0.43, "learning_rate": 0.00012702840543710073, "loss": 1.1261, "step": 2336 }, { "epoch": 0.43, "learning_rate": 0.00012697093027493007, "loss": 1.1046, "step": 2337 }, { "epoch": 0.43, "learning_rate": 0.00012691344550263701, "loss": 1.0765, "step": 2338 }, { "epoch": 0.43, "learning_rate": 0.00012685595114070422, "loss": 1.1049, "step": 2339 }, { "epoch": 0.43, "learning_rate": 0.00012679844720961774, "loss": 1.1698, "step": 2340 }, { "epoch": 0.43, "learning_rate": 0.00012674093372986697, "loss": 1.3197, "step": 2341 }, { "epoch": 0.43, "learning_rate": 0.00012668341072194484, "loss": 1.0857, "step": 2342 }, { "epoch": 0.43, "learning_rate": 0.0001266258782063476, "loss": 1.0108, "step": 2343 }, { "epoch": 0.43, "learning_rate": 0.00012656833620357485, "loss": 1.0257, "step": 2344 }, { "epoch": 0.43, "learning_rate": 0.00012651078473412968, "loss": 1.0975, "step": 2345 }, { "epoch": 0.43, "learning_rate": 0.00012645322381851844, "loss": 1.1253, "step": 2346 }, { "epoch": 0.43, "learning_rate": 0.0001263956534772509, "loss": 1.1208, "step": 2347 }, { "epoch": 0.43, "learning_rate": 0.0001263380737308402, "loss": 1.1998, "step": 2348 }, { "epoch": 0.43, "learning_rate": 0.00012628048459980282, "loss": 1.1365, "step": 2349 }, { "epoch": 0.43, "learning_rate": 0.00012622288610465856, "loss": 1.155, "step": 2350 }, { "epoch": 0.43, "learning_rate": 0.00012616527826593058, "loss": 1.0693, "step": 2351 }, { "epoch": 0.43, "learning_rate": 0.0001261076611041454, "loss": 1.0889, "step": 2352 }, { "epoch": 0.43, "learning_rate": 0.00012605003463983272, "loss": 1.075, "step": 2353 }, { "epoch": 0.43, "learning_rate": 0.0001259923988935258, "loss": 1.2563, "step": 2354 }, { "epoch": 0.43, "learning_rate": 0.000125934753885761, "loss": 1.1541, "step": 2355 }, { "epoch": 0.43, "learning_rate": 0.00012587709963707804, "loss": 1.0392, "step": 2356 }, { "epoch": 0.43, "learning_rate": 0.00012581943616801996, "loss": 0.9799, "step": 2357 }, { "epoch": 0.43, "learning_rate": 0.00012576176349913303, "loss": 1.2093, "step": 2358 }, { "epoch": 0.43, "learning_rate": 0.00012570408165096689, "loss": 1.16, "step": 2359 }, { "epoch": 0.43, "learning_rate": 0.0001256463906440744, "loss": 1.1445, "step": 2360 }, { "epoch": 0.44, "learning_rate": 0.00012558869049901158, "loss": 1.0989, "step": 2361 }, { "epoch": 0.44, "learning_rate": 0.0001255309812363379, "loss": 1.0478, "step": 2362 }, { "epoch": 0.44, "learning_rate": 0.00012547326287661598, "loss": 1.1003, "step": 2363 }, { "epoch": 0.44, "learning_rate": 0.00012541553544041163, "loss": 1.1824, "step": 2364 }, { "epoch": 0.44, "learning_rate": 0.00012535779894829398, "loss": 1.2142, "step": 2365 }, { "epoch": 0.44, "learning_rate": 0.00012530005342083537, "loss": 1.1751, "step": 2366 }, { "epoch": 0.44, "learning_rate": 0.00012524229887861133, "loss": 1.1593, "step": 2367 }, { "epoch": 0.44, "learning_rate": 0.00012518453534220064, "loss": 1.1398, "step": 2368 }, { "epoch": 0.44, "learning_rate": 0.00012512676283218518, "loss": 1.0954, "step": 2369 }, { "epoch": 0.44, "learning_rate": 0.00012506898136915019, "loss": 1.1862, "step": 2370 }, { "epoch": 0.44, "learning_rate": 0.00012501119097368398, "loss": 1.1005, "step": 2371 }, { "epoch": 0.44, "learning_rate": 0.00012495339166637812, "loss": 1.1249, "step": 2372 }, { "epoch": 0.44, "learning_rate": 0.00012489558346782728, "loss": 1.1381, "step": 2373 }, { "epoch": 0.44, "learning_rate": 0.00012483776639862938, "loss": 1.1912, "step": 2374 }, { "epoch": 0.44, "learning_rate": 0.0001247799404793854, "loss": 0.9786, "step": 2375 }, { "epoch": 0.44, "learning_rate": 0.00012472210573069955, "loss": 1.1206, "step": 2376 }, { "epoch": 0.44, "learning_rate": 0.00012466426217317921, "loss": 1.3199, "step": 2377 }, { "epoch": 0.44, "learning_rate": 0.0001246064098274348, "loss": 1.0963, "step": 2378 }, { "epoch": 0.44, "learning_rate": 0.00012454854871407994, "loss": 1.2755, "step": 2379 }, { "epoch": 0.44, "learning_rate": 0.00012449067885373133, "loss": 1.0797, "step": 2380 }, { "epoch": 0.44, "learning_rate": 0.0001244328002670089, "loss": 1.0929, "step": 2381 }, { "epoch": 0.44, "learning_rate": 0.00012437491297453553, "loss": 1.1049, "step": 2382 }, { "epoch": 0.44, "learning_rate": 0.0001243170169969373, "loss": 1.0515, "step": 2383 }, { "epoch": 0.44, "learning_rate": 0.00012425911235484333, "loss": 1.0808, "step": 2384 }, { "epoch": 0.44, "learning_rate": 0.00012420119906888597, "loss": 1.2188, "step": 2385 }, { "epoch": 0.44, "learning_rate": 0.0001241432771597004, "loss": 1.1136, "step": 2386 }, { "epoch": 0.44, "learning_rate": 0.00012408534664792512, "loss": 1.1651, "step": 2387 }, { "epoch": 0.44, "learning_rate": 0.0001240274075542015, "loss": 1.0326, "step": 2388 }, { "epoch": 0.44, "learning_rate": 0.00012396945989917411, "loss": 1.1284, "step": 2389 }, { "epoch": 0.44, "learning_rate": 0.00012391150370349055, "loss": 1.1085, "step": 2390 }, { "epoch": 0.44, "learning_rate": 0.00012385353898780135, "loss": 1.1715, "step": 2391 }, { "epoch": 0.44, "learning_rate": 0.00012379556577276016, "loss": 1.1318, "step": 2392 }, { "epoch": 0.44, "learning_rate": 0.00012373758407902374, "loss": 1.0629, "step": 2393 }, { "epoch": 0.44, "learning_rate": 0.0001236795939272517, "loss": 1.1442, "step": 2394 }, { "epoch": 0.44, "learning_rate": 0.00012362159533810672, "loss": 1.0948, "step": 2395 }, { "epoch": 0.44, "learning_rate": 0.00012356358833225464, "loss": 1.0541, "step": 2396 }, { "epoch": 0.44, "learning_rate": 0.00012350557293036406, "loss": 1.0875, "step": 2397 }, { "epoch": 0.44, "learning_rate": 0.00012344754915310678, "loss": 0.9822, "step": 2398 }, { "epoch": 0.44, "learning_rate": 0.00012338951702115737, "loss": 1.2031, "step": 2399 }, { "epoch": 0.44, "learning_rate": 0.0001233314765551936, "loss": 0.987, "step": 2400 }, { "epoch": 0.44, "learning_rate": 0.00012327342777589606, "loss": 1.0359, "step": 2401 }, { "epoch": 0.44, "learning_rate": 0.00012321537070394836, "loss": 1.0591, "step": 2402 }, { "epoch": 0.44, "learning_rate": 0.00012315730536003702, "loss": 1.1836, "step": 2403 }, { "epoch": 0.44, "learning_rate": 0.0001230992317648516, "loss": 1.0706, "step": 2404 }, { "epoch": 0.44, "learning_rate": 0.00012304114993908448, "loss": 1.1776, "step": 2405 }, { "epoch": 0.44, "learning_rate": 0.00012298305990343108, "loss": 1.0886, "step": 2406 }, { "epoch": 0.44, "learning_rate": 0.00012292496167858966, "loss": 1.0796, "step": 2407 }, { "epoch": 0.44, "learning_rate": 0.00012286685528526146, "loss": 1.048, "step": 2408 }, { "epoch": 0.44, "learning_rate": 0.00012280874074415064, "loss": 1.1571, "step": 2409 }, { "epoch": 0.44, "learning_rate": 0.00012275061807596416, "loss": 1.0824, "step": 2410 }, { "epoch": 0.44, "learning_rate": 0.00012269248730141193, "loss": 1.0355, "step": 2411 }, { "epoch": 0.44, "learning_rate": 0.00012263434844120687, "loss": 1.0895, "step": 2412 }, { "epoch": 0.44, "learning_rate": 0.0001225762015160646, "loss": 1.1499, "step": 2413 }, { "epoch": 0.44, "learning_rate": 0.0001225180465467037, "loss": 1.2146, "step": 2414 }, { "epoch": 0.45, "learning_rate": 0.00012245988355384562, "loss": 1.0729, "step": 2415 }, { "epoch": 0.45, "learning_rate": 0.0001224017125582146, "loss": 1.0257, "step": 2416 }, { "epoch": 0.45, "learning_rate": 0.00012234353358053784, "loss": 1.1301, "step": 2417 }, { "epoch": 0.45, "learning_rate": 0.00012228534664154536, "loss": 1.1316, "step": 2418 }, { "epoch": 0.45, "learning_rate": 0.00012222715176196991, "loss": 1.065, "step": 2419 }, { "epoch": 0.45, "learning_rate": 0.0001221689489625472, "loss": 0.9693, "step": 2420 }, { "epoch": 0.45, "learning_rate": 0.00012211073826401566, "loss": 0.9748, "step": 2421 }, { "epoch": 0.45, "learning_rate": 0.00012205251968711657, "loss": 1.0449, "step": 2422 }, { "epoch": 0.45, "learning_rate": 0.00012199429325259412, "loss": 1.1211, "step": 2423 }, { "epoch": 0.45, "learning_rate": 0.00012193605898119514, "loss": 0.9598, "step": 2424 }, { "epoch": 0.45, "learning_rate": 0.00012187781689366933, "loss": 1.0285, "step": 2425 }, { "epoch": 0.45, "learning_rate": 0.00012181956701076918, "loss": 1.1806, "step": 2426 }, { "epoch": 0.45, "learning_rate": 0.00012176130935324991, "loss": 1.0715, "step": 2427 }, { "epoch": 0.45, "learning_rate": 0.0001217030439418696, "loss": 1.1763, "step": 2428 }, { "epoch": 0.45, "learning_rate": 0.00012164477079738901, "loss": 0.9499, "step": 2429 }, { "epoch": 0.45, "learning_rate": 0.00012158648994057167, "loss": 1.1704, "step": 2430 }, { "epoch": 0.45, "learning_rate": 0.00012152820139218389, "loss": 0.9716, "step": 2431 }, { "epoch": 0.45, "learning_rate": 0.00012146990517299466, "loss": 1.2877, "step": 2432 }, { "epoch": 0.45, "learning_rate": 0.00012141160130377578, "loss": 1.1016, "step": 2433 }, { "epoch": 0.45, "learning_rate": 0.00012135328980530176, "loss": 0.9014, "step": 2434 }, { "epoch": 0.45, "learning_rate": 0.00012129497069834977, "loss": 1.0053, "step": 2435 }, { "epoch": 0.45, "learning_rate": 0.00012123664400369972, "loss": 1.1807, "step": 2436 }, { "epoch": 0.45, "learning_rate": 0.00012117830974213428, "loss": 1.0391, "step": 2437 }, { "epoch": 0.45, "learning_rate": 0.00012111996793443874, "loss": 1.1402, "step": 2438 }, { "epoch": 0.45, "learning_rate": 0.0001210616186014011, "loss": 1.0232, "step": 2439 }, { "epoch": 0.45, "learning_rate": 0.00012100326176381208, "loss": 1.1348, "step": 2440 }, { "epoch": 0.45, "learning_rate": 0.000120944897442465, "loss": 1.2386, "step": 2441 }, { "epoch": 0.45, "learning_rate": 0.00012088652565815594, "loss": 1.0825, "step": 2442 }, { "epoch": 0.45, "learning_rate": 0.00012082814643168357, "loss": 1.0642, "step": 2443 }, { "epoch": 0.45, "learning_rate": 0.00012076975978384917, "loss": 1.0572, "step": 2444 }, { "epoch": 0.45, "learning_rate": 0.00012071136573545683, "loss": 0.9592, "step": 2445 }, { "epoch": 0.45, "learning_rate": 0.0001206529643073131, "loss": 1.2214, "step": 2446 }, { "epoch": 0.45, "learning_rate": 0.00012059455552022725, "loss": 1.0173, "step": 2447 }, { "epoch": 0.45, "learning_rate": 0.00012053613939501119, "loss": 1.0838, "step": 2448 }, { "epoch": 0.45, "learning_rate": 0.00012047771595247934, "loss": 1.0727, "step": 2449 }, { "epoch": 0.45, "learning_rate": 0.00012041928521344883, "loss": 1.0588, "step": 2450 }, { "epoch": 0.45, "learning_rate": 0.00012036084719873936, "loss": 1.1277, "step": 2451 }, { "epoch": 0.45, "learning_rate": 0.00012030240192917321, "loss": 1.1543, "step": 2452 }, { "epoch": 0.45, "learning_rate": 0.00012024394942557529, "loss": 1.0771, "step": 2453 }, { "epoch": 0.45, "learning_rate": 0.00012018548970877299, "loss": 1.1867, "step": 2454 }, { "epoch": 0.45, "learning_rate": 0.00012012702279959633, "loss": 1.0833, "step": 2455 }, { "epoch": 0.45, "learning_rate": 0.00012006854871887798, "loss": 1.1735, "step": 2456 }, { "epoch": 0.45, "learning_rate": 0.00012001006748745299, "loss": 1.1007, "step": 2457 }, { "epoch": 0.45, "learning_rate": 0.00011995157912615905, "loss": 1.1014, "step": 2458 }, { "epoch": 0.45, "learning_rate": 0.00011989308365583645, "loss": 1.0674, "step": 2459 }, { "epoch": 0.45, "learning_rate": 0.00011983458109732791, "loss": 1.0823, "step": 2460 }, { "epoch": 0.45, "learning_rate": 0.00011977607147147869, "loss": 1.0536, "step": 2461 }, { "epoch": 0.45, "learning_rate": 0.00011971755479913665, "loss": 1.03, "step": 2462 }, { "epoch": 0.45, "learning_rate": 0.00011965903110115206, "loss": 1.0709, "step": 2463 }, { "epoch": 0.45, "learning_rate": 0.00011960050039837778, "loss": 1.1163, "step": 2464 }, { "epoch": 0.45, "learning_rate": 0.00011954196271166906, "loss": 1.0322, "step": 2465 }, { "epoch": 0.45, "learning_rate": 0.00011948341806188377, "loss": 1.0371, "step": 2466 }, { "epoch": 0.45, "learning_rate": 0.00011942486646988219, "loss": 1.0621, "step": 2467 }, { "epoch": 0.45, "learning_rate": 0.00011936630795652699, "loss": 1.0871, "step": 2468 }, { "epoch": 0.45, "learning_rate": 0.00011930774254268348, "loss": 1.0141, "step": 2469 }, { "epoch": 0.46, "learning_rate": 0.00011924917024921935, "loss": 1.1259, "step": 2470 }, { "epoch": 0.46, "learning_rate": 0.00011919059109700465, "loss": 1.0218, "step": 2471 }, { "epoch": 0.46, "learning_rate": 0.00011913200510691203, "loss": 1.1069, "step": 2472 }, { "epoch": 0.46, "learning_rate": 0.0001190734122998165, "loss": 1.0622, "step": 2473 }, { "epoch": 0.46, "learning_rate": 0.00011901481269659547, "loss": 1.0531, "step": 2474 }, { "epoch": 0.46, "learning_rate": 0.00011895620631812883, "loss": 1.0359, "step": 2475 }, { "epoch": 0.46, "learning_rate": 0.00011889759318529882, "loss": 1.0438, "step": 2476 }, { "epoch": 0.46, "learning_rate": 0.00011883897331899016, "loss": 1.077, "step": 2477 }, { "epoch": 0.46, "learning_rate": 0.00011878034674008993, "loss": 1.1148, "step": 2478 }, { "epoch": 0.46, "learning_rate": 0.0001187217134694876, "loss": 1.1033, "step": 2479 }, { "epoch": 0.46, "learning_rate": 0.00011866307352807503, "loss": 1.0528, "step": 2480 }, { "epoch": 0.46, "learning_rate": 0.00011860442693674647, "loss": 1.0477, "step": 2481 }, { "epoch": 0.46, "learning_rate": 0.00011854577371639848, "loss": 1.0813, "step": 2482 }, { "epoch": 0.46, "learning_rate": 0.00011848711388793008, "loss": 1.0616, "step": 2483 }, { "epoch": 0.46, "learning_rate": 0.0001184284474722426, "loss": 0.9703, "step": 2484 }, { "epoch": 0.46, "learning_rate": 0.00011836977449023967, "loss": 0.9331, "step": 2485 }, { "epoch": 0.46, "learning_rate": 0.0001183110949628273, "loss": 1.1191, "step": 2486 }, { "epoch": 0.46, "learning_rate": 0.0001182524089109139, "loss": 1.036, "step": 2487 }, { "epoch": 0.46, "learning_rate": 0.00011819371635541003, "loss": 1.0684, "step": 2488 }, { "epoch": 0.46, "learning_rate": 0.00011813501731722875, "loss": 1.0942, "step": 2489 }, { "epoch": 0.46, "learning_rate": 0.00011807631181728536, "loss": 1.1642, "step": 2490 }, { "epoch": 0.46, "learning_rate": 0.00011801759987649741, "loss": 1.1738, "step": 2491 }, { "epoch": 0.46, "learning_rate": 0.0001179588815157848, "loss": 0.9156, "step": 2492 }, { "epoch": 0.46, "learning_rate": 0.00011790015675606976, "loss": 0.9947, "step": 2493 }, { "epoch": 0.46, "learning_rate": 0.00011784142561827669, "loss": 1.0142, "step": 2494 }, { "epoch": 0.46, "learning_rate": 0.00011778268812333233, "loss": 1.0981, "step": 2495 }, { "epoch": 0.46, "learning_rate": 0.00011772394429216571, "loss": 1.0445, "step": 2496 }, { "epoch": 0.46, "learning_rate": 0.00011766519414570805, "loss": 1.094, "step": 2497 }, { "epoch": 0.46, "learning_rate": 0.00011760643770489288, "loss": 0.9539, "step": 2498 }, { "epoch": 0.46, "learning_rate": 0.00011754767499065593, "loss": 1.0254, "step": 2499 }, { "epoch": 0.46, "learning_rate": 0.00011748890602393521, "loss": 0.9844, "step": 2500 }, { "epoch": 0.46, "learning_rate": 0.00011743013082567087, "loss": 1.0708, "step": 2501 }, { "epoch": 0.46, "learning_rate": 0.00011737134941680544, "loss": 0.9493, "step": 2502 }, { "epoch": 0.46, "learning_rate": 0.00011731256181828346, "loss": 1.1233, "step": 2503 }, { "epoch": 0.46, "learning_rate": 0.00011725376805105182, "loss": 0.9682, "step": 2504 }, { "epoch": 0.46, "learning_rate": 0.00011719496813605962, "loss": 1.1679, "step": 2505 }, { "epoch": 0.46, "learning_rate": 0.00011713616209425799, "loss": 1.0648, "step": 2506 }, { "epoch": 0.46, "learning_rate": 0.00011707734994660044, "loss": 1.1584, "step": 2507 }, { "epoch": 0.46, "learning_rate": 0.00011701853171404256, "loss": 1.1915, "step": 2508 }, { "epoch": 0.46, "learning_rate": 0.00011695970741754206, "loss": 1.0943, "step": 2509 }, { "epoch": 0.46, "learning_rate": 0.0001169008770780589, "loss": 1.2487, "step": 2510 }, { "epoch": 0.46, "learning_rate": 0.00011684204071655519, "loss": 1.0337, "step": 2511 }, { "epoch": 0.46, "learning_rate": 0.00011678319835399509, "loss": 1.1732, "step": 2512 }, { "epoch": 0.46, "learning_rate": 0.00011672435001134501, "loss": 1.0857, "step": 2513 }, { "epoch": 0.46, "learning_rate": 0.00011666549570957341, "loss": 0.9783, "step": 2514 }, { "epoch": 0.46, "learning_rate": 0.00011660663546965094, "loss": 0.9347, "step": 2515 }, { "epoch": 0.46, "learning_rate": 0.00011654776931255033, "loss": 1.172, "step": 2516 }, { "epoch": 0.46, "learning_rate": 0.0001164888972592464, "loss": 1.0466, "step": 2517 }, { "epoch": 0.46, "learning_rate": 0.00011643001933071609, "loss": 1.2721, "step": 2518 }, { "epoch": 0.46, "learning_rate": 0.00011637113554793846, "loss": 1.2364, "step": 2519 }, { "epoch": 0.46, "learning_rate": 0.00011631224593189461, "loss": 1.0221, "step": 2520 }, { "epoch": 0.46, "learning_rate": 0.00011625335050356771, "loss": 1.0244, "step": 2521 }, { "epoch": 0.46, "learning_rate": 0.00011619444928394312, "loss": 1.0406, "step": 2522 }, { "epoch": 0.46, "learning_rate": 0.00011613554229400809, "loss": 1.0666, "step": 2523 }, { "epoch": 0.47, "learning_rate": 0.00011607662955475204, "loss": 0.9845, "step": 2524 }, { "epoch": 0.47, "learning_rate": 0.00011601771108716646, "loss": 1.0737, "step": 2525 }, { "epoch": 0.47, "learning_rate": 0.00011595878691224473, "loss": 1.0791, "step": 2526 }, { "epoch": 0.47, "learning_rate": 0.00011589985705098245, "loss": 1.05, "step": 2527 }, { "epoch": 0.47, "learning_rate": 0.00011584092152437711, "loss": 1.0166, "step": 2528 }, { "epoch": 0.47, "learning_rate": 0.00011578198035342826, "loss": 1.014, "step": 2529 }, { "epoch": 0.47, "learning_rate": 0.00011572303355913754, "loss": 1.0597, "step": 2530 }, { "epoch": 0.47, "learning_rate": 0.00011566408116250842, "loss": 1.0042, "step": 2531 }, { "epoch": 0.47, "learning_rate": 0.00011560512318454655, "loss": 0.9529, "step": 2532 }, { "epoch": 0.47, "learning_rate": 0.00011554615964625948, "loss": 1.0287, "step": 2533 }, { "epoch": 0.47, "learning_rate": 0.00011548719056865671, "loss": 1.1661, "step": 2534 }, { "epoch": 0.47, "learning_rate": 0.00011542821597274978, "loss": 1.1373, "step": 2535 }, { "epoch": 0.47, "learning_rate": 0.00011536923587955218, "loss": 1.0877, "step": 2536 }, { "epoch": 0.47, "learning_rate": 0.0001153102503100793, "loss": 1.1041, "step": 2537 }, { "epoch": 0.47, "learning_rate": 0.00011525125928534859, "loss": 1.2425, "step": 2538 }, { "epoch": 0.47, "learning_rate": 0.00011519226282637934, "loss": 1.0201, "step": 2539 }, { "epoch": 0.47, "learning_rate": 0.00011513326095419281, "loss": 1.1049, "step": 2540 }, { "epoch": 0.47, "learning_rate": 0.00011507425368981228, "loss": 0.8227, "step": 2541 }, { "epoch": 0.47, "learning_rate": 0.00011501524105426278, "loss": 1.174, "step": 2542 }, { "epoch": 0.47, "learning_rate": 0.00011495622306857136, "loss": 1.1698, "step": 2543 }, { "epoch": 0.47, "learning_rate": 0.000114897199753767, "loss": 1.0361, "step": 2544 }, { "epoch": 0.47, "learning_rate": 0.00011483817113088052, "loss": 1.0362, "step": 2545 }, { "epoch": 0.47, "learning_rate": 0.00011477913722094462, "loss": 1.071, "step": 2546 }, { "epoch": 0.47, "learning_rate": 0.00011472009804499396, "loss": 1.212, "step": 2547 }, { "epoch": 0.47, "learning_rate": 0.00011466105362406498, "loss": 1.0599, "step": 2548 }, { "epoch": 0.47, "learning_rate": 0.0001146020039791961, "loss": 0.9016, "step": 2549 }, { "epoch": 0.47, "learning_rate": 0.00011454294913142747, "loss": 1.0331, "step": 2550 }, { "epoch": 0.47, "learning_rate": 0.00011448388910180121, "loss": 1.0931, "step": 2551 }, { "epoch": 0.47, "learning_rate": 0.00011442482391136124, "loss": 1.2043, "step": 2552 }, { "epoch": 0.47, "learning_rate": 0.00011436575358115331, "loss": 1.1662, "step": 2553 }, { "epoch": 0.47, "learning_rate": 0.000114306678132225, "loss": 1.1793, "step": 2554 }, { "epoch": 0.47, "learning_rate": 0.00011424759758562575, "loss": 1.1494, "step": 2555 }, { "epoch": 0.47, "learning_rate": 0.00011418851196240677, "loss": 0.9071, "step": 2556 }, { "epoch": 0.47, "learning_rate": 0.0001141294212836211, "loss": 1.1037, "step": 2557 }, { "epoch": 0.47, "learning_rate": 0.00011407032557032363, "loss": 1.0746, "step": 2558 }, { "epoch": 0.47, "learning_rate": 0.0001140112248435709, "loss": 1.0621, "step": 2559 }, { "epoch": 0.47, "learning_rate": 0.00011395211912442141, "loss": 1.1837, "step": 2560 }, { "epoch": 0.47, "learning_rate": 0.00011389300843393533, "loss": 1.0299, "step": 2561 }, { "epoch": 0.47, "learning_rate": 0.00011383389279317463, "loss": 1.0579, "step": 2562 }, { "epoch": 0.47, "learning_rate": 0.00011377477222320308, "loss": 1.1226, "step": 2563 }, { "epoch": 0.47, "learning_rate": 0.0001137156467450861, "loss": 1.034, "step": 2564 }, { "epoch": 0.47, "learning_rate": 0.00011365651637989099, "loss": 1.09, "step": 2565 }, { "epoch": 0.47, "learning_rate": 0.00011359738114868674, "loss": 1.0761, "step": 2566 }, { "epoch": 0.47, "learning_rate": 0.000113538241072544, "loss": 1.0722, "step": 2567 }, { "epoch": 0.47, "learning_rate": 0.0001134790961725353, "loss": 1.2275, "step": 2568 }, { "epoch": 0.47, "learning_rate": 0.00011341994646973475, "loss": 1.244, "step": 2569 }, { "epoch": 0.47, "learning_rate": 0.00011336079198521821, "loss": 1.1529, "step": 2570 }, { "epoch": 0.47, "learning_rate": 0.00011330163274006328, "loss": 1.0189, "step": 2571 }, { "epoch": 0.47, "learning_rate": 0.00011324246875534923, "loss": 1.2126, "step": 2572 }, { "epoch": 0.47, "learning_rate": 0.00011318330005215701, "loss": 1.2187, "step": 2573 }, { "epoch": 0.47, "learning_rate": 0.00011312412665156929, "loss": 1.0649, "step": 2574 }, { "epoch": 0.47, "learning_rate": 0.00011306494857467034, "loss": 1.1392, "step": 2575 }, { "epoch": 0.47, "learning_rate": 0.00011300576584254617, "loss": 1.0744, "step": 2576 }, { "epoch": 0.47, "learning_rate": 0.00011294657847628445, "loss": 1.1354, "step": 2577 }, { "epoch": 0.48, "learning_rate": 0.00011288738649697442, "loss": 1.1076, "step": 2578 }, { "epoch": 0.48, "learning_rate": 0.00011282818992570702, "loss": 1.1535, "step": 2579 }, { "epoch": 0.48, "learning_rate": 0.00011276898878357489, "loss": 1.0801, "step": 2580 }, { "epoch": 0.48, "learning_rate": 0.00011270978309167215, "loss": 1.0137, "step": 2581 }, { "epoch": 0.48, "learning_rate": 0.00011265057287109466, "loss": 1.2734, "step": 2582 }, { "epoch": 0.48, "learning_rate": 0.00011259135814293986, "loss": 1.0551, "step": 2583 }, { "epoch": 0.48, "learning_rate": 0.00011253213892830676, "loss": 1.0603, "step": 2584 }, { "epoch": 0.48, "learning_rate": 0.00011247291524829605, "loss": 0.9991, "step": 2585 }, { "epoch": 0.48, "learning_rate": 0.00011241368712400991, "loss": 1.1884, "step": 2586 }, { "epoch": 0.48, "learning_rate": 0.00011235445457655218, "loss": 1.0387, "step": 2587 }, { "epoch": 0.48, "learning_rate": 0.00011229521762702826, "loss": 1.2322, "step": 2588 }, { "epoch": 0.48, "learning_rate": 0.0001122359762965451, "loss": 1.1555, "step": 2589 }, { "epoch": 0.48, "learning_rate": 0.0001121767306062112, "loss": 1.056, "step": 2590 }, { "epoch": 0.48, "learning_rate": 0.00011211748057713666, "loss": 1.0815, "step": 2591 }, { "epoch": 0.48, "learning_rate": 0.0001120582262304331, "loss": 1.0023, "step": 2592 }, { "epoch": 0.48, "learning_rate": 0.00011199896758721369, "loss": 1.0731, "step": 2593 }, { "epoch": 0.48, "learning_rate": 0.00011193970466859308, "loss": 1.1277, "step": 2594 }, { "epoch": 0.48, "learning_rate": 0.00011188043749568751, "loss": 0.9713, "step": 2595 }, { "epoch": 0.48, "learning_rate": 0.0001118211660896147, "loss": 0.9618, "step": 2596 }, { "epoch": 0.48, "learning_rate": 0.00011176189047149388, "loss": 1.0128, "step": 2597 }, { "epoch": 0.48, "learning_rate": 0.0001117026106624458, "loss": 1.0967, "step": 2598 }, { "epoch": 0.48, "learning_rate": 0.00011164332668359272, "loss": 1.0833, "step": 2599 }, { "epoch": 0.48, "learning_rate": 0.00011158403855605828, "loss": 0.9411, "step": 2600 }, { "epoch": 0.48, "learning_rate": 0.00011152474630096775, "loss": 1.0144, "step": 2601 }, { "epoch": 0.48, "learning_rate": 0.00011146544993944776, "loss": 1.1589, "step": 2602 }, { "epoch": 0.48, "learning_rate": 0.00011140614949262642, "loss": 1.0718, "step": 2603 }, { "epoch": 0.48, "learning_rate": 0.0001113468449816334, "loss": 1.0805, "step": 2604 }, { "epoch": 0.48, "learning_rate": 0.00011128753642759961, "loss": 1.115, "step": 2605 }, { "epoch": 0.48, "learning_rate": 0.00011122822385165761, "loss": 1.1553, "step": 2606 }, { "epoch": 0.48, "learning_rate": 0.0001111689072749413, "loss": 0.9043, "step": 2607 }, { "epoch": 0.48, "learning_rate": 0.00011110958671858596, "loss": 1.1683, "step": 2608 }, { "epoch": 0.48, "learning_rate": 0.00011105026220372836, "loss": 1.0626, "step": 2609 }, { "epoch": 0.48, "learning_rate": 0.00011099093375150673, "loss": 1.172, "step": 2610 }, { "epoch": 0.48, "learning_rate": 0.00011093160138306054, "loss": 1.1124, "step": 2611 }, { "epoch": 0.48, "learning_rate": 0.00011087226511953077, "loss": 1.1184, "step": 2612 }, { "epoch": 0.48, "learning_rate": 0.00011081292498205981, "loss": 1.0105, "step": 2613 }, { "epoch": 0.48, "learning_rate": 0.00011075358099179137, "loss": 1.1689, "step": 2614 }, { "epoch": 0.48, "learning_rate": 0.00011069423316987053, "loss": 1.1182, "step": 2615 }, { "epoch": 0.48, "learning_rate": 0.00011063488153744375, "loss": 1.0262, "step": 2616 }, { "epoch": 0.48, "learning_rate": 0.00011057552611565887, "loss": 1.0826, "step": 2617 }, { "epoch": 0.48, "learning_rate": 0.00011051616692566506, "loss": 1.0257, "step": 2618 }, { "epoch": 0.48, "learning_rate": 0.00011045680398861284, "loss": 1.1513, "step": 2619 }, { "epoch": 0.48, "learning_rate": 0.00011039743732565404, "loss": 1.1208, "step": 2620 }, { "epoch": 0.48, "learning_rate": 0.00011033806695794189, "loss": 1.0161, "step": 2621 }, { "epoch": 0.48, "learning_rate": 0.00011027869290663083, "loss": 1.1182, "step": 2622 }, { "epoch": 0.48, "learning_rate": 0.00011021931519287669, "loss": 1.0304, "step": 2623 }, { "epoch": 0.48, "learning_rate": 0.00011015993383783658, "loss": 1.1134, "step": 2624 }, { "epoch": 0.48, "learning_rate": 0.00011010054886266894, "loss": 1.1747, "step": 2625 }, { "epoch": 0.48, "learning_rate": 0.00011004116028853346, "loss": 1.0594, "step": 2626 }, { "epoch": 0.48, "learning_rate": 0.00010998176813659111, "loss": 0.9984, "step": 2627 }, { "epoch": 0.48, "learning_rate": 0.00010992237242800416, "loss": 1.0027, "step": 2628 }, { "epoch": 0.48, "learning_rate": 0.00010986297318393615, "loss": 1.0629, "step": 2629 }, { "epoch": 0.48, "learning_rate": 0.00010980357042555182, "loss": 1.2186, "step": 2630 }, { "epoch": 0.48, "learning_rate": 0.00010974416417401729, "loss": 1.2497, "step": 2631 }, { "epoch": 0.49, "learning_rate": 0.0001096847544504998, "loss": 1.1378, "step": 2632 }, { "epoch": 0.49, "learning_rate": 0.00010962534127616784, "loss": 1.1268, "step": 2633 }, { "epoch": 0.49, "learning_rate": 0.0001095659246721912, "loss": 1.0702, "step": 2634 }, { "epoch": 0.49, "learning_rate": 0.00010950650465974088, "loss": 0.9811, "step": 2635 }, { "epoch": 0.49, "learning_rate": 0.000109447081259989, "loss": 1.0418, "step": 2636 }, { "epoch": 0.49, "learning_rate": 0.00010938765449410899, "loss": 1.2157, "step": 2637 }, { "epoch": 0.49, "learning_rate": 0.00010932822438327546, "loss": 0.9725, "step": 2638 }, { "epoch": 0.49, "learning_rate": 0.00010926879094866416, "loss": 1.0679, "step": 2639 }, { "epoch": 0.49, "learning_rate": 0.00010920935421145206, "loss": 1.0095, "step": 2640 }, { "epoch": 0.49, "learning_rate": 0.00010914991419281738, "loss": 1.0304, "step": 2641 }, { "epoch": 0.49, "learning_rate": 0.00010909047091393931, "loss": 1.0908, "step": 2642 }, { "epoch": 0.49, "learning_rate": 0.00010903102439599844, "loss": 1.1521, "step": 2643 }, { "epoch": 0.49, "learning_rate": 0.00010897157466017636, "loss": 1.0171, "step": 2644 }, { "epoch": 0.49, "learning_rate": 0.00010891212172765583, "loss": 1.054, "step": 2645 }, { "epoch": 0.49, "learning_rate": 0.0001088526656196208, "loss": 1.0379, "step": 2646 }, { "epoch": 0.49, "learning_rate": 0.00010879320635725632, "loss": 1.1599, "step": 2647 }, { "epoch": 0.49, "learning_rate": 0.00010873374396174852, "loss": 1.0448, "step": 2648 }, { "epoch": 0.49, "learning_rate": 0.0001086742784542847, "loss": 1.1297, "step": 2649 }, { "epoch": 0.49, "learning_rate": 0.0001086148098560533, "loss": 1.2667, "step": 2650 }, { "epoch": 0.49, "learning_rate": 0.00010855533818824379, "loss": 1.2, "step": 2651 }, { "epoch": 0.49, "learning_rate": 0.00010849586347204676, "loss": 1.0517, "step": 2652 }, { "epoch": 0.49, "learning_rate": 0.00010843638572865391, "loss": 1.1702, "step": 2653 }, { "epoch": 0.49, "learning_rate": 0.00010837690497925796, "loss": 1.1612, "step": 2654 }, { "epoch": 0.49, "learning_rate": 0.00010831742124505277, "loss": 1.0264, "step": 2655 }, { "epoch": 0.49, "learning_rate": 0.00010825793454723325, "loss": 1.0736, "step": 2656 }, { "epoch": 0.49, "learning_rate": 0.00010819844490699529, "loss": 0.9869, "step": 2657 }, { "epoch": 0.49, "learning_rate": 0.00010813895234553593, "loss": 1.0987, "step": 2658 }, { "epoch": 0.49, "learning_rate": 0.00010807945688405321, "loss": 1.2177, "step": 2659 }, { "epoch": 0.49, "learning_rate": 0.00010801995854374616, "loss": 1.0892, "step": 2660 }, { "epoch": 0.49, "learning_rate": 0.00010796045734581492, "loss": 1.0517, "step": 2661 }, { "epoch": 0.49, "learning_rate": 0.0001079009533114606, "loss": 1.056, "step": 2662 }, { "epoch": 0.49, "learning_rate": 0.0001078414464618853, "loss": 1.193, "step": 2663 }, { "epoch": 0.49, "learning_rate": 0.00010778193681829216, "loss": 1.2229, "step": 2664 }, { "epoch": 0.49, "learning_rate": 0.00010772242440188534, "loss": 1.0591, "step": 2665 }, { "epoch": 0.49, "learning_rate": 0.0001076629092338699, "loss": 1.2058, "step": 2666 }, { "epoch": 0.49, "learning_rate": 0.00010760339133545196, "loss": 1.148, "step": 2667 }, { "epoch": 0.49, "learning_rate": 0.00010754387072783858, "loss": 1.008, "step": 2668 }, { "epoch": 0.49, "learning_rate": 0.00010748434743223777, "loss": 1.134, "step": 2669 }, { "epoch": 0.49, "learning_rate": 0.00010742482146985857, "loss": 1.1691, "step": 2670 }, { "epoch": 0.49, "learning_rate": 0.00010736529286191086, "loss": 1.1032, "step": 2671 }, { "epoch": 0.49, "learning_rate": 0.00010730576162960555, "loss": 1.1496, "step": 2672 }, { "epoch": 0.49, "learning_rate": 0.0001072462277941545, "loss": 1.0748, "step": 2673 }, { "epoch": 0.49, "learning_rate": 0.00010718669137677038, "loss": 1.0459, "step": 2674 }, { "epoch": 0.49, "learning_rate": 0.00010712715239866687, "loss": 1.0272, "step": 2675 }, { "epoch": 0.49, "learning_rate": 0.00010706761088105858, "loss": 1.1141, "step": 2676 }, { "epoch": 0.49, "learning_rate": 0.00010700806684516096, "loss": 1.0837, "step": 2677 }, { "epoch": 0.49, "learning_rate": 0.00010694852031219042, "loss": 1.0413, "step": 2678 }, { "epoch": 0.49, "learning_rate": 0.00010688897130336419, "loss": 1.1249, "step": 2679 }, { "epoch": 0.49, "learning_rate": 0.00010682941983990045, "loss": 1.0789, "step": 2680 }, { "epoch": 0.49, "learning_rate": 0.00010676986594301823, "loss": 1.1816, "step": 2681 }, { "epoch": 0.49, "learning_rate": 0.0001067103096339374, "loss": 1.1431, "step": 2682 }, { "epoch": 0.49, "learning_rate": 0.00010665075093387871, "loss": 1.0199, "step": 2683 }, { "epoch": 0.49, "learning_rate": 0.0001065911898640638, "loss": 0.9806, "step": 2684 }, { "epoch": 0.49, "learning_rate": 0.00010653162644571506, "loss": 1.1226, "step": 2685 }, { "epoch": 0.49, "learning_rate": 0.00010647206070005581, "loss": 0.9506, "step": 2686 }, { "epoch": 0.5, "learning_rate": 0.00010641249264831019, "loss": 1.0302, "step": 2687 }, { "epoch": 0.5, "learning_rate": 0.00010635292231170309, "loss": 1.0785, "step": 2688 }, { "epoch": 0.5, "learning_rate": 0.00010629334971146028, "loss": 1.018, "step": 2689 }, { "epoch": 0.5, "learning_rate": 0.0001062337748688083, "loss": 1.0723, "step": 2690 }, { "epoch": 0.5, "learning_rate": 0.00010617419780497452, "loss": 0.9661, "step": 2691 }, { "epoch": 0.5, "learning_rate": 0.00010611461854118714, "loss": 1.0781, "step": 2692 }, { "epoch": 0.5, "learning_rate": 0.00010605503709867499, "loss": 1.1653, "step": 2693 }, { "epoch": 0.5, "learning_rate": 0.00010599545349866782, "loss": 1.1712, "step": 2694 }, { "epoch": 0.5, "learning_rate": 0.00010593586776239614, "loss": 1.1704, "step": 2695 }, { "epoch": 0.5, "learning_rate": 0.00010587627991109112, "loss": 1.1162, "step": 2696 }, { "epoch": 0.5, "learning_rate": 0.00010581668996598482, "loss": 1.0818, "step": 2697 }, { "epoch": 0.5, "learning_rate": 0.00010575709794830994, "loss": 1.029, "step": 2698 }, { "epoch": 0.5, "learning_rate": 0.00010569750387929998, "loss": 1.0339, "step": 2699 }, { "epoch": 0.5, "learning_rate": 0.00010563790778018912, "loss": 1.0263, "step": 2700 }, { "epoch": 0.5, "learning_rate": 0.00010557830967221227, "loss": 1.1458, "step": 2701 }, { "epoch": 0.5, "learning_rate": 0.00010551870957660513, "loss": 1.0737, "step": 2702 }, { "epoch": 0.5, "learning_rate": 0.00010545910751460401, "loss": 1.2301, "step": 2703 }, { "epoch": 0.5, "learning_rate": 0.00010539950350744598, "loss": 1.173, "step": 2704 }, { "epoch": 0.5, "learning_rate": 0.0001053398975763688, "loss": 0.988, "step": 2705 }, { "epoch": 0.5, "learning_rate": 0.00010528028974261091, "loss": 1.0139, "step": 2706 }, { "epoch": 0.5, "learning_rate": 0.00010522068002741136, "loss": 1.0149, "step": 2707 }, { "epoch": 0.5, "learning_rate": 0.00010516106845200999, "loss": 1.0829, "step": 2708 }, { "epoch": 0.5, "learning_rate": 0.00010510145503764726, "loss": 1.0717, "step": 2709 }, { "epoch": 0.5, "learning_rate": 0.00010504183980556419, "loss": 0.9663, "step": 2710 }, { "epoch": 0.5, "learning_rate": 0.00010498222277700261, "loss": 1.0867, "step": 2711 }, { "epoch": 0.5, "learning_rate": 0.00010492260397320483, "loss": 1.027, "step": 2712 }, { "epoch": 0.5, "learning_rate": 0.00010486298341541393, "loss": 1.1236, "step": 2713 }, { "epoch": 0.5, "learning_rate": 0.00010480336112487357, "loss": 1.0276, "step": 2714 }, { "epoch": 0.5, "learning_rate": 0.00010474373712282793, "loss": 1.0468, "step": 2715 }, { "epoch": 0.5, "learning_rate": 0.00010468411143052194, "loss": 0.9707, "step": 2716 }, { "epoch": 0.5, "learning_rate": 0.00010462448406920108, "loss": 1.0679, "step": 2717 }, { "epoch": 0.5, "learning_rate": 0.00010456485506011138, "loss": 0.988, "step": 2718 }, { "epoch": 0.5, "learning_rate": 0.00010450522442449952, "loss": 1.08, "step": 2719 }, { "epoch": 0.5, "learning_rate": 0.00010444559218361277, "loss": 1.1992, "step": 2720 }, { "epoch": 0.5, "learning_rate": 0.00010438595835869885, "loss": 1.0758, "step": 2721 }, { "epoch": 0.5, "learning_rate": 0.00010432632297100621, "loss": 1.1668, "step": 2722 }, { "epoch": 0.5, "learning_rate": 0.00010426668604178376, "loss": 1.0161, "step": 2723 }, { "epoch": 0.5, "learning_rate": 0.00010420704759228093, "loss": 1.1059, "step": 2724 }, { "epoch": 0.5, "learning_rate": 0.00010414740764374782, "loss": 1.0634, "step": 2725 }, { "epoch": 0.5, "learning_rate": 0.00010408776621743493, "loss": 1.0601, "step": 2726 }, { "epoch": 0.5, "learning_rate": 0.00010402812333459333, "loss": 0.9154, "step": 2727 }, { "epoch": 0.5, "learning_rate": 0.00010396847901647468, "loss": 1.1093, "step": 2728 }, { "epoch": 0.5, "learning_rate": 0.00010390883328433103, "loss": 1.0559, "step": 2729 }, { "epoch": 0.5, "learning_rate": 0.00010384918615941502, "loss": 1.0959, "step": 2730 }, { "epoch": 0.5, "learning_rate": 0.00010378953766297976, "loss": 1.109, "step": 2731 }, { "epoch": 0.5, "learning_rate": 0.00010372988781627884, "loss": 0.9126, "step": 2732 }, { "epoch": 0.5, "learning_rate": 0.00010367023664056638, "loss": 1.0876, "step": 2733 }, { "epoch": 0.5, "learning_rate": 0.00010361058415709687, "loss": 1.1759, "step": 2734 }, { "epoch": 0.5, "learning_rate": 0.00010355093038712538, "loss": 1.1467, "step": 2735 }, { "epoch": 0.5, "learning_rate": 0.00010349127535190737, "loss": 1.0835, "step": 2736 }, { "epoch": 0.5, "learning_rate": 0.00010343161907269876, "loss": 1.0929, "step": 2737 }, { "epoch": 0.5, "learning_rate": 0.00010337196157075593, "loss": 1.0994, "step": 2738 }, { "epoch": 0.5, "learning_rate": 0.00010331230286733573, "loss": 1.1397, "step": 2739 }, { "epoch": 0.5, "learning_rate": 0.00010325264298369532, "loss": 0.9584, "step": 2740 }, { "epoch": 0.51, "learning_rate": 0.00010319298194109241, "loss": 1.0389, "step": 2741 }, { "epoch": 0.51, "learning_rate": 0.00010313331976078505, "loss": 0.9806, "step": 2742 }, { "epoch": 0.51, "learning_rate": 0.00010307365646403171, "loss": 1.0645, "step": 2743 }, { "epoch": 0.51, "learning_rate": 0.0001030139920720913, "loss": 0.963, "step": 2744 }, { "epoch": 0.51, "learning_rate": 0.00010295432660622304, "loss": 0.9625, "step": 2745 }, { "epoch": 0.51, "learning_rate": 0.0001028946600876866, "loss": 0.9485, "step": 2746 }, { "epoch": 0.51, "learning_rate": 0.000102834992537742, "loss": 1.0781, "step": 2747 }, { "epoch": 0.51, "learning_rate": 0.00010277532397764959, "loss": 1.028, "step": 2748 }, { "epoch": 0.51, "learning_rate": 0.0001027156544286702, "loss": 1.031, "step": 2749 }, { "epoch": 0.51, "learning_rate": 0.00010265598391206486, "loss": 1.206, "step": 2750 }, { "epoch": 0.51, "learning_rate": 0.00010259631244909502, "loss": 1.039, "step": 2751 }, { "epoch": 0.51, "learning_rate": 0.0001025366400610225, "loss": 0.959, "step": 2752 }, { "epoch": 0.51, "learning_rate": 0.00010247696676910939, "loss": 0.9852, "step": 2753 }, { "epoch": 0.51, "learning_rate": 0.00010241729259461812, "loss": 1.0005, "step": 2754 }, { "epoch": 0.51, "learning_rate": 0.00010235761755881148, "loss": 1.0043, "step": 2755 }, { "epoch": 0.51, "learning_rate": 0.00010229794168295245, "loss": 1.0287, "step": 2756 }, { "epoch": 0.51, "learning_rate": 0.00010223826498830444, "loss": 1.2265, "step": 2757 }, { "epoch": 0.51, "learning_rate": 0.0001021785874961311, "loss": 1.143, "step": 2758 }, { "epoch": 0.51, "learning_rate": 0.00010211890922769631, "loss": 1.072, "step": 2759 }, { "epoch": 0.51, "learning_rate": 0.00010205923020426434, "loss": 1.0647, "step": 2760 }, { "epoch": 0.51, "learning_rate": 0.00010199955044709965, "loss": 1.1892, "step": 2761 }, { "epoch": 0.51, "learning_rate": 0.00010193986997746697, "loss": 1.0735, "step": 2762 }, { "epoch": 0.51, "learning_rate": 0.00010188018881663126, "loss": 1.1114, "step": 2763 }, { "epoch": 0.51, "learning_rate": 0.00010182050698585781, "loss": 1.063, "step": 2764 }, { "epoch": 0.51, "learning_rate": 0.00010176082450641207, "loss": 1.1066, "step": 2765 }, { "epoch": 0.51, "learning_rate": 0.00010170114139955975, "loss": 0.9187, "step": 2766 }, { "epoch": 0.51, "learning_rate": 0.00010164145768656679, "loss": 1.3366, "step": 2767 }, { "epoch": 0.51, "learning_rate": 0.00010158177338869932, "loss": 1.2262, "step": 2768 }, { "epoch": 0.51, "learning_rate": 0.00010152208852722374, "loss": 1.1433, "step": 2769 }, { "epoch": 0.51, "learning_rate": 0.00010146240312340656, "loss": 1.1368, "step": 2770 }, { "epoch": 0.51, "learning_rate": 0.00010140271719851453, "loss": 1.0929, "step": 2771 }, { "epoch": 0.51, "learning_rate": 0.00010134303077381464, "loss": 1.0655, "step": 2772 }, { "epoch": 0.51, "learning_rate": 0.00010128334387057394, "loss": 1.1459, "step": 2773 }, { "epoch": 0.51, "learning_rate": 0.00010122365651005977, "loss": 1.3252, "step": 2774 }, { "epoch": 0.51, "learning_rate": 0.0001011639687135395, "loss": 1.1494, "step": 2775 }, { "epoch": 0.51, "learning_rate": 0.0001011042805022808, "loss": 0.9305, "step": 2776 }, { "epoch": 0.51, "learning_rate": 0.00010104459189755142, "loss": 1.2907, "step": 2777 }, { "epoch": 0.51, "learning_rate": 0.0001009849029206192, "loss": 1.0338, "step": 2778 }, { "epoch": 0.51, "learning_rate": 0.0001009252135927522, "loss": 1.1016, "step": 2779 }, { "epoch": 0.51, "learning_rate": 0.00010086552393521859, "loss": 1.1942, "step": 2780 }, { "epoch": 0.51, "learning_rate": 0.0001008058339692866, "loss": 1.0571, "step": 2781 }, { "epoch": 0.51, "learning_rate": 0.00010074614371622459, "loss": 1.1761, "step": 2782 }, { "epoch": 0.51, "learning_rate": 0.0001006864531973011, "loss": 0.9925, "step": 2783 }, { "epoch": 0.51, "learning_rate": 0.00010062676243378465, "loss": 1.14, "step": 2784 }, { "epoch": 0.51, "learning_rate": 0.0001005670714469439, "loss": 1.125, "step": 2785 }, { "epoch": 0.51, "learning_rate": 0.00010050738025804764, "loss": 1.1554, "step": 2786 }, { "epoch": 0.51, "learning_rate": 0.00010044768888836462, "loss": 1.1202, "step": 2787 }, { "epoch": 0.51, "learning_rate": 0.00010038799735916374, "loss": 1.0583, "step": 2788 }, { "epoch": 0.51, "learning_rate": 0.00010032830569171396, "loss": 1.3135, "step": 2789 }, { "epoch": 0.51, "learning_rate": 0.00010026861390728422, "loss": 1.0833, "step": 2790 }, { "epoch": 0.51, "learning_rate": 0.00010020892202714356, "loss": 0.9715, "step": 2791 }, { "epoch": 0.51, "learning_rate": 0.00010014923007256105, "loss": 1.0556, "step": 2792 }, { "epoch": 0.51, "learning_rate": 0.00010008953806480575, "loss": 0.9473, "step": 2793 }, { "epoch": 0.51, "learning_rate": 0.00010002984602514678, "loss": 0.9293, "step": 2794 }, { "epoch": 0.52, "learning_rate": 9.997015397485326e-05, "loss": 1.1272, "step": 2795 }, { "epoch": 0.52, "learning_rate": 9.991046193519428e-05, "loss": 1.062, "step": 2796 }, { "epoch": 0.52, "learning_rate": 9.985076992743897e-05, "loss": 1.1298, "step": 2797 }, { "epoch": 0.52, "learning_rate": 9.979107797285648e-05, "loss": 1.0184, "step": 2798 }, { "epoch": 0.52, "learning_rate": 9.97313860927158e-05, "loss": 1.2291, "step": 2799 }, { "epoch": 0.52, "learning_rate": 9.967169430828608e-05, "loss": 1.1021, "step": 2800 }, { "epoch": 0.52, "learning_rate": 9.961200264083627e-05, "loss": 1.0445, "step": 2801 }, { "epoch": 0.52, "learning_rate": 9.95523111116354e-05, "loss": 0.9849, "step": 2802 }, { "epoch": 0.52, "learning_rate": 9.949261974195238e-05, "loss": 1.0716, "step": 2803 }, { "epoch": 0.52, "learning_rate": 9.943292855305612e-05, "loss": 1.0842, "step": 2804 }, { "epoch": 0.52, "learning_rate": 9.937323756621537e-05, "loss": 1.0207, "step": 2805 }, { "epoch": 0.52, "learning_rate": 9.931354680269893e-05, "loss": 1.1115, "step": 2806 }, { "epoch": 0.52, "learning_rate": 9.925385628377543e-05, "loss": 1.0494, "step": 2807 }, { "epoch": 0.52, "learning_rate": 9.919416603071341e-05, "loss": 1.2316, "step": 2808 }, { "epoch": 0.52, "learning_rate": 9.913447606478142e-05, "loss": 1.0382, "step": 2809 }, { "epoch": 0.52, "learning_rate": 9.907478640724781e-05, "loss": 0.9761, "step": 2810 }, { "epoch": 0.52, "learning_rate": 9.901509707938084e-05, "loss": 1.2445, "step": 2811 }, { "epoch": 0.52, "learning_rate": 9.895540810244863e-05, "loss": 1.0628, "step": 2812 }, { "epoch": 0.52, "learning_rate": 9.889571949771922e-05, "loss": 1.2257, "step": 2813 }, { "epoch": 0.52, "learning_rate": 9.883603128646052e-05, "loss": 1.0955, "step": 2814 }, { "epoch": 0.52, "learning_rate": 9.877634348994027e-05, "loss": 1.0964, "step": 2815 }, { "epoch": 0.52, "learning_rate": 9.87166561294261e-05, "loss": 1.1278, "step": 2816 }, { "epoch": 0.52, "learning_rate": 9.86569692261854e-05, "loss": 1.1321, "step": 2817 }, { "epoch": 0.52, "learning_rate": 9.85972828014855e-05, "loss": 1.0114, "step": 2818 }, { "epoch": 0.52, "learning_rate": 9.853759687659346e-05, "loss": 0.9821, "step": 2819 }, { "epoch": 0.52, "learning_rate": 9.847791147277627e-05, "loss": 1.1945, "step": 2820 }, { "epoch": 0.52, "learning_rate": 9.841822661130067e-05, "loss": 1.074, "step": 2821 }, { "epoch": 0.52, "learning_rate": 9.835854231343325e-05, "loss": 1.2117, "step": 2822 }, { "epoch": 0.52, "learning_rate": 9.829885860044028e-05, "loss": 0.938, "step": 2823 }, { "epoch": 0.52, "learning_rate": 9.823917549358797e-05, "loss": 1.1441, "step": 2824 }, { "epoch": 0.52, "learning_rate": 9.817949301414222e-05, "loss": 1.082, "step": 2825 }, { "epoch": 0.52, "learning_rate": 9.811981118336875e-05, "loss": 1.0743, "step": 2826 }, { "epoch": 0.52, "learning_rate": 9.806013002253304e-05, "loss": 0.9569, "step": 2827 }, { "epoch": 0.52, "learning_rate": 9.800044955290038e-05, "loss": 1.1873, "step": 2828 }, { "epoch": 0.52, "learning_rate": 9.794076979573567e-05, "loss": 1.0217, "step": 2829 }, { "epoch": 0.52, "learning_rate": 9.78810907723037e-05, "loss": 1.1538, "step": 2830 }, { "epoch": 0.52, "learning_rate": 9.782141250386892e-05, "loss": 1.0231, "step": 2831 }, { "epoch": 0.52, "learning_rate": 9.776173501169557e-05, "loss": 0.9982, "step": 2832 }, { "epoch": 0.52, "learning_rate": 9.77020583170476e-05, "loss": 1.0248, "step": 2833 }, { "epoch": 0.52, "learning_rate": 9.764238244118858e-05, "loss": 0.9194, "step": 2834 }, { "epoch": 0.52, "learning_rate": 9.75827074053819e-05, "loss": 1.0945, "step": 2835 }, { "epoch": 0.52, "learning_rate": 9.752303323089062e-05, "loss": 1.0158, "step": 2836 }, { "epoch": 0.52, "learning_rate": 9.746335993897751e-05, "loss": 1.1441, "step": 2837 }, { "epoch": 0.52, "learning_rate": 9.740368755090498e-05, "loss": 1.2054, "step": 2838 }, { "epoch": 0.52, "learning_rate": 9.734401608793519e-05, "loss": 1.0142, "step": 2839 }, { "epoch": 0.52, "learning_rate": 9.728434557132984e-05, "loss": 1.0601, "step": 2840 }, { "epoch": 0.52, "learning_rate": 9.722467602235042e-05, "loss": 1.12, "step": 2841 }, { "epoch": 0.52, "learning_rate": 9.716500746225802e-05, "loss": 0.9628, "step": 2842 }, { "epoch": 0.52, "learning_rate": 9.710533991231341e-05, "loss": 1.2167, "step": 2843 }, { "epoch": 0.52, "learning_rate": 9.704567339377701e-05, "loss": 1.1419, "step": 2844 }, { "epoch": 0.52, "learning_rate": 9.698600792790875e-05, "loss": 1.2225, "step": 2845 }, { "epoch": 0.52, "learning_rate": 9.69263435359683e-05, "loss": 1.0641, "step": 2846 }, { "epoch": 0.52, "learning_rate": 9.686668023921496e-05, "loss": 1.1021, "step": 2847 }, { "epoch": 0.52, "learning_rate": 9.680701805890761e-05, "loss": 0.9891, "step": 2848 }, { "epoch": 0.52, "learning_rate": 9.674735701630469e-05, "loss": 0.9184, "step": 2849 }, { "epoch": 0.53, "learning_rate": 9.668769713266432e-05, "loss": 1.1512, "step": 2850 }, { "epoch": 0.53, "learning_rate": 9.662803842924408e-05, "loss": 1.1046, "step": 2851 }, { "epoch": 0.53, "learning_rate": 9.656838092730126e-05, "loss": 1.234, "step": 2852 }, { "epoch": 0.53, "learning_rate": 9.650872464809265e-05, "loss": 1.0837, "step": 2853 }, { "epoch": 0.53, "learning_rate": 9.644906961287463e-05, "loss": 1.0177, "step": 2854 }, { "epoch": 0.53, "learning_rate": 9.638941584290318e-05, "loss": 1.1131, "step": 2855 }, { "epoch": 0.53, "learning_rate": 9.632976335943367e-05, "loss": 1.0589, "step": 2856 }, { "epoch": 0.53, "learning_rate": 9.627011218372117e-05, "loss": 0.9089, "step": 2857 }, { "epoch": 0.53, "learning_rate": 9.621046233702026e-05, "loss": 0.9306, "step": 2858 }, { "epoch": 0.53, "learning_rate": 9.615081384058499e-05, "loss": 1.1509, "step": 2859 }, { "epoch": 0.53, "learning_rate": 9.609116671566898e-05, "loss": 1.0072, "step": 2860 }, { "epoch": 0.53, "learning_rate": 9.603152098352537e-05, "loss": 1.0286, "step": 2861 }, { "epoch": 0.53, "learning_rate": 9.597187666540668e-05, "loss": 1.1474, "step": 2862 }, { "epoch": 0.53, "learning_rate": 9.59122337825651e-05, "loss": 1.2406, "step": 2863 }, { "epoch": 0.53, "learning_rate": 9.58525923562522e-05, "loss": 1.1913, "step": 2864 }, { "epoch": 0.53, "learning_rate": 9.579295240771906e-05, "loss": 1.0838, "step": 2865 }, { "epoch": 0.53, "learning_rate": 9.57333139582163e-05, "loss": 1.0707, "step": 2866 }, { "epoch": 0.53, "learning_rate": 9.567367702899382e-05, "loss": 1.1379, "step": 2867 }, { "epoch": 0.53, "learning_rate": 9.561404164130116e-05, "loss": 1.106, "step": 2868 }, { "epoch": 0.53, "learning_rate": 9.555440781638727e-05, "loss": 1.185, "step": 2869 }, { "epoch": 0.53, "learning_rate": 9.549477557550047e-05, "loss": 1.2044, "step": 2870 }, { "epoch": 0.53, "learning_rate": 9.543514493988862e-05, "loss": 1.0697, "step": 2871 }, { "epoch": 0.53, "learning_rate": 9.537551593079897e-05, "loss": 1.0939, "step": 2872 }, { "epoch": 0.53, "learning_rate": 9.531588856947808e-05, "loss": 1.1777, "step": 2873 }, { "epoch": 0.53, "learning_rate": 9.525626287717208e-05, "loss": 1.1276, "step": 2874 }, { "epoch": 0.53, "learning_rate": 9.519663887512647e-05, "loss": 1.0928, "step": 2875 }, { "epoch": 0.53, "learning_rate": 9.513701658458606e-05, "loss": 1.1275, "step": 2876 }, { "epoch": 0.53, "learning_rate": 9.507739602679519e-05, "loss": 1.0419, "step": 2877 }, { "epoch": 0.53, "learning_rate": 9.501777722299744e-05, "loss": 1.1474, "step": 2878 }, { "epoch": 0.53, "learning_rate": 9.495816019443584e-05, "loss": 1.2016, "step": 2879 }, { "epoch": 0.53, "learning_rate": 9.489854496235278e-05, "loss": 1.0701, "step": 2880 }, { "epoch": 0.53, "learning_rate": 9.483893154799001e-05, "loss": 1.0122, "step": 2881 }, { "epoch": 0.53, "learning_rate": 9.477931997258864e-05, "loss": 1.016, "step": 2882 }, { "epoch": 0.53, "learning_rate": 9.471971025738914e-05, "loss": 1.1688, "step": 2883 }, { "epoch": 0.53, "learning_rate": 9.466010242363123e-05, "loss": 1.0263, "step": 2884 }, { "epoch": 0.53, "learning_rate": 9.460049649255403e-05, "loss": 1.1505, "step": 2885 }, { "epoch": 0.53, "learning_rate": 9.4540892485396e-05, "loss": 1.0371, "step": 2886 }, { "epoch": 0.53, "learning_rate": 9.448129042339488e-05, "loss": 1.0257, "step": 2887 }, { "epoch": 0.53, "learning_rate": 9.442169032778777e-05, "loss": 1.0664, "step": 2888 }, { "epoch": 0.53, "learning_rate": 9.436209221981093e-05, "loss": 1.0914, "step": 2889 }, { "epoch": 0.53, "learning_rate": 9.430249612070006e-05, "loss": 1.0356, "step": 2890 }, { "epoch": 0.53, "learning_rate": 9.424290205169007e-05, "loss": 1.1543, "step": 2891 }, { "epoch": 0.53, "learning_rate": 9.418331003401519e-05, "loss": 1.0792, "step": 2892 }, { "epoch": 0.53, "learning_rate": 9.412372008890886e-05, "loss": 1.0847, "step": 2893 }, { "epoch": 0.53, "learning_rate": 9.406413223760391e-05, "loss": 1.0578, "step": 2894 }, { "epoch": 0.53, "learning_rate": 9.40045465013322e-05, "loss": 1.0352, "step": 2895 }, { "epoch": 0.53, "learning_rate": 9.394496290132503e-05, "loss": 1.1335, "step": 2896 }, { "epoch": 0.53, "learning_rate": 9.388538145881289e-05, "loss": 1.1344, "step": 2897 }, { "epoch": 0.53, "learning_rate": 9.382580219502547e-05, "loss": 1.0438, "step": 2898 }, { "epoch": 0.53, "learning_rate": 9.376622513119173e-05, "loss": 1.1124, "step": 2899 }, { "epoch": 0.53, "learning_rate": 9.370665028853976e-05, "loss": 1.0339, "step": 2900 }, { "epoch": 0.53, "learning_rate": 9.364707768829694e-05, "loss": 1.0601, "step": 2901 }, { "epoch": 0.53, "learning_rate": 9.358750735168983e-05, "loss": 1.1725, "step": 2902 }, { "epoch": 0.53, "learning_rate": 9.35279392999442e-05, "loss": 1.1453, "step": 2903 }, { "epoch": 0.54, "learning_rate": 9.346837355428494e-05, "loss": 0.9737, "step": 2904 }, { "epoch": 0.54, "learning_rate": 9.340881013593625e-05, "loss": 1.1115, "step": 2905 }, { "epoch": 0.54, "learning_rate": 9.334924906612131e-05, "loss": 1.0433, "step": 2906 }, { "epoch": 0.54, "learning_rate": 9.328969036606262e-05, "loss": 1.121, "step": 2907 }, { "epoch": 0.54, "learning_rate": 9.323013405698178e-05, "loss": 1.0781, "step": 2908 }, { "epoch": 0.54, "learning_rate": 9.317058016009954e-05, "loss": 1.0102, "step": 2909 }, { "epoch": 0.54, "learning_rate": 9.311102869663584e-05, "loss": 1.1634, "step": 2910 }, { "epoch": 0.54, "learning_rate": 9.305147968780962e-05, "loss": 1.0994, "step": 2911 }, { "epoch": 0.54, "learning_rate": 9.299193315483906e-05, "loss": 1.1435, "step": 2912 }, { "epoch": 0.54, "learning_rate": 9.293238911894144e-05, "loss": 1.2202, "step": 2913 }, { "epoch": 0.54, "learning_rate": 9.287284760133313e-05, "loss": 0.9822, "step": 2914 }, { "epoch": 0.54, "learning_rate": 9.281330862322963e-05, "loss": 1.0963, "step": 2915 }, { "epoch": 0.54, "learning_rate": 9.275377220584554e-05, "loss": 1.1516, "step": 2916 }, { "epoch": 0.54, "learning_rate": 9.269423837039446e-05, "loss": 1.2182, "step": 2917 }, { "epoch": 0.54, "learning_rate": 9.263470713808915e-05, "loss": 1.1302, "step": 2918 }, { "epoch": 0.54, "learning_rate": 9.257517853014145e-05, "loss": 1.127, "step": 2919 }, { "epoch": 0.54, "learning_rate": 9.251565256776223e-05, "loss": 1.1655, "step": 2920 }, { "epoch": 0.54, "learning_rate": 9.245612927216145e-05, "loss": 1.0654, "step": 2921 }, { "epoch": 0.54, "learning_rate": 9.239660866454809e-05, "loss": 0.9546, "step": 2922 }, { "epoch": 0.54, "learning_rate": 9.233709076613013e-05, "loss": 1.1352, "step": 2923 }, { "epoch": 0.54, "learning_rate": 9.227757559811469e-05, "loss": 1.1256, "step": 2924 }, { "epoch": 0.54, "learning_rate": 9.221806318170783e-05, "loss": 1.1096, "step": 2925 }, { "epoch": 0.54, "learning_rate": 9.21585535381147e-05, "loss": 1.0317, "step": 2926 }, { "epoch": 0.54, "learning_rate": 9.209904668853943e-05, "loss": 1.0159, "step": 2927 }, { "epoch": 0.54, "learning_rate": 9.20395426541851e-05, "loss": 1.0464, "step": 2928 }, { "epoch": 0.54, "learning_rate": 9.198004145625386e-05, "loss": 1.118, "step": 2929 }, { "epoch": 0.54, "learning_rate": 9.192054311594682e-05, "loss": 1.1315, "step": 2930 }, { "epoch": 0.54, "learning_rate": 9.186104765446408e-05, "loss": 1.0516, "step": 2931 }, { "epoch": 0.54, "learning_rate": 9.180155509300473e-05, "loss": 1.1579, "step": 2932 }, { "epoch": 0.54, "learning_rate": 9.174206545276677e-05, "loss": 1.3097, "step": 2933 }, { "epoch": 0.54, "learning_rate": 9.168257875494724e-05, "loss": 1.0407, "step": 2934 }, { "epoch": 0.54, "learning_rate": 9.162309502074206e-05, "loss": 1.0698, "step": 2935 }, { "epoch": 0.54, "learning_rate": 9.156361427134611e-05, "loss": 1.0847, "step": 2936 }, { "epoch": 0.54, "learning_rate": 9.150413652795325e-05, "loss": 1.0272, "step": 2937 }, { "epoch": 0.54, "learning_rate": 9.144466181175622e-05, "loss": 0.8153, "step": 2938 }, { "epoch": 0.54, "learning_rate": 9.138519014394671e-05, "loss": 1.0665, "step": 2939 }, { "epoch": 0.54, "learning_rate": 9.132572154571532e-05, "loss": 0.9969, "step": 2940 }, { "epoch": 0.54, "learning_rate": 9.126625603825152e-05, "loss": 1.0735, "step": 2941 }, { "epoch": 0.54, "learning_rate": 9.120679364274372e-05, "loss": 1.0776, "step": 2942 }, { "epoch": 0.54, "learning_rate": 9.114733438037922e-05, "loss": 1.0154, "step": 2943 }, { "epoch": 0.54, "learning_rate": 9.108787827234418e-05, "loss": 1.0487, "step": 2944 }, { "epoch": 0.54, "learning_rate": 9.102842533982365e-05, "loss": 1.0509, "step": 2945 }, { "epoch": 0.54, "learning_rate": 9.096897560400158e-05, "loss": 1.1233, "step": 2946 }, { "epoch": 0.54, "learning_rate": 9.09095290860607e-05, "loss": 1.0819, "step": 2947 }, { "epoch": 0.54, "learning_rate": 9.085008580718266e-05, "loss": 1.0821, "step": 2948 }, { "epoch": 0.54, "learning_rate": 9.079064578854795e-05, "loss": 0.9752, "step": 2949 }, { "epoch": 0.54, "learning_rate": 9.073120905133588e-05, "loss": 1.1108, "step": 2950 }, { "epoch": 0.54, "learning_rate": 9.067177561672456e-05, "loss": 1.0505, "step": 2951 }, { "epoch": 0.54, "learning_rate": 9.061234550589103e-05, "loss": 1.2959, "step": 2952 }, { "epoch": 0.54, "learning_rate": 9.055291874001103e-05, "loss": 1.0732, "step": 2953 }, { "epoch": 0.54, "learning_rate": 9.049349534025917e-05, "loss": 1.021, "step": 2954 }, { "epoch": 0.54, "learning_rate": 9.043407532780882e-05, "loss": 1.0041, "step": 2955 }, { "epoch": 0.54, "learning_rate": 9.037465872383218e-05, "loss": 1.0968, "step": 2956 }, { "epoch": 0.54, "learning_rate": 9.031524554950023e-05, "loss": 1.1629, "step": 2957 }, { "epoch": 0.55, "learning_rate": 9.025583582598273e-05, "loss": 0.9102, "step": 2958 }, { "epoch": 0.55, "learning_rate": 9.019642957444819e-05, "loss": 0.892, "step": 2959 }, { "epoch": 0.55, "learning_rate": 9.013702681606389e-05, "loss": 0.9989, "step": 2960 }, { "epoch": 0.55, "learning_rate": 9.007762757199587e-05, "loss": 1.0452, "step": 2961 }, { "epoch": 0.55, "learning_rate": 9.001823186340892e-05, "loss": 1.0116, "step": 2962 }, { "epoch": 0.55, "learning_rate": 8.995883971146656e-05, "loss": 1.0462, "step": 2963 }, { "epoch": 0.55, "learning_rate": 8.989945113733108e-05, "loss": 1.1197, "step": 2964 }, { "epoch": 0.55, "learning_rate": 8.984006616216346e-05, "loss": 1.0009, "step": 2965 }, { "epoch": 0.55, "learning_rate": 8.978068480712335e-05, "loss": 1.1562, "step": 2966 }, { "epoch": 0.55, "learning_rate": 8.97213070933692e-05, "loss": 1.0241, "step": 2967 }, { "epoch": 0.55, "learning_rate": 8.966193304205813e-05, "loss": 1.0414, "step": 2968 }, { "epoch": 0.55, "learning_rate": 8.960256267434596e-05, "loss": 0.8943, "step": 2969 }, { "epoch": 0.55, "learning_rate": 8.954319601138718e-05, "loss": 0.9679, "step": 2970 }, { "epoch": 0.55, "learning_rate": 8.948383307433498e-05, "loss": 1.2716, "step": 2971 }, { "epoch": 0.55, "learning_rate": 8.942447388434115e-05, "loss": 0.9547, "step": 2972 }, { "epoch": 0.55, "learning_rate": 8.936511846255627e-05, "loss": 1.0394, "step": 2973 }, { "epoch": 0.55, "learning_rate": 8.930576683012949e-05, "loss": 1.0698, "step": 2974 }, { "epoch": 0.55, "learning_rate": 8.924641900820864e-05, "loss": 1.092, "step": 2975 }, { "epoch": 0.55, "learning_rate": 8.918707501794021e-05, "loss": 0.851, "step": 2976 }, { "epoch": 0.55, "learning_rate": 8.912773488046925e-05, "loss": 1.128, "step": 2977 }, { "epoch": 0.55, "learning_rate": 8.906839861693949e-05, "loss": 1.105, "step": 2978 }, { "epoch": 0.55, "learning_rate": 8.90090662484933e-05, "loss": 1.1814, "step": 2979 }, { "epoch": 0.55, "learning_rate": 8.894973779627163e-05, "loss": 1.0331, "step": 2980 }, { "epoch": 0.55, "learning_rate": 8.889041328141405e-05, "loss": 1.1446, "step": 2981 }, { "epoch": 0.55, "learning_rate": 8.883109272505877e-05, "loss": 1.1565, "step": 2982 }, { "epoch": 0.55, "learning_rate": 8.877177614834242e-05, "loss": 1.1029, "step": 2983 }, { "epoch": 0.55, "learning_rate": 8.871246357240041e-05, "loss": 1.0901, "step": 2984 }, { "epoch": 0.55, "learning_rate": 8.865315501836664e-05, "loss": 1.1169, "step": 2985 }, { "epoch": 0.55, "learning_rate": 8.859385050737356e-05, "loss": 1.0706, "step": 2986 }, { "epoch": 0.55, "learning_rate": 8.853455006055229e-05, "loss": 1.0915, "step": 2987 }, { "epoch": 0.55, "learning_rate": 8.847525369903229e-05, "loss": 1.1521, "step": 2988 }, { "epoch": 0.55, "learning_rate": 8.841596144394174e-05, "loss": 1.071, "step": 2989 }, { "epoch": 0.55, "learning_rate": 8.835667331640731e-05, "loss": 1.037, "step": 2990 }, { "epoch": 0.55, "learning_rate": 8.82973893375542e-05, "loss": 0.8792, "step": 2991 }, { "epoch": 0.55, "learning_rate": 8.823810952850612e-05, "loss": 1.1389, "step": 2992 }, { "epoch": 0.55, "learning_rate": 8.817883391038533e-05, "loss": 0.9715, "step": 2993 }, { "epoch": 0.55, "learning_rate": 8.811956250431253e-05, "loss": 1.0953, "step": 2994 }, { "epoch": 0.55, "learning_rate": 8.806029533140693e-05, "loss": 1.1094, "step": 2995 }, { "epoch": 0.55, "learning_rate": 8.800103241278634e-05, "loss": 1.1268, "step": 2996 }, { "epoch": 0.55, "learning_rate": 8.79417737695669e-05, "loss": 0.9715, "step": 2997 }, { "epoch": 0.55, "learning_rate": 8.788251942286337e-05, "loss": 1.0884, "step": 2998 }, { "epoch": 0.55, "learning_rate": 8.782326939378883e-05, "loss": 0.9897, "step": 2999 }, { "epoch": 0.55, "learning_rate": 8.776402370345494e-05, "loss": 0.9945, "step": 3000 }, { "epoch": 0.55, "learning_rate": 8.770478237297176e-05, "loss": 1.2906, "step": 3001 }, { "epoch": 0.55, "learning_rate": 8.764554542344783e-05, "loss": 1.1382, "step": 3002 }, { "epoch": 0.55, "learning_rate": 8.75863128759901e-05, "loss": 1.1728, "step": 3003 }, { "epoch": 0.55, "learning_rate": 8.752708475170399e-05, "loss": 1.0572, "step": 3004 }, { "epoch": 0.55, "learning_rate": 8.746786107169327e-05, "loss": 1.2175, "step": 3005 }, { "epoch": 0.55, "learning_rate": 8.740864185706016e-05, "loss": 1.0403, "step": 3006 }, { "epoch": 0.55, "learning_rate": 8.734942712890534e-05, "loss": 1.0308, "step": 3007 }, { "epoch": 0.55, "learning_rate": 8.729021690832785e-05, "loss": 1.0724, "step": 3008 }, { "epoch": 0.55, "learning_rate": 8.723101121642516e-05, "loss": 0.8647, "step": 3009 }, { "epoch": 0.55, "learning_rate": 8.717181007429299e-05, "loss": 1.0368, "step": 3010 }, { "epoch": 0.55, "learning_rate": 8.71126135030256e-05, "loss": 1.082, "step": 3011 }, { "epoch": 0.56, "learning_rate": 8.705342152371557e-05, "loss": 1.0906, "step": 3012 }, { "epoch": 0.56, "learning_rate": 8.699423415745383e-05, "loss": 1.1191, "step": 3013 }, { "epoch": 0.56, "learning_rate": 8.693505142532966e-05, "loss": 1.0953, "step": 3014 }, { "epoch": 0.56, "learning_rate": 8.687587334843078e-05, "loss": 1.2058, "step": 3015 }, { "epoch": 0.56, "learning_rate": 8.681669994784303e-05, "loss": 1.0772, "step": 3016 }, { "epoch": 0.56, "learning_rate": 8.67575312446508e-05, "loss": 0.9868, "step": 3017 }, { "epoch": 0.56, "learning_rate": 8.669836725993674e-05, "loss": 1.05, "step": 3018 }, { "epoch": 0.56, "learning_rate": 8.66392080147818e-05, "loss": 1.1397, "step": 3019 }, { "epoch": 0.56, "learning_rate": 8.65800535302653e-05, "loss": 1.1164, "step": 3020 }, { "epoch": 0.56, "learning_rate": 8.652090382746474e-05, "loss": 1.0751, "step": 3021 }, { "epoch": 0.56, "learning_rate": 8.6461758927456e-05, "loss": 1.0227, "step": 3022 }, { "epoch": 0.56, "learning_rate": 8.640261885131328e-05, "loss": 1.3356, "step": 3023 }, { "epoch": 0.56, "learning_rate": 8.634348362010902e-05, "loss": 1.0647, "step": 3024 }, { "epoch": 0.56, "learning_rate": 8.628435325491394e-05, "loss": 1.2844, "step": 3025 }, { "epoch": 0.56, "learning_rate": 8.622522777679699e-05, "loss": 1.21, "step": 3026 }, { "epoch": 0.56, "learning_rate": 8.61661072068254e-05, "loss": 1.0959, "step": 3027 }, { "epoch": 0.56, "learning_rate": 8.61069915660647e-05, "loss": 1.1979, "step": 3028 }, { "epoch": 0.56, "learning_rate": 8.60478808755786e-05, "loss": 1.0756, "step": 3029 }, { "epoch": 0.56, "learning_rate": 8.598877515642911e-05, "loss": 1.1997, "step": 3030 }, { "epoch": 0.56, "learning_rate": 8.592967442967644e-05, "loss": 1.0719, "step": 3031 }, { "epoch": 0.56, "learning_rate": 8.587057871637891e-05, "loss": 1.0853, "step": 3032 }, { "epoch": 0.56, "learning_rate": 8.581148803759324e-05, "loss": 1.1406, "step": 3033 }, { "epoch": 0.56, "learning_rate": 8.575240241437427e-05, "loss": 1.0996, "step": 3034 }, { "epoch": 0.56, "learning_rate": 8.5693321867775e-05, "loss": 1.1049, "step": 3035 }, { "epoch": 0.56, "learning_rate": 8.563424641884674e-05, "loss": 1.0336, "step": 3036 }, { "epoch": 0.56, "learning_rate": 8.55751760886388e-05, "loss": 1.0082, "step": 3037 }, { "epoch": 0.56, "learning_rate": 8.551611089819883e-05, "loss": 0.9259, "step": 3038 }, { "epoch": 0.56, "learning_rate": 8.545705086857256e-05, "loss": 0.9993, "step": 3039 }, { "epoch": 0.56, "learning_rate": 8.539799602080394e-05, "loss": 1.0362, "step": 3040 }, { "epoch": 0.56, "learning_rate": 8.533894637593503e-05, "loss": 1.0423, "step": 3041 }, { "epoch": 0.56, "learning_rate": 8.527990195500609e-05, "loss": 1.0589, "step": 3042 }, { "epoch": 0.56, "learning_rate": 8.522086277905541e-05, "loss": 1.0313, "step": 3043 }, { "epoch": 0.56, "learning_rate": 8.516182886911952e-05, "loss": 1.0829, "step": 3044 }, { "epoch": 0.56, "learning_rate": 8.510280024623301e-05, "loss": 0.8681, "step": 3045 }, { "epoch": 0.56, "learning_rate": 8.504377693142865e-05, "loss": 1.1583, "step": 3046 }, { "epoch": 0.56, "learning_rate": 8.498475894573728e-05, "loss": 1.2505, "step": 3047 }, { "epoch": 0.56, "learning_rate": 8.492574631018777e-05, "loss": 1.068, "step": 3048 }, { "epoch": 0.56, "learning_rate": 8.48667390458072e-05, "loss": 1.1559, "step": 3049 }, { "epoch": 0.56, "learning_rate": 8.480773717362069e-05, "loss": 0.9939, "step": 3050 }, { "epoch": 0.56, "learning_rate": 8.474874071465144e-05, "loss": 1.0152, "step": 3051 }, { "epoch": 0.56, "learning_rate": 8.46897496899207e-05, "loss": 1.2434, "step": 3052 }, { "epoch": 0.56, "learning_rate": 8.463076412044789e-05, "loss": 0.9436, "step": 3053 }, { "epoch": 0.56, "learning_rate": 8.457178402725025e-05, "loss": 1.2059, "step": 3054 }, { "epoch": 0.56, "learning_rate": 8.451280943134332e-05, "loss": 1.0541, "step": 3055 }, { "epoch": 0.56, "learning_rate": 8.445384035374055e-05, "loss": 0.9551, "step": 3056 }, { "epoch": 0.56, "learning_rate": 8.439487681545345e-05, "loss": 1.2341, "step": 3057 }, { "epoch": 0.56, "learning_rate": 8.433591883749162e-05, "loss": 1.0456, "step": 3058 }, { "epoch": 0.56, "learning_rate": 8.427696644086251e-05, "loss": 1.0568, "step": 3059 }, { "epoch": 0.56, "learning_rate": 8.421801964657175e-05, "loss": 1.0322, "step": 3060 }, { "epoch": 0.56, "learning_rate": 8.415907847562292e-05, "loss": 1.0706, "step": 3061 }, { "epoch": 0.56, "learning_rate": 8.410014294901757e-05, "loss": 1.07, "step": 3062 }, { "epoch": 0.56, "learning_rate": 8.404121308775525e-05, "loss": 1.1057, "step": 3063 }, { "epoch": 0.56, "learning_rate": 8.398228891283358e-05, "loss": 1.0576, "step": 3064 }, { "epoch": 0.56, "learning_rate": 8.392337044524797e-05, "loss": 1.0187, "step": 3065 }, { "epoch": 0.56, "learning_rate": 8.386445770599192e-05, "loss": 0.9772, "step": 3066 }, { "epoch": 0.57, "learning_rate": 8.380555071605689e-05, "loss": 1.1402, "step": 3067 }, { "epoch": 0.57, "learning_rate": 8.374664949643228e-05, "loss": 1.0866, "step": 3068 }, { "epoch": 0.57, "learning_rate": 8.368775406810543e-05, "loss": 0.97, "step": 3069 }, { "epoch": 0.57, "learning_rate": 8.362886445206159e-05, "loss": 0.9831, "step": 3070 }, { "epoch": 0.57, "learning_rate": 8.356998066928394e-05, "loss": 1.1968, "step": 3071 }, { "epoch": 0.57, "learning_rate": 8.351110274075363e-05, "loss": 0.9934, "step": 3072 }, { "epoch": 0.57, "learning_rate": 8.345223068744968e-05, "loss": 0.9784, "step": 3073 }, { "epoch": 0.57, "learning_rate": 8.339336453034905e-05, "loss": 1.0311, "step": 3074 }, { "epoch": 0.57, "learning_rate": 8.33345042904266e-05, "loss": 1.0327, "step": 3075 }, { "epoch": 0.57, "learning_rate": 8.327564998865503e-05, "loss": 1.0825, "step": 3076 }, { "epoch": 0.57, "learning_rate": 8.321680164600493e-05, "loss": 1.0557, "step": 3077 }, { "epoch": 0.57, "learning_rate": 8.315795928344482e-05, "loss": 1.0798, "step": 3078 }, { "epoch": 0.57, "learning_rate": 8.30991229219411e-05, "loss": 1.103, "step": 3079 }, { "epoch": 0.57, "learning_rate": 8.304029258245795e-05, "loss": 1.1546, "step": 3080 }, { "epoch": 0.57, "learning_rate": 8.298146828595748e-05, "loss": 0.949, "step": 3081 }, { "epoch": 0.57, "learning_rate": 8.292265005339958e-05, "loss": 1.0504, "step": 3082 }, { "epoch": 0.57, "learning_rate": 8.286383790574202e-05, "loss": 1.0933, "step": 3083 }, { "epoch": 0.57, "learning_rate": 8.280503186394042e-05, "loss": 1.1361, "step": 3084 }, { "epoch": 0.57, "learning_rate": 8.274623194894818e-05, "loss": 1.1062, "step": 3085 }, { "epoch": 0.57, "learning_rate": 8.268743818171657e-05, "loss": 1.0327, "step": 3086 }, { "epoch": 0.57, "learning_rate": 8.26286505831946e-05, "loss": 1.0668, "step": 3087 }, { "epoch": 0.57, "learning_rate": 8.256986917432914e-05, "loss": 0.9463, "step": 3088 }, { "epoch": 0.57, "learning_rate": 8.251109397606483e-05, "loss": 1.1653, "step": 3089 }, { "epoch": 0.57, "learning_rate": 8.245232500934408e-05, "loss": 1.1315, "step": 3090 }, { "epoch": 0.57, "learning_rate": 8.239356229510715e-05, "loss": 0.8717, "step": 3091 }, { "epoch": 0.57, "learning_rate": 8.233480585429196e-05, "loss": 1.1201, "step": 3092 }, { "epoch": 0.57, "learning_rate": 8.227605570783431e-05, "loss": 0.9175, "step": 3093 }, { "epoch": 0.57, "learning_rate": 8.22173118766677e-05, "loss": 1.0318, "step": 3094 }, { "epoch": 0.57, "learning_rate": 8.215857438172334e-05, "loss": 1.1616, "step": 3095 }, { "epoch": 0.57, "learning_rate": 8.209984324393026e-05, "loss": 1.0502, "step": 3096 }, { "epoch": 0.57, "learning_rate": 8.20411184842152e-05, "loss": 1.0412, "step": 3097 }, { "epoch": 0.57, "learning_rate": 8.198240012350261e-05, "loss": 1.134, "step": 3098 }, { "epoch": 0.57, "learning_rate": 8.192368818271465e-05, "loss": 1.0813, "step": 3099 }, { "epoch": 0.57, "learning_rate": 8.186498268277126e-05, "loss": 1.0565, "step": 3100 }, { "epoch": 0.57, "learning_rate": 8.180628364458998e-05, "loss": 1.0711, "step": 3101 }, { "epoch": 0.57, "learning_rate": 8.174759108908615e-05, "loss": 0.9517, "step": 3102 }, { "epoch": 0.57, "learning_rate": 8.168890503717271e-05, "loss": 1.0925, "step": 3103 }, { "epoch": 0.57, "learning_rate": 8.163022550976035e-05, "loss": 1.0291, "step": 3104 }, { "epoch": 0.57, "learning_rate": 8.157155252775742e-05, "loss": 1.0549, "step": 3105 }, { "epoch": 0.57, "learning_rate": 8.151288611206992e-05, "loss": 1.0484, "step": 3106 }, { "epoch": 0.57, "learning_rate": 8.145422628360153e-05, "loss": 0.9969, "step": 3107 }, { "epoch": 0.57, "learning_rate": 8.139557306325358e-05, "loss": 1.0648, "step": 3108 }, { "epoch": 0.57, "learning_rate": 8.1336926471925e-05, "loss": 1.229, "step": 3109 }, { "epoch": 0.57, "learning_rate": 8.127828653051243e-05, "loss": 1.1392, "step": 3110 }, { "epoch": 0.57, "learning_rate": 8.121965325991008e-05, "loss": 1.1594, "step": 3111 }, { "epoch": 0.57, "learning_rate": 8.116102668100986e-05, "loss": 1.2241, "step": 3112 }, { "epoch": 0.57, "learning_rate": 8.110240681470123e-05, "loss": 1.098, "step": 3113 }, { "epoch": 0.57, "learning_rate": 8.104379368187122e-05, "loss": 1.0304, "step": 3114 }, { "epoch": 0.57, "learning_rate": 8.098518730340456e-05, "loss": 1.0348, "step": 3115 }, { "epoch": 0.57, "learning_rate": 8.092658770018351e-05, "loss": 1.1278, "step": 3116 }, { "epoch": 0.57, "learning_rate": 8.086799489308797e-05, "loss": 1.118, "step": 3117 }, { "epoch": 0.57, "learning_rate": 8.080940890299536e-05, "loss": 0.9724, "step": 3118 }, { "epoch": 0.57, "learning_rate": 8.07508297507807e-05, "loss": 1.1754, "step": 3119 }, { "epoch": 0.57, "learning_rate": 8.069225745731654e-05, "loss": 1.2146, "step": 3120 }, { "epoch": 0.58, "learning_rate": 8.063369204347302e-05, "loss": 1.0773, "step": 3121 }, { "epoch": 0.58, "learning_rate": 8.057513353011785e-05, "loss": 1.1106, "step": 3122 }, { "epoch": 0.58, "learning_rate": 8.051658193811623e-05, "loss": 1.0805, "step": 3123 }, { "epoch": 0.58, "learning_rate": 8.045803728833097e-05, "loss": 1.0957, "step": 3124 }, { "epoch": 0.58, "learning_rate": 8.039949960162227e-05, "loss": 1.0093, "step": 3125 }, { "epoch": 0.58, "learning_rate": 8.034096889884797e-05, "loss": 1.0766, "step": 3126 }, { "epoch": 0.58, "learning_rate": 8.028244520086337e-05, "loss": 1.1036, "step": 3127 }, { "epoch": 0.58, "learning_rate": 8.022392852852132e-05, "loss": 1.151, "step": 3128 }, { "epoch": 0.58, "learning_rate": 8.01654189026721e-05, "loss": 1.043, "step": 3129 }, { "epoch": 0.58, "learning_rate": 8.01069163441636e-05, "loss": 1.0198, "step": 3130 }, { "epoch": 0.58, "learning_rate": 8.004842087384096e-05, "loss": 1.125, "step": 3131 }, { "epoch": 0.58, "learning_rate": 7.998993251254705e-05, "loss": 1.1284, "step": 3132 }, { "epoch": 0.58, "learning_rate": 7.993145128112205e-05, "loss": 1.0865, "step": 3133 }, { "epoch": 0.58, "learning_rate": 7.987297720040365e-05, "loss": 1.0917, "step": 3134 }, { "epoch": 0.58, "learning_rate": 7.981451029122706e-05, "loss": 1.0703, "step": 3135 }, { "epoch": 0.58, "learning_rate": 7.975605057442476e-05, "loss": 1.1425, "step": 3136 }, { "epoch": 0.58, "learning_rate": 7.96975980708268e-05, "loss": 1.0678, "step": 3137 }, { "epoch": 0.58, "learning_rate": 7.963915280126066e-05, "loss": 1.042, "step": 3138 }, { "epoch": 0.58, "learning_rate": 7.958071478655118e-05, "loss": 1.0609, "step": 3139 }, { "epoch": 0.58, "learning_rate": 7.952228404752067e-05, "loss": 1.1439, "step": 3140 }, { "epoch": 0.58, "learning_rate": 7.946386060498886e-05, "loss": 0.9923, "step": 3141 }, { "epoch": 0.58, "learning_rate": 7.940544447977276e-05, "loss": 0.873, "step": 3142 }, { "epoch": 0.58, "learning_rate": 7.934703569268691e-05, "loss": 1.1417, "step": 3143 }, { "epoch": 0.58, "learning_rate": 7.928863426454318e-05, "loss": 0.9101, "step": 3144 }, { "epoch": 0.58, "learning_rate": 7.923024021615082e-05, "loss": 1.0413, "step": 3145 }, { "epoch": 0.58, "learning_rate": 7.917185356831648e-05, "loss": 1.0664, "step": 3146 }, { "epoch": 0.58, "learning_rate": 7.91134743418441e-05, "loss": 1.0637, "step": 3147 }, { "epoch": 0.58, "learning_rate": 7.905510255753501e-05, "loss": 1.0523, "step": 3148 }, { "epoch": 0.58, "learning_rate": 7.899673823618793e-05, "loss": 1.2108, "step": 3149 }, { "epoch": 0.58, "learning_rate": 7.893838139859891e-05, "loss": 1.2172, "step": 3150 }, { "epoch": 0.58, "learning_rate": 7.888003206556126e-05, "loss": 1.0615, "step": 3151 }, { "epoch": 0.58, "learning_rate": 7.882169025786575e-05, "loss": 1.1811, "step": 3152 }, { "epoch": 0.58, "learning_rate": 7.87633559963003e-05, "loss": 0.9663, "step": 3153 }, { "epoch": 0.58, "learning_rate": 7.870502930165026e-05, "loss": 0.9339, "step": 3154 }, { "epoch": 0.58, "learning_rate": 7.864671019469826e-05, "loss": 0.9625, "step": 3155 }, { "epoch": 0.58, "learning_rate": 7.858839869622422e-05, "loss": 1.1203, "step": 3156 }, { "epoch": 0.58, "learning_rate": 7.853009482700539e-05, "loss": 1.036, "step": 3157 }, { "epoch": 0.58, "learning_rate": 7.847179860781616e-05, "loss": 1.0764, "step": 3158 }, { "epoch": 0.58, "learning_rate": 7.841351005942836e-05, "loss": 0.9742, "step": 3159 }, { "epoch": 0.58, "learning_rate": 7.8355229202611e-05, "loss": 1.1719, "step": 3160 }, { "epoch": 0.58, "learning_rate": 7.829695605813039e-05, "loss": 0.9191, "step": 3161 }, { "epoch": 0.58, "learning_rate": 7.823869064675007e-05, "loss": 1.0872, "step": 3162 }, { "epoch": 0.58, "learning_rate": 7.818043298923086e-05, "loss": 1.1261, "step": 3163 }, { "epoch": 0.58, "learning_rate": 7.81221831063307e-05, "loss": 1.0135, "step": 3164 }, { "epoch": 0.58, "learning_rate": 7.806394101880489e-05, "loss": 1.1118, "step": 3165 }, { "epoch": 0.58, "learning_rate": 7.80057067474059e-05, "loss": 1.0474, "step": 3166 }, { "epoch": 0.58, "learning_rate": 7.794748031288342e-05, "loss": 1.0444, "step": 3167 }, { "epoch": 0.58, "learning_rate": 7.788926173598441e-05, "loss": 0.9553, "step": 3168 }, { "epoch": 0.58, "learning_rate": 7.783105103745286e-05, "loss": 1.0953, "step": 3169 }, { "epoch": 0.58, "learning_rate": 7.777284823803012e-05, "loss": 1.1152, "step": 3170 }, { "epoch": 0.58, "learning_rate": 7.771465335845467e-05, "loss": 1.01, "step": 3171 }, { "epoch": 0.58, "learning_rate": 7.765646641946215e-05, "loss": 1.0378, "step": 3172 }, { "epoch": 0.58, "learning_rate": 7.759828744178539e-05, "loss": 1.0912, "step": 3173 }, { "epoch": 0.58, "learning_rate": 7.754011644615444e-05, "loss": 0.9812, "step": 3174 }, { "epoch": 0.59, "learning_rate": 7.748195345329634e-05, "loss": 1.0919, "step": 3175 }, { "epoch": 0.59, "learning_rate": 7.742379848393543e-05, "loss": 1.0144, "step": 3176 }, { "epoch": 0.59, "learning_rate": 7.736565155879314e-05, "loss": 0.9968, "step": 3177 }, { "epoch": 0.59, "learning_rate": 7.730751269858806e-05, "loss": 0.9268, "step": 3178 }, { "epoch": 0.59, "learning_rate": 7.72493819240359e-05, "loss": 1.0456, "step": 3179 }, { "epoch": 0.59, "learning_rate": 7.719125925584941e-05, "loss": 0.9859, "step": 3180 }, { "epoch": 0.59, "learning_rate": 7.713314471473855e-05, "loss": 1.0556, "step": 3181 }, { "epoch": 0.59, "learning_rate": 7.707503832141034e-05, "loss": 1.1325, "step": 3182 }, { "epoch": 0.59, "learning_rate": 7.701694009656892e-05, "loss": 1.1823, "step": 3183 }, { "epoch": 0.59, "learning_rate": 7.695885006091552e-05, "loss": 1.1268, "step": 3184 }, { "epoch": 0.59, "learning_rate": 7.690076823514844e-05, "loss": 0.9666, "step": 3185 }, { "epoch": 0.59, "learning_rate": 7.6842694639963e-05, "loss": 1.1899, "step": 3186 }, { "epoch": 0.59, "learning_rate": 7.678462929605167e-05, "loss": 1.1341, "step": 3187 }, { "epoch": 0.59, "learning_rate": 7.672657222410395e-05, "loss": 0.9933, "step": 3188 }, { "epoch": 0.59, "learning_rate": 7.666852344480641e-05, "loss": 1.0594, "step": 3189 }, { "epoch": 0.59, "learning_rate": 7.661048297884266e-05, "loss": 1.0309, "step": 3190 }, { "epoch": 0.59, "learning_rate": 7.655245084689326e-05, "loss": 1.0825, "step": 3191 }, { "epoch": 0.59, "learning_rate": 7.649442706963594e-05, "loss": 0.9914, "step": 3192 }, { "epoch": 0.59, "learning_rate": 7.643641166774538e-05, "loss": 1.0763, "step": 3193 }, { "epoch": 0.59, "learning_rate": 7.637840466189326e-05, "loss": 1.2137, "step": 3194 }, { "epoch": 0.59, "learning_rate": 7.632040607274832e-05, "loss": 1.1152, "step": 3195 }, { "epoch": 0.59, "learning_rate": 7.626241592097631e-05, "loss": 1.0769, "step": 3196 }, { "epoch": 0.59, "learning_rate": 7.620443422723985e-05, "loss": 1.1048, "step": 3197 }, { "epoch": 0.59, "learning_rate": 7.614646101219868e-05, "loss": 1.1338, "step": 3198 }, { "epoch": 0.59, "learning_rate": 7.608849629650947e-05, "loss": 1.0576, "step": 3199 }, { "epoch": 0.59, "learning_rate": 7.603054010082588e-05, "loss": 1.1562, "step": 3200 }, { "epoch": 0.59, "learning_rate": 7.597259244579853e-05, "loss": 1.1091, "step": 3201 }, { "epoch": 0.59, "learning_rate": 7.591465335207492e-05, "loss": 1.0862, "step": 3202 }, { "epoch": 0.59, "learning_rate": 7.585672284029962e-05, "loss": 1.0755, "step": 3203 }, { "epoch": 0.59, "learning_rate": 7.579880093111407e-05, "loss": 1.0118, "step": 3204 }, { "epoch": 0.59, "learning_rate": 7.574088764515665e-05, "loss": 1.0769, "step": 3205 }, { "epoch": 0.59, "learning_rate": 7.568298300306271e-05, "loss": 1.0703, "step": 3206 }, { "epoch": 0.59, "learning_rate": 7.562508702546452e-05, "loss": 1.0098, "step": 3207 }, { "epoch": 0.59, "learning_rate": 7.556719973299115e-05, "loss": 1.0671, "step": 3208 }, { "epoch": 0.59, "learning_rate": 7.550932114626869e-05, "loss": 1.1586, "step": 3209 }, { "epoch": 0.59, "learning_rate": 7.54514512859201e-05, "loss": 1.0645, "step": 3210 }, { "epoch": 0.59, "learning_rate": 7.539359017256522e-05, "loss": 1.0109, "step": 3211 }, { "epoch": 0.59, "learning_rate": 7.533573782682084e-05, "loss": 1.1552, "step": 3212 }, { "epoch": 0.59, "learning_rate": 7.527789426930046e-05, "loss": 1.0521, "step": 3213 }, { "epoch": 0.59, "learning_rate": 7.522005952061462e-05, "loss": 1.0229, "step": 3214 }, { "epoch": 0.59, "learning_rate": 7.516223360137065e-05, "loss": 1.0248, "step": 3215 }, { "epoch": 0.59, "learning_rate": 7.510441653217272e-05, "loss": 1.1504, "step": 3216 }, { "epoch": 0.59, "learning_rate": 7.504660833362187e-05, "loss": 0.99, "step": 3217 }, { "epoch": 0.59, "learning_rate": 7.498880902631604e-05, "loss": 0.9824, "step": 3218 }, { "epoch": 0.59, "learning_rate": 7.493101863084985e-05, "loss": 1.1499, "step": 3219 }, { "epoch": 0.59, "learning_rate": 7.487323716781485e-05, "loss": 1.1241, "step": 3220 }, { "epoch": 0.59, "learning_rate": 7.48154646577994e-05, "loss": 1.0919, "step": 3221 }, { "epoch": 0.59, "learning_rate": 7.475770112138866e-05, "loss": 1.0238, "step": 3222 }, { "epoch": 0.59, "learning_rate": 7.469994657916463e-05, "loss": 0.9617, "step": 3223 }, { "epoch": 0.59, "learning_rate": 7.464220105170603e-05, "loss": 0.9937, "step": 3224 }, { "epoch": 0.59, "learning_rate": 7.458446455958839e-05, "loss": 1.0941, "step": 3225 }, { "epoch": 0.59, "learning_rate": 7.452673712338404e-05, "loss": 1.0214, "step": 3226 }, { "epoch": 0.59, "learning_rate": 7.44690187636621e-05, "loss": 1.0803, "step": 3227 }, { "epoch": 0.59, "learning_rate": 7.441130950098841e-05, "loss": 1.0447, "step": 3228 }, { "epoch": 0.6, "learning_rate": 7.435360935592563e-05, "loss": 0.9425, "step": 3229 }, { "epoch": 0.6, "learning_rate": 7.429591834903314e-05, "loss": 1.0145, "step": 3230 }, { "epoch": 0.6, "learning_rate": 7.423823650086699e-05, "loss": 1.0266, "step": 3231 }, { "epoch": 0.6, "learning_rate": 7.418056383198007e-05, "loss": 1.122, "step": 3232 }, { "epoch": 0.6, "learning_rate": 7.412290036292197e-05, "loss": 0.9646, "step": 3233 }, { "epoch": 0.6, "learning_rate": 7.406524611423903e-05, "loss": 1.2385, "step": 3234 }, { "epoch": 0.6, "learning_rate": 7.40076011064742e-05, "loss": 1.0606, "step": 3235 }, { "epoch": 0.6, "learning_rate": 7.394996536016729e-05, "loss": 1.1265, "step": 3236 }, { "epoch": 0.6, "learning_rate": 7.389233889585465e-05, "loss": 1.0976, "step": 3237 }, { "epoch": 0.6, "learning_rate": 7.383472173406944e-05, "loss": 1.0727, "step": 3238 }, { "epoch": 0.6, "learning_rate": 7.377711389534145e-05, "loss": 0.9407, "step": 3239 }, { "epoch": 0.6, "learning_rate": 7.371951540019721e-05, "loss": 0.9825, "step": 3240 }, { "epoch": 0.6, "learning_rate": 7.366192626915981e-05, "loss": 1.0984, "step": 3241 }, { "epoch": 0.6, "learning_rate": 7.360434652274913e-05, "loss": 1.04, "step": 3242 }, { "epoch": 0.6, "learning_rate": 7.35467761814816e-05, "loss": 1.16, "step": 3243 }, { "epoch": 0.6, "learning_rate": 7.348921526587034e-05, "loss": 1.0811, "step": 3244 }, { "epoch": 0.6, "learning_rate": 7.343166379642517e-05, "loss": 1.0589, "step": 3245 }, { "epoch": 0.6, "learning_rate": 7.337412179365243e-05, "loss": 0.9829, "step": 3246 }, { "epoch": 0.6, "learning_rate": 7.331658927805516e-05, "loss": 1.2062, "step": 3247 }, { "epoch": 0.6, "learning_rate": 7.325906627013304e-05, "loss": 1.0018, "step": 3248 }, { "epoch": 0.6, "learning_rate": 7.32015527903823e-05, "loss": 0.8719, "step": 3249 }, { "epoch": 0.6, "learning_rate": 7.314404885929578e-05, "loss": 1.1392, "step": 3250 }, { "epoch": 0.6, "learning_rate": 7.3086554497363e-05, "loss": 0.9158, "step": 3251 }, { "epoch": 0.6, "learning_rate": 7.302906972506995e-05, "loss": 1.0797, "step": 3252 }, { "epoch": 0.6, "learning_rate": 7.29715945628993e-05, "loss": 1.08, "step": 3253 }, { "epoch": 0.6, "learning_rate": 7.291412903133026e-05, "loss": 1.0834, "step": 3254 }, { "epoch": 0.6, "learning_rate": 7.28566731508386e-05, "loss": 1.1424, "step": 3255 }, { "epoch": 0.6, "learning_rate": 7.279922694189666e-05, "loss": 1.1088, "step": 3256 }, { "epoch": 0.6, "learning_rate": 7.27417904249733e-05, "loss": 1.073, "step": 3257 }, { "epoch": 0.6, "learning_rate": 7.268436362053403e-05, "loss": 1.0728, "step": 3258 }, { "epoch": 0.6, "learning_rate": 7.262694654904077e-05, "loss": 0.9713, "step": 3259 }, { "epoch": 0.6, "learning_rate": 7.256953923095209e-05, "loss": 1.1026, "step": 3260 }, { "epoch": 0.6, "learning_rate": 7.251214168672298e-05, "loss": 1.028, "step": 3261 }, { "epoch": 0.6, "learning_rate": 7.245475393680499e-05, "loss": 1.1023, "step": 3262 }, { "epoch": 0.6, "learning_rate": 7.239737600164618e-05, "loss": 1.0245, "step": 3263 }, { "epoch": 0.6, "learning_rate": 7.234000790169114e-05, "loss": 0.9849, "step": 3264 }, { "epoch": 0.6, "learning_rate": 7.228264965738093e-05, "loss": 1.0377, "step": 3265 }, { "epoch": 0.6, "learning_rate": 7.222530128915313e-05, "loss": 1.1025, "step": 3266 }, { "epoch": 0.6, "learning_rate": 7.216796281744172e-05, "loss": 1.0672, "step": 3267 }, { "epoch": 0.6, "learning_rate": 7.211063426267721e-05, "loss": 1.0954, "step": 3268 }, { "epoch": 0.6, "learning_rate": 7.205331564528658e-05, "loss": 0.9527, "step": 3269 }, { "epoch": 0.6, "learning_rate": 7.199600698569327e-05, "loss": 1.0562, "step": 3270 }, { "epoch": 0.6, "learning_rate": 7.193870830431715e-05, "loss": 1.0602, "step": 3271 }, { "epoch": 0.6, "learning_rate": 7.188141962157461e-05, "loss": 1.0909, "step": 3272 }, { "epoch": 0.6, "learning_rate": 7.18241409578783e-05, "loss": 1.1358, "step": 3273 }, { "epoch": 0.6, "learning_rate": 7.17668723336375e-05, "loss": 0.9706, "step": 3274 }, { "epoch": 0.6, "learning_rate": 7.170961376925779e-05, "loss": 1.0028, "step": 3275 }, { "epoch": 0.6, "learning_rate": 7.16523652851412e-05, "loss": 1.1249, "step": 3276 }, { "epoch": 0.6, "learning_rate": 7.159512690168622e-05, "loss": 1.1601, "step": 3277 }, { "epoch": 0.6, "learning_rate": 7.153789863928769e-05, "loss": 1.0714, "step": 3278 }, { "epoch": 0.6, "learning_rate": 7.148068051833676e-05, "loss": 1.2068, "step": 3279 }, { "epoch": 0.6, "learning_rate": 7.142347255922112e-05, "loss": 1.2019, "step": 3280 }, { "epoch": 0.6, "learning_rate": 7.136627478232476e-05, "loss": 1.1644, "step": 3281 }, { "epoch": 0.6, "learning_rate": 7.130908720802805e-05, "loss": 1.0604, "step": 3282 }, { "epoch": 0.6, "learning_rate": 7.125190985670777e-05, "loss": 1.0324, "step": 3283 }, { "epoch": 0.61, "learning_rate": 7.119474274873693e-05, "loss": 1.1188, "step": 3284 }, { "epoch": 0.61, "learning_rate": 7.113758590448502e-05, "loss": 1.0361, "step": 3285 }, { "epoch": 0.61, "learning_rate": 7.108043934431785e-05, "loss": 1.1456, "step": 3286 }, { "epoch": 0.61, "learning_rate": 7.102330308859753e-05, "loss": 1.0428, "step": 3287 }, { "epoch": 0.61, "learning_rate": 7.09661771576825e-05, "loss": 0.8901, "step": 3288 }, { "epoch": 0.61, "learning_rate": 7.090906157192758e-05, "loss": 1.1055, "step": 3289 }, { "epoch": 0.61, "learning_rate": 7.085195635168377e-05, "loss": 1.1463, "step": 3290 }, { "epoch": 0.61, "learning_rate": 7.079486151729854e-05, "loss": 1.0458, "step": 3291 }, { "epoch": 0.61, "learning_rate": 7.073777708911556e-05, "loss": 1.2224, "step": 3292 }, { "epoch": 0.61, "learning_rate": 7.068070308747479e-05, "loss": 1.2456, "step": 3293 }, { "epoch": 0.61, "learning_rate": 7.062363953271259e-05, "loss": 1.057, "step": 3294 }, { "epoch": 0.61, "learning_rate": 7.056658644516138e-05, "loss": 1.1197, "step": 3295 }, { "epoch": 0.61, "learning_rate": 7.050954384515005e-05, "loss": 0.8322, "step": 3296 }, { "epoch": 0.61, "learning_rate": 7.04525117530037e-05, "loss": 1.1374, "step": 3297 }, { "epoch": 0.61, "learning_rate": 7.03954901890436e-05, "loss": 1.1204, "step": 3298 }, { "epoch": 0.61, "learning_rate": 7.033847917358741e-05, "loss": 1.0501, "step": 3299 }, { "epoch": 0.61, "learning_rate": 7.028147872694897e-05, "loss": 1.1111, "step": 3300 }, { "epoch": 0.61, "learning_rate": 7.022448886943824e-05, "loss": 1.0899, "step": 3301 }, { "epoch": 0.61, "learning_rate": 7.016750962136158e-05, "loss": 1.0951, "step": 3302 }, { "epoch": 0.61, "learning_rate": 7.011054100302148e-05, "loss": 1.1859, "step": 3303 }, { "epoch": 0.61, "learning_rate": 7.005358303471667e-05, "loss": 1.0831, "step": 3304 }, { "epoch": 0.61, "learning_rate": 6.999663573674211e-05, "loss": 1.0105, "step": 3305 }, { "epoch": 0.61, "learning_rate": 6.993969912938887e-05, "loss": 1.1107, "step": 3306 }, { "epoch": 0.61, "learning_rate": 6.988277323294426e-05, "loss": 1.0756, "step": 3307 }, { "epoch": 0.61, "learning_rate": 6.982585806769181e-05, "loss": 1.0788, "step": 3308 }, { "epoch": 0.61, "learning_rate": 6.97689536539112e-05, "loss": 0.8683, "step": 3309 }, { "epoch": 0.61, "learning_rate": 6.971206001187824e-05, "loss": 0.9765, "step": 3310 }, { "epoch": 0.61, "learning_rate": 6.965517716186502e-05, "loss": 1.0245, "step": 3311 }, { "epoch": 0.61, "learning_rate": 6.95983051241396e-05, "loss": 1.0791, "step": 3312 }, { "epoch": 0.61, "learning_rate": 6.954144391896631e-05, "loss": 1.0522, "step": 3313 }, { "epoch": 0.61, "learning_rate": 6.948459356660563e-05, "loss": 1.063, "step": 3314 }, { "epoch": 0.61, "learning_rate": 6.942775408731413e-05, "loss": 0.9754, "step": 3315 }, { "epoch": 0.61, "learning_rate": 6.937092550134454e-05, "loss": 1.0771, "step": 3316 }, { "epoch": 0.61, "learning_rate": 6.931410782894562e-05, "loss": 1.0768, "step": 3317 }, { "epoch": 0.61, "learning_rate": 6.925730109036236e-05, "loss": 1.0612, "step": 3318 }, { "epoch": 0.61, "learning_rate": 6.920050530583578e-05, "loss": 1.2221, "step": 3319 }, { "epoch": 0.61, "learning_rate": 6.914372049560304e-05, "loss": 1.1532, "step": 3320 }, { "epoch": 0.61, "learning_rate": 6.908694667989735e-05, "loss": 0.9648, "step": 3321 }, { "epoch": 0.61, "learning_rate": 6.903018387894805e-05, "loss": 1.0437, "step": 3322 }, { "epoch": 0.61, "learning_rate": 6.897343211298047e-05, "loss": 1.0535, "step": 3323 }, { "epoch": 0.61, "learning_rate": 6.89166914022161e-05, "loss": 1.0438, "step": 3324 }, { "epoch": 0.61, "learning_rate": 6.885996176687244e-05, "loss": 1.0388, "step": 3325 }, { "epoch": 0.61, "learning_rate": 6.880324322716306e-05, "loss": 1.0791, "step": 3326 }, { "epoch": 0.61, "learning_rate": 6.874653580329764e-05, "loss": 1.0235, "step": 3327 }, { "epoch": 0.61, "learning_rate": 6.868983951548171e-05, "loss": 1.0271, "step": 3328 }, { "epoch": 0.61, "learning_rate": 6.863315438391705e-05, "loss": 0.9745, "step": 3329 }, { "epoch": 0.61, "learning_rate": 6.857648042880133e-05, "loss": 1.0911, "step": 3330 }, { "epoch": 0.61, "learning_rate": 6.85198176703283e-05, "loss": 1.0786, "step": 3331 }, { "epoch": 0.61, "learning_rate": 6.846316612868765e-05, "loss": 1.108, "step": 3332 }, { "epoch": 0.61, "learning_rate": 6.840652582406525e-05, "loss": 1.0025, "step": 3333 }, { "epoch": 0.61, "learning_rate": 6.83498967766427e-05, "loss": 1.0328, "step": 3334 }, { "epoch": 0.61, "learning_rate": 6.829327900659776e-05, "loss": 0.9122, "step": 3335 }, { "epoch": 0.61, "learning_rate": 6.823667253410417e-05, "loss": 1.031, "step": 3336 }, { "epoch": 0.61, "learning_rate": 6.81800773793316e-05, "loss": 1.1116, "step": 3337 }, { "epoch": 0.62, "learning_rate": 6.812349356244574e-05, "loss": 0.9476, "step": 3338 }, { "epoch": 0.62, "learning_rate": 6.806692110360812e-05, "loss": 1.0599, "step": 3339 }, { "epoch": 0.62, "learning_rate": 6.801036002297634e-05, "loss": 1.023, "step": 3340 }, { "epoch": 0.62, "learning_rate": 6.795381034070394e-05, "loss": 1.06, "step": 3341 }, { "epoch": 0.62, "learning_rate": 6.789727207694033e-05, "loss": 1.0454, "step": 3342 }, { "epoch": 0.62, "learning_rate": 6.78407452518309e-05, "loss": 0.9687, "step": 3343 }, { "epoch": 0.62, "learning_rate": 6.778422988551701e-05, "loss": 1.0091, "step": 3344 }, { "epoch": 0.62, "learning_rate": 6.772772599813577e-05, "loss": 1.0273, "step": 3345 }, { "epoch": 0.62, "learning_rate": 6.767123360982038e-05, "loss": 1.0343, "step": 3346 }, { "epoch": 0.62, "learning_rate": 6.761475274069986e-05, "loss": 1.0136, "step": 3347 }, { "epoch": 0.62, "learning_rate": 6.755828341089917e-05, "loss": 1.0556, "step": 3348 }, { "epoch": 0.62, "learning_rate": 6.750182564053911e-05, "loss": 1.0325, "step": 3349 }, { "epoch": 0.62, "learning_rate": 6.744537944973635e-05, "loss": 1.1886, "step": 3350 }, { "epoch": 0.62, "learning_rate": 6.738894485860348e-05, "loss": 1.005, "step": 3351 }, { "epoch": 0.62, "learning_rate": 6.733252188724892e-05, "loss": 0.9578, "step": 3352 }, { "epoch": 0.62, "learning_rate": 6.727611055577703e-05, "loss": 1.0764, "step": 3353 }, { "epoch": 0.62, "learning_rate": 6.721971088428789e-05, "loss": 1.0045, "step": 3354 }, { "epoch": 0.62, "learning_rate": 6.716332289287759e-05, "loss": 1.1417, "step": 3355 }, { "epoch": 0.62, "learning_rate": 6.710694660163787e-05, "loss": 1.1055, "step": 3356 }, { "epoch": 0.62, "learning_rate": 6.705058203065644e-05, "loss": 1.0566, "step": 3357 }, { "epoch": 0.62, "learning_rate": 6.699422920001677e-05, "loss": 1.0951, "step": 3358 }, { "epoch": 0.62, "learning_rate": 6.693788812979819e-05, "loss": 1.098, "step": 3359 }, { "epoch": 0.62, "learning_rate": 6.688155884007586e-05, "loss": 1.0005, "step": 3360 }, { "epoch": 0.62, "learning_rate": 6.682524135092058e-05, "loss": 1.1721, "step": 3361 }, { "epoch": 0.62, "learning_rate": 6.676893568239915e-05, "loss": 0.9804, "step": 3362 }, { "epoch": 0.62, "learning_rate": 6.671264185457407e-05, "loss": 1.1356, "step": 3363 }, { "epoch": 0.62, "learning_rate": 6.665635988750358e-05, "loss": 0.9285, "step": 3364 }, { "epoch": 0.62, "learning_rate": 6.66000898012418e-05, "loss": 1.0069, "step": 3365 }, { "epoch": 0.62, "learning_rate": 6.654383161583855e-05, "loss": 0.9576, "step": 3366 }, { "epoch": 0.62, "learning_rate": 6.648758535133934e-05, "loss": 1.074, "step": 3367 }, { "epoch": 0.62, "learning_rate": 6.643135102778555e-05, "loss": 1.1022, "step": 3368 }, { "epoch": 0.62, "learning_rate": 6.637512866521427e-05, "loss": 1.0849, "step": 3369 }, { "epoch": 0.62, "learning_rate": 6.631891828365833e-05, "loss": 1.0683, "step": 3370 }, { "epoch": 0.62, "learning_rate": 6.626271990314626e-05, "loss": 0.95, "step": 3371 }, { "epoch": 0.62, "learning_rate": 6.620653354370235e-05, "loss": 1.224, "step": 3372 }, { "epoch": 0.62, "learning_rate": 6.615035922534657e-05, "loss": 1.1282, "step": 3373 }, { "epoch": 0.62, "learning_rate": 6.609419696809462e-05, "loss": 1.0328, "step": 3374 }, { "epoch": 0.62, "learning_rate": 6.603804679195792e-05, "loss": 1.0329, "step": 3375 }, { "epoch": 0.62, "learning_rate": 6.598190871694356e-05, "loss": 1.1915, "step": 3376 }, { "epoch": 0.62, "learning_rate": 6.592578276305436e-05, "loss": 0.9226, "step": 3377 }, { "epoch": 0.62, "learning_rate": 6.586966895028876e-05, "loss": 1.0131, "step": 3378 }, { "epoch": 0.62, "learning_rate": 6.581356729864087e-05, "loss": 1.0751, "step": 3379 }, { "epoch": 0.62, "learning_rate": 6.575747782810053e-05, "loss": 1.2305, "step": 3380 }, { "epoch": 0.62, "learning_rate": 6.570140055865321e-05, "loss": 1.0623, "step": 3381 }, { "epoch": 0.62, "learning_rate": 6.564533551028005e-05, "loss": 1.1501, "step": 3382 }, { "epoch": 0.62, "learning_rate": 6.558928270295779e-05, "loss": 1.0603, "step": 3383 }, { "epoch": 0.62, "learning_rate": 6.553324215665883e-05, "loss": 1.1495, "step": 3384 }, { "epoch": 0.62, "learning_rate": 6.547721389135121e-05, "loss": 1.0949, "step": 3385 }, { "epoch": 0.62, "learning_rate": 6.542119792699861e-05, "loss": 1.1272, "step": 3386 }, { "epoch": 0.62, "learning_rate": 6.536519428356029e-05, "loss": 1.1546, "step": 3387 }, { "epoch": 0.62, "learning_rate": 6.530920298099115e-05, "loss": 1.0897, "step": 3388 }, { "epoch": 0.62, "learning_rate": 6.525322403924166e-05, "loss": 1.0005, "step": 3389 }, { "epoch": 0.62, "learning_rate": 6.519725747825795e-05, "loss": 0.9719, "step": 3390 }, { "epoch": 0.62, "learning_rate": 6.514130331798162e-05, "loss": 0.9895, "step": 3391 }, { "epoch": 0.63, "learning_rate": 6.508536157834996e-05, "loss": 0.964, "step": 3392 }, { "epoch": 0.63, "learning_rate": 6.502943227929586e-05, "loss": 0.9848, "step": 3393 }, { "epoch": 0.63, "learning_rate": 6.497351544074761e-05, "loss": 1.0791, "step": 3394 }, { "epoch": 0.63, "learning_rate": 6.491761108262923e-05, "loss": 1.1191, "step": 3395 }, { "epoch": 0.63, "learning_rate": 6.486171922486024e-05, "loss": 1.0247, "step": 3396 }, { "epoch": 0.63, "learning_rate": 6.480583988735565e-05, "loss": 1.0059, "step": 3397 }, { "epoch": 0.63, "learning_rate": 6.47499730900261e-05, "loss": 0.911, "step": 3398 }, { "epoch": 0.63, "learning_rate": 6.46941188527777e-05, "loss": 1.1389, "step": 3399 }, { "epoch": 0.63, "learning_rate": 6.46382771955121e-05, "loss": 0.9884, "step": 3400 }, { "epoch": 0.63, "learning_rate": 6.458244813812646e-05, "loss": 1.0169, "step": 3401 }, { "epoch": 0.63, "learning_rate": 6.45266317005135e-05, "loss": 1.1192, "step": 3402 }, { "epoch": 0.63, "learning_rate": 6.447082790256134e-05, "loss": 1.0254, "step": 3403 }, { "epoch": 0.63, "learning_rate": 6.44150367641537e-05, "loss": 1.012, "step": 3404 }, { "epoch": 0.63, "learning_rate": 6.435925830516973e-05, "loss": 1.1356, "step": 3405 }, { "epoch": 0.63, "learning_rate": 6.43034925454841e-05, "loss": 1.0235, "step": 3406 }, { "epoch": 0.63, "learning_rate": 6.424773950496692e-05, "loss": 1.1922, "step": 3407 }, { "epoch": 0.63, "learning_rate": 6.419199920348381e-05, "loss": 1.1175, "step": 3408 }, { "epoch": 0.63, "learning_rate": 6.413627166089579e-05, "loss": 1.047, "step": 3409 }, { "epoch": 0.63, "learning_rate": 6.40805568970594e-05, "loss": 1.1171, "step": 3410 }, { "epoch": 0.63, "learning_rate": 6.402485493182655e-05, "loss": 1.0446, "step": 3411 }, { "epoch": 0.63, "learning_rate": 6.396916578504467e-05, "loss": 0.9533, "step": 3412 }, { "epoch": 0.63, "learning_rate": 6.391348947655657e-05, "loss": 1.1548, "step": 3413 }, { "epoch": 0.63, "learning_rate": 6.385782602620056e-05, "loss": 0.8914, "step": 3414 }, { "epoch": 0.63, "learning_rate": 6.380217545381024e-05, "loss": 1.1058, "step": 3415 }, { "epoch": 0.63, "learning_rate": 6.374653777921471e-05, "loss": 1.0598, "step": 3416 }, { "epoch": 0.63, "learning_rate": 6.369091302223847e-05, "loss": 1.0398, "step": 3417 }, { "epoch": 0.63, "learning_rate": 6.363530120270141e-05, "loss": 1.0182, "step": 3418 }, { "epoch": 0.63, "learning_rate": 6.357970234041877e-05, "loss": 1.0212, "step": 3419 }, { "epoch": 0.63, "learning_rate": 6.352411645520126e-05, "loss": 1.0537, "step": 3420 }, { "epoch": 0.63, "learning_rate": 6.346854356685488e-05, "loss": 1.0713, "step": 3421 }, { "epoch": 0.63, "learning_rate": 6.341298369518103e-05, "loss": 0.9996, "step": 3422 }, { "epoch": 0.63, "learning_rate": 6.335743685997648e-05, "loss": 1.0466, "step": 3423 }, { "epoch": 0.63, "learning_rate": 6.330190308103336e-05, "loss": 1.0657, "step": 3424 }, { "epoch": 0.63, "learning_rate": 6.324638237813912e-05, "loss": 1.0552, "step": 3425 }, { "epoch": 0.63, "learning_rate": 6.319087477107663e-05, "loss": 1.0154, "step": 3426 }, { "epoch": 0.63, "learning_rate": 6.313538027962394e-05, "loss": 1.0338, "step": 3427 }, { "epoch": 0.63, "learning_rate": 6.307989892355455e-05, "loss": 1.0245, "step": 3428 }, { "epoch": 0.63, "learning_rate": 6.302443072263728e-05, "loss": 1.137, "step": 3429 }, { "epoch": 0.63, "learning_rate": 6.296897569663621e-05, "loss": 1.058, "step": 3430 }, { "epoch": 0.63, "learning_rate": 6.291353386531074e-05, "loss": 0.9731, "step": 3431 }, { "epoch": 0.63, "learning_rate": 6.285810524841563e-05, "loss": 1.0548, "step": 3432 }, { "epoch": 0.63, "learning_rate": 6.280268986570079e-05, "loss": 0.9733, "step": 3433 }, { "epoch": 0.63, "learning_rate": 6.274728773691154e-05, "loss": 1.0127, "step": 3434 }, { "epoch": 0.63, "learning_rate": 6.269189888178843e-05, "loss": 1.1619, "step": 3435 }, { "epoch": 0.63, "learning_rate": 6.263652332006734e-05, "loss": 0.9655, "step": 3436 }, { "epoch": 0.63, "learning_rate": 6.258116107147933e-05, "loss": 0.8997, "step": 3437 }, { "epoch": 0.63, "learning_rate": 6.252581215575071e-05, "loss": 1.1045, "step": 3438 }, { "epoch": 0.63, "learning_rate": 6.247047659260311e-05, "loss": 1.0026, "step": 3439 }, { "epoch": 0.63, "learning_rate": 6.241515440175338e-05, "loss": 1.0963, "step": 3440 }, { "epoch": 0.63, "learning_rate": 6.235984560291357e-05, "loss": 0.9906, "step": 3441 }, { "epoch": 0.63, "learning_rate": 6.230455021579102e-05, "loss": 1.0235, "step": 3442 }, { "epoch": 0.63, "learning_rate": 6.224926826008828e-05, "loss": 1.021, "step": 3443 }, { "epoch": 0.63, "learning_rate": 6.2193999755503e-05, "loss": 1.0702, "step": 3444 }, { "epoch": 0.63, "learning_rate": 6.213874472172815e-05, "loss": 1.0467, "step": 3445 }, { "epoch": 0.64, "learning_rate": 6.208350317845193e-05, "loss": 0.9736, "step": 3446 }, { "epoch": 0.64, "learning_rate": 6.202827514535765e-05, "loss": 1.0184, "step": 3447 }, { "epoch": 0.64, "learning_rate": 6.197306064212387e-05, "loss": 1.0381, "step": 3448 }, { "epoch": 0.64, "learning_rate": 6.191785968842422e-05, "loss": 1.0595, "step": 3449 }, { "epoch": 0.64, "learning_rate": 6.186267230392762e-05, "loss": 1.0411, "step": 3450 }, { "epoch": 0.64, "learning_rate": 6.180749850829812e-05, "loss": 1.0791, "step": 3451 }, { "epoch": 0.64, "learning_rate": 6.175233832119489e-05, "loss": 1.0251, "step": 3452 }, { "epoch": 0.64, "learning_rate": 6.169719176227234e-05, "loss": 1.1214, "step": 3453 }, { "epoch": 0.64, "learning_rate": 6.164205885117993e-05, "loss": 1.1554, "step": 3454 }, { "epoch": 0.64, "learning_rate": 6.158693960756228e-05, "loss": 1.0583, "step": 3455 }, { "epoch": 0.64, "learning_rate": 6.153183405105915e-05, "loss": 0.962, "step": 3456 }, { "epoch": 0.64, "learning_rate": 6.147674220130547e-05, "loss": 1.1157, "step": 3457 }, { "epoch": 0.64, "learning_rate": 6.142166407793119e-05, "loss": 1.107, "step": 3458 }, { "epoch": 0.64, "learning_rate": 6.136659970056149e-05, "loss": 0.9871, "step": 3459 }, { "epoch": 0.64, "learning_rate": 6.131154908881648e-05, "loss": 1.0503, "step": 3460 }, { "epoch": 0.64, "learning_rate": 6.125651226231155e-05, "loss": 1.0059, "step": 3461 }, { "epoch": 0.64, "learning_rate": 6.120148924065707e-05, "loss": 0.9635, "step": 3462 }, { "epoch": 0.64, "learning_rate": 6.114648004345853e-05, "loss": 1.1859, "step": 3463 }, { "epoch": 0.64, "learning_rate": 6.109148469031646e-05, "loss": 1.0064, "step": 3464 }, { "epoch": 0.64, "learning_rate": 6.103650320082655e-05, "loss": 1.0408, "step": 3465 }, { "epoch": 0.64, "learning_rate": 6.098153559457935e-05, "loss": 0.9965, "step": 3466 }, { "epoch": 0.64, "learning_rate": 6.092658189116068e-05, "loss": 0.9242, "step": 3467 }, { "epoch": 0.64, "learning_rate": 6.0871642110151305e-05, "loss": 0.9789, "step": 3468 }, { "epoch": 0.64, "learning_rate": 6.081671627112704e-05, "loss": 1.0913, "step": 3469 }, { "epoch": 0.64, "learning_rate": 6.0761804393658775e-05, "loss": 1.0653, "step": 3470 }, { "epoch": 0.64, "learning_rate": 6.070690649731231e-05, "loss": 1.1205, "step": 3471 }, { "epoch": 0.64, "learning_rate": 6.0652022601648575e-05, "loss": 1.0456, "step": 3472 }, { "epoch": 0.64, "learning_rate": 6.059715272622346e-05, "loss": 1.0305, "step": 3473 }, { "epoch": 0.64, "learning_rate": 6.0542296890587904e-05, "loss": 0.8801, "step": 3474 }, { "epoch": 0.64, "learning_rate": 6.0487455114287794e-05, "loss": 1.126, "step": 3475 }, { "epoch": 0.64, "learning_rate": 6.043262741686408e-05, "loss": 1.079, "step": 3476 }, { "epoch": 0.64, "learning_rate": 6.037781381785256e-05, "loss": 1.0989, "step": 3477 }, { "epoch": 0.64, "learning_rate": 6.032301433678411e-05, "loss": 1.1157, "step": 3478 }, { "epoch": 0.64, "learning_rate": 6.026822899318458e-05, "loss": 1.1857, "step": 3479 }, { "epoch": 0.64, "learning_rate": 6.021345780657477e-05, "loss": 1.0825, "step": 3480 }, { "epoch": 0.64, "learning_rate": 6.015870079647044e-05, "loss": 1.0835, "step": 3481 }, { "epoch": 0.64, "learning_rate": 6.01039579823822e-05, "loss": 0.9624, "step": 3482 }, { "epoch": 0.64, "learning_rate": 6.0049229383815755e-05, "loss": 0.9843, "step": 3483 }, { "epoch": 0.64, "learning_rate": 5.9994515020271644e-05, "loss": 0.986, "step": 3484 }, { "epoch": 0.64, "learning_rate": 5.993981491124541e-05, "loss": 1.1809, "step": 3485 }, { "epoch": 0.64, "learning_rate": 5.9885129076227456e-05, "loss": 1.0276, "step": 3486 }, { "epoch": 0.64, "learning_rate": 5.983045753470308e-05, "loss": 1.0632, "step": 3487 }, { "epoch": 0.64, "learning_rate": 5.977580030615254e-05, "loss": 1.1144, "step": 3488 }, { "epoch": 0.64, "learning_rate": 5.9721157410050976e-05, "loss": 1.0267, "step": 3489 }, { "epoch": 0.64, "learning_rate": 5.966652886586843e-05, "loss": 1.0009, "step": 3490 }, { "epoch": 0.64, "learning_rate": 5.961191469306984e-05, "loss": 1.0395, "step": 3491 }, { "epoch": 0.64, "learning_rate": 5.9557314911115006e-05, "loss": 0.9475, "step": 3492 }, { "epoch": 0.64, "learning_rate": 5.9502729539458535e-05, "loss": 1.0748, "step": 3493 }, { "epoch": 0.64, "learning_rate": 5.944815859755002e-05, "loss": 1.046, "step": 3494 }, { "epoch": 0.64, "learning_rate": 5.9393602104833824e-05, "loss": 1.0965, "step": 3495 }, { "epoch": 0.64, "learning_rate": 5.933906008074923e-05, "loss": 1.0057, "step": 3496 }, { "epoch": 0.64, "learning_rate": 5.928453254473034e-05, "loss": 0.9096, "step": 3497 }, { "epoch": 0.64, "learning_rate": 5.9230019516206034e-05, "loss": 1.1237, "step": 3498 }, { "epoch": 0.64, "learning_rate": 5.917552101460008e-05, "loss": 1.0928, "step": 3499 }, { "epoch": 0.64, "learning_rate": 5.912103705933107e-05, "loss": 0.9582, "step": 3500 }, { "epoch": 0.65, "learning_rate": 5.906656766981242e-05, "loss": 1.1097, "step": 3501 }, { "epoch": 0.65, "learning_rate": 5.901211286545234e-05, "loss": 1.0965, "step": 3502 }, { "epoch": 0.65, "learning_rate": 5.895767266565386e-05, "loss": 0.8389, "step": 3503 }, { "epoch": 0.65, "learning_rate": 5.8903247089814736e-05, "loss": 1.145, "step": 3504 }, { "epoch": 0.65, "learning_rate": 5.884883615732759e-05, "loss": 1.0364, "step": 3505 }, { "epoch": 0.65, "learning_rate": 5.879443988757982e-05, "loss": 1.0596, "step": 3506 }, { "epoch": 0.65, "learning_rate": 5.874005829995357e-05, "loss": 1.0864, "step": 3507 }, { "epoch": 0.65, "learning_rate": 5.868569141382581e-05, "loss": 0.9602, "step": 3508 }, { "epoch": 0.65, "learning_rate": 5.863133924856814e-05, "loss": 0.9442, "step": 3509 }, { "epoch": 0.65, "learning_rate": 5.857700182354704e-05, "loss": 1.0791, "step": 3510 }, { "epoch": 0.65, "learning_rate": 5.852267915812373e-05, "loss": 1.0255, "step": 3511 }, { "epoch": 0.65, "learning_rate": 5.8468371271654096e-05, "loss": 1.0875, "step": 3512 }, { "epoch": 0.65, "learning_rate": 5.841407818348885e-05, "loss": 1.065, "step": 3513 }, { "epoch": 0.65, "learning_rate": 5.8359799912973365e-05, "loss": 1.0109, "step": 3514 }, { "epoch": 0.65, "learning_rate": 5.8305536479447765e-05, "loss": 1.134, "step": 3515 }, { "epoch": 0.65, "learning_rate": 5.825128790224681e-05, "loss": 1.0566, "step": 3516 }, { "epoch": 0.65, "learning_rate": 5.819705420070012e-05, "loss": 0.9655, "step": 3517 }, { "epoch": 0.65, "learning_rate": 5.814283539413185e-05, "loss": 0.9671, "step": 3518 }, { "epoch": 0.65, "learning_rate": 5.8088631501861034e-05, "loss": 1.1001, "step": 3519 }, { "epoch": 0.65, "learning_rate": 5.803444254320115e-05, "loss": 1.042, "step": 3520 }, { "epoch": 0.65, "learning_rate": 5.798026853746059e-05, "loss": 1.0799, "step": 3521 }, { "epoch": 0.65, "learning_rate": 5.792610950394222e-05, "loss": 1.1452, "step": 3522 }, { "epoch": 0.65, "learning_rate": 5.7871965461943765e-05, "loss": 1.1161, "step": 3523 }, { "epoch": 0.65, "learning_rate": 5.781783643075743e-05, "loss": 1.1107, "step": 3524 }, { "epoch": 0.65, "learning_rate": 5.7763722429670274e-05, "loss": 1.0276, "step": 3525 }, { "epoch": 0.65, "learning_rate": 5.7709623477963694e-05, "loss": 0.9167, "step": 3526 }, { "epoch": 0.65, "learning_rate": 5.765553959491406e-05, "loss": 1.0777, "step": 3527 }, { "epoch": 0.65, "learning_rate": 5.760147079979212e-05, "loss": 1.0386, "step": 3528 }, { "epoch": 0.65, "learning_rate": 5.7547417111863444e-05, "loss": 1.0858, "step": 3529 }, { "epoch": 0.65, "learning_rate": 5.7493378550388064e-05, "loss": 0.9641, "step": 3530 }, { "epoch": 0.65, "learning_rate": 5.7439355134620696e-05, "loss": 1.0505, "step": 3531 }, { "epoch": 0.65, "learning_rate": 5.7385346883810596e-05, "loss": 1.1275, "step": 3532 }, { "epoch": 0.65, "learning_rate": 5.733135381720174e-05, "loss": 1.1082, "step": 3533 }, { "epoch": 0.65, "learning_rate": 5.7277375954032555e-05, "loss": 1.1007, "step": 3534 }, { "epoch": 0.65, "learning_rate": 5.722341331353618e-05, "loss": 1.1421, "step": 3535 }, { "epoch": 0.65, "learning_rate": 5.716946591494022e-05, "loss": 1.1765, "step": 3536 }, { "epoch": 0.65, "learning_rate": 5.711553377746691e-05, "loss": 0.9427, "step": 3537 }, { "epoch": 0.65, "learning_rate": 5.706161692033298e-05, "loss": 1.0214, "step": 3538 }, { "epoch": 0.65, "learning_rate": 5.700771536274985e-05, "loss": 1.0833, "step": 3539 }, { "epoch": 0.65, "learning_rate": 5.695382912392331e-05, "loss": 0.9884, "step": 3540 }, { "epoch": 0.65, "learning_rate": 5.6899958223053915e-05, "loss": 1.015, "step": 3541 }, { "epoch": 0.65, "learning_rate": 5.684610267933648e-05, "loss": 1.1201, "step": 3542 }, { "epoch": 0.65, "learning_rate": 5.67922625119606e-05, "loss": 1.1259, "step": 3543 }, { "epoch": 0.65, "learning_rate": 5.673843774011021e-05, "loss": 1.1783, "step": 3544 }, { "epoch": 0.65, "learning_rate": 5.66846283829639e-05, "loss": 1.139, "step": 3545 }, { "epoch": 0.65, "learning_rate": 5.663083445969464e-05, "loss": 1.1463, "step": 3546 }, { "epoch": 0.65, "learning_rate": 5.6577055989470076e-05, "loss": 0.9237, "step": 3547 }, { "epoch": 0.65, "learning_rate": 5.652329299145207e-05, "loss": 0.9538, "step": 3548 }, { "epoch": 0.65, "learning_rate": 5.646954548479726e-05, "loss": 1.1289, "step": 3549 }, { "epoch": 0.65, "learning_rate": 5.6415813488656565e-05, "loss": 1.0292, "step": 3550 }, { "epoch": 0.65, "learning_rate": 5.636209702217552e-05, "loss": 1.0741, "step": 3551 }, { "epoch": 0.65, "learning_rate": 5.630839610449403e-05, "loss": 1.0365, "step": 3552 }, { "epoch": 0.65, "learning_rate": 5.625471075474649e-05, "loss": 1.0808, "step": 3553 }, { "epoch": 0.65, "learning_rate": 5.6201040992061694e-05, "loss": 1.0858, "step": 3554 }, { "epoch": 0.66, "learning_rate": 5.614738683556302e-05, "loss": 1.1077, "step": 3555 }, { "epoch": 0.66, "learning_rate": 5.609374830436812e-05, "loss": 1.1215, "step": 3556 }, { "epoch": 0.66, "learning_rate": 5.604012541758924e-05, "loss": 1.0693, "step": 3557 }, { "epoch": 0.66, "learning_rate": 5.598651819433294e-05, "loss": 1.1014, "step": 3558 }, { "epoch": 0.66, "learning_rate": 5.5932926653700203e-05, "loss": 1.1457, "step": 3559 }, { "epoch": 0.66, "learning_rate": 5.5879350814786426e-05, "loss": 1.0132, "step": 3560 }, { "epoch": 0.66, "learning_rate": 5.582579069668151e-05, "loss": 1.0234, "step": 3561 }, { "epoch": 0.66, "learning_rate": 5.577224631846961e-05, "loss": 1.049, "step": 3562 }, { "epoch": 0.66, "learning_rate": 5.571871769922945e-05, "loss": 1.2182, "step": 3563 }, { "epoch": 0.66, "learning_rate": 5.566520485803388e-05, "loss": 0.9876, "step": 3564 }, { "epoch": 0.66, "learning_rate": 5.5611707813950396e-05, "loss": 1.059, "step": 3565 }, { "epoch": 0.66, "learning_rate": 5.5558226586040674e-05, "loss": 1.1683, "step": 3566 }, { "epoch": 0.66, "learning_rate": 5.55047611933609e-05, "loss": 1.0516, "step": 3567 }, { "epoch": 0.66, "learning_rate": 5.5451311654961456e-05, "loss": 1.033, "step": 3568 }, { "epoch": 0.66, "learning_rate": 5.5397877989887314e-05, "loss": 0.9374, "step": 3569 }, { "epoch": 0.66, "learning_rate": 5.5344460217177454e-05, "loss": 1.0167, "step": 3570 }, { "epoch": 0.66, "learning_rate": 5.529105835586552e-05, "loss": 0.9202, "step": 3571 }, { "epoch": 0.66, "learning_rate": 5.523767242497927e-05, "loss": 1.1173, "step": 3572 }, { "epoch": 0.66, "learning_rate": 5.518430244354093e-05, "loss": 1.0934, "step": 3573 }, { "epoch": 0.66, "learning_rate": 5.5130948430566934e-05, "loss": 1.0085, "step": 3574 }, { "epoch": 0.66, "learning_rate": 5.507761040506807e-05, "loss": 1.1286, "step": 3575 }, { "epoch": 0.66, "learning_rate": 5.50242883860494e-05, "loss": 1.0664, "step": 3576 }, { "epoch": 0.66, "learning_rate": 5.497098239251038e-05, "loss": 1.1402, "step": 3577 }, { "epoch": 0.66, "learning_rate": 5.4917692443444614e-05, "loss": 1.0671, "step": 3578 }, { "epoch": 0.66, "learning_rate": 5.4864418557840136e-05, "loss": 1.1735, "step": 3579 }, { "epoch": 0.66, "learning_rate": 5.4811160754679134e-05, "loss": 1.004, "step": 3580 }, { "epoch": 0.66, "learning_rate": 5.4757919052938134e-05, "loss": 1.1508, "step": 3581 }, { "epoch": 0.66, "learning_rate": 5.470469347158784e-05, "loss": 0.9533, "step": 3582 }, { "epoch": 0.66, "learning_rate": 5.465148402959339e-05, "loss": 0.9807, "step": 3583 }, { "epoch": 0.66, "learning_rate": 5.459829074591394e-05, "loss": 1.0402, "step": 3584 }, { "epoch": 0.66, "learning_rate": 5.454511363950314e-05, "loss": 1.0397, "step": 3585 }, { "epoch": 0.66, "learning_rate": 5.44919527293086e-05, "loss": 1.0945, "step": 3586 }, { "epoch": 0.66, "learning_rate": 5.443880803427239e-05, "loss": 1.0282, "step": 3587 }, { "epoch": 0.66, "learning_rate": 5.4385679573330675e-05, "loss": 0.9055, "step": 3588 }, { "epoch": 0.66, "learning_rate": 5.43325673654139e-05, "loss": 1.0827, "step": 3589 }, { "epoch": 0.66, "learning_rate": 5.427947142944666e-05, "loss": 1.1001, "step": 3590 }, { "epoch": 0.66, "learning_rate": 5.422639178434788e-05, "loss": 1.0502, "step": 3591 }, { "epoch": 0.66, "learning_rate": 5.4173328449030436e-05, "loss": 1.0318, "step": 3592 }, { "epoch": 0.66, "learning_rate": 5.4120281442401655e-05, "loss": 1.0241, "step": 3593 }, { "epoch": 0.66, "learning_rate": 5.4067250783362856e-05, "loss": 1.1269, "step": 3594 }, { "epoch": 0.66, "learning_rate": 5.401423649080969e-05, "loss": 1.0399, "step": 3595 }, { "epoch": 0.66, "learning_rate": 5.3961238583631855e-05, "loss": 0.9982, "step": 3596 }, { "epoch": 0.66, "learning_rate": 5.390825708071325e-05, "loss": 1.1135, "step": 3597 }, { "epoch": 0.66, "learning_rate": 5.38552920009319e-05, "loss": 1.0567, "step": 3598 }, { "epoch": 0.66, "learning_rate": 5.380234336316008e-05, "loss": 1.1103, "step": 3599 }, { "epoch": 0.66, "learning_rate": 5.374941118626406e-05, "loss": 1.0461, "step": 3600 }, { "epoch": 0.66, "learning_rate": 5.369649548910438e-05, "loss": 1.1307, "step": 3601 }, { "epoch": 0.66, "learning_rate": 5.364359629053566e-05, "loss": 1.1623, "step": 3602 }, { "epoch": 0.66, "learning_rate": 5.359071360940657e-05, "loss": 1.1503, "step": 3603 }, { "epoch": 0.66, "learning_rate": 5.3537847464559946e-05, "loss": 1.118, "step": 3604 }, { "epoch": 0.66, "learning_rate": 5.348499787483281e-05, "loss": 0.9265, "step": 3605 }, { "epoch": 0.66, "learning_rate": 5.3432164859056154e-05, "loss": 1.0312, "step": 3606 }, { "epoch": 0.66, "learning_rate": 5.337934843605519e-05, "loss": 1.0914, "step": 3607 }, { "epoch": 0.66, "learning_rate": 5.33265486246491e-05, "loss": 1.0004, "step": 3608 }, { "epoch": 0.67, "learning_rate": 5.3273765443651235e-05, "loss": 0.9467, "step": 3609 }, { "epoch": 0.67, "learning_rate": 5.3220998911868913e-05, "loss": 1.221, "step": 3610 }, { "epoch": 0.67, "learning_rate": 5.316824904810369e-05, "loss": 1.1405, "step": 3611 }, { "epoch": 0.67, "learning_rate": 5.3115515871151e-05, "loss": 1.0859, "step": 3612 }, { "epoch": 0.67, "learning_rate": 5.30627993998005e-05, "loss": 1.1149, "step": 3613 }, { "epoch": 0.67, "learning_rate": 5.301009965283576e-05, "loss": 0.976, "step": 3614 }, { "epoch": 0.67, "learning_rate": 5.2957416649034475e-05, "loss": 0.8858, "step": 3615 }, { "epoch": 0.67, "learning_rate": 5.290475040716827e-05, "loss": 0.9287, "step": 3616 }, { "epoch": 0.67, "learning_rate": 5.285210094600295e-05, "loss": 1.0725, "step": 3617 }, { "epoch": 0.67, "learning_rate": 5.279946828429823e-05, "loss": 0.9718, "step": 3618 }, { "epoch": 0.67, "learning_rate": 5.274685244080783e-05, "loss": 1.2187, "step": 3619 }, { "epoch": 0.67, "learning_rate": 5.2694253434279585e-05, "loss": 1.2505, "step": 3620 }, { "epoch": 0.67, "learning_rate": 5.264167128345523e-05, "loss": 1.0966, "step": 3621 }, { "epoch": 0.67, "learning_rate": 5.2589106007070486e-05, "loss": 1.0135, "step": 3622 }, { "epoch": 0.67, "learning_rate": 5.2536557623855185e-05, "loss": 1.1952, "step": 3623 }, { "epoch": 0.67, "learning_rate": 5.248402615253301e-05, "loss": 1.0967, "step": 3624 }, { "epoch": 0.67, "learning_rate": 5.2431511611821626e-05, "loss": 1.0524, "step": 3625 }, { "epoch": 0.67, "learning_rate": 5.237901402043278e-05, "loss": 1.1093, "step": 3626 }, { "epoch": 0.67, "learning_rate": 5.2326533397072076e-05, "loss": 1.0896, "step": 3627 }, { "epoch": 0.67, "learning_rate": 5.227406976043905e-05, "loss": 1.1008, "step": 3628 }, { "epoch": 0.67, "learning_rate": 5.22216231292273e-05, "loss": 0.9159, "step": 3629 }, { "epoch": 0.67, "learning_rate": 5.2169193522124284e-05, "loss": 1.0025, "step": 3630 }, { "epoch": 0.67, "learning_rate": 5.211678095781136e-05, "loss": 1.0894, "step": 3631 }, { "epoch": 0.67, "learning_rate": 5.206438545496395e-05, "loss": 1.1308, "step": 3632 }, { "epoch": 0.67, "learning_rate": 5.2012007032251256e-05, "loss": 1.0475, "step": 3633 }, { "epoch": 0.67, "learning_rate": 5.195964570833641e-05, "loss": 1.1223, "step": 3634 }, { "epoch": 0.67, "learning_rate": 5.190730150187658e-05, "loss": 1.0231, "step": 3635 }, { "epoch": 0.67, "learning_rate": 5.1854974431522696e-05, "loss": 1.0278, "step": 3636 }, { "epoch": 0.67, "learning_rate": 5.1802664515919595e-05, "loss": 1.0695, "step": 3637 }, { "epoch": 0.67, "learning_rate": 5.175037177370612e-05, "loss": 1.0605, "step": 3638 }, { "epoch": 0.67, "learning_rate": 5.169809622351486e-05, "loss": 0.9751, "step": 3639 }, { "epoch": 0.67, "learning_rate": 5.164583788397234e-05, "loss": 1.0427, "step": 3640 }, { "epoch": 0.67, "learning_rate": 5.159359677369889e-05, "loss": 0.9795, "step": 3641 }, { "epoch": 0.67, "learning_rate": 5.154137291130885e-05, "loss": 1.1236, "step": 3642 }, { "epoch": 0.67, "learning_rate": 5.148916631541023e-05, "loss": 1.0951, "step": 3643 }, { "epoch": 0.67, "learning_rate": 5.1436977004605046e-05, "loss": 1.0332, "step": 3644 }, { "epoch": 0.67, "learning_rate": 5.138480499748905e-05, "loss": 1.1324, "step": 3645 }, { "epoch": 0.67, "learning_rate": 5.133265031265189e-05, "loss": 0.993, "step": 3646 }, { "epoch": 0.67, "learning_rate": 5.128051296867693e-05, "loss": 1.0491, "step": 3647 }, { "epoch": 0.67, "learning_rate": 5.122839298414156e-05, "loss": 1.0626, "step": 3648 }, { "epoch": 0.67, "learning_rate": 5.1176290377616754e-05, "loss": 1.1801, "step": 3649 }, { "epoch": 0.67, "learning_rate": 5.11242051676675e-05, "loss": 0.9606, "step": 3650 }, { "epoch": 0.67, "learning_rate": 5.1072137372852455e-05, "loss": 1.0652, "step": 3651 }, { "epoch": 0.67, "learning_rate": 5.1020087011724095e-05, "loss": 0.9608, "step": 3652 }, { "epoch": 0.67, "learning_rate": 5.096805410282868e-05, "loss": 1.1207, "step": 3653 }, { "epoch": 0.67, "learning_rate": 5.091603866470631e-05, "loss": 1.1158, "step": 3654 }, { "epoch": 0.67, "learning_rate": 5.086404071589077e-05, "loss": 1.1527, "step": 3655 }, { "epoch": 0.67, "learning_rate": 5.0812060274909745e-05, "loss": 1.0182, "step": 3656 }, { "epoch": 0.67, "learning_rate": 5.076009736028453e-05, "loss": 1.0226, "step": 3657 }, { "epoch": 0.67, "learning_rate": 5.070815199053026e-05, "loss": 1.0385, "step": 3658 }, { "epoch": 0.67, "learning_rate": 5.065622418415577e-05, "loss": 1.0239, "step": 3659 }, { "epoch": 0.67, "learning_rate": 5.0604313959663726e-05, "loss": 1.0869, "step": 3660 }, { "epoch": 0.67, "learning_rate": 5.0552421335550426e-05, "loss": 0.9955, "step": 3661 }, { "epoch": 0.67, "learning_rate": 5.050054633030603e-05, "loss": 1.1062, "step": 3662 }, { "epoch": 0.67, "learning_rate": 5.0448688962414194e-05, "loss": 0.9827, "step": 3663 }, { "epoch": 0.68, "learning_rate": 5.0396849250352526e-05, "loss": 0.9786, "step": 3664 }, { "epoch": 0.68, "learning_rate": 5.03450272125922e-05, "loss": 0.8975, "step": 3665 }, { "epoch": 0.68, "learning_rate": 5.029322286759819e-05, "loss": 1.0682, "step": 3666 }, { "epoch": 0.68, "learning_rate": 5.0241436233829044e-05, "loss": 0.9856, "step": 3667 }, { "epoch": 0.68, "learning_rate": 5.018966732973719e-05, "loss": 0.9724, "step": 3668 }, { "epoch": 0.68, "learning_rate": 5.013791617376846e-05, "loss": 1.054, "step": 3669 }, { "epoch": 0.68, "learning_rate": 5.0086182784362625e-05, "loss": 1.0334, "step": 3670 }, { "epoch": 0.68, "learning_rate": 5.003446717995296e-05, "loss": 1.0648, "step": 3671 }, { "epoch": 0.68, "learning_rate": 4.998276937896653e-05, "loss": 1.0337, "step": 3672 }, { "epoch": 0.68, "learning_rate": 4.993108939982395e-05, "loss": 1.1476, "step": 3673 }, { "epoch": 0.68, "learning_rate": 4.987942726093954e-05, "loss": 0.9799, "step": 3674 }, { "epoch": 0.68, "learning_rate": 4.982778298072118e-05, "loss": 1.1485, "step": 3675 }, { "epoch": 0.68, "learning_rate": 4.977615657757056e-05, "loss": 0.941, "step": 3676 }, { "epoch": 0.68, "learning_rate": 4.972454806988281e-05, "loss": 0.9818, "step": 3677 }, { "epoch": 0.68, "learning_rate": 4.967295747604685e-05, "loss": 1.2106, "step": 3678 }, { "epoch": 0.68, "learning_rate": 4.9621384814445084e-05, "loss": 1.2198, "step": 3679 }, { "epoch": 0.68, "learning_rate": 4.956983010345357e-05, "loss": 1.0622, "step": 3680 }, { "epoch": 0.68, "learning_rate": 4.9518293361441966e-05, "loss": 0.9449, "step": 3681 }, { "epoch": 0.68, "learning_rate": 4.9466774606773605e-05, "loss": 1.1086, "step": 3682 }, { "epoch": 0.68, "learning_rate": 4.9415273857805255e-05, "loss": 1.076, "step": 3683 }, { "epoch": 0.68, "learning_rate": 4.9363791132887494e-05, "loss": 1.0701, "step": 3684 }, { "epoch": 0.68, "learning_rate": 4.9312326450364176e-05, "loss": 0.9382, "step": 3685 }, { "epoch": 0.68, "learning_rate": 4.9260879828573006e-05, "loss": 0.9631, "step": 3686 }, { "epoch": 0.68, "learning_rate": 4.920945128584507e-05, "loss": 1.2151, "step": 3687 }, { "epoch": 0.68, "learning_rate": 4.915804084050516e-05, "loss": 1.0814, "step": 3688 }, { "epoch": 0.68, "learning_rate": 4.9106648510871456e-05, "loss": 1.1505, "step": 3689 }, { "epoch": 0.68, "learning_rate": 4.905527431525589e-05, "loss": 1.0412, "step": 3690 }, { "epoch": 0.68, "learning_rate": 4.900391827196367e-05, "loss": 1.0187, "step": 3691 }, { "epoch": 0.68, "learning_rate": 4.8952580399293766e-05, "loss": 0.9987, "step": 3692 }, { "epoch": 0.68, "learning_rate": 4.890126071553853e-05, "loss": 1.1026, "step": 3693 }, { "epoch": 0.68, "learning_rate": 4.884995923898396e-05, "loss": 1.0813, "step": 3694 }, { "epoch": 0.68, "learning_rate": 4.879867598790946e-05, "loss": 0.8911, "step": 3695 }, { "epoch": 0.68, "learning_rate": 4.874741098058796e-05, "loss": 1.0059, "step": 3696 }, { "epoch": 0.68, "learning_rate": 4.869616423528588e-05, "loss": 1.0272, "step": 3697 }, { "epoch": 0.68, "learning_rate": 4.864493577026322e-05, "loss": 0.9421, "step": 3698 }, { "epoch": 0.68, "learning_rate": 4.859372560377334e-05, "loss": 1.0212, "step": 3699 }, { "epoch": 0.68, "learning_rate": 4.854253375406322e-05, "loss": 1.2175, "step": 3700 }, { "epoch": 0.68, "learning_rate": 4.849136023937321e-05, "loss": 1.1471, "step": 3701 }, { "epoch": 0.68, "learning_rate": 4.844020507793712e-05, "loss": 0.9595, "step": 3702 }, { "epoch": 0.68, "learning_rate": 4.8389068287982244e-05, "loss": 1.0711, "step": 3703 }, { "epoch": 0.68, "learning_rate": 4.833794988772941e-05, "loss": 1.0655, "step": 3704 }, { "epoch": 0.68, "learning_rate": 4.828684989539275e-05, "loss": 1.0401, "step": 3705 }, { "epoch": 0.68, "learning_rate": 4.823576832918001e-05, "loss": 0.9037, "step": 3706 }, { "epoch": 0.68, "learning_rate": 4.8184705207292144e-05, "loss": 1.0741, "step": 3707 }, { "epoch": 0.68, "learning_rate": 4.8133660547923754e-05, "loss": 1.1224, "step": 3708 }, { "epoch": 0.68, "learning_rate": 4.808263436926271e-05, "loss": 1.0348, "step": 3709 }, { "epoch": 0.68, "learning_rate": 4.803162668949044e-05, "loss": 0.9972, "step": 3710 }, { "epoch": 0.68, "learning_rate": 4.7980637526781635e-05, "loss": 1.022, "step": 3711 }, { "epoch": 0.68, "learning_rate": 4.792966689930447e-05, "loss": 1.1938, "step": 3712 }, { "epoch": 0.68, "learning_rate": 4.7878714825220464e-05, "loss": 1.0574, "step": 3713 }, { "epoch": 0.68, "learning_rate": 4.782778132268462e-05, "loss": 1.1707, "step": 3714 }, { "epoch": 0.68, "learning_rate": 4.7776866409845186e-05, "loss": 1.0393, "step": 3715 }, { "epoch": 0.68, "learning_rate": 4.772597010484395e-05, "loss": 1.0517, "step": 3716 }, { "epoch": 0.68, "learning_rate": 4.767509242581595e-05, "loss": 1.075, "step": 3717 }, { "epoch": 0.69, "learning_rate": 4.7624233390889606e-05, "loss": 1.0266, "step": 3718 }, { "epoch": 0.69, "learning_rate": 4.757339301818667e-05, "loss": 0.969, "step": 3719 }, { "epoch": 0.69, "learning_rate": 4.7522571325822376e-05, "loss": 1.1984, "step": 3720 }, { "epoch": 0.69, "learning_rate": 4.747176833190511e-05, "loss": 1.0484, "step": 3721 }, { "epoch": 0.69, "learning_rate": 4.742098405453682e-05, "loss": 1.0385, "step": 3722 }, { "epoch": 0.69, "learning_rate": 4.7370218511812515e-05, "loss": 1.1438, "step": 3723 }, { "epoch": 0.69, "learning_rate": 4.731947172182078e-05, "loss": 1.1881, "step": 3724 }, { "epoch": 0.69, "learning_rate": 4.726874370264333e-05, "loss": 0.9881, "step": 3725 }, { "epoch": 0.69, "learning_rate": 4.7218034472355344e-05, "loss": 0.9601, "step": 3726 }, { "epoch": 0.69, "learning_rate": 4.7167344049025175e-05, "loss": 0.9986, "step": 3727 }, { "epoch": 0.69, "learning_rate": 4.711667245071464e-05, "loss": 1.1857, "step": 3728 }, { "epoch": 0.69, "learning_rate": 4.706601969547859e-05, "loss": 0.9974, "step": 3729 }, { "epoch": 0.69, "learning_rate": 4.701538580136543e-05, "loss": 1.2085, "step": 3730 }, { "epoch": 0.69, "learning_rate": 4.696477078641667e-05, "loss": 1.0424, "step": 3731 }, { "epoch": 0.69, "learning_rate": 4.6914174668667197e-05, "loss": 1.0618, "step": 3732 }, { "epoch": 0.69, "learning_rate": 4.6863597466145116e-05, "loss": 1.0822, "step": 3733 }, { "epoch": 0.69, "learning_rate": 4.6813039196871776e-05, "loss": 1.146, "step": 3734 }, { "epoch": 0.69, "learning_rate": 4.6762499878861764e-05, "loss": 1.0412, "step": 3735 }, { "epoch": 0.69, "learning_rate": 4.671197953012303e-05, "loss": 1.1468, "step": 3736 }, { "epoch": 0.69, "learning_rate": 4.666147816865661e-05, "loss": 1.0772, "step": 3737 }, { "epoch": 0.69, "learning_rate": 4.661099581245694e-05, "loss": 1.0704, "step": 3738 }, { "epoch": 0.69, "learning_rate": 4.6560532479511535e-05, "loss": 1.1079, "step": 3739 }, { "epoch": 0.69, "learning_rate": 4.651008818780119e-05, "loss": 1.0371, "step": 3740 }, { "epoch": 0.69, "learning_rate": 4.64596629552999e-05, "loss": 1.1287, "step": 3741 }, { "epoch": 0.69, "learning_rate": 4.6409256799974944e-05, "loss": 1.1675, "step": 3742 }, { "epoch": 0.69, "learning_rate": 4.635886973978668e-05, "loss": 0.9779, "step": 3743 }, { "epoch": 0.69, "learning_rate": 4.63085017926888e-05, "loss": 1.1493, "step": 3744 }, { "epoch": 0.69, "learning_rate": 4.625815297662808e-05, "loss": 1.0728, "step": 3745 }, { "epoch": 0.69, "learning_rate": 4.6207823309544495e-05, "loss": 1.0302, "step": 3746 }, { "epoch": 0.69, "learning_rate": 4.6157512809371185e-05, "loss": 0.9689, "step": 3747 }, { "epoch": 0.69, "learning_rate": 4.610722149403458e-05, "loss": 1.0795, "step": 3748 }, { "epoch": 0.69, "learning_rate": 4.605694938145408e-05, "loss": 1.2143, "step": 3749 }, { "epoch": 0.69, "learning_rate": 4.600669648954246e-05, "loss": 1.0416, "step": 3750 }, { "epoch": 0.69, "learning_rate": 4.595646283620547e-05, "loss": 1.1468, "step": 3751 }, { "epoch": 0.69, "learning_rate": 4.590624843934207e-05, "loss": 1.0468, "step": 3752 }, { "epoch": 0.69, "learning_rate": 4.5856053316844317e-05, "loss": 1.0309, "step": 3753 }, { "epoch": 0.69, "learning_rate": 4.580587748659752e-05, "loss": 1.0099, "step": 3754 }, { "epoch": 0.69, "learning_rate": 4.575572096648001e-05, "loss": 1.0444, "step": 3755 }, { "epoch": 0.69, "learning_rate": 4.5705583774363204e-05, "loss": 1.0664, "step": 3756 }, { "epoch": 0.69, "learning_rate": 4.5655465928111776e-05, "loss": 0.9364, "step": 3757 }, { "epoch": 0.69, "learning_rate": 4.560536744558338e-05, "loss": 1.0876, "step": 3758 }, { "epoch": 0.69, "learning_rate": 4.5555288344628766e-05, "loss": 0.9594, "step": 3759 }, { "epoch": 0.69, "learning_rate": 4.550522864309191e-05, "loss": 1.1649, "step": 3760 }, { "epoch": 0.69, "learning_rate": 4.5455188358809755e-05, "loss": 1.0719, "step": 3761 }, { "epoch": 0.69, "learning_rate": 4.5405167509612304e-05, "loss": 1.0057, "step": 3762 }, { "epoch": 0.69, "learning_rate": 4.535516611332278e-05, "loss": 1.093, "step": 3763 }, { "epoch": 0.69, "learning_rate": 4.530518418775733e-05, "loss": 0.8441, "step": 3764 }, { "epoch": 0.69, "learning_rate": 4.525522175072521e-05, "loss": 1.124, "step": 3765 }, { "epoch": 0.69, "learning_rate": 4.5205278820028784e-05, "loss": 1.019, "step": 3766 }, { "epoch": 0.69, "learning_rate": 4.515535541346341e-05, "loss": 0.9888, "step": 3767 }, { "epoch": 0.69, "learning_rate": 4.510545154881744e-05, "loss": 1.0955, "step": 3768 }, { "epoch": 0.69, "learning_rate": 4.505556724387241e-05, "loss": 1.073, "step": 3769 }, { "epoch": 0.69, "learning_rate": 4.500570251640277e-05, "loss": 1.0448, "step": 3770 }, { "epoch": 0.69, "learning_rate": 4.4955857384175984e-05, "loss": 1.0078, "step": 3771 }, { "epoch": 0.7, "learning_rate": 4.490603186495264e-05, "loss": 1.0191, "step": 3772 }, { "epoch": 0.7, "learning_rate": 4.485622597648624e-05, "loss": 0.9919, "step": 3773 }, { "epoch": 0.7, "learning_rate": 4.4806439736523295e-05, "loss": 1.1418, "step": 3774 }, { "epoch": 0.7, "learning_rate": 4.475667316280341e-05, "loss": 1.0594, "step": 3775 }, { "epoch": 0.7, "learning_rate": 4.470692627305908e-05, "loss": 1.0845, "step": 3776 }, { "epoch": 0.7, "learning_rate": 4.465719908501581e-05, "loss": 1.1627, "step": 3777 }, { "epoch": 0.7, "learning_rate": 4.4607491616392094e-05, "loss": 1.0917, "step": 3778 }, { "epoch": 0.7, "learning_rate": 4.4557803884899444e-05, "loss": 1.0384, "step": 3779 }, { "epoch": 0.7, "learning_rate": 4.450813590824224e-05, "loss": 1.0792, "step": 3780 }, { "epoch": 0.7, "learning_rate": 4.445848770411795e-05, "loss": 0.9818, "step": 3781 }, { "epoch": 0.7, "learning_rate": 4.4408859290216895e-05, "loss": 1.1517, "step": 3782 }, { "epoch": 0.7, "learning_rate": 4.4359250684222383e-05, "loss": 1.1236, "step": 3783 }, { "epoch": 0.7, "learning_rate": 4.4309661903810616e-05, "loss": 1.0362, "step": 3784 }, { "epoch": 0.7, "learning_rate": 4.426009296665084e-05, "loss": 1.0079, "step": 3785 }, { "epoch": 0.7, "learning_rate": 4.421054389040511e-05, "loss": 1.025, "step": 3786 }, { "epoch": 0.7, "learning_rate": 4.416101469272853e-05, "loss": 1.0744, "step": 3787 }, { "epoch": 0.7, "learning_rate": 4.4111505391269e-05, "loss": 1.1129, "step": 3788 }, { "epoch": 0.7, "learning_rate": 4.406201600366739e-05, "loss": 1.0527, "step": 3789 }, { "epoch": 0.7, "learning_rate": 4.4012546547557434e-05, "loss": 1.112, "step": 3790 }, { "epoch": 0.7, "learning_rate": 4.396309704056587e-05, "loss": 0.9495, "step": 3791 }, { "epoch": 0.7, "learning_rate": 4.391366750031217e-05, "loss": 1.0017, "step": 3792 }, { "epoch": 0.7, "learning_rate": 4.3864257944408846e-05, "loss": 0.9168, "step": 3793 }, { "epoch": 0.7, "learning_rate": 4.381486839046121e-05, "loss": 1.131, "step": 3794 }, { "epoch": 0.7, "learning_rate": 4.376549885606742e-05, "loss": 0.8784, "step": 3795 }, { "epoch": 0.7, "learning_rate": 4.3716149358818526e-05, "loss": 1.0805, "step": 3796 }, { "epoch": 0.7, "learning_rate": 4.366681991629851e-05, "loss": 1.0103, "step": 3797 }, { "epoch": 0.7, "learning_rate": 4.3617510546084073e-05, "loss": 1.0732, "step": 3798 }, { "epoch": 0.7, "learning_rate": 4.356822126574496e-05, "loss": 1.185, "step": 3799 }, { "epoch": 0.7, "learning_rate": 4.351895209284347e-05, "loss": 0.9375, "step": 3800 }, { "epoch": 0.7, "learning_rate": 4.3469703044935026e-05, "loss": 1.1473, "step": 3801 }, { "epoch": 0.7, "learning_rate": 4.342047413956769e-05, "loss": 1.0082, "step": 3802 }, { "epoch": 0.7, "learning_rate": 4.337126539428248e-05, "loss": 1.1705, "step": 3803 }, { "epoch": 0.7, "learning_rate": 4.332207682661309e-05, "loss": 1.155, "step": 3804 }, { "epoch": 0.7, "learning_rate": 4.327290845408622e-05, "loss": 1.0326, "step": 3805 }, { "epoch": 0.7, "learning_rate": 4.3223760294221084e-05, "loss": 1.0362, "step": 3806 }, { "epoch": 0.7, "learning_rate": 4.317463236453e-05, "loss": 1.0711, "step": 3807 }, { "epoch": 0.7, "learning_rate": 4.312552468251785e-05, "loss": 0.9294, "step": 3808 }, { "epoch": 0.7, "learning_rate": 4.3076437265682476e-05, "loss": 1.0878, "step": 3809 }, { "epoch": 0.7, "learning_rate": 4.302737013151438e-05, "loss": 1.0243, "step": 3810 }, { "epoch": 0.7, "learning_rate": 4.297832329749687e-05, "loss": 1.0966, "step": 3811 }, { "epoch": 0.7, "learning_rate": 4.292929678110598e-05, "loss": 1.1337, "step": 3812 }, { "epoch": 0.7, "learning_rate": 4.2880290599810636e-05, "loss": 1.0846, "step": 3813 }, { "epoch": 0.7, "learning_rate": 4.283130477107234e-05, "loss": 1.0504, "step": 3814 }, { "epoch": 0.7, "learning_rate": 4.278233931234552e-05, "loss": 1.0709, "step": 3815 }, { "epoch": 0.7, "learning_rate": 4.2733394241077215e-05, "loss": 1.2105, "step": 3816 }, { "epoch": 0.7, "learning_rate": 4.268446957470725e-05, "loss": 0.9659, "step": 3817 }, { "epoch": 0.7, "learning_rate": 4.263556533066815e-05, "loss": 1.0757, "step": 3818 }, { "epoch": 0.7, "learning_rate": 4.258668152638522e-05, "loss": 1.0203, "step": 3819 }, { "epoch": 0.7, "learning_rate": 4.25378181792764e-05, "loss": 1.058, "step": 3820 }, { "epoch": 0.7, "learning_rate": 4.24889753067525e-05, "loss": 1.0799, "step": 3821 }, { "epoch": 0.7, "learning_rate": 4.244015292621677e-05, "loss": 1.1142, "step": 3822 }, { "epoch": 0.7, "learning_rate": 4.239135105506543e-05, "loss": 1.0489, "step": 3823 }, { "epoch": 0.7, "learning_rate": 4.2342569710687184e-05, "loss": 1.0256, "step": 3824 }, { "epoch": 0.7, "learning_rate": 4.2293808910463606e-05, "loss": 1.1547, "step": 3825 }, { "epoch": 0.71, "learning_rate": 4.224506867176877e-05, "loss": 1.0534, "step": 3826 }, { "epoch": 0.71, "learning_rate": 4.219634901196963e-05, "loss": 1.0022, "step": 3827 }, { "epoch": 0.71, "learning_rate": 4.2147649948425525e-05, "loss": 1.0289, "step": 3828 }, { "epoch": 0.71, "learning_rate": 4.2098971498488734e-05, "loss": 0.9958, "step": 3829 }, { "epoch": 0.71, "learning_rate": 4.2050313679504015e-05, "loss": 1.0834, "step": 3830 }, { "epoch": 0.71, "learning_rate": 4.2001676508808886e-05, "loss": 1.0184, "step": 3831 }, { "epoch": 0.71, "learning_rate": 4.195306000373344e-05, "loss": 1.0838, "step": 3832 }, { "epoch": 0.71, "learning_rate": 4.19044641816004e-05, "loss": 1.0752, "step": 3833 }, { "epoch": 0.71, "learning_rate": 4.1855889059725117e-05, "loss": 1.21, "step": 3834 }, { "epoch": 0.71, "learning_rate": 4.180733465541568e-05, "loss": 1.0523, "step": 3835 }, { "epoch": 0.71, "learning_rate": 4.17588009859726e-05, "loss": 1.0263, "step": 3836 }, { "epoch": 0.71, "learning_rate": 4.171028806868921e-05, "loss": 1.087, "step": 3837 }, { "epoch": 0.71, "learning_rate": 4.166179592085129e-05, "loss": 0.9691, "step": 3838 }, { "epoch": 0.71, "learning_rate": 4.161332455973729e-05, "loss": 1.0913, "step": 3839 }, { "epoch": 0.71, "learning_rate": 4.1564874002618194e-05, "loss": 1.1041, "step": 3840 }, { "epoch": 0.71, "learning_rate": 4.1516444266757694e-05, "loss": 1.0671, "step": 3841 }, { "epoch": 0.71, "learning_rate": 4.1468035369411904e-05, "loss": 0.9717, "step": 3842 }, { "epoch": 0.71, "learning_rate": 4.1419647327829724e-05, "loss": 1.0193, "step": 3843 }, { "epoch": 0.71, "learning_rate": 4.137128015925233e-05, "loss": 0.8805, "step": 3844 }, { "epoch": 0.71, "learning_rate": 4.132293388091375e-05, "loss": 1.0584, "step": 3845 }, { "epoch": 0.71, "learning_rate": 4.127460851004037e-05, "loss": 0.9652, "step": 3846 }, { "epoch": 0.71, "learning_rate": 4.122630406385126e-05, "loss": 0.9269, "step": 3847 }, { "epoch": 0.71, "learning_rate": 4.117802055955792e-05, "loss": 0.931, "step": 3848 }, { "epoch": 0.71, "learning_rate": 4.112975801436454e-05, "loss": 1.0675, "step": 3849 }, { "epoch": 0.71, "learning_rate": 4.10815164454676e-05, "loss": 1.009, "step": 3850 }, { "epoch": 0.71, "learning_rate": 4.103329587005635e-05, "loss": 1.0704, "step": 3851 }, { "epoch": 0.71, "learning_rate": 4.0985096305312407e-05, "loss": 1.0063, "step": 3852 }, { "epoch": 0.71, "learning_rate": 4.093691776841002e-05, "loss": 1.2321, "step": 3853 }, { "epoch": 0.71, "learning_rate": 4.088876027651585e-05, "loss": 1.0496, "step": 3854 }, { "epoch": 0.71, "learning_rate": 4.084062384678907e-05, "loss": 1.0637, "step": 3855 }, { "epoch": 0.71, "learning_rate": 4.0792508496381366e-05, "loss": 0.9315, "step": 3856 }, { "epoch": 0.71, "learning_rate": 4.0744414242436944e-05, "loss": 1.1495, "step": 3857 }, { "epoch": 0.71, "learning_rate": 4.0696341102092425e-05, "loss": 0.907, "step": 3858 }, { "epoch": 0.71, "learning_rate": 4.064828909247701e-05, "loss": 0.9803, "step": 3859 }, { "epoch": 0.71, "learning_rate": 4.060025823071226e-05, "loss": 1.0136, "step": 3860 }, { "epoch": 0.71, "learning_rate": 4.055224853391226e-05, "loss": 1.0859, "step": 3861 }, { "epoch": 0.71, "learning_rate": 4.05042600191835e-05, "loss": 1.0643, "step": 3862 }, { "epoch": 0.71, "learning_rate": 4.0456292703625044e-05, "loss": 1.2131, "step": 3863 }, { "epoch": 0.71, "learning_rate": 4.040834660432824e-05, "loss": 1.195, "step": 3864 }, { "epoch": 0.71, "learning_rate": 4.036042173837706e-05, "loss": 0.9884, "step": 3865 }, { "epoch": 0.71, "learning_rate": 4.031251812284769e-05, "loss": 0.9974, "step": 3866 }, { "epoch": 0.71, "learning_rate": 4.0264635774808945e-05, "loss": 1.0504, "step": 3867 }, { "epoch": 0.71, "learning_rate": 4.0216774711321925e-05, "loss": 1.0242, "step": 3868 }, { "epoch": 0.71, "learning_rate": 4.016893494944026e-05, "loss": 1.0735, "step": 3869 }, { "epoch": 0.71, "learning_rate": 4.0121116506209876e-05, "loss": 1.0981, "step": 3870 }, { "epoch": 0.71, "learning_rate": 4.007331939866925e-05, "loss": 0.9968, "step": 3871 }, { "epoch": 0.71, "learning_rate": 4.002554364384903e-05, "loss": 1.0719, "step": 3872 }, { "epoch": 0.71, "learning_rate": 3.99777892587725e-05, "loss": 0.9974, "step": 3873 }, { "epoch": 0.71, "learning_rate": 3.993005626045514e-05, "loss": 0.8797, "step": 3874 }, { "epoch": 0.71, "learning_rate": 3.9882344665904956e-05, "loss": 1.1302, "step": 3875 }, { "epoch": 0.71, "learning_rate": 3.983465449212225e-05, "loss": 1.0094, "step": 3876 }, { "epoch": 0.71, "learning_rate": 3.9786985756099674e-05, "loss": 0.9625, "step": 3877 }, { "epoch": 0.71, "learning_rate": 3.9739338474822253e-05, "loss": 1.1769, "step": 3878 }, { "epoch": 0.71, "learning_rate": 3.969171266526745e-05, "loss": 0.8846, "step": 3879 }, { "epoch": 0.71, "learning_rate": 3.964410834440494e-05, "loss": 1.0502, "step": 3880 }, { "epoch": 0.72, "learning_rate": 3.959652552919688e-05, "loss": 0.9822, "step": 3881 }, { "epoch": 0.72, "learning_rate": 3.954896423659767e-05, "loss": 1.0164, "step": 3882 }, { "epoch": 0.72, "learning_rate": 3.9501424483554074e-05, "loss": 0.9393, "step": 3883 }, { "epoch": 0.72, "learning_rate": 3.945390628700513e-05, "loss": 1.1615, "step": 3884 }, { "epoch": 0.72, "learning_rate": 3.940640966388231e-05, "loss": 1.0287, "step": 3885 }, { "epoch": 0.72, "learning_rate": 3.935893463110926e-05, "loss": 0.92, "step": 3886 }, { "epoch": 0.72, "learning_rate": 3.931148120560211e-05, "loss": 1.0319, "step": 3887 }, { "epoch": 0.72, "learning_rate": 3.926404940426904e-05, "loss": 0.9513, "step": 3888 }, { "epoch": 0.72, "learning_rate": 3.921663924401079e-05, "loss": 0.9749, "step": 3889 }, { "epoch": 0.72, "learning_rate": 3.916925074172019e-05, "loss": 1.0699, "step": 3890 }, { "epoch": 0.72, "learning_rate": 3.912188391428249e-05, "loss": 1.0881, "step": 3891 }, { "epoch": 0.72, "learning_rate": 3.90745387785751e-05, "loss": 0.989, "step": 3892 }, { "epoch": 0.72, "learning_rate": 3.9027215351467864e-05, "loss": 1.0426, "step": 3893 }, { "epoch": 0.72, "learning_rate": 3.897991364982263e-05, "loss": 1.0805, "step": 3894 }, { "epoch": 0.72, "learning_rate": 3.893263369049378e-05, "loss": 1.0165, "step": 3895 }, { "epoch": 0.72, "learning_rate": 3.888537549032776e-05, "loss": 1.0579, "step": 3896 }, { "epoch": 0.72, "learning_rate": 3.8838139066163394e-05, "loss": 1.067, "step": 3897 }, { "epoch": 0.72, "learning_rate": 3.8790924434831655e-05, "loss": 0.927, "step": 3898 }, { "epoch": 0.72, "learning_rate": 3.8743731613155785e-05, "loss": 1.1565, "step": 3899 }, { "epoch": 0.72, "learning_rate": 3.869656061795121e-05, "loss": 1.0128, "step": 3900 }, { "epoch": 0.72, "learning_rate": 3.8649411466025696e-05, "loss": 0.9476, "step": 3901 }, { "epoch": 0.72, "learning_rate": 3.860228417417907e-05, "loss": 0.9007, "step": 3902 }, { "epoch": 0.72, "learning_rate": 3.855517875920354e-05, "loss": 1.044, "step": 3903 }, { "epoch": 0.72, "learning_rate": 3.850809523788337e-05, "loss": 1.0058, "step": 3904 }, { "epoch": 0.72, "learning_rate": 3.846103362699509e-05, "loss": 0.9804, "step": 3905 }, { "epoch": 0.72, "learning_rate": 3.841399394330739e-05, "loss": 0.9624, "step": 3906 }, { "epoch": 0.72, "learning_rate": 3.8366976203581226e-05, "loss": 1.1228, "step": 3907 }, { "epoch": 0.72, "learning_rate": 3.8319980424569625e-05, "loss": 1.0128, "step": 3908 }, { "epoch": 0.72, "learning_rate": 3.827300662301791e-05, "loss": 1.0488, "step": 3909 }, { "epoch": 0.72, "learning_rate": 3.822605481566347e-05, "loss": 1.1641, "step": 3910 }, { "epoch": 0.72, "learning_rate": 3.81791250192359e-05, "loss": 1.1292, "step": 3911 }, { "epoch": 0.72, "learning_rate": 3.81322172504569e-05, "loss": 1.0038, "step": 3912 }, { "epoch": 0.72, "learning_rate": 3.808533152604047e-05, "loss": 1.0217, "step": 3913 }, { "epoch": 0.72, "learning_rate": 3.803846786269255e-05, "loss": 1.0242, "step": 3914 }, { "epoch": 0.72, "learning_rate": 3.7991626277111404e-05, "loss": 1.1609, "step": 3915 }, { "epoch": 0.72, "learning_rate": 3.7944806785987316e-05, "loss": 1.0146, "step": 3916 }, { "epoch": 0.72, "learning_rate": 3.7898009406002746e-05, "loss": 1.1297, "step": 3917 }, { "epoch": 0.72, "learning_rate": 3.7851234153832195e-05, "loss": 0.981, "step": 3918 }, { "epoch": 0.72, "learning_rate": 3.780448104614244e-05, "loss": 1.1294, "step": 3919 }, { "epoch": 0.72, "learning_rate": 3.775775009959221e-05, "loss": 0.9685, "step": 3920 }, { "epoch": 0.72, "learning_rate": 3.77110413308324e-05, "loss": 0.9539, "step": 3921 }, { "epoch": 0.72, "learning_rate": 3.7664354756506036e-05, "loss": 1.0355, "step": 3922 }, { "epoch": 0.72, "learning_rate": 3.761769039324818e-05, "loss": 1.0428, "step": 3923 }, { "epoch": 0.72, "learning_rate": 3.757104825768599e-05, "loss": 0.9707, "step": 3924 }, { "epoch": 0.72, "learning_rate": 3.752442836643876e-05, "loss": 1.0553, "step": 3925 }, { "epoch": 0.72, "learning_rate": 3.747783073611777e-05, "loss": 1.107, "step": 3926 }, { "epoch": 0.72, "learning_rate": 3.743125538332641e-05, "loss": 0.989, "step": 3927 }, { "epoch": 0.72, "learning_rate": 3.738470232466018e-05, "loss": 1.0431, "step": 3928 }, { "epoch": 0.72, "learning_rate": 3.733817157670656e-05, "loss": 1.0863, "step": 3929 }, { "epoch": 0.72, "learning_rate": 3.7291663156045085e-05, "loss": 1.0503, "step": 3930 }, { "epoch": 0.72, "learning_rate": 3.724517707924742e-05, "loss": 1.0187, "step": 3931 }, { "epoch": 0.72, "learning_rate": 3.719871336287718e-05, "loss": 1.1013, "step": 3932 }, { "epoch": 0.72, "learning_rate": 3.7152272023489996e-05, "loss": 0.986, "step": 3933 }, { "epoch": 0.72, "learning_rate": 3.710585307763366e-05, "loss": 1.1235, "step": 3934 }, { "epoch": 0.73, "learning_rate": 3.705945654184785e-05, "loss": 0.977, "step": 3935 }, { "epoch": 0.73, "learning_rate": 3.701308243266427e-05, "loss": 1.0366, "step": 3936 }, { "epoch": 0.73, "learning_rate": 3.696673076660674e-05, "loss": 1.0631, "step": 3937 }, { "epoch": 0.73, "learning_rate": 3.6920401560190975e-05, "loss": 1.0904, "step": 3938 }, { "epoch": 0.73, "learning_rate": 3.687409482992469e-05, "loss": 1.0082, "step": 3939 }, { "epoch": 0.73, "learning_rate": 3.68278105923077e-05, "loss": 1.1368, "step": 3940 }, { "epoch": 0.73, "learning_rate": 3.678154886383168e-05, "loss": 1.1725, "step": 3941 }, { "epoch": 0.73, "learning_rate": 3.673530966098036e-05, "loss": 1.0457, "step": 3942 }, { "epoch": 0.73, "learning_rate": 3.668909300022937e-05, "loss": 0.9711, "step": 3943 }, { "epoch": 0.73, "learning_rate": 3.664289889804643e-05, "loss": 1.0323, "step": 3944 }, { "epoch": 0.73, "learning_rate": 3.659672737089107e-05, "loss": 1.0008, "step": 3945 }, { "epoch": 0.73, "learning_rate": 3.6550578435214936e-05, "loss": 0.9662, "step": 3946 }, { "epoch": 0.73, "learning_rate": 3.650445210746151e-05, "loss": 1.1813, "step": 3947 }, { "epoch": 0.73, "learning_rate": 3.645834840406625e-05, "loss": 1.0855, "step": 3948 }, { "epoch": 0.73, "learning_rate": 3.6412267341456505e-05, "loss": 1.0014, "step": 3949 }, { "epoch": 0.73, "learning_rate": 3.6366208936051705e-05, "loss": 0.9827, "step": 3950 }, { "epoch": 0.73, "learning_rate": 3.6320173204263016e-05, "loss": 1.1334, "step": 3951 }, { "epoch": 0.73, "learning_rate": 3.62741601624937e-05, "loss": 1.1594, "step": 3952 }, { "epoch": 0.73, "learning_rate": 3.6228169827138815e-05, "loss": 0.97, "step": 3953 }, { "epoch": 0.73, "learning_rate": 3.618220221458536e-05, "loss": 1.1562, "step": 3954 }, { "epoch": 0.73, "learning_rate": 3.613625734121221e-05, "loss": 1.0364, "step": 3955 }, { "epoch": 0.73, "learning_rate": 3.609033522339025e-05, "loss": 0.9918, "step": 3956 }, { "epoch": 0.73, "learning_rate": 3.604443587748209e-05, "loss": 1.0952, "step": 3957 }, { "epoch": 0.73, "learning_rate": 3.599855931984243e-05, "loss": 1.2437, "step": 3958 }, { "epoch": 0.73, "learning_rate": 3.595270556681759e-05, "loss": 1.0011, "step": 3959 }, { "epoch": 0.73, "learning_rate": 3.5906874634746015e-05, "loss": 0.9523, "step": 3960 }, { "epoch": 0.73, "learning_rate": 3.5861066539957844e-05, "loss": 1.0206, "step": 3961 }, { "epoch": 0.73, "learning_rate": 3.58152812987752e-05, "loss": 0.9449, "step": 3962 }, { "epoch": 0.73, "learning_rate": 3.5769518927511966e-05, "loss": 1.0377, "step": 3963 }, { "epoch": 0.73, "learning_rate": 3.5723779442474014e-05, "loss": 1.0942, "step": 3964 }, { "epoch": 0.73, "learning_rate": 3.567806285995883e-05, "loss": 1.1359, "step": 3965 }, { "epoch": 0.73, "learning_rate": 3.563236919625595e-05, "loss": 1.1494, "step": 3966 }, { "epoch": 0.73, "learning_rate": 3.558669846764665e-05, "loss": 1.2153, "step": 3967 }, { "epoch": 0.73, "learning_rate": 3.55410506904041e-05, "loss": 1.007, "step": 3968 }, { "epoch": 0.73, "learning_rate": 3.54954258807932e-05, "loss": 1.1358, "step": 3969 }, { "epoch": 0.73, "learning_rate": 3.544982405507072e-05, "loss": 1.0323, "step": 3970 }, { "epoch": 0.73, "learning_rate": 3.540424522948521e-05, "loss": 0.9986, "step": 3971 }, { "epoch": 0.73, "learning_rate": 3.5358689420277115e-05, "loss": 1.0513, "step": 3972 }, { "epoch": 0.73, "learning_rate": 3.531315664367852e-05, "loss": 0.9682, "step": 3973 }, { "epoch": 0.73, "learning_rate": 3.526764691591349e-05, "loss": 1.1396, "step": 3974 }, { "epoch": 0.73, "learning_rate": 3.522216025319773e-05, "loss": 1.1432, "step": 3975 }, { "epoch": 0.73, "learning_rate": 3.5176696671738795e-05, "loss": 1.1428, "step": 3976 }, { "epoch": 0.73, "learning_rate": 3.513125618773595e-05, "loss": 1.1578, "step": 3977 }, { "epoch": 0.73, "learning_rate": 3.5085838817380356e-05, "loss": 0.878, "step": 3978 }, { "epoch": 0.73, "learning_rate": 3.504044457685479e-05, "loss": 1.1064, "step": 3979 }, { "epoch": 0.73, "learning_rate": 3.499507348233396e-05, "loss": 1.1296, "step": 3980 }, { "epoch": 0.73, "learning_rate": 3.49497255499841e-05, "loss": 1.0479, "step": 3981 }, { "epoch": 0.73, "learning_rate": 3.490440079596341e-05, "loss": 1.0391, "step": 3982 }, { "epoch": 0.73, "learning_rate": 3.485909923642168e-05, "loss": 1.0934, "step": 3983 }, { "epoch": 0.73, "learning_rate": 3.481382088750054e-05, "loss": 1.0864, "step": 3984 }, { "epoch": 0.73, "learning_rate": 3.476856576533326e-05, "loss": 1.0355, "step": 3985 }, { "epoch": 0.73, "learning_rate": 3.4723333886044964e-05, "loss": 0.9918, "step": 3986 }, { "epoch": 0.73, "learning_rate": 3.467812526575228e-05, "loss": 1.1025, "step": 3987 }, { "epoch": 0.73, "learning_rate": 3.463293992056378e-05, "loss": 1.0205, "step": 3988 }, { "epoch": 0.74, "learning_rate": 3.4587777866579566e-05, "loss": 1.0034, "step": 3989 }, { "epoch": 0.74, "learning_rate": 3.4542639119891575e-05, "loss": 1.012, "step": 3990 }, { "epoch": 0.74, "learning_rate": 3.4497523696583344e-05, "loss": 1.192, "step": 3991 }, { "epoch": 0.74, "learning_rate": 3.445243161273016e-05, "loss": 1.0713, "step": 3992 }, { "epoch": 0.74, "learning_rate": 3.4407362884398906e-05, "loss": 1.0404, "step": 3993 }, { "epoch": 0.74, "learning_rate": 3.436231752764829e-05, "loss": 1.0716, "step": 3994 }, { "epoch": 0.74, "learning_rate": 3.431729555852853e-05, "loss": 1.0939, "step": 3995 }, { "epoch": 0.74, "learning_rate": 3.4272296993081644e-05, "loss": 1.1702, "step": 3996 }, { "epoch": 0.74, "learning_rate": 3.422732184734124e-05, "loss": 1.0626, "step": 3997 }, { "epoch": 0.74, "learning_rate": 3.41823701373326e-05, "loss": 1.2381, "step": 3998 }, { "epoch": 0.74, "learning_rate": 3.413744187907261e-05, "loss": 1.1577, "step": 3999 }, { "epoch": 0.74, "learning_rate": 3.4092537088569885e-05, "loss": 1.0906, "step": 4000 }, { "epoch": 0.74, "learning_rate": 3.40476557818246e-05, "loss": 1.092, "step": 4001 }, { "epoch": 0.74, "learning_rate": 3.4002797974828695e-05, "loss": 1.0174, "step": 4002 }, { "epoch": 0.74, "learning_rate": 3.395796368356548e-05, "loss": 1.1608, "step": 4003 }, { "epoch": 0.74, "learning_rate": 3.391315292401017e-05, "loss": 0.9702, "step": 4004 }, { "epoch": 0.74, "learning_rate": 3.3868365712129405e-05, "loss": 1.0465, "step": 4005 }, { "epoch": 0.74, "learning_rate": 3.382360206388153e-05, "loss": 1.0149, "step": 4006 }, { "epoch": 0.74, "learning_rate": 3.377886199521644e-05, "loss": 1.0695, "step": 4007 }, { "epoch": 0.74, "learning_rate": 3.373414552207572e-05, "loss": 1.0129, "step": 4008 }, { "epoch": 0.74, "learning_rate": 3.368945266039237e-05, "loss": 1.0741, "step": 4009 }, { "epoch": 0.74, "learning_rate": 3.3644783426091176e-05, "loss": 1.1292, "step": 4010 }, { "epoch": 0.74, "learning_rate": 3.3600137835088344e-05, "loss": 1.0355, "step": 4011 }, { "epoch": 0.74, "learning_rate": 3.35555159032918e-05, "loss": 1.1726, "step": 4012 }, { "epoch": 0.74, "learning_rate": 3.351091764660093e-05, "loss": 1.0487, "step": 4013 }, { "epoch": 0.74, "learning_rate": 3.3466343080906716e-05, "loss": 1.0535, "step": 4014 }, { "epoch": 0.74, "learning_rate": 3.3421792222091684e-05, "loss": 0.9814, "step": 4015 }, { "epoch": 0.74, "learning_rate": 3.337726508602999e-05, "loss": 1.0846, "step": 4016 }, { "epoch": 0.74, "learning_rate": 3.333276168858722e-05, "loss": 1.0707, "step": 4017 }, { "epoch": 0.74, "learning_rate": 3.328828204562062e-05, "loss": 1.06, "step": 4018 }, { "epoch": 0.74, "learning_rate": 3.324382617297887e-05, "loss": 1.0774, "step": 4019 }, { "epoch": 0.74, "learning_rate": 3.319939408650225e-05, "loss": 1.0748, "step": 4020 }, { "epoch": 0.74, "learning_rate": 3.31549858020225e-05, "loss": 1.1114, "step": 4021 }, { "epoch": 0.74, "learning_rate": 3.311060133536297e-05, "loss": 1.016, "step": 4022 }, { "epoch": 0.74, "learning_rate": 3.306624070233842e-05, "loss": 1.1, "step": 4023 }, { "epoch": 0.74, "learning_rate": 3.3021903918755236e-05, "loss": 0.9695, "step": 4024 }, { "epoch": 0.74, "learning_rate": 3.297759100041113e-05, "loss": 1.0163, "step": 4025 }, { "epoch": 0.74, "learning_rate": 3.293330196309553e-05, "loss": 1.0662, "step": 4026 }, { "epoch": 0.74, "learning_rate": 3.288903682258917e-05, "loss": 1.149, "step": 4027 }, { "epoch": 0.74, "learning_rate": 3.284479559466439e-05, "loss": 1.043, "step": 4028 }, { "epoch": 0.74, "learning_rate": 3.280057829508492e-05, "loss": 1.1173, "step": 4029 }, { "epoch": 0.74, "learning_rate": 3.275638493960608e-05, "loss": 1.0237, "step": 4030 }, { "epoch": 0.74, "learning_rate": 3.2712215543974475e-05, "loss": 1.1468, "step": 4031 }, { "epoch": 0.74, "learning_rate": 3.2668070123928385e-05, "loss": 1.0747, "step": 4032 }, { "epoch": 0.74, "learning_rate": 3.262394869519736e-05, "loss": 1.0827, "step": 4033 }, { "epoch": 0.74, "learning_rate": 3.257985127350257e-05, "loss": 1.0594, "step": 4034 }, { "epoch": 0.74, "learning_rate": 3.253577787455651e-05, "loss": 0.97, "step": 4035 }, { "epoch": 0.74, "learning_rate": 3.249172851406313e-05, "loss": 1.0123, "step": 4036 }, { "epoch": 0.74, "learning_rate": 3.244770320771784e-05, "loss": 0.984, "step": 4037 }, { "epoch": 0.74, "learning_rate": 3.2403701971207536e-05, "loss": 1.0386, "step": 4038 }, { "epoch": 0.74, "learning_rate": 3.235972482021039e-05, "loss": 1.0793, "step": 4039 }, { "epoch": 0.74, "learning_rate": 3.2315771770396165e-05, "loss": 1.0344, "step": 4040 }, { "epoch": 0.74, "learning_rate": 3.227184283742591e-05, "loss": 1.0833, "step": 4041 }, { "epoch": 0.74, "learning_rate": 3.222793803695213e-05, "loss": 1.1566, "step": 4042 }, { "epoch": 0.75, "learning_rate": 3.218405738461868e-05, "loss": 1.1142, "step": 4043 }, { "epoch": 0.75, "learning_rate": 3.214020089606094e-05, "loss": 1.0425, "step": 4044 }, { "epoch": 0.75, "learning_rate": 3.209636858690551e-05, "loss": 0.9807, "step": 4045 }, { "epoch": 0.75, "learning_rate": 3.205256047277054e-05, "loss": 0.8906, "step": 4046 }, { "epoch": 0.75, "learning_rate": 3.200877656926543e-05, "loss": 1.0388, "step": 4047 }, { "epoch": 0.75, "learning_rate": 3.1965016891991e-05, "loss": 1.0546, "step": 4048 }, { "epoch": 0.75, "learning_rate": 3.192128145653943e-05, "loss": 1.1084, "step": 4049 }, { "epoch": 0.75, "learning_rate": 3.1877570278494315e-05, "loss": 1.1282, "step": 4050 }, { "epoch": 0.75, "learning_rate": 3.1833883373430515e-05, "loss": 1.0987, "step": 4051 }, { "epoch": 0.75, "learning_rate": 3.179022075691435e-05, "loss": 1.0876, "step": 4052 }, { "epoch": 0.75, "learning_rate": 3.174658244450337e-05, "loss": 1.0721, "step": 4053 }, { "epoch": 0.75, "learning_rate": 3.1702968451746554e-05, "loss": 1.1805, "step": 4054 }, { "epoch": 0.75, "learning_rate": 3.1659378794184126e-05, "loss": 1.0254, "step": 4055 }, { "epoch": 0.75, "learning_rate": 3.161581348734777e-05, "loss": 0.966, "step": 4056 }, { "epoch": 0.75, "learning_rate": 3.1572272546760383e-05, "loss": 0.9543, "step": 4057 }, { "epoch": 0.75, "learning_rate": 3.1528755987936186e-05, "loss": 1.0959, "step": 4058 }, { "epoch": 0.75, "learning_rate": 3.148526382638081e-05, "loss": 0.8725, "step": 4059 }, { "epoch": 0.75, "learning_rate": 3.144179607759108e-05, "loss": 1.1226, "step": 4060 }, { "epoch": 0.75, "learning_rate": 3.139835275705514e-05, "loss": 1.1106, "step": 4061 }, { "epoch": 0.75, "learning_rate": 3.135493388025253e-05, "loss": 0.9625, "step": 4062 }, { "epoch": 0.75, "learning_rate": 3.131153946265397e-05, "loss": 1.1089, "step": 4063 }, { "epoch": 0.75, "learning_rate": 3.126816951972147e-05, "loss": 1.0177, "step": 4064 }, { "epoch": 0.75, "learning_rate": 3.122482406690842e-05, "loss": 0.9722, "step": 4065 }, { "epoch": 0.75, "learning_rate": 3.118150311965937e-05, "loss": 1.0691, "step": 4066 }, { "epoch": 0.75, "learning_rate": 3.113820669341017e-05, "loss": 1.2196, "step": 4067 }, { "epoch": 0.75, "learning_rate": 3.1094934803587996e-05, "loss": 1.0085, "step": 4068 }, { "epoch": 0.75, "learning_rate": 3.10516874656112e-05, "loss": 1.1351, "step": 4069 }, { "epoch": 0.75, "learning_rate": 3.100846469488939e-05, "loss": 0.9401, "step": 4070 }, { "epoch": 0.75, "learning_rate": 3.096526650682352e-05, "loss": 1.0467, "step": 4071 }, { "epoch": 0.75, "learning_rate": 3.0922092916805665e-05, "loss": 1.077, "step": 4072 }, { "epoch": 0.75, "learning_rate": 3.087894394021917e-05, "loss": 0.9684, "step": 4073 }, { "epoch": 0.75, "learning_rate": 3.0835819592438674e-05, "loss": 1.1511, "step": 4074 }, { "epoch": 0.75, "learning_rate": 3.079271988882997e-05, "loss": 1.0491, "step": 4075 }, { "epoch": 0.75, "learning_rate": 3.074964484475004e-05, "loss": 0.9819, "step": 4076 }, { "epoch": 0.75, "learning_rate": 3.070659447554719e-05, "loss": 1.04, "step": 4077 }, { "epoch": 0.75, "learning_rate": 3.066356879656087e-05, "loss": 1.1105, "step": 4078 }, { "epoch": 0.75, "learning_rate": 3.06205678231217e-05, "loss": 0.8859, "step": 4079 }, { "epoch": 0.75, "learning_rate": 3.057759157055153e-05, "loss": 0.8826, "step": 4080 }, { "epoch": 0.75, "learning_rate": 3.053464005416347e-05, "loss": 1.1467, "step": 4081 }, { "epoch": 0.75, "learning_rate": 3.0491713289261657e-05, "loss": 0.8991, "step": 4082 }, { "epoch": 0.75, "learning_rate": 3.0448811291141577e-05, "loss": 1.0004, "step": 4083 }, { "epoch": 0.75, "learning_rate": 3.0405934075089802e-05, "loss": 1.109, "step": 4084 }, { "epoch": 0.75, "learning_rate": 3.0363081656384075e-05, "loss": 1.0037, "step": 4085 }, { "epoch": 0.75, "learning_rate": 3.032025405029327e-05, "loss": 1.0211, "step": 4086 }, { "epoch": 0.75, "learning_rate": 3.0277451272077562e-05, "loss": 1.1451, "step": 4087 }, { "epoch": 0.75, "learning_rate": 3.0234673336988094e-05, "loss": 1.1922, "step": 4088 }, { "epoch": 0.75, "learning_rate": 3.0191920260267325e-05, "loss": 1.0586, "step": 4089 }, { "epoch": 0.75, "learning_rate": 3.0149192057148734e-05, "loss": 1.0654, "step": 4090 }, { "epoch": 0.75, "learning_rate": 3.0106488742856996e-05, "loss": 0.9705, "step": 4091 }, { "epoch": 0.75, "learning_rate": 3.0063810332607865e-05, "loss": 1.0926, "step": 4092 }, { "epoch": 0.75, "learning_rate": 3.002115684160832e-05, "loss": 0.8925, "step": 4093 }, { "epoch": 0.75, "learning_rate": 2.997852828505634e-05, "loss": 0.9943, "step": 4094 }, { "epoch": 0.75, "learning_rate": 2.993592467814115e-05, "loss": 1.1917, "step": 4095 }, { "epoch": 0.75, "learning_rate": 2.989334603604297e-05, "loss": 1.0809, "step": 4096 }, { "epoch": 0.75, "learning_rate": 2.9850792373933178e-05, "loss": 1.068, "step": 4097 }, { "epoch": 0.76, "learning_rate": 2.9808263706974216e-05, "loss": 0.9799, "step": 4098 }, { "epoch": 0.76, "learning_rate": 2.9765760050319703e-05, "loss": 1.2214, "step": 4099 }, { "epoch": 0.76, "learning_rate": 2.9723281419114225e-05, "loss": 1.1272, "step": 4100 }, { "epoch": 0.76, "learning_rate": 2.9680827828493618e-05, "loss": 1.0547, "step": 4101 }, { "epoch": 0.76, "learning_rate": 2.9638399293584572e-05, "loss": 0.9541, "step": 4102 }, { "epoch": 0.76, "learning_rate": 2.9595995829505053e-05, "loss": 0.9971, "step": 4103 }, { "epoch": 0.76, "learning_rate": 2.955361745136397e-05, "loss": 0.9216, "step": 4104 }, { "epoch": 0.76, "learning_rate": 2.9511264174261377e-05, "loss": 1.1015, "step": 4105 }, { "epoch": 0.76, "learning_rate": 2.9468936013288284e-05, "loss": 1.1587, "step": 4106 }, { "epoch": 0.76, "learning_rate": 2.9426632983526924e-05, "loss": 1.0327, "step": 4107 }, { "epoch": 0.76, "learning_rate": 2.9384355100050322e-05, "loss": 1.0434, "step": 4108 }, { "epoch": 0.76, "learning_rate": 2.9342102377922774e-05, "loss": 1.1519, "step": 4109 }, { "epoch": 0.76, "learning_rate": 2.9299874832199458e-05, "loss": 1.1372, "step": 4110 }, { "epoch": 0.76, "learning_rate": 2.9257672477926722e-05, "loss": 1.1102, "step": 4111 }, { "epoch": 0.76, "learning_rate": 2.9215495330141806e-05, "loss": 0.972, "step": 4112 }, { "epoch": 0.76, "learning_rate": 2.9173343403873043e-05, "loss": 0.8765, "step": 4113 }, { "epoch": 0.76, "learning_rate": 2.913121671413971e-05, "loss": 1.0557, "step": 4114 }, { "epoch": 0.76, "learning_rate": 2.9089115275952218e-05, "loss": 0.9837, "step": 4115 }, { "epoch": 0.76, "learning_rate": 2.9047039104311824e-05, "loss": 0.9756, "step": 4116 }, { "epoch": 0.76, "learning_rate": 2.9004988214210927e-05, "loss": 1.0059, "step": 4117 }, { "epoch": 0.76, "learning_rate": 2.8962962620632827e-05, "loss": 1.1379, "step": 4118 }, { "epoch": 0.76, "learning_rate": 2.8920962338551838e-05, "loss": 0.9658, "step": 4119 }, { "epoch": 0.76, "learning_rate": 2.8878987382933198e-05, "loss": 1.1034, "step": 4120 }, { "epoch": 0.76, "learning_rate": 2.8837037768733265e-05, "loss": 1.0696, "step": 4121 }, { "epoch": 0.76, "learning_rate": 2.879511351089921e-05, "loss": 1.0247, "step": 4122 }, { "epoch": 0.76, "learning_rate": 2.8753214624369316e-05, "loss": 1.02, "step": 4123 }, { "epoch": 0.76, "learning_rate": 2.8711341124072633e-05, "loss": 1.0812, "step": 4124 }, { "epoch": 0.76, "learning_rate": 2.866949302492936e-05, "loss": 1.1022, "step": 4125 }, { "epoch": 0.76, "learning_rate": 2.8627670341850522e-05, "loss": 1.084, "step": 4126 }, { "epoch": 0.76, "learning_rate": 2.858587308973818e-05, "loss": 1.0563, "step": 4127 }, { "epoch": 0.76, "learning_rate": 2.8544101283485223e-05, "loss": 1.0257, "step": 4128 }, { "epoch": 0.76, "learning_rate": 2.850235493797563e-05, "loss": 1.0646, "step": 4129 }, { "epoch": 0.76, "learning_rate": 2.846063406808408e-05, "loss": 1.1565, "step": 4130 }, { "epoch": 0.76, "learning_rate": 2.841893868867641e-05, "loss": 1.1005, "step": 4131 }, { "epoch": 0.76, "learning_rate": 2.8377268814609203e-05, "loss": 1.0959, "step": 4132 }, { "epoch": 0.76, "learning_rate": 2.8335624460730083e-05, "loss": 0.9363, "step": 4133 }, { "epoch": 0.76, "learning_rate": 2.8294005641877486e-05, "loss": 0.9435, "step": 4134 }, { "epoch": 0.76, "learning_rate": 2.82524123728808e-05, "loss": 1.007, "step": 4135 }, { "epoch": 0.76, "learning_rate": 2.8210844668560244e-05, "loss": 0.9538, "step": 4136 }, { "epoch": 0.76, "learning_rate": 2.816930254372705e-05, "loss": 1.0036, "step": 4137 }, { "epoch": 0.76, "learning_rate": 2.8127786013183187e-05, "loss": 1.0368, "step": 4138 }, { "epoch": 0.76, "learning_rate": 2.808629509172165e-05, "loss": 1.1812, "step": 4139 }, { "epoch": 0.76, "learning_rate": 2.8044829794126215e-05, "loss": 1.1172, "step": 4140 }, { "epoch": 0.76, "learning_rate": 2.8003390135171537e-05, "loss": 1.2774, "step": 4141 }, { "epoch": 0.76, "learning_rate": 2.7961976129623134e-05, "loss": 1.0421, "step": 4142 }, { "epoch": 0.76, "learning_rate": 2.792058779223744e-05, "loss": 1.0281, "step": 4143 }, { "epoch": 0.76, "learning_rate": 2.7879225137761666e-05, "loss": 0.9621, "step": 4144 }, { "epoch": 0.76, "learning_rate": 2.783788818093399e-05, "loss": 1.0216, "step": 4145 }, { "epoch": 0.76, "learning_rate": 2.7796576936483233e-05, "loss": 1.0752, "step": 4146 }, { "epoch": 0.76, "learning_rate": 2.7755291419129247e-05, "loss": 1.1429, "step": 4147 }, { "epoch": 0.76, "learning_rate": 2.7714031643582607e-05, "loss": 1.0698, "step": 4148 }, { "epoch": 0.76, "learning_rate": 2.76727976245448e-05, "loss": 0.9992, "step": 4149 }, { "epoch": 0.76, "learning_rate": 2.7631589376708035e-05, "loss": 0.8825, "step": 4150 }, { "epoch": 0.76, "learning_rate": 2.7590406914755463e-05, "loss": 1.0994, "step": 4151 }, { "epoch": 0.77, "learning_rate": 2.754925025336088e-05, "loss": 1.0216, "step": 4152 }, { "epoch": 0.77, "learning_rate": 2.750811940718906e-05, "loss": 1.0787, "step": 4153 }, { "epoch": 0.77, "learning_rate": 2.746701439089544e-05, "loss": 1.0919, "step": 4154 }, { "epoch": 0.77, "learning_rate": 2.7425935219126387e-05, "loss": 0.9441, "step": 4155 }, { "epoch": 0.77, "learning_rate": 2.7384881906518957e-05, "loss": 0.9867, "step": 4156 }, { "epoch": 0.77, "learning_rate": 2.7343854467701014e-05, "loss": 0.9256, "step": 4157 }, { "epoch": 0.77, "learning_rate": 2.7302852917291187e-05, "loss": 1.0536, "step": 4158 }, { "epoch": 0.77, "learning_rate": 2.7261877269898972e-05, "loss": 1.1499, "step": 4159 }, { "epoch": 0.77, "learning_rate": 2.7220927540124507e-05, "loss": 1.0609, "step": 4160 }, { "epoch": 0.77, "learning_rate": 2.718000374255881e-05, "loss": 1.0383, "step": 4161 }, { "epoch": 0.77, "learning_rate": 2.7139105891783588e-05, "loss": 0.9685, "step": 4162 }, { "epoch": 0.77, "learning_rate": 2.7098234002371313e-05, "loss": 0.9403, "step": 4163 }, { "epoch": 0.77, "learning_rate": 2.705738808888518e-05, "loss": 1.116, "step": 4164 }, { "epoch": 0.77, "learning_rate": 2.701656816587924e-05, "loss": 1.1831, "step": 4165 }, { "epoch": 0.77, "learning_rate": 2.6975774247898133e-05, "loss": 1.2532, "step": 4166 }, { "epoch": 0.77, "learning_rate": 2.693500634947741e-05, "loss": 1.1639, "step": 4167 }, { "epoch": 0.77, "learning_rate": 2.6894264485143107e-05, "loss": 1.1007, "step": 4168 }, { "epoch": 0.77, "learning_rate": 2.6853548669412233e-05, "loss": 1.0552, "step": 4169 }, { "epoch": 0.77, "learning_rate": 2.6812858916792328e-05, "loss": 1.1204, "step": 4170 }, { "epoch": 0.77, "learning_rate": 2.6772195241781805e-05, "loss": 1.0344, "step": 4171 }, { "epoch": 0.77, "learning_rate": 2.6731557658869666e-05, "loss": 1.0666, "step": 4172 }, { "epoch": 0.77, "learning_rate": 2.669094618253565e-05, "loss": 1.2015, "step": 4173 }, { "epoch": 0.77, "learning_rate": 2.6650360827250166e-05, "loss": 1.0142, "step": 4174 }, { "epoch": 0.77, "learning_rate": 2.6609801607474415e-05, "loss": 1.0415, "step": 4175 }, { "epoch": 0.77, "learning_rate": 2.6569268537660163e-05, "loss": 0.9775, "step": 4176 }, { "epoch": 0.77, "learning_rate": 2.6528761632249965e-05, "loss": 0.9587, "step": 4177 }, { "epoch": 0.77, "learning_rate": 2.6488280905676965e-05, "loss": 0.955, "step": 4178 }, { "epoch": 0.77, "learning_rate": 2.6447826372365037e-05, "loss": 1.053, "step": 4179 }, { "epoch": 0.77, "learning_rate": 2.6407398046728648e-05, "loss": 1.1079, "step": 4180 }, { "epoch": 0.77, "learning_rate": 2.6366995943173057e-05, "loss": 1.0443, "step": 4181 }, { "epoch": 0.77, "learning_rate": 2.6326620076094033e-05, "loss": 1.0034, "step": 4182 }, { "epoch": 0.77, "learning_rate": 2.628627045987817e-05, "loss": 0.9529, "step": 4183 }, { "epoch": 0.77, "learning_rate": 2.6245947108902468e-05, "loss": 0.9531, "step": 4184 }, { "epoch": 0.77, "learning_rate": 2.62056500375348e-05, "loss": 1.0578, "step": 4185 }, { "epoch": 0.77, "learning_rate": 2.6165379260133516e-05, "loss": 1.1334, "step": 4186 }, { "epoch": 0.77, "learning_rate": 2.6125134791047733e-05, "loss": 0.933, "step": 4187 }, { "epoch": 0.77, "learning_rate": 2.6084916644617062e-05, "loss": 1.0597, "step": 4188 }, { "epoch": 0.77, "learning_rate": 2.6044724835171874e-05, "loss": 0.9727, "step": 4189 }, { "epoch": 0.77, "learning_rate": 2.6004559377032955e-05, "loss": 0.9509, "step": 4190 }, { "epoch": 0.77, "learning_rate": 2.5964420284511936e-05, "loss": 0.9726, "step": 4191 }, { "epoch": 0.77, "learning_rate": 2.5924307571910857e-05, "loss": 1.0849, "step": 4192 }, { "epoch": 0.77, "learning_rate": 2.588422125352251e-05, "loss": 1.0698, "step": 4193 }, { "epoch": 0.77, "learning_rate": 2.584416134363019e-05, "loss": 1.1152, "step": 4194 }, { "epoch": 0.77, "learning_rate": 2.5804127856507796e-05, "loss": 1.1313, "step": 4195 }, { "epoch": 0.77, "learning_rate": 2.5764120806419812e-05, "loss": 1.163, "step": 4196 }, { "epoch": 0.77, "learning_rate": 2.572414020762136e-05, "loss": 1.0474, "step": 4197 }, { "epoch": 0.77, "learning_rate": 2.568418607435803e-05, "loss": 1.0574, "step": 4198 }, { "epoch": 0.77, "learning_rate": 2.5644258420866107e-05, "loss": 1.0918, "step": 4199 }, { "epoch": 0.77, "learning_rate": 2.5604357261372335e-05, "loss": 0.9178, "step": 4200 }, { "epoch": 0.77, "learning_rate": 2.5564482610094088e-05, "loss": 1.0814, "step": 4201 }, { "epoch": 0.77, "learning_rate": 2.5524634481239197e-05, "loss": 1.0242, "step": 4202 }, { "epoch": 0.77, "learning_rate": 2.5484812889006205e-05, "loss": 0.8483, "step": 4203 }, { "epoch": 0.77, "learning_rate": 2.5445017847584028e-05, "loss": 1.094, "step": 4204 }, { "epoch": 0.77, "learning_rate": 2.5405249371152273e-05, "loss": 1.0415, "step": 4205 }, { "epoch": 0.78, "learning_rate": 2.5365507473880957e-05, "loss": 1.1736, "step": 4206 }, { "epoch": 0.78, "learning_rate": 2.5325792169930705e-05, "loss": 0.9231, "step": 4207 }, { "epoch": 0.78, "learning_rate": 2.5286103473452583e-05, "loss": 1.0479, "step": 4208 }, { "epoch": 0.78, "learning_rate": 2.5246441398588316e-05, "loss": 1.136, "step": 4209 }, { "epoch": 0.78, "learning_rate": 2.5206805959469982e-05, "loss": 0.9641, "step": 4210 }, { "epoch": 0.78, "learning_rate": 2.5167197170220314e-05, "loss": 1.0457, "step": 4211 }, { "epoch": 0.78, "learning_rate": 2.5127615044952446e-05, "loss": 0.9917, "step": 4212 }, { "epoch": 0.78, "learning_rate": 2.5088059597770062e-05, "loss": 1.1065, "step": 4213 }, { "epoch": 0.78, "learning_rate": 2.504853084276728e-05, "loss": 1.1554, "step": 4214 }, { "epoch": 0.78, "learning_rate": 2.500902879402881e-05, "loss": 1.0627, "step": 4215 }, { "epoch": 0.78, "learning_rate": 2.496955346562978e-05, "loss": 1.0449, "step": 4216 }, { "epoch": 0.78, "learning_rate": 2.4930104871635752e-05, "loss": 1.0086, "step": 4217 }, { "epoch": 0.78, "learning_rate": 2.4890683026102878e-05, "loss": 1.1206, "step": 4218 }, { "epoch": 0.78, "learning_rate": 2.4851287943077706e-05, "loss": 0.9568, "step": 4219 }, { "epoch": 0.78, "learning_rate": 2.4811919636597214e-05, "loss": 1.1997, "step": 4220 }, { "epoch": 0.78, "learning_rate": 2.4772578120688938e-05, "loss": 1.0837, "step": 4221 }, { "epoch": 0.78, "learning_rate": 2.4733263409370798e-05, "loss": 1.0015, "step": 4222 }, { "epoch": 0.78, "learning_rate": 2.4693975516651147e-05, "loss": 1.2284, "step": 4223 }, { "epoch": 0.78, "learning_rate": 2.4654714456528873e-05, "loss": 0.9929, "step": 4224 }, { "epoch": 0.78, "learning_rate": 2.4615480242993217e-05, "loss": 1.0995, "step": 4225 }, { "epoch": 0.78, "learning_rate": 2.4576272890023844e-05, "loss": 1.0943, "step": 4226 }, { "epoch": 0.78, "learning_rate": 2.4537092411590945e-05, "loss": 0.9893, "step": 4227 }, { "epoch": 0.78, "learning_rate": 2.4497938821655063e-05, "loss": 0.9152, "step": 4228 }, { "epoch": 0.78, "learning_rate": 2.4458812134167132e-05, "loss": 0.9463, "step": 4229 }, { "epoch": 0.78, "learning_rate": 2.4419712363068593e-05, "loss": 1.022, "step": 4230 }, { "epoch": 0.78, "learning_rate": 2.4380639522291237e-05, "loss": 1.0611, "step": 4231 }, { "epoch": 0.78, "learning_rate": 2.434159362575722e-05, "loss": 1.0753, "step": 4232 }, { "epoch": 0.78, "learning_rate": 2.430257468737921e-05, "loss": 1.2267, "step": 4233 }, { "epoch": 0.78, "learning_rate": 2.4263582721060174e-05, "loss": 1.0685, "step": 4234 }, { "epoch": 0.78, "learning_rate": 2.4224617740693478e-05, "loss": 1.1054, "step": 4235 }, { "epoch": 0.78, "learning_rate": 2.418567976016294e-05, "loss": 0.9869, "step": 4236 }, { "epoch": 0.78, "learning_rate": 2.414676879334269e-05, "loss": 0.9831, "step": 4237 }, { "epoch": 0.78, "learning_rate": 2.410788485409724e-05, "loss": 0.9453, "step": 4238 }, { "epoch": 0.78, "learning_rate": 2.4069027956281475e-05, "loss": 1.0521, "step": 4239 }, { "epoch": 0.78, "learning_rate": 2.40301981137407e-05, "loss": 1.0006, "step": 4240 }, { "epoch": 0.78, "learning_rate": 2.3991395340310484e-05, "loss": 1.0337, "step": 4241 }, { "epoch": 0.78, "learning_rate": 2.3952619649816864e-05, "loss": 1.0513, "step": 4242 }, { "epoch": 0.78, "learning_rate": 2.391387105607612e-05, "loss": 1.0222, "step": 4243 }, { "epoch": 0.78, "learning_rate": 2.3875149572894927e-05, "loss": 1.0898, "step": 4244 }, { "epoch": 0.78, "learning_rate": 2.3836455214070276e-05, "loss": 1.2076, "step": 4245 }, { "epoch": 0.78, "learning_rate": 2.3797787993389563e-05, "loss": 1.072, "step": 4246 }, { "epoch": 0.78, "learning_rate": 2.375914792463041e-05, "loss": 1.0827, "step": 4247 }, { "epoch": 0.78, "learning_rate": 2.3720535021560865e-05, "loss": 1.2161, "step": 4248 }, { "epoch": 0.78, "learning_rate": 2.3681949297939233e-05, "loss": 1.1771, "step": 4249 }, { "epoch": 0.78, "learning_rate": 2.364339076751414e-05, "loss": 1.1198, "step": 4250 }, { "epoch": 0.78, "learning_rate": 2.360485944402452e-05, "loss": 0.9669, "step": 4251 }, { "epoch": 0.78, "learning_rate": 2.3566355341199663e-05, "loss": 0.9865, "step": 4252 }, { "epoch": 0.78, "learning_rate": 2.3527878472759078e-05, "loss": 1.0277, "step": 4253 }, { "epoch": 0.78, "learning_rate": 2.3489428852412676e-05, "loss": 1.0774, "step": 4254 }, { "epoch": 0.78, "learning_rate": 2.345100649386056e-05, "loss": 1.0797, "step": 4255 }, { "epoch": 0.78, "learning_rate": 2.341261141079316e-05, "loss": 1.1496, "step": 4256 }, { "epoch": 0.78, "learning_rate": 2.3374243616891166e-05, "loss": 0.9637, "step": 4257 }, { "epoch": 0.78, "learning_rate": 2.333590312582561e-05, "loss": 1.0705, "step": 4258 }, { "epoch": 0.78, "learning_rate": 2.3297589951257694e-05, "loss": 0.9571, "step": 4259 }, { "epoch": 0.79, "learning_rate": 2.325930410683903e-05, "loss": 1.0527, "step": 4260 }, { "epoch": 0.79, "learning_rate": 2.322104560621129e-05, "loss": 1.0015, "step": 4261 }, { "epoch": 0.79, "learning_rate": 2.3182814463006596e-05, "loss": 0.9935, "step": 4262 }, { "epoch": 0.79, "learning_rate": 2.3144610690847203e-05, "loss": 1.1139, "step": 4263 }, { "epoch": 0.79, "learning_rate": 2.3106434303345683e-05, "loss": 1.0745, "step": 4264 }, { "epoch": 0.79, "learning_rate": 2.3068285314104787e-05, "loss": 1.0423, "step": 4265 }, { "epoch": 0.79, "learning_rate": 2.3030163736717613e-05, "loss": 1.1899, "step": 4266 }, { "epoch": 0.79, "learning_rate": 2.2992069584767306e-05, "loss": 1.1618, "step": 4267 }, { "epoch": 0.79, "learning_rate": 2.2954002871827417e-05, "loss": 0.9704, "step": 4268 }, { "epoch": 0.79, "learning_rate": 2.291596361146162e-05, "loss": 0.9811, "step": 4269 }, { "epoch": 0.79, "learning_rate": 2.2877951817223896e-05, "loss": 1.0886, "step": 4270 }, { "epoch": 0.79, "learning_rate": 2.2839967502658334e-05, "loss": 1.0077, "step": 4271 }, { "epoch": 0.79, "learning_rate": 2.280201068129929e-05, "loss": 1.0526, "step": 4272 }, { "epoch": 0.79, "learning_rate": 2.2764081366671285e-05, "loss": 1.0151, "step": 4273 }, { "epoch": 0.79, "learning_rate": 2.272617957228913e-05, "loss": 0.9174, "step": 4274 }, { "epoch": 0.79, "learning_rate": 2.268830531165771e-05, "loss": 1.1103, "step": 4275 }, { "epoch": 0.79, "learning_rate": 2.2650458598272206e-05, "loss": 1.0966, "step": 4276 }, { "epoch": 0.79, "learning_rate": 2.2612639445617912e-05, "loss": 1.0794, "step": 4277 }, { "epoch": 0.79, "learning_rate": 2.2574847867170334e-05, "loss": 0.926, "step": 4278 }, { "epoch": 0.79, "learning_rate": 2.2537083876395105e-05, "loss": 1.0749, "step": 4279 }, { "epoch": 0.79, "learning_rate": 2.2499347486748112e-05, "loss": 1.0593, "step": 4280 }, { "epoch": 0.79, "learning_rate": 2.2461638711675337e-05, "loss": 1.126, "step": 4281 }, { "epoch": 0.79, "learning_rate": 2.2423957564613006e-05, "loss": 0.974, "step": 4282 }, { "epoch": 0.79, "learning_rate": 2.238630405898734e-05, "loss": 1.01, "step": 4283 }, { "epoch": 0.79, "learning_rate": 2.2348678208214903e-05, "loss": 1.1236, "step": 4284 }, { "epoch": 0.79, "learning_rate": 2.2311080025702235e-05, "loss": 1.1597, "step": 4285 }, { "epoch": 0.79, "learning_rate": 2.2273509524846192e-05, "loss": 1.0216, "step": 4286 }, { "epoch": 0.79, "learning_rate": 2.2235966719033586e-05, "loss": 0.9831, "step": 4287 }, { "epoch": 0.79, "learning_rate": 2.2198451621641546e-05, "loss": 0.9835, "step": 4288 }, { "epoch": 0.79, "learning_rate": 2.216096424603711e-05, "loss": 1.0662, "step": 4289 }, { "epoch": 0.79, "learning_rate": 2.212350460557765e-05, "loss": 1.02, "step": 4290 }, { "epoch": 0.79, "learning_rate": 2.2086072713610505e-05, "loss": 1.0598, "step": 4291 }, { "epoch": 0.79, "learning_rate": 2.2048668583473232e-05, "loss": 0.9518, "step": 4292 }, { "epoch": 0.79, "learning_rate": 2.201129222849344e-05, "loss": 1.0161, "step": 4293 }, { "epoch": 0.79, "learning_rate": 2.197394366198884e-05, "loss": 1.0594, "step": 4294 }, { "epoch": 0.79, "learning_rate": 2.193662289726721e-05, "loss": 1.0837, "step": 4295 }, { "epoch": 0.79, "learning_rate": 2.1899329947626544e-05, "loss": 0.9358, "step": 4296 }, { "epoch": 0.79, "learning_rate": 2.186206482635479e-05, "loss": 0.9252, "step": 4297 }, { "epoch": 0.79, "learning_rate": 2.1824827546730066e-05, "loss": 1.1037, "step": 4298 }, { "epoch": 0.79, "learning_rate": 2.1787618122020538e-05, "loss": 1.0142, "step": 4299 }, { "epoch": 0.79, "learning_rate": 2.1750436565484433e-05, "loss": 0.9608, "step": 4300 }, { "epoch": 0.79, "learning_rate": 2.1713282890370034e-05, "loss": 1.0145, "step": 4301 }, { "epoch": 0.79, "learning_rate": 2.1676157109915786e-05, "loss": 1.1094, "step": 4302 }, { "epoch": 0.79, "learning_rate": 2.163905923735007e-05, "loss": 1.1201, "step": 4303 }, { "epoch": 0.79, "learning_rate": 2.1601989285891456e-05, "loss": 0.9428, "step": 4304 }, { "epoch": 0.79, "learning_rate": 2.156494726874838e-05, "loss": 1.0486, "step": 4305 }, { "epoch": 0.79, "learning_rate": 2.152793319911952e-05, "loss": 1.0903, "step": 4306 }, { "epoch": 0.79, "learning_rate": 2.1490947090193457e-05, "loss": 1.0175, "step": 4307 }, { "epoch": 0.79, "learning_rate": 2.145398895514892e-05, "loss": 1.0166, "step": 4308 }, { "epoch": 0.79, "learning_rate": 2.1417058807154555e-05, "loss": 1.0921, "step": 4309 }, { "epoch": 0.79, "learning_rate": 2.1380156659369178e-05, "loss": 1.0396, "step": 4310 }, { "epoch": 0.79, "learning_rate": 2.1343282524941422e-05, "loss": 1.0813, "step": 4311 }, { "epoch": 0.79, "learning_rate": 2.1306436417010168e-05, "loss": 1.103, "step": 4312 }, { "epoch": 0.79, "learning_rate": 2.1269618348704125e-05, "loss": 1.0381, "step": 4313 }, { "epoch": 0.79, "learning_rate": 2.1232828333142152e-05, "loss": 1.0716, "step": 4314 }, { "epoch": 0.8, "learning_rate": 2.1196066383433024e-05, "loss": 0.9613, "step": 4315 }, { "epoch": 0.8, "learning_rate": 2.1159332512675534e-05, "loss": 0.9834, "step": 4316 }, { "epoch": 0.8, "learning_rate": 2.112262673395847e-05, "loss": 1.0399, "step": 4317 }, { "epoch": 0.8, "learning_rate": 2.1085949060360654e-05, "loss": 0.9393, "step": 4318 }, { "epoch": 0.8, "learning_rate": 2.1049299504950803e-05, "loss": 1.1215, "step": 4319 }, { "epoch": 0.8, "learning_rate": 2.101267808078774e-05, "loss": 1.1244, "step": 4320 }, { "epoch": 0.8, "learning_rate": 2.097608480092016e-05, "loss": 1.0467, "step": 4321 }, { "epoch": 0.8, "learning_rate": 2.0939519678386753e-05, "loss": 1.1789, "step": 4322 }, { "epoch": 0.8, "learning_rate": 2.090298272621617e-05, "loss": 0.8569, "step": 4323 }, { "epoch": 0.8, "learning_rate": 2.086647395742709e-05, "loss": 1.111, "step": 4324 }, { "epoch": 0.8, "learning_rate": 2.082999338502806e-05, "loss": 1.0271, "step": 4325 }, { "epoch": 0.8, "learning_rate": 2.0793541022017692e-05, "loss": 1.1245, "step": 4326 }, { "epoch": 0.8, "learning_rate": 2.0757116881384374e-05, "loss": 1.1011, "step": 4327 }, { "epoch": 0.8, "learning_rate": 2.072072097610661e-05, "loss": 1.1047, "step": 4328 }, { "epoch": 0.8, "learning_rate": 2.0684353319152737e-05, "loss": 0.9069, "step": 4329 }, { "epoch": 0.8, "learning_rate": 2.0648013923481115e-05, "loss": 0.9888, "step": 4330 }, { "epoch": 0.8, "learning_rate": 2.0611702802039912e-05, "loss": 1.1764, "step": 4331 }, { "epoch": 0.8, "learning_rate": 2.0575419967767385e-05, "loss": 1.1261, "step": 4332 }, { "epoch": 0.8, "learning_rate": 2.0539165433591513e-05, "loss": 0.959, "step": 4333 }, { "epoch": 0.8, "learning_rate": 2.050293921243036e-05, "loss": 1.0731, "step": 4334 }, { "epoch": 0.8, "learning_rate": 2.0466741317191816e-05, "loss": 1.2158, "step": 4335 }, { "epoch": 0.8, "learning_rate": 2.0430571760773742e-05, "loss": 0.9563, "step": 4336 }, { "epoch": 0.8, "learning_rate": 2.0394430556063816e-05, "loss": 0.9941, "step": 4337 }, { "epoch": 0.8, "learning_rate": 2.035831771593968e-05, "loss": 1.0781, "step": 4338 }, { "epoch": 0.8, "learning_rate": 2.0322233253268818e-05, "loss": 0.932, "step": 4339 }, { "epoch": 0.8, "learning_rate": 2.0286177180908673e-05, "loss": 1.0347, "step": 4340 }, { "epoch": 0.8, "learning_rate": 2.0250149511706483e-05, "loss": 1.1249, "step": 4341 }, { "epoch": 0.8, "learning_rate": 2.0214150258499487e-05, "loss": 0.9769, "step": 4342 }, { "epoch": 0.8, "learning_rate": 2.0178179434114674e-05, "loss": 1.049, "step": 4343 }, { "epoch": 0.8, "learning_rate": 2.0142237051368963e-05, "loss": 1.0503, "step": 4344 }, { "epoch": 0.8, "learning_rate": 2.0106323123069104e-05, "loss": 1.0249, "step": 4345 }, { "epoch": 0.8, "learning_rate": 2.0070437662011798e-05, "loss": 1.0416, "step": 4346 }, { "epoch": 0.8, "learning_rate": 2.0034580680983473e-05, "loss": 1.0017, "step": 4347 }, { "epoch": 0.8, "learning_rate": 1.999875219276054e-05, "loss": 0.8471, "step": 4348 }, { "epoch": 0.8, "learning_rate": 1.9962952210109166e-05, "loss": 1.064, "step": 4349 }, { "epoch": 0.8, "learning_rate": 1.992718074578539e-05, "loss": 1.1487, "step": 4350 }, { "epoch": 0.8, "learning_rate": 1.9891437812535052e-05, "loss": 0.9764, "step": 4351 }, { "epoch": 0.8, "learning_rate": 1.985572342309393e-05, "loss": 1.065, "step": 4352 }, { "epoch": 0.8, "learning_rate": 1.98200375901875e-05, "loss": 0.9313, "step": 4353 }, { "epoch": 0.8, "learning_rate": 1.9784380326531183e-05, "loss": 0.9909, "step": 4354 }, { "epoch": 0.8, "learning_rate": 1.9748751644830142e-05, "loss": 1.0422, "step": 4355 }, { "epoch": 0.8, "learning_rate": 1.9713151557779374e-05, "loss": 0.9686, "step": 4356 }, { "epoch": 0.8, "learning_rate": 1.9677580078063662e-05, "loss": 1.0079, "step": 4357 }, { "epoch": 0.8, "learning_rate": 1.9642037218357688e-05, "loss": 1.0109, "step": 4358 }, { "epoch": 0.8, "learning_rate": 1.960652299132585e-05, "loss": 0.8878, "step": 4359 }, { "epoch": 0.8, "learning_rate": 1.957103740962232e-05, "loss": 1.1917, "step": 4360 }, { "epoch": 0.8, "learning_rate": 1.9535580485891182e-05, "loss": 0.9527, "step": 4361 }, { "epoch": 0.8, "learning_rate": 1.95001522327662e-05, "loss": 1.0612, "step": 4362 }, { "epoch": 0.8, "learning_rate": 1.9464752662870945e-05, "loss": 1.0277, "step": 4363 }, { "epoch": 0.8, "learning_rate": 1.9429381788818824e-05, "loss": 1.0843, "step": 4364 }, { "epoch": 0.8, "learning_rate": 1.9394039623212945e-05, "loss": 1.0931, "step": 4365 }, { "epoch": 0.8, "learning_rate": 1.9358726178646224e-05, "loss": 1.0744, "step": 4366 }, { "epoch": 0.8, "learning_rate": 1.9323441467701352e-05, "loss": 1.2035, "step": 4367 }, { "epoch": 0.8, "learning_rate": 1.9288185502950775e-05, "loss": 1.0413, "step": 4368 }, { "epoch": 0.81, "learning_rate": 1.9252958296956648e-05, "loss": 0.9793, "step": 4369 }, { "epoch": 0.81, "learning_rate": 1.9217759862270977e-05, "loss": 1.0231, "step": 4370 }, { "epoch": 0.81, "learning_rate": 1.9182590211435423e-05, "loss": 0.9696, "step": 4371 }, { "epoch": 0.81, "learning_rate": 1.914744935698143e-05, "loss": 1.0175, "step": 4372 }, { "epoch": 0.81, "learning_rate": 1.9112337311430194e-05, "loss": 1.0878, "step": 4373 }, { "epoch": 0.81, "learning_rate": 1.907725408729263e-05, "loss": 1.0714, "step": 4374 }, { "epoch": 0.81, "learning_rate": 1.904219969706935e-05, "loss": 1.0205, "step": 4375 }, { "epoch": 0.81, "learning_rate": 1.9007174153250797e-05, "loss": 0.9849, "step": 4376 }, { "epoch": 0.81, "learning_rate": 1.897217746831701e-05, "loss": 1.0911, "step": 4377 }, { "epoch": 0.81, "learning_rate": 1.8937209654737796e-05, "loss": 0.9977, "step": 4378 }, { "epoch": 0.81, "learning_rate": 1.8902270724972726e-05, "loss": 0.9294, "step": 4379 }, { "epoch": 0.81, "learning_rate": 1.8867360691471002e-05, "loss": 0.9557, "step": 4380 }, { "epoch": 0.81, "learning_rate": 1.883247956667157e-05, "loss": 1.0257, "step": 4381 }, { "epoch": 0.81, "learning_rate": 1.8797627363003022e-05, "loss": 1.062, "step": 4382 }, { "epoch": 0.81, "learning_rate": 1.8762804092883766e-05, "loss": 1.0262, "step": 4383 }, { "epoch": 0.81, "learning_rate": 1.8728009768721765e-05, "loss": 1.0319, "step": 4384 }, { "epoch": 0.81, "learning_rate": 1.869324440291477e-05, "loss": 0.9651, "step": 4385 }, { "epoch": 0.81, "learning_rate": 1.8658508007850138e-05, "loss": 0.9219, "step": 4386 }, { "epoch": 0.81, "learning_rate": 1.862380059590495e-05, "loss": 0.8583, "step": 4387 }, { "epoch": 0.81, "learning_rate": 1.8589122179445917e-05, "loss": 1.224, "step": 4388 }, { "epoch": 0.81, "learning_rate": 1.85544727708295e-05, "loss": 0.9836, "step": 4389 }, { "epoch": 0.81, "learning_rate": 1.8519852382401715e-05, "loss": 1.0502, "step": 4390 }, { "epoch": 0.81, "learning_rate": 1.8485261026498356e-05, "loss": 1.1903, "step": 4391 }, { "epoch": 0.81, "learning_rate": 1.845069871544477e-05, "loss": 0.8998, "step": 4392 }, { "epoch": 0.81, "learning_rate": 1.841616546155601e-05, "loss": 1.0304, "step": 4393 }, { "epoch": 0.81, "learning_rate": 1.838166127713672e-05, "loss": 1.0982, "step": 4394 }, { "epoch": 0.81, "learning_rate": 1.834718617448129e-05, "loss": 1.04, "step": 4395 }, { "epoch": 0.81, "learning_rate": 1.831274016587362e-05, "loss": 1.0524, "step": 4396 }, { "epoch": 0.81, "learning_rate": 1.8278323263587404e-05, "loss": 1.0445, "step": 4397 }, { "epoch": 0.81, "learning_rate": 1.8243935479885753e-05, "loss": 1.1769, "step": 4398 }, { "epoch": 0.81, "learning_rate": 1.82095768270216e-05, "loss": 1.1642, "step": 4399 }, { "epoch": 0.81, "learning_rate": 1.8175247317237366e-05, "loss": 1.1054, "step": 4400 }, { "epoch": 0.81, "learning_rate": 1.8140946962765194e-05, "loss": 1.031, "step": 4401 }, { "epoch": 0.81, "learning_rate": 1.810667577582672e-05, "loss": 1.1262, "step": 4402 }, { "epoch": 0.81, "learning_rate": 1.8072433768633333e-05, "loss": 0.9884, "step": 4403 }, { "epoch": 0.81, "learning_rate": 1.8038220953385853e-05, "loss": 1.1774, "step": 4404 }, { "epoch": 0.81, "learning_rate": 1.800403734227485e-05, "loss": 1.1101, "step": 4405 }, { "epoch": 0.81, "learning_rate": 1.7969882947480375e-05, "loss": 1.0773, "step": 4406 }, { "epoch": 0.81, "learning_rate": 1.793575778117218e-05, "loss": 1.0814, "step": 4407 }, { "epoch": 0.81, "learning_rate": 1.790166185550951e-05, "loss": 1.0072, "step": 4408 }, { "epoch": 0.81, "learning_rate": 1.7867595182641226e-05, "loss": 1.0209, "step": 4409 }, { "epoch": 0.81, "learning_rate": 1.7833557774705733e-05, "loss": 0.9441, "step": 4410 }, { "epoch": 0.81, "learning_rate": 1.7799549643831104e-05, "loss": 1.0124, "step": 4411 }, { "epoch": 0.81, "learning_rate": 1.7765570802134844e-05, "loss": 1.0395, "step": 4412 }, { "epoch": 0.81, "learning_rate": 1.7731621261724164e-05, "loss": 1.014, "step": 4413 }, { "epoch": 0.81, "learning_rate": 1.7697701034695724e-05, "loss": 1.0051, "step": 4414 }, { "epoch": 0.81, "learning_rate": 1.7663810133135784e-05, "loss": 1.1149, "step": 4415 }, { "epoch": 0.81, "learning_rate": 1.7629948569120126e-05, "loss": 0.9619, "step": 4416 }, { "epoch": 0.81, "learning_rate": 1.7596116354714155e-05, "loss": 1.0735, "step": 4417 }, { "epoch": 0.81, "learning_rate": 1.7562313501972692e-05, "loss": 0.9269, "step": 4418 }, { "epoch": 0.81, "learning_rate": 1.7528540022940288e-05, "loss": 1.0432, "step": 4419 }, { "epoch": 0.81, "learning_rate": 1.7494795929650766e-05, "loss": 0.9364, "step": 4420 }, { "epoch": 0.81, "learning_rate": 1.746108123412773e-05, "loss": 1.1141, "step": 4421 }, { "epoch": 0.81, "learning_rate": 1.7427395948384117e-05, "loss": 0.9393, "step": 4422 }, { "epoch": 0.82, "learning_rate": 1.739374008442256e-05, "loss": 1.0148, "step": 4423 }, { "epoch": 0.82, "learning_rate": 1.7360113654235034e-05, "loss": 1.001, "step": 4424 }, { "epoch": 0.82, "learning_rate": 1.7326516669803193e-05, "loss": 1.0554, "step": 4425 }, { "epoch": 0.82, "learning_rate": 1.7292949143098026e-05, "loss": 1.0275, "step": 4426 }, { "epoch": 0.82, "learning_rate": 1.725941108608019e-05, "loss": 1.0603, "step": 4427 }, { "epoch": 0.82, "learning_rate": 1.7225902510699697e-05, "loss": 1.0528, "step": 4428 }, { "epoch": 0.82, "learning_rate": 1.7192423428896198e-05, "loss": 1.0913, "step": 4429 }, { "epoch": 0.82, "learning_rate": 1.7158973852598725e-05, "loss": 1.1288, "step": 4430 }, { "epoch": 0.82, "learning_rate": 1.7125553793725836e-05, "loss": 1.0908, "step": 4431 }, { "epoch": 0.82, "learning_rate": 1.7092163264185545e-05, "loss": 0.9931, "step": 4432 }, { "epoch": 0.82, "learning_rate": 1.7058802275875408e-05, "loss": 0.9747, "step": 4433 }, { "epoch": 0.82, "learning_rate": 1.7025470840682378e-05, "loss": 1.0124, "step": 4434 }, { "epoch": 0.82, "learning_rate": 1.699216897048297e-05, "loss": 0.9803, "step": 4435 }, { "epoch": 0.82, "learning_rate": 1.6958896677143065e-05, "loss": 1.1025, "step": 4436 }, { "epoch": 0.82, "learning_rate": 1.692565397251805e-05, "loss": 0.9993, "step": 4437 }, { "epoch": 0.82, "learning_rate": 1.6892440868452764e-05, "loss": 0.928, "step": 4438 }, { "epoch": 0.82, "learning_rate": 1.6859257376781545e-05, "loss": 0.9333, "step": 4439 }, { "epoch": 0.82, "learning_rate": 1.6826103509328083e-05, "loss": 0.9449, "step": 4440 }, { "epoch": 0.82, "learning_rate": 1.679297927790565e-05, "loss": 0.9677, "step": 4441 }, { "epoch": 0.82, "learning_rate": 1.6759884694316774e-05, "loss": 0.9757, "step": 4442 }, { "epoch": 0.82, "learning_rate": 1.6726819770353585e-05, "loss": 0.9625, "step": 4443 }, { "epoch": 0.82, "learning_rate": 1.669378451779755e-05, "loss": 1.0362, "step": 4444 }, { "epoch": 0.82, "learning_rate": 1.666077894841964e-05, "loss": 1.0755, "step": 4445 }, { "epoch": 0.82, "learning_rate": 1.662780307398014e-05, "loss": 0.9698, "step": 4446 }, { "epoch": 0.82, "learning_rate": 1.6594856906228918e-05, "loss": 0.9715, "step": 4447 }, { "epoch": 0.82, "learning_rate": 1.6561940456905033e-05, "loss": 1.0494, "step": 4448 }, { "epoch": 0.82, "learning_rate": 1.6529053737737164e-05, "loss": 1.0156, "step": 4449 }, { "epoch": 0.82, "learning_rate": 1.649619676044327e-05, "loss": 0.9169, "step": 4450 }, { "epoch": 0.82, "learning_rate": 1.64633695367308e-05, "loss": 1.0282, "step": 4451 }, { "epoch": 0.82, "learning_rate": 1.6430572078296525e-05, "loss": 0.9224, "step": 4452 }, { "epoch": 0.82, "learning_rate": 1.6397804396826643e-05, "loss": 1.0302, "step": 4453 }, { "epoch": 0.82, "learning_rate": 1.636506650399673e-05, "loss": 1.1553, "step": 4454 }, { "epoch": 0.82, "learning_rate": 1.6332358411471792e-05, "loss": 0.8628, "step": 4455 }, { "epoch": 0.82, "learning_rate": 1.6299680130906138e-05, "loss": 0.9887, "step": 4456 }, { "epoch": 0.82, "learning_rate": 1.6267031673943543e-05, "loss": 1.043, "step": 4457 }, { "epoch": 0.82, "learning_rate": 1.62344130522171e-05, "loss": 1.1193, "step": 4458 }, { "epoch": 0.82, "learning_rate": 1.6201824277349277e-05, "loss": 0.9825, "step": 4459 }, { "epoch": 0.82, "learning_rate": 1.616926536095189e-05, "loss": 0.9799, "step": 4460 }, { "epoch": 0.82, "learning_rate": 1.613673631462619e-05, "loss": 1.1937, "step": 4461 }, { "epoch": 0.82, "learning_rate": 1.6104237149962686e-05, "loss": 1.0572, "step": 4462 }, { "epoch": 0.82, "learning_rate": 1.6071767878541354e-05, "loss": 1.1303, "step": 4463 }, { "epoch": 0.82, "learning_rate": 1.6039328511931362e-05, "loss": 1.0735, "step": 4464 }, { "epoch": 0.82, "learning_rate": 1.6006919061691384e-05, "loss": 1.0581, "step": 4465 }, { "epoch": 0.82, "learning_rate": 1.5974539539369328e-05, "loss": 0.996, "step": 4466 }, { "epoch": 0.82, "learning_rate": 1.5942189956502497e-05, "loss": 1.1149, "step": 4467 }, { "epoch": 0.82, "learning_rate": 1.5909870324617472e-05, "loss": 0.9089, "step": 4468 }, { "epoch": 0.82, "learning_rate": 1.587758065523025e-05, "loss": 0.9684, "step": 4469 }, { "epoch": 0.82, "learning_rate": 1.5845320959846023e-05, "loss": 0.9839, "step": 4470 }, { "epoch": 0.82, "learning_rate": 1.5813091249959434e-05, "loss": 0.9506, "step": 4471 }, { "epoch": 0.82, "learning_rate": 1.578089153705433e-05, "loss": 1.07, "step": 4472 }, { "epoch": 0.82, "learning_rate": 1.5748721832603973e-05, "loss": 1.0524, "step": 4473 }, { "epoch": 0.82, "learning_rate": 1.571658214807087e-05, "loss": 1.0164, "step": 4474 }, { "epoch": 0.82, "learning_rate": 1.568447249490682e-05, "loss": 1.0846, "step": 4475 }, { "epoch": 0.82, "learning_rate": 1.5652392884552947e-05, "loss": 1.0392, "step": 4476 }, { "epoch": 0.82, "learning_rate": 1.5620343328439703e-05, "loss": 1.0695, "step": 4477 }, { "epoch": 0.83, "learning_rate": 1.558832383798674e-05, "loss": 1.0107, "step": 4478 }, { "epoch": 0.83, "learning_rate": 1.5556334424603114e-05, "loss": 1.1112, "step": 4479 }, { "epoch": 0.83, "learning_rate": 1.5524375099687072e-05, "loss": 0.8183, "step": 4480 }, { "epoch": 0.83, "learning_rate": 1.549244587462618e-05, "loss": 1.0433, "step": 4481 }, { "epoch": 0.83, "learning_rate": 1.5460546760797236e-05, "loss": 0.8989, "step": 4482 }, { "epoch": 0.83, "learning_rate": 1.54286777695664e-05, "loss": 1.0855, "step": 4483 }, { "epoch": 0.83, "learning_rate": 1.5396838912289e-05, "loss": 1.1564, "step": 4484 }, { "epoch": 0.83, "learning_rate": 1.5365030200309727e-05, "loss": 1.0072, "step": 4485 }, { "epoch": 0.83, "learning_rate": 1.5333251644962376e-05, "loss": 1.0334, "step": 4486 }, { "epoch": 0.83, "learning_rate": 1.5301503257570184e-05, "loss": 1.1311, "step": 4487 }, { "epoch": 0.83, "learning_rate": 1.5269785049445484e-05, "loss": 1.0115, "step": 4488 }, { "epoch": 0.83, "learning_rate": 1.5238097031889987e-05, "loss": 0.979, "step": 4489 }, { "epoch": 0.83, "learning_rate": 1.5206439216194513e-05, "loss": 1.0048, "step": 4490 }, { "epoch": 0.83, "learning_rate": 1.5174811613639262e-05, "loss": 0.9804, "step": 4491 }, { "epoch": 0.83, "learning_rate": 1.5143214235493497e-05, "loss": 1.0668, "step": 4492 }, { "epoch": 0.83, "learning_rate": 1.5111647093015901e-05, "loss": 1.1071, "step": 4493 }, { "epoch": 0.83, "learning_rate": 1.5080110197454223e-05, "loss": 1.0793, "step": 4494 }, { "epoch": 0.83, "learning_rate": 1.5048603560045549e-05, "loss": 1.0212, "step": 4495 }, { "epoch": 0.83, "learning_rate": 1.5017127192016134e-05, "loss": 0.966, "step": 4496 }, { "epoch": 0.83, "learning_rate": 1.4985681104581428e-05, "loss": 1.1211, "step": 4497 }, { "epoch": 0.83, "learning_rate": 1.4954265308946114e-05, "loss": 0.9684, "step": 4498 }, { "epoch": 0.83, "learning_rate": 1.4922879816304113e-05, "loss": 0.9678, "step": 4499 }, { "epoch": 0.83, "learning_rate": 1.4891524637838473e-05, "loss": 1.0865, "step": 4500 }, { "epoch": 0.83, "learning_rate": 1.486019978472154e-05, "loss": 0.9401, "step": 4501 }, { "epoch": 0.83, "learning_rate": 1.482890526811478e-05, "loss": 1.1594, "step": 4502 }, { "epoch": 0.83, "learning_rate": 1.4797641099168858e-05, "loss": 0.991, "step": 4503 }, { "epoch": 0.83, "learning_rate": 1.4766407289023609e-05, "loss": 0.9629, "step": 4504 }, { "epoch": 0.83, "learning_rate": 1.4735203848808155e-05, "loss": 1.1314, "step": 4505 }, { "epoch": 0.83, "learning_rate": 1.4704030789640643e-05, "loss": 1.1077, "step": 4506 }, { "epoch": 0.83, "learning_rate": 1.4672888122628536e-05, "loss": 1.1079, "step": 4507 }, { "epoch": 0.83, "learning_rate": 1.4641775858868379e-05, "loss": 1.0705, "step": 4508 }, { "epoch": 0.83, "learning_rate": 1.4610694009445914e-05, "loss": 1.0043, "step": 4509 }, { "epoch": 0.83, "learning_rate": 1.4579642585436015e-05, "loss": 1.1998, "step": 4510 }, { "epoch": 0.83, "learning_rate": 1.4548621597902779e-05, "loss": 1.1254, "step": 4511 }, { "epoch": 0.83, "learning_rate": 1.4517631057899394e-05, "loss": 1.0985, "step": 4512 }, { "epoch": 0.83, "learning_rate": 1.4486670976468253e-05, "loss": 1.0778, "step": 4513 }, { "epoch": 0.83, "learning_rate": 1.4455741364640862e-05, "loss": 0.9953, "step": 4514 }, { "epoch": 0.83, "learning_rate": 1.4424842233437863e-05, "loss": 1.2096, "step": 4515 }, { "epoch": 0.83, "learning_rate": 1.4393973593869037e-05, "loss": 0.801, "step": 4516 }, { "epoch": 0.83, "learning_rate": 1.4363135456933364e-05, "loss": 1.0456, "step": 4517 }, { "epoch": 0.83, "learning_rate": 1.4332327833618875e-05, "loss": 1.0594, "step": 4518 }, { "epoch": 0.83, "learning_rate": 1.4301550734902736e-05, "loss": 1.0472, "step": 4519 }, { "epoch": 0.83, "learning_rate": 1.4270804171751317e-05, "loss": 0.9422, "step": 4520 }, { "epoch": 0.83, "learning_rate": 1.4240088155120023e-05, "loss": 1.0536, "step": 4521 }, { "epoch": 0.83, "learning_rate": 1.4209402695953388e-05, "loss": 1.1285, "step": 4522 }, { "epoch": 0.83, "learning_rate": 1.417874780518511e-05, "loss": 1.0615, "step": 4523 }, { "epoch": 0.83, "learning_rate": 1.414812349373793e-05, "loss": 1.0922, "step": 4524 }, { "epoch": 0.83, "learning_rate": 1.4117529772523707e-05, "loss": 0.9844, "step": 4525 }, { "epoch": 0.83, "learning_rate": 1.4086966652443468e-05, "loss": 0.8987, "step": 4526 }, { "epoch": 0.83, "learning_rate": 1.4056434144387253e-05, "loss": 1.121, "step": 4527 }, { "epoch": 0.83, "learning_rate": 1.4025932259234198e-05, "loss": 0.9663, "step": 4528 }, { "epoch": 0.83, "learning_rate": 1.3995461007852605e-05, "loss": 1.0, "step": 4529 }, { "epoch": 0.83, "learning_rate": 1.3965020401099781e-05, "loss": 1.0075, "step": 4530 }, { "epoch": 0.83, "learning_rate": 1.3934610449822128e-05, "loss": 0.9671, "step": 4531 }, { "epoch": 0.84, "learning_rate": 1.3904231164855186e-05, "loss": 1.1104, "step": 4532 }, { "epoch": 0.84, "learning_rate": 1.3873882557023488e-05, "loss": 0.9814, "step": 4533 }, { "epoch": 0.84, "learning_rate": 1.384356463714066e-05, "loss": 1.1833, "step": 4534 }, { "epoch": 0.84, "learning_rate": 1.3813277416009452e-05, "loss": 1.2036, "step": 4535 }, { "epoch": 0.84, "learning_rate": 1.3783020904421584e-05, "loss": 0.9681, "step": 4536 }, { "epoch": 0.84, "learning_rate": 1.3752795113157879e-05, "loss": 0.9157, "step": 4537 }, { "epoch": 0.84, "learning_rate": 1.3722600052988243e-05, "loss": 0.9981, "step": 4538 }, { "epoch": 0.84, "learning_rate": 1.3692435734671583e-05, "loss": 1.1123, "step": 4539 }, { "epoch": 0.84, "learning_rate": 1.3662302168955877e-05, "loss": 1.1197, "step": 4540 }, { "epoch": 0.84, "learning_rate": 1.3632199366578103e-05, "loss": 1.1978, "step": 4541 }, { "epoch": 0.84, "learning_rate": 1.360212733826436e-05, "loss": 1.0417, "step": 4542 }, { "epoch": 0.84, "learning_rate": 1.3572086094729709e-05, "loss": 1.079, "step": 4543 }, { "epoch": 0.84, "learning_rate": 1.3542075646678286e-05, "loss": 1.0722, "step": 4544 }, { "epoch": 0.84, "learning_rate": 1.3512096004803232e-05, "loss": 1.0898, "step": 4545 }, { "epoch": 0.84, "learning_rate": 1.348214717978672e-05, "loss": 0.9351, "step": 4546 }, { "epoch": 0.84, "learning_rate": 1.3452229182299892e-05, "loss": 1.0912, "step": 4547 }, { "epoch": 0.84, "learning_rate": 1.3422342023003021e-05, "loss": 0.9938, "step": 4548 }, { "epoch": 0.84, "learning_rate": 1.3392485712545266e-05, "loss": 0.8847, "step": 4549 }, { "epoch": 0.84, "learning_rate": 1.3362660261564908e-05, "loss": 1.0752, "step": 4550 }, { "epoch": 0.84, "learning_rate": 1.3332865680689132e-05, "loss": 0.9761, "step": 4551 }, { "epoch": 0.84, "learning_rate": 1.3303101980534184e-05, "loss": 1.1541, "step": 4552 }, { "epoch": 0.84, "learning_rate": 1.3273369171705263e-05, "loss": 1.0358, "step": 4553 }, { "epoch": 0.84, "learning_rate": 1.3243667264796633e-05, "loss": 1.1174, "step": 4554 }, { "epoch": 0.84, "learning_rate": 1.3213996270391448e-05, "loss": 1.0134, "step": 4555 }, { "epoch": 0.84, "learning_rate": 1.3184356199061965e-05, "loss": 1.0756, "step": 4556 }, { "epoch": 0.84, "learning_rate": 1.315474706136931e-05, "loss": 0.9262, "step": 4557 }, { "epoch": 0.84, "learning_rate": 1.3125168867863668e-05, "loss": 0.9913, "step": 4558 }, { "epoch": 0.84, "learning_rate": 1.3095621629084109e-05, "loss": 0.9021, "step": 4559 }, { "epoch": 0.84, "learning_rate": 1.306610535555881e-05, "loss": 0.9479, "step": 4560 }, { "epoch": 0.84, "learning_rate": 1.303662005780476e-05, "loss": 1.1232, "step": 4561 }, { "epoch": 0.84, "learning_rate": 1.3007165746328064e-05, "loss": 0.8643, "step": 4562 }, { "epoch": 0.84, "learning_rate": 1.297774243162363e-05, "loss": 1.0685, "step": 4563 }, { "epoch": 0.84, "learning_rate": 1.2948350124175456e-05, "loss": 0.9495, "step": 4564 }, { "epoch": 0.84, "learning_rate": 1.2918988834456392e-05, "loss": 1.0438, "step": 4565 }, { "epoch": 0.84, "learning_rate": 1.288965857292832e-05, "loss": 1.0367, "step": 4566 }, { "epoch": 0.84, "learning_rate": 1.2860359350041984e-05, "loss": 0.9278, "step": 4567 }, { "epoch": 0.84, "learning_rate": 1.2831091176237175e-05, "loss": 1.1255, "step": 4568 }, { "epoch": 0.84, "learning_rate": 1.2801854061942475e-05, "loss": 1.0607, "step": 4569 }, { "epoch": 0.84, "learning_rate": 1.2772648017575539e-05, "loss": 1.0585, "step": 4570 }, { "epoch": 0.84, "learning_rate": 1.2743473053542842e-05, "loss": 1.1849, "step": 4571 }, { "epoch": 0.84, "learning_rate": 1.2714329180239892e-05, "loss": 1.1153, "step": 4572 }, { "epoch": 0.84, "learning_rate": 1.2685216408051026e-05, "loss": 0.8771, "step": 4573 }, { "epoch": 0.84, "learning_rate": 1.2656134747349557e-05, "loss": 1.0368, "step": 4574 }, { "epoch": 0.84, "learning_rate": 1.2627084208497642e-05, "loss": 1.0738, "step": 4575 }, { "epoch": 0.84, "learning_rate": 1.2598064801846454e-05, "loss": 0.9217, "step": 4576 }, { "epoch": 0.84, "learning_rate": 1.2569076537735969e-05, "loss": 1.0381, "step": 4577 }, { "epoch": 0.84, "learning_rate": 1.2540119426495156e-05, "loss": 1.1304, "step": 4578 }, { "epoch": 0.84, "learning_rate": 1.251119347844183e-05, "loss": 1.1615, "step": 4579 }, { "epoch": 0.84, "learning_rate": 1.24822987038827e-05, "loss": 1.1841, "step": 4580 }, { "epoch": 0.84, "learning_rate": 1.2453435113113377e-05, "loss": 1.0395, "step": 4581 }, { "epoch": 0.84, "learning_rate": 1.2424602716418398e-05, "loss": 1.0047, "step": 4582 }, { "epoch": 0.84, "learning_rate": 1.2395801524071104e-05, "loss": 0.9166, "step": 4583 }, { "epoch": 0.84, "learning_rate": 1.2367031546333829e-05, "loss": 1.0242, "step": 4584 }, { "epoch": 0.84, "learning_rate": 1.233829279345765e-05, "loss": 1.0986, "step": 4585 }, { "epoch": 0.85, "learning_rate": 1.2309585275682645e-05, "loss": 1.1261, "step": 4586 }, { "epoch": 0.85, "learning_rate": 1.2280909003237662e-05, "loss": 1.0559, "step": 4587 }, { "epoch": 0.85, "learning_rate": 1.2252263986340495e-05, "loss": 1.0344, "step": 4588 }, { "epoch": 0.85, "learning_rate": 1.2223650235197747e-05, "loss": 1.1415, "step": 4589 }, { "epoch": 0.85, "learning_rate": 1.2195067760004953e-05, "loss": 0.9941, "step": 4590 }, { "epoch": 0.85, "learning_rate": 1.2166516570946363e-05, "loss": 0.8509, "step": 4591 }, { "epoch": 0.85, "learning_rate": 1.213799667819524e-05, "loss": 1.0725, "step": 4592 }, { "epoch": 0.85, "learning_rate": 1.2109508091913568e-05, "loss": 1.0883, "step": 4593 }, { "epoch": 0.85, "learning_rate": 1.2081050822252294e-05, "loss": 1.1977, "step": 4594 }, { "epoch": 0.85, "learning_rate": 1.2052624879351104e-05, "loss": 1.065, "step": 4595 }, { "epoch": 0.85, "learning_rate": 1.2024230273338567e-05, "loss": 0.9742, "step": 4596 }, { "epoch": 0.85, "learning_rate": 1.1995867014332063e-05, "loss": 0.9542, "step": 4597 }, { "epoch": 0.85, "learning_rate": 1.1967535112437855e-05, "loss": 1.0219, "step": 4598 }, { "epoch": 0.85, "learning_rate": 1.1939234577750979e-05, "loss": 0.9596, "step": 4599 }, { "epoch": 0.85, "learning_rate": 1.1910965420355324e-05, "loss": 1.0266, "step": 4600 }, { "epoch": 0.85, "learning_rate": 1.18827276503236e-05, "loss": 1.0557, "step": 4601 }, { "epoch": 0.85, "learning_rate": 1.185452127771729e-05, "loss": 1.0123, "step": 4602 }, { "epoch": 0.85, "learning_rate": 1.1826346312586733e-05, "loss": 0.9636, "step": 4603 }, { "epoch": 0.85, "learning_rate": 1.179820276497109e-05, "loss": 0.9397, "step": 4604 }, { "epoch": 0.85, "learning_rate": 1.1770090644898257e-05, "loss": 1.0683, "step": 4605 }, { "epoch": 0.85, "learning_rate": 1.1742009962385048e-05, "loss": 1.0341, "step": 4606 }, { "epoch": 0.85, "learning_rate": 1.1713960727436924e-05, "loss": 1.0332, "step": 4607 }, { "epoch": 0.85, "learning_rate": 1.168594295004829e-05, "loss": 1.1497, "step": 4608 }, { "epoch": 0.85, "learning_rate": 1.1657956640202217e-05, "loss": 0.984, "step": 4609 }, { "epoch": 0.85, "learning_rate": 1.1630001807870684e-05, "loss": 0.9103, "step": 4610 }, { "epoch": 0.85, "learning_rate": 1.1602078463014332e-05, "loss": 1.0393, "step": 4611 }, { "epoch": 0.85, "learning_rate": 1.1574186615582706e-05, "loss": 1.0112, "step": 4612 }, { "epoch": 0.85, "learning_rate": 1.1546326275513996e-05, "loss": 1.1627, "step": 4613 }, { "epoch": 0.85, "learning_rate": 1.1518497452735277e-05, "loss": 1.0268, "step": 4614 }, { "epoch": 0.85, "learning_rate": 1.1490700157162327e-05, "loss": 1.0734, "step": 4615 }, { "epoch": 0.85, "learning_rate": 1.1462934398699743e-05, "loss": 0.9365, "step": 4616 }, { "epoch": 0.85, "learning_rate": 1.1435200187240836e-05, "loss": 1.1132, "step": 4617 }, { "epoch": 0.85, "learning_rate": 1.140749753266771e-05, "loss": 0.9044, "step": 4618 }, { "epoch": 0.85, "learning_rate": 1.1379826444851172e-05, "loss": 1.1082, "step": 4619 }, { "epoch": 0.85, "learning_rate": 1.1352186933650866e-05, "loss": 1.0253, "step": 4620 }, { "epoch": 0.85, "learning_rate": 1.1324579008915104e-05, "loss": 0.9594, "step": 4621 }, { "epoch": 0.85, "learning_rate": 1.1297002680481017e-05, "loss": 1.1322, "step": 4622 }, { "epoch": 0.85, "learning_rate": 1.1269457958174412e-05, "loss": 1.0074, "step": 4623 }, { "epoch": 0.85, "learning_rate": 1.1241944851809883e-05, "loss": 0.8713, "step": 4624 }, { "epoch": 0.85, "learning_rate": 1.1214463371190686e-05, "loss": 0.998, "step": 4625 }, { "epoch": 0.85, "learning_rate": 1.1187013526108925e-05, "loss": 1.0185, "step": 4626 }, { "epoch": 0.85, "learning_rate": 1.1159595326345318e-05, "loss": 1.015, "step": 4627 }, { "epoch": 0.85, "learning_rate": 1.1132208781669417e-05, "loss": 1.0936, "step": 4628 }, { "epoch": 0.85, "learning_rate": 1.110485390183934e-05, "loss": 1.0891, "step": 4629 }, { "epoch": 0.85, "learning_rate": 1.10775306966021e-05, "loss": 1.0305, "step": 4630 }, { "epoch": 0.85, "learning_rate": 1.1050239175693288e-05, "loss": 0.9738, "step": 4631 }, { "epoch": 0.85, "learning_rate": 1.1022979348837292e-05, "loss": 0.9847, "step": 4632 }, { "epoch": 0.85, "learning_rate": 1.0995751225747164e-05, "loss": 1.1188, "step": 4633 }, { "epoch": 0.85, "learning_rate": 1.0968554816124653e-05, "loss": 1.0713, "step": 4634 }, { "epoch": 0.85, "learning_rate": 1.0941390129660211e-05, "loss": 0.865, "step": 4635 }, { "epoch": 0.85, "learning_rate": 1.0914257176033049e-05, "loss": 1.1334, "step": 4636 }, { "epoch": 0.85, "learning_rate": 1.0887155964910967e-05, "loss": 1.0182, "step": 4637 }, { "epoch": 0.85, "learning_rate": 1.0860086505950551e-05, "loss": 1.2367, "step": 4638 }, { "epoch": 0.85, "learning_rate": 1.0833048808797008e-05, "loss": 0.9966, "step": 4639 }, { "epoch": 0.86, "learning_rate": 1.0806042883084255e-05, "loss": 1.0524, "step": 4640 }, { "epoch": 0.86, "learning_rate": 1.077906873843486e-05, "loss": 1.0199, "step": 4641 }, { "epoch": 0.86, "learning_rate": 1.0752126384460138e-05, "loss": 1.0378, "step": 4642 }, { "epoch": 0.86, "learning_rate": 1.0725215830759982e-05, "loss": 1.0858, "step": 4643 }, { "epoch": 0.86, "learning_rate": 1.0698337086923038e-05, "loss": 1.0623, "step": 4644 }, { "epoch": 0.86, "learning_rate": 1.0671490162526577e-05, "loss": 1.0272, "step": 4645 }, { "epoch": 0.86, "learning_rate": 1.0644675067136522e-05, "loss": 1.035, "step": 4646 }, { "epoch": 0.86, "learning_rate": 1.0617891810307456e-05, "loss": 0.9289, "step": 4647 }, { "epoch": 0.86, "learning_rate": 1.0591140401582666e-05, "loss": 1.1101, "step": 4648 }, { "epoch": 0.86, "learning_rate": 1.0564420850494017e-05, "loss": 1.037, "step": 4649 }, { "epoch": 0.86, "learning_rate": 1.0537733166562103e-05, "loss": 1.063, "step": 4650 }, { "epoch": 0.86, "learning_rate": 1.0511077359296096e-05, "loss": 1.1041, "step": 4651 }, { "epoch": 0.86, "learning_rate": 1.0484453438193843e-05, "loss": 0.9665, "step": 4652 }, { "epoch": 0.86, "learning_rate": 1.045786141274181e-05, "loss": 1.0114, "step": 4653 }, { "epoch": 0.86, "learning_rate": 1.043130129241513e-05, "loss": 1.1139, "step": 4654 }, { "epoch": 0.86, "learning_rate": 1.0404773086677532e-05, "loss": 1.0696, "step": 4655 }, { "epoch": 0.86, "learning_rate": 1.0378276804981391e-05, "loss": 0.9746, "step": 4656 }, { "epoch": 0.86, "learning_rate": 1.0351812456767718e-05, "loss": 1.2518, "step": 4657 }, { "epoch": 0.86, "learning_rate": 1.0325380051466126e-05, "loss": 0.9931, "step": 4658 }, { "epoch": 0.86, "learning_rate": 1.029897959849484e-05, "loss": 1.1292, "step": 4659 }, { "epoch": 0.86, "learning_rate": 1.0272611107260743e-05, "loss": 1.032, "step": 4660 }, { "epoch": 0.86, "learning_rate": 1.0246274587159278e-05, "loss": 0.9811, "step": 4661 }, { "epoch": 0.86, "learning_rate": 1.0219970047574512e-05, "loss": 1.0694, "step": 4662 }, { "epoch": 0.86, "learning_rate": 1.019369749787914e-05, "loss": 1.1086, "step": 4663 }, { "epoch": 0.86, "learning_rate": 1.016745694743444e-05, "loss": 1.0258, "step": 4664 }, { "epoch": 0.86, "learning_rate": 1.0141248405590275e-05, "loss": 0.9675, "step": 4665 }, { "epoch": 0.86, "learning_rate": 1.0115071881685134e-05, "loss": 1.0151, "step": 4666 }, { "epoch": 0.86, "learning_rate": 1.0088927385046077e-05, "loss": 1.0817, "step": 4667 }, { "epoch": 0.86, "learning_rate": 1.006281492498874e-05, "loss": 1.0605, "step": 4668 }, { "epoch": 0.86, "learning_rate": 1.0036734510817391e-05, "loss": 0.8491, "step": 4669 }, { "epoch": 0.86, "learning_rate": 1.0010686151824832e-05, "loss": 0.9971, "step": 4670 }, { "epoch": 0.86, "learning_rate": 9.98466985729245e-06, "loss": 1.0131, "step": 4671 }, { "epoch": 0.86, "learning_rate": 9.958685636490239e-06, "loss": 1.0003, "step": 4672 }, { "epoch": 0.86, "learning_rate": 9.932733498676727e-06, "loss": 0.9945, "step": 4673 }, { "epoch": 0.86, "learning_rate": 9.906813453099018e-06, "loss": 0.8785, "step": 4674 }, { "epoch": 0.86, "learning_rate": 9.880925508992822e-06, "loss": 0.9034, "step": 4675 }, { "epoch": 0.86, "learning_rate": 9.855069675582362e-06, "loss": 0.984, "step": 4676 }, { "epoch": 0.86, "learning_rate": 9.829245962080425e-06, "loss": 1.0232, "step": 4677 }, { "epoch": 0.86, "learning_rate": 9.803454377688359e-06, "loss": 1.0664, "step": 4678 }, { "epoch": 0.86, "learning_rate": 9.777694931596093e-06, "loss": 0.9944, "step": 4679 }, { "epoch": 0.86, "learning_rate": 9.751967632982062e-06, "loss": 1.1076, "step": 4680 }, { "epoch": 0.86, "learning_rate": 9.726272491013277e-06, "loss": 0.9715, "step": 4681 }, { "epoch": 0.86, "learning_rate": 9.700609514845282e-06, "loss": 1.1089, "step": 4682 }, { "epoch": 0.86, "learning_rate": 9.67497871362214e-06, "loss": 1.0718, "step": 4683 }, { "epoch": 0.86, "learning_rate": 9.649380096476468e-06, "loss": 1.0851, "step": 4684 }, { "epoch": 0.86, "learning_rate": 9.623813672529435e-06, "loss": 1.1166, "step": 4685 }, { "epoch": 0.86, "learning_rate": 9.59827945089068e-06, "loss": 1.1786, "step": 4686 }, { "epoch": 0.86, "learning_rate": 9.572777440658453e-06, "loss": 1.0801, "step": 4687 }, { "epoch": 0.86, "learning_rate": 9.547307650919456e-06, "loss": 0.9772, "step": 4688 }, { "epoch": 0.86, "learning_rate": 9.52187009074893e-06, "loss": 1.0519, "step": 4689 }, { "epoch": 0.86, "learning_rate": 9.496464769210622e-06, "loss": 1.0809, "step": 4690 }, { "epoch": 0.86, "learning_rate": 9.471091695356848e-06, "loss": 0.9877, "step": 4691 }, { "epoch": 0.86, "learning_rate": 9.445750878228354e-06, "loss": 1.0358, "step": 4692 }, { "epoch": 0.86, "learning_rate": 9.420442326854451e-06, "loss": 1.0345, "step": 4693 }, { "epoch": 0.86, "learning_rate": 9.395166050252935e-06, "loss": 0.9962, "step": 4694 }, { "epoch": 0.87, "learning_rate": 9.36992205743008e-06, "loss": 1.0648, "step": 4695 }, { "epoch": 0.87, "learning_rate": 9.344710357380671e-06, "loss": 1.1577, "step": 4696 }, { "epoch": 0.87, "learning_rate": 9.31953095908803e-06, "loss": 0.9051, "step": 4697 }, { "epoch": 0.87, "learning_rate": 9.294383871523882e-06, "loss": 1.0647, "step": 4698 }, { "epoch": 0.87, "learning_rate": 9.269269103648558e-06, "loss": 1.0467, "step": 4699 }, { "epoch": 0.87, "learning_rate": 9.244186664410715e-06, "loss": 0.9278, "step": 4700 }, { "epoch": 0.87, "learning_rate": 9.219136562747655e-06, "loss": 1.2336, "step": 4701 }, { "epoch": 0.87, "learning_rate": 9.194118807585029e-06, "loss": 0.9838, "step": 4702 }, { "epoch": 0.87, "learning_rate": 9.169133407837049e-06, "loss": 1.1839, "step": 4703 }, { "epoch": 0.87, "learning_rate": 9.144180372406341e-06, "loss": 0.9244, "step": 4704 }, { "epoch": 0.87, "learning_rate": 9.11925971018408e-06, "loss": 1.0398, "step": 4705 }, { "epoch": 0.87, "learning_rate": 9.094371430049764e-06, "loss": 0.8928, "step": 4706 }, { "epoch": 0.87, "learning_rate": 9.069515540871499e-06, "loss": 1.0282, "step": 4707 }, { "epoch": 0.87, "learning_rate": 9.044692051505744e-06, "loss": 1.0233, "step": 4708 }, { "epoch": 0.87, "learning_rate": 9.019900970797502e-06, "loss": 1.1084, "step": 4709 }, { "epoch": 0.87, "learning_rate": 8.995142307580162e-06, "loss": 0.9822, "step": 4710 }, { "epoch": 0.87, "learning_rate": 8.970416070675603e-06, "loss": 1.2158, "step": 4711 }, { "epoch": 0.87, "learning_rate": 8.945722268894085e-06, "loss": 1.1041, "step": 4712 }, { "epoch": 0.87, "learning_rate": 8.921060911034407e-06, "loss": 0.9794, "step": 4713 }, { "epoch": 0.87, "learning_rate": 8.896432005883726e-06, "loss": 1.0527, "step": 4714 }, { "epoch": 0.87, "learning_rate": 8.87183556221769e-06, "loss": 0.9834, "step": 4715 }, { "epoch": 0.87, "learning_rate": 8.847271588800354e-06, "loss": 1.1198, "step": 4716 }, { "epoch": 0.87, "learning_rate": 8.822740094384208e-06, "loss": 1.0374, "step": 4717 }, { "epoch": 0.87, "learning_rate": 8.798241087710147e-06, "loss": 1.0747, "step": 4718 }, { "epoch": 0.87, "learning_rate": 8.773774577507542e-06, "loss": 1.095, "step": 4719 }, { "epoch": 0.87, "learning_rate": 8.749340572494125e-06, "loss": 1.0043, "step": 4720 }, { "epoch": 0.87, "learning_rate": 8.724939081376127e-06, "loss": 1.0078, "step": 4721 }, { "epoch": 0.87, "learning_rate": 8.700570112848083e-06, "loss": 1.0954, "step": 4722 }, { "epoch": 0.87, "learning_rate": 8.676233675593037e-06, "loss": 1.0853, "step": 4723 }, { "epoch": 0.87, "learning_rate": 8.651929778282387e-06, "loss": 0.9911, "step": 4724 }, { "epoch": 0.87, "learning_rate": 8.627658429575968e-06, "loss": 1.1089, "step": 4725 }, { "epoch": 0.87, "learning_rate": 8.60341963812199e-06, "loss": 0.9184, "step": 4726 }, { "epoch": 0.87, "learning_rate": 8.579213412557107e-06, "loss": 1.0355, "step": 4727 }, { "epoch": 0.87, "learning_rate": 8.555039761506277e-06, "loss": 1.0945, "step": 4728 }, { "epoch": 0.87, "learning_rate": 8.530898693582979e-06, "loss": 1.0322, "step": 4729 }, { "epoch": 0.87, "learning_rate": 8.50679021738896e-06, "loss": 0.9545, "step": 4730 }, { "epoch": 0.87, "learning_rate": 8.482714341514453e-06, "loss": 0.9698, "step": 4731 }, { "epoch": 0.87, "learning_rate": 8.458671074538017e-06, "loss": 0.9969, "step": 4732 }, { "epoch": 0.87, "learning_rate": 8.434660425026597e-06, "loss": 1.0389, "step": 4733 }, { "epoch": 0.87, "learning_rate": 8.41068240153552e-06, "loss": 1.0619, "step": 4734 }, { "epoch": 0.87, "learning_rate": 8.386737012608526e-06, "loss": 1.0933, "step": 4735 }, { "epoch": 0.87, "learning_rate": 8.362824266777658e-06, "loss": 1.005, "step": 4736 }, { "epoch": 0.87, "learning_rate": 8.33894417256339e-06, "loss": 1.021, "step": 4737 }, { "epoch": 0.87, "learning_rate": 8.315096738474537e-06, "loss": 1.0459, "step": 4738 }, { "epoch": 0.87, "learning_rate": 8.291281973008259e-06, "loss": 1.0402, "step": 4739 }, { "epoch": 0.87, "learning_rate": 8.267499884650076e-06, "loss": 1.1364, "step": 4740 }, { "epoch": 0.87, "learning_rate": 8.243750481873924e-06, "loss": 1.0265, "step": 4741 }, { "epoch": 0.87, "learning_rate": 8.220033773142023e-06, "loss": 1.0815, "step": 4742 }, { "epoch": 0.87, "learning_rate": 8.196349766904998e-06, "loss": 1.048, "step": 4743 }, { "epoch": 0.87, "learning_rate": 8.172698471601748e-06, "loss": 1.0478, "step": 4744 }, { "epoch": 0.87, "learning_rate": 8.149079895659595e-06, "loss": 1.1301, "step": 4745 }, { "epoch": 0.87, "learning_rate": 8.125494047494153e-06, "loss": 0.9221, "step": 4746 }, { "epoch": 0.87, "learning_rate": 8.101940935509412e-06, "loss": 1.0357, "step": 4747 }, { "epoch": 0.87, "learning_rate": 8.078420568097656e-06, "loss": 0.9894, "step": 4748 }, { "epoch": 0.88, "learning_rate": 8.054932953639571e-06, "loss": 1.1266, "step": 4749 }, { "epoch": 0.88, "learning_rate": 8.031478100504053e-06, "loss": 1.0003, "step": 4750 }, { "epoch": 0.88, "learning_rate": 8.008056017048449e-06, "loss": 0.9573, "step": 4751 }, { "epoch": 0.88, "learning_rate": 7.984666711618337e-06, "loss": 1.0365, "step": 4752 }, { "epoch": 0.88, "learning_rate": 7.961310192547711e-06, "loss": 1.0618, "step": 4753 }, { "epoch": 0.88, "learning_rate": 7.937986468158787e-06, "loss": 1.0658, "step": 4754 }, { "epoch": 0.88, "learning_rate": 7.914695546762141e-06, "loss": 0.9107, "step": 4755 }, { "epoch": 0.88, "learning_rate": 7.891437436656646e-06, "loss": 0.9436, "step": 4756 }, { "epoch": 0.88, "learning_rate": 7.86821214612954e-06, "loss": 1.0595, "step": 4757 }, { "epoch": 0.88, "learning_rate": 7.845019683456256e-06, "loss": 0.9853, "step": 4758 }, { "epoch": 0.88, "learning_rate": 7.821860056900654e-06, "loss": 1.0533, "step": 4759 }, { "epoch": 0.88, "learning_rate": 7.79873327471482e-06, "loss": 0.9643, "step": 4760 }, { "epoch": 0.88, "learning_rate": 7.77563934513913e-06, "loss": 1.1397, "step": 4761 }, { "epoch": 0.88, "learning_rate": 7.752578276402278e-06, "loss": 0.9702, "step": 4762 }, { "epoch": 0.88, "learning_rate": 7.72955007672127e-06, "loss": 1.1225, "step": 4763 }, { "epoch": 0.88, "learning_rate": 7.706554754301365e-06, "loss": 1.0067, "step": 4764 }, { "epoch": 0.88, "learning_rate": 7.683592317336142e-06, "loss": 1.0052, "step": 4765 }, { "epoch": 0.88, "learning_rate": 7.660662774007398e-06, "loss": 0.9517, "step": 4766 }, { "epoch": 0.88, "learning_rate": 7.63776613248528e-06, "loss": 1.0414, "step": 4767 }, { "epoch": 0.88, "learning_rate": 7.614902400928181e-06, "loss": 0.9552, "step": 4768 }, { "epoch": 0.88, "learning_rate": 7.592071587482774e-06, "loss": 1.0475, "step": 4769 }, { "epoch": 0.88, "learning_rate": 7.569273700283974e-06, "loss": 1.035, "step": 4770 }, { "epoch": 0.88, "learning_rate": 7.54650874745505e-06, "loss": 1.0387, "step": 4771 }, { "epoch": 0.88, "learning_rate": 7.523776737107402e-06, "loss": 1.0715, "step": 4772 }, { "epoch": 0.88, "learning_rate": 7.5010776773408176e-06, "loss": 1.0864, "step": 4773 }, { "epoch": 0.88, "learning_rate": 7.478411576243249e-06, "loss": 0.9291, "step": 4774 }, { "epoch": 0.88, "learning_rate": 7.455778441890993e-06, "loss": 1.1339, "step": 4775 }, { "epoch": 0.88, "learning_rate": 7.4331782823485295e-06, "loss": 1.1303, "step": 4776 }, { "epoch": 0.88, "learning_rate": 7.410611105668619e-06, "loss": 1.001, "step": 4777 }, { "epoch": 0.88, "learning_rate": 7.38807691989225e-06, "loss": 1.1075, "step": 4778 }, { "epoch": 0.88, "learning_rate": 7.365575733048691e-06, "loss": 1.0812, "step": 4779 }, { "epoch": 0.88, "learning_rate": 7.343107553155404e-06, "loss": 0.9834, "step": 4780 }, { "epoch": 0.88, "learning_rate": 7.320672388218164e-06, "loss": 0.9214, "step": 4781 }, { "epoch": 0.88, "learning_rate": 7.2982702462308986e-06, "loss": 0.9703, "step": 4782 }, { "epoch": 0.88, "learning_rate": 7.2759011351758225e-06, "loss": 1.0552, "step": 4783 }, { "epoch": 0.88, "learning_rate": 7.253565063023349e-06, "loss": 1.0245, "step": 4784 }, { "epoch": 0.88, "learning_rate": 7.231262037732156e-06, "loss": 1.0414, "step": 4785 }, { "epoch": 0.88, "learning_rate": 7.208992067249099e-06, "loss": 1.0061, "step": 4786 }, { "epoch": 0.88, "learning_rate": 7.1867551595093396e-06, "loss": 1.1557, "step": 4787 }, { "epoch": 0.88, "learning_rate": 7.16455132243612e-06, "loss": 1.0241, "step": 4788 }, { "epoch": 0.88, "learning_rate": 7.142380563941042e-06, "loss": 1.0607, "step": 4789 }, { "epoch": 0.88, "learning_rate": 7.120242891923823e-06, "loss": 0.8884, "step": 4790 }, { "epoch": 0.88, "learning_rate": 7.098138314272451e-06, "loss": 1.0622, "step": 4791 }, { "epoch": 0.88, "learning_rate": 7.076066838863083e-06, "loss": 0.9898, "step": 4792 }, { "epoch": 0.88, "learning_rate": 7.054028473560126e-06, "loss": 0.9227, "step": 4793 }, { "epoch": 0.88, "learning_rate": 7.032023226216111e-06, "loss": 1.1182, "step": 4794 }, { "epoch": 0.88, "learning_rate": 7.010051104671866e-06, "loss": 1.099, "step": 4795 }, { "epoch": 0.88, "learning_rate": 6.988112116756318e-06, "loss": 1.1078, "step": 4796 }, { "epoch": 0.88, "learning_rate": 6.9662062702866906e-06, "loss": 1.0841, "step": 4797 }, { "epoch": 0.88, "learning_rate": 6.944333573068307e-06, "loss": 0.957, "step": 4798 }, { "epoch": 0.88, "learning_rate": 6.922494032894744e-06, "loss": 1.0137, "step": 4799 }, { "epoch": 0.88, "learning_rate": 6.900687657547699e-06, "loss": 1.0479, "step": 4800 }, { "epoch": 0.88, "learning_rate": 6.878914454797136e-06, "loss": 1.2114, "step": 4801 }, { "epoch": 0.88, "learning_rate": 6.857174432401115e-06, "loss": 1.0997, "step": 4802 }, { "epoch": 0.89, "learning_rate": 6.835467598105949e-06, "loss": 1.0122, "step": 4803 }, { "epoch": 0.89, "learning_rate": 6.813793959646064e-06, "loss": 0.9939, "step": 4804 }, { "epoch": 0.89, "learning_rate": 6.792153524744093e-06, "loss": 1.1166, "step": 4805 }, { "epoch": 0.89, "learning_rate": 6.770546301110803e-06, "loss": 0.9556, "step": 4806 }, { "epoch": 0.89, "learning_rate": 6.748972296445199e-06, "loss": 1.0816, "step": 4807 }, { "epoch": 0.89, "learning_rate": 6.727431518434357e-06, "loss": 0.9823, "step": 4808 }, { "epoch": 0.89, "learning_rate": 6.705923974753592e-06, "loss": 1.1842, "step": 4809 }, { "epoch": 0.89, "learning_rate": 6.684449673066329e-06, "loss": 1.0574, "step": 4810 }, { "epoch": 0.89, "learning_rate": 6.6630086210241695e-06, "loss": 1.1266, "step": 4811 }, { "epoch": 0.89, "learning_rate": 6.641600826266847e-06, "loss": 1.1338, "step": 4812 }, { "epoch": 0.89, "learning_rate": 6.620226296422294e-06, "loss": 0.8496, "step": 4813 }, { "epoch": 0.89, "learning_rate": 6.598885039106517e-06, "loss": 1.0678, "step": 4814 }, { "epoch": 0.89, "learning_rate": 6.577577061923734e-06, "loss": 1.1457, "step": 4815 }, { "epoch": 0.89, "learning_rate": 6.556302372466272e-06, "loss": 0.9809, "step": 4816 }, { "epoch": 0.89, "learning_rate": 6.535060978314611e-06, "loss": 0.9536, "step": 4817 }, { "epoch": 0.89, "learning_rate": 6.51385288703732e-06, "loss": 1.0758, "step": 4818 }, { "epoch": 0.89, "learning_rate": 6.492678106191186e-06, "loss": 1.0997, "step": 4819 }, { "epoch": 0.89, "learning_rate": 6.4715366433210634e-06, "loss": 1.0429, "step": 4820 }, { "epoch": 0.89, "learning_rate": 6.450428505959938e-06, "loss": 0.9315, "step": 4821 }, { "epoch": 0.89, "learning_rate": 6.4293537016289706e-06, "loss": 1.0223, "step": 4822 }, { "epoch": 0.89, "learning_rate": 6.408312237837399e-06, "loss": 1.066, "step": 4823 }, { "epoch": 0.89, "learning_rate": 6.3873041220825605e-06, "loss": 0.9284, "step": 4824 }, { "epoch": 0.89, "learning_rate": 6.366329361849988e-06, "loss": 1.0708, "step": 4825 }, { "epoch": 0.89, "learning_rate": 6.345387964613259e-06, "loss": 1.0789, "step": 4826 }, { "epoch": 0.89, "learning_rate": 6.324479937834093e-06, "loss": 0.8968, "step": 4827 }, { "epoch": 0.89, "learning_rate": 6.303605288962322e-06, "loss": 1.0849, "step": 4828 }, { "epoch": 0.89, "learning_rate": 6.282764025435872e-06, "loss": 1.0564, "step": 4829 }, { "epoch": 0.89, "learning_rate": 6.261956154680759e-06, "loss": 1.0453, "step": 4830 }, { "epoch": 0.89, "learning_rate": 6.2411816841111634e-06, "loss": 0.8927, "step": 4831 }, { "epoch": 0.89, "learning_rate": 6.2204406211292865e-06, "loss": 0.8511, "step": 4832 }, { "epoch": 0.89, "learning_rate": 6.199732973125472e-06, "loss": 0.9009, "step": 4833 }, { "epoch": 0.89, "learning_rate": 6.17905874747815e-06, "loss": 1.1948, "step": 4834 }, { "epoch": 0.89, "learning_rate": 6.158417951553841e-06, "loss": 0.9857, "step": 4835 }, { "epoch": 0.89, "learning_rate": 6.137810592707127e-06, "loss": 0.9717, "step": 4836 }, { "epoch": 0.89, "learning_rate": 6.1172366782807354e-06, "loss": 1.0111, "step": 4837 }, { "epoch": 0.89, "learning_rate": 6.096696215605424e-06, "loss": 0.9294, "step": 4838 }, { "epoch": 0.89, "learning_rate": 6.076189212000039e-06, "loss": 0.9643, "step": 4839 }, { "epoch": 0.89, "learning_rate": 6.055715674771534e-06, "loss": 1.0298, "step": 4840 }, { "epoch": 0.89, "learning_rate": 6.035275611214919e-06, "loss": 0.9972, "step": 4841 }, { "epoch": 0.89, "learning_rate": 6.0148690286132685e-06, "loss": 1.0546, "step": 4842 }, { "epoch": 0.89, "learning_rate": 5.9944959342377205e-06, "loss": 0.8769, "step": 4843 }, { "epoch": 0.89, "learning_rate": 5.974156335347514e-06, "loss": 1.1754, "step": 4844 }, { "epoch": 0.89, "learning_rate": 5.953850239189929e-06, "loss": 1.1393, "step": 4845 }, { "epoch": 0.89, "learning_rate": 5.933577653000321e-06, "loss": 0.9175, "step": 4846 }, { "epoch": 0.89, "learning_rate": 5.913338584002093e-06, "loss": 1.1957, "step": 4847 }, { "epoch": 0.89, "learning_rate": 5.893133039406718e-06, "loss": 0.9721, "step": 4848 }, { "epoch": 0.89, "learning_rate": 5.872961026413693e-06, "loss": 1.1667, "step": 4849 }, { "epoch": 0.89, "learning_rate": 5.852822552210613e-06, "loss": 0.9334, "step": 4850 }, { "epoch": 0.89, "learning_rate": 5.832717623973072e-06, "loss": 0.9277, "step": 4851 }, { "epoch": 0.89, "learning_rate": 5.8126462488647835e-06, "loss": 1.0404, "step": 4852 }, { "epoch": 0.89, "learning_rate": 5.792608434037439e-06, "loss": 0.9302, "step": 4853 }, { "epoch": 0.89, "learning_rate": 5.772604186630792e-06, "loss": 1.1543, "step": 4854 }, { "epoch": 0.89, "learning_rate": 5.752633513772621e-06, "loss": 0.9499, "step": 4855 }, { "epoch": 0.89, "learning_rate": 5.732696422578787e-06, "loss": 1.0225, "step": 4856 }, { "epoch": 0.9, "learning_rate": 5.712792920153132e-06, "loss": 1.1013, "step": 4857 }, { "epoch": 0.9, "learning_rate": 5.692923013587603e-06, "loss": 1.0933, "step": 4858 }, { "epoch": 0.9, "learning_rate": 5.67308670996205e-06, "loss": 1.075, "step": 4859 }, { "epoch": 0.9, "learning_rate": 5.653284016344495e-06, "loss": 1.0015, "step": 4860 }, { "epoch": 0.9, "learning_rate": 5.633514939790862e-06, "loss": 0.9839, "step": 4861 }, { "epoch": 0.9, "learning_rate": 5.613779487345205e-06, "loss": 1.1029, "step": 4862 }, { "epoch": 0.9, "learning_rate": 5.59407766603951e-06, "loss": 1.2004, "step": 4863 }, { "epoch": 0.9, "learning_rate": 5.574409482893839e-06, "loss": 0.9935, "step": 4864 }, { "epoch": 0.9, "learning_rate": 5.554774944916197e-06, "loss": 1.1967, "step": 4865 }, { "epoch": 0.9, "learning_rate": 5.535174059102699e-06, "loss": 0.9469, "step": 4866 }, { "epoch": 0.9, "learning_rate": 5.51560683243737e-06, "loss": 1.0762, "step": 4867 }, { "epoch": 0.9, "learning_rate": 5.496073271892332e-06, "loss": 1.0356, "step": 4868 }, { "epoch": 0.9, "learning_rate": 5.476573384427652e-06, "loss": 1.1464, "step": 4869 }, { "epoch": 0.9, "learning_rate": 5.457107176991405e-06, "loss": 1.0146, "step": 4870 }, { "epoch": 0.9, "learning_rate": 5.437674656519665e-06, "loss": 0.9422, "step": 4871 }, { "epoch": 0.9, "learning_rate": 5.418275829936537e-06, "loss": 0.9499, "step": 4872 }, { "epoch": 0.9, "learning_rate": 5.398910704154081e-06, "loss": 1.1457, "step": 4873 }, { "epoch": 0.9, "learning_rate": 5.379579286072378e-06, "loss": 1.0515, "step": 4874 }, { "epoch": 0.9, "learning_rate": 5.3602815825794735e-06, "loss": 1.137, "step": 4875 }, { "epoch": 0.9, "learning_rate": 5.341017600551423e-06, "loss": 1.0178, "step": 4876 }, { "epoch": 0.9, "learning_rate": 5.321787346852236e-06, "loss": 1.1029, "step": 4877 }, { "epoch": 0.9, "learning_rate": 5.302590828333931e-06, "loss": 1.1026, "step": 4878 }, { "epoch": 0.9, "learning_rate": 5.283428051836503e-06, "loss": 0.9775, "step": 4879 }, { "epoch": 0.9, "learning_rate": 5.264299024187935e-06, "loss": 0.9229, "step": 4880 }, { "epoch": 0.9, "learning_rate": 5.24520375220412e-06, "loss": 1.1426, "step": 4881 }, { "epoch": 0.9, "learning_rate": 5.226142242689014e-06, "loss": 0.9737, "step": 4882 }, { "epoch": 0.9, "learning_rate": 5.207114502434485e-06, "loss": 1.0245, "step": 4883 }, { "epoch": 0.9, "learning_rate": 5.188120538220398e-06, "loss": 0.9771, "step": 4884 }, { "epoch": 0.9, "learning_rate": 5.16916035681454e-06, "loss": 1.1169, "step": 4885 }, { "epoch": 0.9, "learning_rate": 5.150233964972751e-06, "loss": 0.9878, "step": 4886 }, { "epoch": 0.9, "learning_rate": 5.131341369438703e-06, "loss": 1.1039, "step": 4887 }, { "epoch": 0.9, "learning_rate": 5.1124825769441335e-06, "loss": 1.1156, "step": 4888 }, { "epoch": 0.9, "learning_rate": 5.093657594208679e-06, "loss": 1.0329, "step": 4889 }, { "epoch": 0.9, "learning_rate": 5.0748664279399615e-06, "loss": 1.0656, "step": 4890 }, { "epoch": 0.9, "learning_rate": 5.056109084833527e-06, "loss": 0.9193, "step": 4891 }, { "epoch": 0.9, "learning_rate": 5.037385571572884e-06, "loss": 1.0395, "step": 4892 }, { "epoch": 0.9, "learning_rate": 5.018695894829473e-06, "loss": 1.0846, "step": 4893 }, { "epoch": 0.9, "learning_rate": 5.0000400612627114e-06, "loss": 1.0082, "step": 4894 }, { "epoch": 0.9, "learning_rate": 4.981418077519906e-06, "loss": 1.019, "step": 4895 }, { "epoch": 0.9, "learning_rate": 4.962829950236369e-06, "loss": 1.0009, "step": 4896 }, { "epoch": 0.9, "learning_rate": 4.944275686035294e-06, "loss": 1.0646, "step": 4897 }, { "epoch": 0.9, "learning_rate": 4.925755291527823e-06, "loss": 1.0564, "step": 4898 }, { "epoch": 0.9, "learning_rate": 4.907268773313023e-06, "loss": 0.9627, "step": 4899 }, { "epoch": 0.9, "learning_rate": 4.888816137977925e-06, "loss": 1.0615, "step": 4900 }, { "epoch": 0.9, "learning_rate": 4.870397392097437e-06, "loss": 1.0694, "step": 4901 }, { "epoch": 0.9, "learning_rate": 4.852012542234463e-06, "loss": 1.0935, "step": 4902 }, { "epoch": 0.9, "learning_rate": 4.833661594939732e-06, "loss": 0.9253, "step": 4903 }, { "epoch": 0.9, "learning_rate": 4.815344556751966e-06, "loss": 1.0579, "step": 4904 }, { "epoch": 0.9, "learning_rate": 4.79706143419778e-06, "loss": 0.964, "step": 4905 }, { "epoch": 0.9, "learning_rate": 4.778812233791719e-06, "loss": 1.0971, "step": 4906 }, { "epoch": 0.9, "learning_rate": 4.760596962036212e-06, "loss": 0.9408, "step": 4907 }, { "epoch": 0.9, "learning_rate": 4.742415625421659e-06, "loss": 1.0083, "step": 4908 }, { "epoch": 0.9, "learning_rate": 4.724268230426265e-06, "loss": 1.1376, "step": 4909 }, { "epoch": 0.9, "learning_rate": 4.706154783516248e-06, "loss": 0.9574, "step": 4910 }, { "epoch": 0.9, "learning_rate": 4.688075291145655e-06, "loss": 1.0774, "step": 4911 }, { "epoch": 0.91, "learning_rate": 4.6700297597565e-06, "loss": 1.0707, "step": 4912 }, { "epoch": 0.91, "learning_rate": 4.652018195778629e-06, "loss": 1.1203, "step": 4913 }, { "epoch": 0.91, "learning_rate": 4.63404060562983e-06, "loss": 1.0849, "step": 4914 }, { "epoch": 0.91, "learning_rate": 4.616096995715746e-06, "loss": 1.1866, "step": 4915 }, { "epoch": 0.91, "learning_rate": 4.598187372429974e-06, "loss": 1.066, "step": 4916 }, { "epoch": 0.91, "learning_rate": 4.580311742153942e-06, "loss": 0.9979, "step": 4917 }, { "epoch": 0.91, "learning_rate": 4.5624701112570004e-06, "loss": 1.2255, "step": 4918 }, { "epoch": 0.91, "learning_rate": 4.544662486096374e-06, "loss": 1.0717, "step": 4919 }, { "epoch": 0.91, "learning_rate": 4.5268888730171766e-06, "loss": 1.0307, "step": 4920 }, { "epoch": 0.91, "learning_rate": 4.509149278352365e-06, "loss": 1.0415, "step": 4921 }, { "epoch": 0.91, "learning_rate": 4.491443708422838e-06, "loss": 0.9421, "step": 4922 }, { "epoch": 0.91, "learning_rate": 4.473772169537316e-06, "loss": 1.0673, "step": 4923 }, { "epoch": 0.91, "learning_rate": 4.456134667992462e-06, "loss": 1.1034, "step": 4924 }, { "epoch": 0.91, "learning_rate": 4.438531210072716e-06, "loss": 0.9741, "step": 4925 }, { "epoch": 0.91, "learning_rate": 4.420961802050461e-06, "loss": 0.9651, "step": 4926 }, { "epoch": 0.91, "learning_rate": 4.40342645018591e-06, "loss": 1.1342, "step": 4927 }, { "epoch": 0.91, "learning_rate": 4.385925160727178e-06, "loss": 0.8748, "step": 4928 }, { "epoch": 0.91, "learning_rate": 4.368457939910209e-06, "loss": 1.089, "step": 4929 }, { "epoch": 0.91, "learning_rate": 4.351024793958835e-06, "loss": 1.1076, "step": 4930 }, { "epoch": 0.91, "learning_rate": 4.33362572908469e-06, "loss": 0.834, "step": 4931 }, { "epoch": 0.91, "learning_rate": 4.316260751487356e-06, "loss": 1.0357, "step": 4932 }, { "epoch": 0.91, "learning_rate": 4.298929867354174e-06, "loss": 0.9568, "step": 4933 }, { "epoch": 0.91, "learning_rate": 4.2816330828604145e-06, "loss": 1.0821, "step": 4934 }, { "epoch": 0.91, "learning_rate": 4.26437040416916e-06, "loss": 0.9993, "step": 4935 }, { "epoch": 0.91, "learning_rate": 4.247141837431345e-06, "loss": 1.1782, "step": 4936 }, { "epoch": 0.91, "learning_rate": 4.229947388785738e-06, "loss": 0.9449, "step": 4937 }, { "epoch": 0.91, "learning_rate": 4.21278706435898e-06, "loss": 0.9613, "step": 4938 }, { "epoch": 0.91, "learning_rate": 4.195660870265516e-06, "loss": 1.1223, "step": 4939 }, { "epoch": 0.91, "learning_rate": 4.178568812607675e-06, "loss": 0.9731, "step": 4940 }, { "epoch": 0.91, "learning_rate": 4.161510897475584e-06, "loss": 1.1278, "step": 4941 }, { "epoch": 0.91, "learning_rate": 4.144487130947217e-06, "loss": 1.0262, "step": 4942 }, { "epoch": 0.91, "learning_rate": 4.127497519088375e-06, "loss": 0.991, "step": 4943 }, { "epoch": 0.91, "learning_rate": 4.1105420679527275e-06, "loss": 1.1687, "step": 4944 }, { "epoch": 0.91, "learning_rate": 4.093620783581698e-06, "loss": 0.9358, "step": 4945 }, { "epoch": 0.91, "learning_rate": 4.076733672004618e-06, "loss": 1.0212, "step": 4946 }, { "epoch": 0.91, "learning_rate": 4.059880739238575e-06, "loss": 0.9704, "step": 4947 }, { "epoch": 0.91, "learning_rate": 4.043061991288522e-06, "loss": 0.9809, "step": 4948 }, { "epoch": 0.91, "learning_rate": 4.0262774341471965e-06, "loss": 0.9211, "step": 4949 }, { "epoch": 0.91, "learning_rate": 4.009527073795205e-06, "loss": 1.0064, "step": 4950 }, { "epoch": 0.91, "learning_rate": 3.992810916200895e-06, "loss": 1.0527, "step": 4951 }, { "epoch": 0.91, "learning_rate": 3.9761289673205135e-06, "loss": 1.1365, "step": 4952 }, { "epoch": 0.91, "learning_rate": 3.95948123309805e-06, "loss": 1.1378, "step": 4953 }, { "epoch": 0.91, "learning_rate": 3.942867719465326e-06, "loss": 0.9176, "step": 4954 }, { "epoch": 0.91, "learning_rate": 3.926288432341963e-06, "loss": 0.8842, "step": 4955 }, { "epoch": 0.91, "learning_rate": 3.909743377635422e-06, "loss": 1.0314, "step": 4956 }, { "epoch": 0.91, "learning_rate": 3.8932325612409115e-06, "loss": 0.9519, "step": 4957 }, { "epoch": 0.91, "learning_rate": 3.876755989041469e-06, "loss": 1.0299, "step": 4958 }, { "epoch": 0.91, "learning_rate": 3.860313666907955e-06, "loss": 1.1314, "step": 4959 }, { "epoch": 0.91, "learning_rate": 3.843905600698982e-06, "loss": 0.9898, "step": 4960 }, { "epoch": 0.91, "learning_rate": 3.827531796260975e-06, "loss": 1.1234, "step": 4961 }, { "epoch": 0.91, "learning_rate": 3.8111922594281536e-06, "loss": 0.9447, "step": 4962 }, { "epoch": 0.91, "learning_rate": 3.7948869960225287e-06, "loss": 0.9384, "step": 4963 }, { "epoch": 0.91, "learning_rate": 3.778616011853886e-06, "loss": 1.0825, "step": 4964 }, { "epoch": 0.91, "learning_rate": 3.7623793127198104e-06, "loss": 1.0317, "step": 4965 }, { "epoch": 0.92, "learning_rate": 3.746176904405685e-06, "loss": 1.1119, "step": 4966 }, { "epoch": 0.92, "learning_rate": 3.730008792684614e-06, "loss": 0.9345, "step": 4967 }, { "epoch": 0.92, "learning_rate": 3.7138749833175668e-06, "loss": 1.0001, "step": 4968 }, { "epoch": 0.92, "learning_rate": 3.697775482053223e-06, "loss": 1.0604, "step": 4969 }, { "epoch": 0.92, "learning_rate": 3.6817102946280602e-06, "loss": 1.0697, "step": 4970 }, { "epoch": 0.92, "learning_rate": 3.665679426766355e-06, "loss": 0.9351, "step": 4971 }, { "epoch": 0.92, "learning_rate": 3.6496828841801056e-06, "loss": 1.0119, "step": 4972 }, { "epoch": 0.92, "learning_rate": 3.6337206725691075e-06, "loss": 1.0869, "step": 4973 }, { "epoch": 0.92, "learning_rate": 3.6177927976209335e-06, "loss": 1.1185, "step": 4974 }, { "epoch": 0.92, "learning_rate": 3.6018992650109106e-06, "loss": 1.0276, "step": 4975 }, { "epoch": 0.92, "learning_rate": 3.5860400804021198e-06, "loss": 1.0494, "step": 4976 }, { "epoch": 0.92, "learning_rate": 3.570215249445419e-06, "loss": 0.9874, "step": 4977 }, { "epoch": 0.92, "learning_rate": 3.554424777779408e-06, "loss": 0.9786, "step": 4978 }, { "epoch": 0.92, "learning_rate": 3.538668671030465e-06, "loss": 0.9983, "step": 4979 }, { "epoch": 0.92, "learning_rate": 3.522946934812699e-06, "loss": 1.0492, "step": 4980 }, { "epoch": 0.92, "learning_rate": 3.5072595747280057e-06, "loss": 1.0014, "step": 4981 }, { "epoch": 0.92, "learning_rate": 3.4916065963659927e-06, "loss": 0.9907, "step": 4982 }, { "epoch": 0.92, "learning_rate": 3.4759880053040538e-06, "loss": 1.0092, "step": 4983 }, { "epoch": 0.92, "learning_rate": 3.460403807107304e-06, "loss": 1.1522, "step": 4984 }, { "epoch": 0.92, "learning_rate": 3.4448540073286238e-06, "loss": 1.0787, "step": 4985 }, { "epoch": 0.92, "learning_rate": 3.4293386115085924e-06, "loss": 1.0132, "step": 4986 }, { "epoch": 0.92, "learning_rate": 3.4138576251756094e-06, "loss": 1.0235, "step": 4987 }, { "epoch": 0.92, "learning_rate": 3.398411053845729e-06, "loss": 1.0728, "step": 4988 }, { "epoch": 0.92, "learning_rate": 3.382998903022816e-06, "loss": 0.9455, "step": 4989 }, { "epoch": 0.92, "learning_rate": 3.36762117819841e-06, "loss": 0.9505, "step": 4990 }, { "epoch": 0.92, "learning_rate": 3.3522778848518287e-06, "loss": 1.1041, "step": 4991 }, { "epoch": 0.92, "learning_rate": 3.3369690284500878e-06, "loss": 1.0331, "step": 4992 }, { "epoch": 0.92, "learning_rate": 3.321694614447968e-06, "loss": 1.1049, "step": 4993 }, { "epoch": 0.92, "learning_rate": 3.3064546482879377e-06, "loss": 1.0309, "step": 4994 }, { "epoch": 0.92, "learning_rate": 3.2912491354002205e-06, "loss": 1.0865, "step": 4995 }, { "epoch": 0.92, "learning_rate": 3.2760780812027715e-06, "loss": 0.9954, "step": 4996 }, { "epoch": 0.92, "learning_rate": 3.2609414911012215e-06, "loss": 0.8798, "step": 4997 }, { "epoch": 0.92, "learning_rate": 3.2458393704889568e-06, "loss": 0.944, "step": 4998 }, { "epoch": 0.92, "learning_rate": 3.230771724747106e-06, "loss": 0.9404, "step": 4999 }, { "epoch": 0.92, "learning_rate": 3.215738559244441e-06, "loss": 0.9397, "step": 5000 }, { "epoch": 0.92, "learning_rate": 3.200739879337522e-06, "loss": 1.0854, "step": 5001 }, { "epoch": 0.92, "learning_rate": 3.1857756903705737e-06, "loss": 0.8861, "step": 5002 }, { "epoch": 0.92, "learning_rate": 3.1708459976755533e-06, "loss": 1.0665, "step": 5003 }, { "epoch": 0.92, "learning_rate": 3.1559508065721055e-06, "loss": 1.2204, "step": 5004 }, { "epoch": 0.92, "learning_rate": 3.141090122367629e-06, "loss": 1.0093, "step": 5005 }, { "epoch": 0.92, "learning_rate": 3.1262639503571666e-06, "loss": 1.1574, "step": 5006 }, { "epoch": 0.92, "learning_rate": 3.1114722958235144e-06, "loss": 1.0905, "step": 5007 }, { "epoch": 0.92, "learning_rate": 3.096715164037123e-06, "loss": 1.0552, "step": 5008 }, { "epoch": 0.92, "learning_rate": 3.081992560256186e-06, "loss": 1.046, "step": 5009 }, { "epoch": 0.92, "learning_rate": 3.067304489726552e-06, "loss": 0.9794, "step": 5010 }, { "epoch": 0.92, "learning_rate": 3.052650957681813e-06, "loss": 1.0901, "step": 5011 }, { "epoch": 0.92, "learning_rate": 3.0380319693432247e-06, "loss": 0.9851, "step": 5012 }, { "epoch": 0.92, "learning_rate": 3.0234475299197317e-06, "loss": 1.0659, "step": 5013 }, { "epoch": 0.92, "learning_rate": 3.0088976446079554e-06, "loss": 1.0343, "step": 5014 }, { "epoch": 0.92, "learning_rate": 2.994382318592259e-06, "loss": 1.0513, "step": 5015 }, { "epoch": 0.92, "learning_rate": 2.97990155704464e-06, "loss": 1.125, "step": 5016 }, { "epoch": 0.92, "learning_rate": 2.965455365124803e-06, "loss": 1.0893, "step": 5017 }, { "epoch": 0.92, "learning_rate": 2.9510437479801312e-06, "loss": 1.0242, "step": 5018 }, { "epoch": 0.92, "learning_rate": 2.9366667107456837e-06, "loss": 0.972, "step": 5019 }, { "epoch": 0.93, "learning_rate": 2.922324258544196e-06, "loss": 0.8534, "step": 5020 }, { "epoch": 0.93, "learning_rate": 2.9080163964861017e-06, "loss": 1.1137, "step": 5021 }, { "epoch": 0.93, "learning_rate": 2.8937431296694686e-06, "loss": 0.9354, "step": 5022 }, { "epoch": 0.93, "learning_rate": 2.879504463180094e-06, "loss": 1.0093, "step": 5023 }, { "epoch": 0.93, "learning_rate": 2.8653004020913753e-06, "loss": 1.1358, "step": 5024 }, { "epoch": 0.93, "learning_rate": 2.8511309514644644e-06, "loss": 1.009, "step": 5025 }, { "epoch": 0.93, "learning_rate": 2.836996116348101e-06, "loss": 1.1425, "step": 5026 }, { "epoch": 0.93, "learning_rate": 2.822895901778744e-06, "loss": 1.134, "step": 5027 }, { "epoch": 0.93, "learning_rate": 2.808830312780486e-06, "loss": 1.1167, "step": 5028 }, { "epoch": 0.93, "learning_rate": 2.7947993543651295e-06, "loss": 0.9548, "step": 5029 }, { "epoch": 0.93, "learning_rate": 2.7808030315320534e-06, "loss": 1.0448, "step": 5030 }, { "epoch": 0.93, "learning_rate": 2.7668413492683674e-06, "loss": 1.1272, "step": 5031 }, { "epoch": 0.93, "learning_rate": 2.7529143125488157e-06, "loss": 1.0735, "step": 5032 }, { "epoch": 0.93, "learning_rate": 2.7390219263358054e-06, "loss": 1.074, "step": 5033 }, { "epoch": 0.93, "learning_rate": 2.7251641955793773e-06, "loss": 1.0486, "step": 5034 }, { "epoch": 0.93, "learning_rate": 2.711341125217237e-06, "loss": 0.974, "step": 5035 }, { "epoch": 0.93, "learning_rate": 2.6975527201747342e-06, "loss": 0.9585, "step": 5036 }, { "epoch": 0.93, "learning_rate": 2.683798985364894e-06, "loss": 0.958, "step": 5037 }, { "epoch": 0.93, "learning_rate": 2.67007992568834e-06, "loss": 0.9595, "step": 5038 }, { "epoch": 0.93, "learning_rate": 2.6563955460333856e-06, "loss": 0.9893, "step": 5039 }, { "epoch": 0.93, "learning_rate": 2.642745851275963e-06, "loss": 0.9941, "step": 5040 }, { "epoch": 0.93, "learning_rate": 2.629130846279648e-06, "loss": 0.9844, "step": 5041 }, { "epoch": 0.93, "learning_rate": 2.61555053589565e-06, "loss": 0.989, "step": 5042 }, { "epoch": 0.93, "learning_rate": 2.602004924962842e-06, "loss": 0.8637, "step": 5043 }, { "epoch": 0.93, "learning_rate": 2.5884940183076966e-06, "loss": 0.9583, "step": 5044 }, { "epoch": 0.93, "learning_rate": 2.5750178207443744e-06, "loss": 1.0576, "step": 5045 }, { "epoch": 0.93, "learning_rate": 2.5615763370745895e-06, "loss": 1.1179, "step": 5046 }, { "epoch": 0.93, "learning_rate": 2.5481695720877663e-06, "loss": 0.9202, "step": 5047 }, { "epoch": 0.93, "learning_rate": 2.534797530560895e-06, "loss": 1.0009, "step": 5048 }, { "epoch": 0.93, "learning_rate": 2.5214602172586533e-06, "loss": 0.9725, "step": 5049 }, { "epoch": 0.93, "learning_rate": 2.508157636933284e-06, "loss": 1.0315, "step": 5050 }, { "epoch": 0.93, "learning_rate": 2.4948897943247284e-06, "loss": 1.0805, "step": 5051 }, { "epoch": 0.93, "learning_rate": 2.48165669416045e-06, "loss": 1.1904, "step": 5052 }, { "epoch": 0.93, "learning_rate": 2.4684583411556217e-06, "loss": 1.056, "step": 5053 }, { "epoch": 0.93, "learning_rate": 2.4552947400129922e-06, "loss": 1.0818, "step": 5054 }, { "epoch": 0.93, "learning_rate": 2.442165895422954e-06, "loss": 0.9361, "step": 5055 }, { "epoch": 0.93, "learning_rate": 2.429071812063488e-06, "loss": 1.0871, "step": 5056 }, { "epoch": 0.93, "learning_rate": 2.416012494600195e-06, "loss": 1.041, "step": 5057 }, { "epoch": 0.93, "learning_rate": 2.402987947686286e-06, "loss": 1.0832, "step": 5058 }, { "epoch": 0.93, "learning_rate": 2.3899981759626155e-06, "loss": 1.0158, "step": 5059 }, { "epoch": 0.93, "learning_rate": 2.3770431840576036e-06, "loss": 1.0792, "step": 5060 }, { "epoch": 0.93, "learning_rate": 2.364122976587313e-06, "loss": 0.9434, "step": 5061 }, { "epoch": 0.93, "learning_rate": 2.3512375581553837e-06, "loss": 1.0997, "step": 5062 }, { "epoch": 0.93, "learning_rate": 2.338386933353065e-06, "loss": 0.9037, "step": 5063 }, { "epoch": 0.93, "learning_rate": 2.325571106759228e-06, "loss": 1.0683, "step": 5064 }, { "epoch": 0.93, "learning_rate": 2.3127900829403306e-06, "loss": 1.2546, "step": 5065 }, { "epoch": 0.93, "learning_rate": 2.3000438664504188e-06, "loss": 1.1524, "step": 5066 }, { "epoch": 0.93, "learning_rate": 2.287332461831182e-06, "loss": 1.0515, "step": 5067 }, { "epoch": 0.93, "learning_rate": 2.2746558736118305e-06, "loss": 0.9686, "step": 5068 }, { "epoch": 0.93, "learning_rate": 2.2620141063092405e-06, "loss": 0.982, "step": 5069 }, { "epoch": 0.93, "learning_rate": 2.24940716442783e-06, "loss": 1.15, "step": 5070 }, { "epoch": 0.93, "learning_rate": 2.2368350524596513e-06, "loss": 1.0269, "step": 5071 }, { "epoch": 0.93, "learning_rate": 2.22429777488431e-06, "loss": 1.0972, "step": 5072 }, { "epoch": 0.93, "learning_rate": 2.2117953361690425e-06, "loss": 1.1244, "step": 5073 }, { "epoch": 0.93, "learning_rate": 2.1993277407685974e-06, "loss": 0.9457, "step": 5074 }, { "epoch": 0.94, "learning_rate": 2.186894993125388e-06, "loss": 1.1747, "step": 5075 }, { "epoch": 0.94, "learning_rate": 2.174497097669359e-06, "loss": 1.0749, "step": 5076 }, { "epoch": 0.94, "learning_rate": 2.1621340588180772e-06, "loss": 1.0131, "step": 5077 }, { "epoch": 0.94, "learning_rate": 2.1498058809766517e-06, "loss": 1.097, "step": 5078 }, { "epoch": 0.94, "learning_rate": 2.1375125685377915e-06, "loss": 1.0021, "step": 5079 }, { "epoch": 0.94, "learning_rate": 2.1252541258817703e-06, "loss": 0.9703, "step": 5080 }, { "epoch": 0.94, "learning_rate": 2.1130305573764495e-06, "loss": 0.915, "step": 5081 }, { "epoch": 0.94, "learning_rate": 2.100841867377257e-06, "loss": 1.0245, "step": 5082 }, { "epoch": 0.94, "learning_rate": 2.0886880602272063e-06, "loss": 1.0241, "step": 5083 }, { "epoch": 0.94, "learning_rate": 2.0765691402568454e-06, "loss": 1.0136, "step": 5084 }, { "epoch": 0.94, "learning_rate": 2.064485111784342e-06, "loss": 0.9785, "step": 5085 }, { "epoch": 0.94, "learning_rate": 2.0524359791153746e-06, "loss": 0.8881, "step": 5086 }, { "epoch": 0.94, "learning_rate": 2.040421746543253e-06, "loss": 1.1735, "step": 5087 }, { "epoch": 0.94, "learning_rate": 2.028442418348797e-06, "loss": 0.9708, "step": 5088 }, { "epoch": 0.94, "learning_rate": 2.016497998800426e-06, "loss": 1.0513, "step": 5089 }, { "epoch": 0.94, "learning_rate": 2.0045884921540804e-06, "loss": 1.0155, "step": 5090 }, { "epoch": 0.94, "learning_rate": 1.992713902653309e-06, "loss": 1.0217, "step": 5091 }, { "epoch": 0.94, "learning_rate": 1.980874234529173e-06, "loss": 1.0257, "step": 5092 }, { "epoch": 0.94, "learning_rate": 1.9690694920003416e-06, "loss": 1.0843, "step": 5093 }, { "epoch": 0.94, "learning_rate": 1.9572996792729836e-06, "loss": 1.1021, "step": 5094 }, { "epoch": 0.94, "learning_rate": 1.945564800540867e-06, "loss": 0.9888, "step": 5095 }, { "epoch": 0.94, "learning_rate": 1.9338648599852792e-06, "loss": 1.1364, "step": 5096 }, { "epoch": 0.94, "learning_rate": 1.922199861775087e-06, "loss": 1.0785, "step": 5097 }, { "epoch": 0.94, "learning_rate": 1.910569810066687e-06, "loss": 1.1279, "step": 5098 }, { "epoch": 0.94, "learning_rate": 1.898974709004031e-06, "loss": 1.079, "step": 5099 }, { "epoch": 0.94, "learning_rate": 1.8874145627186257e-06, "loss": 1.1237, "step": 5100 }, { "epoch": 0.94, "learning_rate": 1.8758893753295092e-06, "loss": 0.9964, "step": 5101 }, { "epoch": 0.94, "learning_rate": 1.8643991509432523e-06, "loss": 1.0053, "step": 5102 }, { "epoch": 0.94, "learning_rate": 1.8529438936540021e-06, "loss": 1.2121, "step": 5103 }, { "epoch": 0.94, "learning_rate": 1.8415236075434162e-06, "loss": 0.9558, "step": 5104 }, { "epoch": 0.94, "learning_rate": 1.8301382966807278e-06, "loss": 1.0373, "step": 5105 }, { "epoch": 0.94, "learning_rate": 1.8187879651226592e-06, "loss": 1.0714, "step": 5106 }, { "epoch": 0.94, "learning_rate": 1.807472616913497e-06, "loss": 0.9992, "step": 5107 }, { "epoch": 0.94, "learning_rate": 1.7961922560850609e-06, "loss": 1.0671, "step": 5108 }, { "epoch": 0.94, "learning_rate": 1.784946886656702e-06, "loss": 1.099, "step": 5109 }, { "epoch": 0.94, "learning_rate": 1.7737365126353045e-06, "loss": 1.104, "step": 5110 }, { "epoch": 0.94, "learning_rate": 1.762561138015284e-06, "loss": 1.0635, "step": 5111 }, { "epoch": 0.94, "learning_rate": 1.7514207667785997e-06, "loss": 1.2269, "step": 5112 }, { "epoch": 0.94, "learning_rate": 1.7403154028946877e-06, "loss": 1.0717, "step": 5113 }, { "epoch": 0.94, "learning_rate": 1.7292450503205715e-06, "loss": 1.0419, "step": 5114 }, { "epoch": 0.94, "learning_rate": 1.7182097130007734e-06, "loss": 1.1348, "step": 5115 }, { "epoch": 0.94, "learning_rate": 1.7072093948673263e-06, "loss": 0.9566, "step": 5116 }, { "epoch": 0.94, "learning_rate": 1.6962440998397944e-06, "loss": 1.0854, "step": 5117 }, { "epoch": 0.94, "learning_rate": 1.685313831825297e-06, "loss": 1.1503, "step": 5118 }, { "epoch": 0.94, "learning_rate": 1.6744185947184077e-06, "loss": 1.0449, "step": 5119 }, { "epoch": 0.94, "learning_rate": 1.6635583924012765e-06, "loss": 0.9624, "step": 5120 }, { "epoch": 0.94, "learning_rate": 1.6527332287435305e-06, "loss": 1.102, "step": 5121 }, { "epoch": 0.94, "learning_rate": 1.6419431076023506e-06, "loss": 1.1206, "step": 5122 }, { "epoch": 0.94, "learning_rate": 1.6311880328223728e-06, "loss": 1.0008, "step": 5123 }, { "epoch": 0.94, "learning_rate": 1.6204680082358203e-06, "loss": 0.9782, "step": 5124 }, { "epoch": 0.94, "learning_rate": 1.6097830376623713e-06, "loss": 0.9916, "step": 5125 }, { "epoch": 0.94, "learning_rate": 1.5991331249092246e-06, "loss": 0.9742, "step": 5126 }, { "epoch": 0.94, "learning_rate": 1.5885182737711003e-06, "loss": 1.0689, "step": 5127 }, { "epoch": 0.94, "learning_rate": 1.5779384880302283e-06, "loss": 1.011, "step": 5128 }, { "epoch": 0.95, "learning_rate": 1.567393771456316e-06, "loss": 0.8579, "step": 5129 }, { "epoch": 0.95, "learning_rate": 1.5568841278066237e-06, "loss": 1.0213, "step": 5130 }, { "epoch": 0.95, "learning_rate": 1.5464095608258566e-06, "loss": 1.0488, "step": 5131 }, { "epoch": 0.95, "learning_rate": 1.5359700742462512e-06, "loss": 0.9747, "step": 5132 }, { "epoch": 0.95, "learning_rate": 1.5255656717875655e-06, "loss": 1.1616, "step": 5133 }, { "epoch": 0.95, "learning_rate": 1.5151963571570227e-06, "loss": 1.0773, "step": 5134 }, { "epoch": 0.95, "learning_rate": 1.5048621340493562e-06, "loss": 1.0582, "step": 5135 }, { "epoch": 0.95, "learning_rate": 1.4945630061467985e-06, "loss": 0.9717, "step": 5136 }, { "epoch": 0.95, "learning_rate": 1.4842989771190584e-06, "loss": 1.0646, "step": 5137 }, { "epoch": 0.95, "learning_rate": 1.4740700506233773e-06, "loss": 1.0619, "step": 5138 }, { "epoch": 0.95, "learning_rate": 1.4638762303044506e-06, "loss": 0.9262, "step": 5139 }, { "epoch": 0.95, "learning_rate": 1.4537175197944842e-06, "loss": 1.0414, "step": 5140 }, { "epoch": 0.95, "learning_rate": 1.4435939227131713e-06, "loss": 0.916, "step": 5141 }, { "epoch": 0.95, "learning_rate": 1.433505442667682e-06, "loss": 1.0426, "step": 5142 }, { "epoch": 0.95, "learning_rate": 1.4234520832527076e-06, "loss": 1.0092, "step": 5143 }, { "epoch": 0.95, "learning_rate": 1.4134338480503829e-06, "loss": 1.1784, "step": 5144 }, { "epoch": 0.95, "learning_rate": 1.403450740630341e-06, "loss": 1.0948, "step": 5145 }, { "epoch": 0.95, "learning_rate": 1.3935027645497146e-06, "loss": 0.9849, "step": 5146 }, { "epoch": 0.95, "learning_rate": 1.3835899233531013e-06, "loss": 1.1134, "step": 5147 }, { "epoch": 0.95, "learning_rate": 1.3737122205725982e-06, "loss": 0.9621, "step": 5148 }, { "epoch": 0.95, "learning_rate": 1.3638696597277679e-06, "loss": 1.0486, "step": 5149 }, { "epoch": 0.95, "learning_rate": 1.354062244325638e-06, "loss": 0.9866, "step": 5150 }, { "epoch": 0.95, "learning_rate": 1.3442899778607353e-06, "loss": 1.0551, "step": 5151 }, { "epoch": 0.95, "learning_rate": 1.3345528638150751e-06, "loss": 1.046, "step": 5152 }, { "epoch": 0.95, "learning_rate": 1.3248509056580926e-06, "loss": 1.1261, "step": 5153 }, { "epoch": 0.95, "learning_rate": 1.3151841068467675e-06, "loss": 0.877, "step": 5154 }, { "epoch": 0.95, "learning_rate": 1.3055524708254885e-06, "loss": 1.0071, "step": 5155 }, { "epoch": 0.95, "learning_rate": 1.2959560010261662e-06, "loss": 1.1287, "step": 5156 }, { "epoch": 0.95, "learning_rate": 1.2863947008681433e-06, "loss": 1.1942, "step": 5157 }, { "epoch": 0.95, "learning_rate": 1.2768685737582497e-06, "loss": 1.0991, "step": 5158 }, { "epoch": 0.95, "learning_rate": 1.2673776230907708e-06, "loss": 0.9308, "step": 5159 }, { "epoch": 0.95, "learning_rate": 1.25792185224749e-06, "loss": 1.0487, "step": 5160 }, { "epoch": 0.95, "learning_rate": 1.2485012645976014e-06, "loss": 1.007, "step": 5161 }, { "epoch": 0.95, "learning_rate": 1.2391158634978194e-06, "loss": 0.9491, "step": 5162 }, { "epoch": 0.95, "learning_rate": 1.2297656522922695e-06, "loss": 1.0582, "step": 5163 }, { "epoch": 0.95, "learning_rate": 1.2204506343125866e-06, "loss": 0.8614, "step": 5164 }, { "epoch": 0.95, "learning_rate": 1.2111708128778266e-06, "loss": 1.0631, "step": 5165 }, { "epoch": 0.95, "learning_rate": 1.2019261912945334e-06, "loss": 0.9551, "step": 5166 }, { "epoch": 0.95, "learning_rate": 1.1927167728566835e-06, "loss": 1.135, "step": 5167 }, { "epoch": 0.95, "learning_rate": 1.1835425608457296e-06, "loss": 1.0322, "step": 5168 }, { "epoch": 0.95, "learning_rate": 1.174403558530568e-06, "loss": 1.0969, "step": 5169 }, { "epoch": 0.95, "learning_rate": 1.1652997691675605e-06, "loss": 1.024, "step": 5170 }, { "epoch": 0.95, "learning_rate": 1.1562311960005234e-06, "loss": 0.9994, "step": 5171 }, { "epoch": 0.95, "learning_rate": 1.1471978422606943e-06, "loss": 1.0268, "step": 5172 }, { "epoch": 0.95, "learning_rate": 1.1381997111667874e-06, "loss": 0.9774, "step": 5173 }, { "epoch": 0.95, "learning_rate": 1.1292368059249936e-06, "loss": 1.0989, "step": 5174 }, { "epoch": 0.95, "learning_rate": 1.1203091297288804e-06, "loss": 0.9584, "step": 5175 }, { "epoch": 0.95, "learning_rate": 1.1114166857595365e-06, "loss": 0.9829, "step": 5176 }, { "epoch": 0.95, "learning_rate": 1.1025594771854497e-06, "loss": 1.0671, "step": 5177 }, { "epoch": 0.95, "learning_rate": 1.0937375071625844e-06, "loss": 1.0405, "step": 5178 }, { "epoch": 0.95, "learning_rate": 1.084950778834304e-06, "loss": 0.9367, "step": 5179 }, { "epoch": 0.95, "learning_rate": 1.0761992953314703e-06, "loss": 1.0577, "step": 5180 }, { "epoch": 0.95, "learning_rate": 1.067483059772334e-06, "loss": 1.0058, "step": 5181 }, { "epoch": 0.95, "learning_rate": 1.058802075262655e-06, "loss": 1.0667, "step": 5182 }, { "epoch": 0.96, "learning_rate": 1.0501563448955365e-06, "loss": 1.0962, "step": 5183 }, { "epoch": 0.96, "learning_rate": 1.0415458717516146e-06, "loss": 0.9875, "step": 5184 }, { "epoch": 0.96, "learning_rate": 1.032970658898913e-06, "loss": 1.0256, "step": 5185 }, { "epoch": 0.96, "learning_rate": 1.0244307093928873e-06, "loss": 1.2216, "step": 5186 }, { "epoch": 0.96, "learning_rate": 1.0159260262764591e-06, "loss": 0.8934, "step": 5187 }, { "epoch": 0.96, "learning_rate": 1.0074566125799601e-06, "loss": 1.1313, "step": 5188 }, { "epoch": 0.96, "learning_rate": 9.990224713211538e-07, "loss": 0.9783, "step": 5189 }, { "epoch": 0.96, "learning_rate": 9.906236055052586e-07, "loss": 1.1634, "step": 5190 }, { "epoch": 0.96, "learning_rate": 9.82260018124881e-07, "loss": 1.0454, "step": 5191 }, { "epoch": 0.96, "learning_rate": 9.739317121601254e-07, "loss": 0.9535, "step": 5192 }, { "epoch": 0.96, "learning_rate": 9.656386905784632e-07, "loss": 1.044, "step": 5193 }, { "epoch": 0.96, "learning_rate": 9.573809563348079e-07, "loss": 1.1615, "step": 5194 }, { "epoch": 0.96, "learning_rate": 9.491585123715063e-07, "loss": 0.9886, "step": 5195 }, { "epoch": 0.96, "learning_rate": 9.40971361618348e-07, "loss": 1.1225, "step": 5196 }, { "epoch": 0.96, "learning_rate": 9.328195069925217e-07, "loss": 1.0857, "step": 5197 }, { "epoch": 0.96, "learning_rate": 9.247029513986482e-07, "loss": 1.051, "step": 5198 }, { "epoch": 0.96, "learning_rate": 9.16621697728759e-07, "loss": 1.1147, "step": 5199 }, { "epoch": 0.96, "learning_rate": 9.085757488623392e-07, "loss": 0.9855, "step": 5200 }, { "epoch": 0.96, "learning_rate": 9.005651076662624e-07, "loss": 1.063, "step": 5201 }, { "epoch": 0.96, "learning_rate": 8.925897769948344e-07, "loss": 1.0425, "step": 5202 }, { "epoch": 0.96, "learning_rate": 8.846497596897707e-07, "loss": 1.0558, "step": 5203 }, { "epoch": 0.96, "learning_rate": 8.767450585802306e-07, "loss": 1.1295, "step": 5204 }, { "epoch": 0.96, "learning_rate": 8.688756764827388e-07, "loss": 0.9102, "step": 5205 }, { "epoch": 0.96, "learning_rate": 8.610416162013079e-07, "loss": 0.987, "step": 5206 }, { "epoch": 0.96, "learning_rate": 8.532428805272829e-07, "loss": 0.9096, "step": 5207 }, { "epoch": 0.96, "learning_rate": 8.454794722394965e-07, "loss": 0.9223, "step": 5208 }, { "epoch": 0.96, "learning_rate": 8.377513941041359e-07, "loss": 1.073, "step": 5209 }, { "epoch": 0.96, "learning_rate": 8.300586488748541e-07, "loss": 1.1019, "step": 5210 }, { "epoch": 0.96, "learning_rate": 8.224012392926472e-07, "loss": 1.1485, "step": 5211 }, { "epoch": 0.96, "learning_rate": 8.147791680859773e-07, "loss": 1.0627, "step": 5212 }, { "epoch": 0.96, "learning_rate": 8.071924379706941e-07, "loss": 1.0806, "step": 5213 }, { "epoch": 0.96, "learning_rate": 7.996410516500685e-07, "loss": 1.1852, "step": 5214 }, { "epoch": 0.96, "learning_rate": 7.921250118147483e-07, "loss": 1.164, "step": 5215 }, { "epoch": 0.96, "learning_rate": 7.846443211428023e-07, "loss": 1.0857, "step": 5216 }, { "epoch": 0.96, "learning_rate": 7.771989822997206e-07, "loss": 1.1174, "step": 5217 }, { "epoch": 0.96, "learning_rate": 7.69788997938381e-07, "loss": 1.0712, "step": 5218 }, { "epoch": 0.96, "learning_rate": 7.624143706990494e-07, "loss": 0.9711, "step": 5219 }, { "epoch": 0.96, "learning_rate": 7.550751032094239e-07, "loss": 0.9736, "step": 5220 }, { "epoch": 0.96, "learning_rate": 7.477711980845903e-07, "loss": 0.9721, "step": 5221 }, { "epoch": 0.96, "learning_rate": 7.405026579270224e-07, "loss": 1.0776, "step": 5222 }, { "epoch": 0.96, "learning_rate": 7.332694853266042e-07, "loss": 1.1264, "step": 5223 }, { "epoch": 0.96, "learning_rate": 7.260716828606295e-07, "loss": 0.9307, "step": 5224 }, { "epoch": 0.96, "learning_rate": 7.18909253093758e-07, "loss": 1.0327, "step": 5225 }, { "epoch": 0.96, "learning_rate": 7.117821985780926e-07, "loss": 1.0005, "step": 5226 }, { "epoch": 0.96, "learning_rate": 7.046905218530686e-07, "loss": 1.1192, "step": 5227 }, { "epoch": 0.96, "learning_rate": 6.976342254455759e-07, "loss": 1.113, "step": 5228 }, { "epoch": 0.96, "learning_rate": 6.906133118698588e-07, "loss": 0.988, "step": 5229 }, { "epoch": 0.96, "learning_rate": 6.836277836275828e-07, "loss": 0.9951, "step": 5230 }, { "epoch": 0.96, "learning_rate": 6.766776432077682e-07, "loss": 1.0957, "step": 5231 }, { "epoch": 0.96, "learning_rate": 6.697628930868671e-07, "loss": 0.9468, "step": 5232 }, { "epoch": 0.96, "learning_rate": 6.628835357286978e-07, "loss": 1.0724, "step": 5233 }, { "epoch": 0.96, "learning_rate": 6.560395735844549e-07, "loss": 0.9666, "step": 5234 }, { "epoch": 0.96, "learning_rate": 6.492310090927656e-07, "loss": 1.06, "step": 5235 }, { "epoch": 0.96, "learning_rate": 6.424578446796003e-07, "loss": 1.0939, "step": 5236 }, { "epoch": 0.97, "learning_rate": 6.357200827583398e-07, "loss": 0.915, "step": 5237 }, { "epoch": 0.97, "learning_rate": 6.290177257297414e-07, "loss": 1.0998, "step": 5238 }, { "epoch": 0.97, "learning_rate": 6.223507759819392e-07, "loss": 1.0462, "step": 5239 }, { "epoch": 0.97, "learning_rate": 6.157192358904774e-07, "loss": 1.049, "step": 5240 }, { "epoch": 0.97, "learning_rate": 6.091231078182547e-07, "loss": 1.0948, "step": 5241 }, { "epoch": 0.97, "learning_rate": 6.0256239411558e-07, "loss": 0.9886, "step": 5242 }, { "epoch": 0.97, "learning_rate": 5.960370971201168e-07, "loss": 0.9929, "step": 5243 }, { "epoch": 0.97, "learning_rate": 5.895472191569274e-07, "loss": 0.946, "step": 5244 }, { "epoch": 0.97, "learning_rate": 5.830927625384286e-07, "loss": 0.9967, "step": 5245 }, { "epoch": 0.97, "learning_rate": 5.766737295644586e-07, "loss": 1.0084, "step": 5246 }, { "epoch": 0.97, "learning_rate": 5.702901225221879e-07, "loss": 1.0104, "step": 5247 }, { "epoch": 0.97, "learning_rate": 5.63941943686197e-07, "loss": 1.0968, "step": 5248 }, { "epoch": 0.97, "learning_rate": 5.576291953184321e-07, "loss": 1.0859, "step": 5249 }, { "epoch": 0.97, "learning_rate": 5.51351879668216e-07, "loss": 1.0982, "step": 5250 }, { "epoch": 0.97, "learning_rate": 5.451099989722375e-07, "loss": 1.0144, "step": 5251 }, { "epoch": 0.97, "learning_rate": 5.38903555454573e-07, "loss": 1.0441, "step": 5252 }, { "epoch": 0.97, "learning_rate": 5.327325513266534e-07, "loss": 1.0524, "step": 5253 }, { "epoch": 0.97, "learning_rate": 5.26596988787309e-07, "loss": 1.0329, "step": 5254 }, { "epoch": 0.97, "learning_rate": 5.204968700227242e-07, "loss": 1.0064, "step": 5255 }, { "epoch": 0.97, "learning_rate": 5.144321972064603e-07, "loss": 1.0005, "step": 5256 }, { "epoch": 0.97, "learning_rate": 5.084029724994332e-07, "loss": 1.005, "step": 5257 }, { "epoch": 0.97, "learning_rate": 5.024091980499468e-07, "loss": 1.0384, "step": 5258 }, { "epoch": 0.97, "learning_rate": 4.964508759936704e-07, "loss": 1.0281, "step": 5259 }, { "epoch": 0.97, "learning_rate": 4.905280084536279e-07, "loss": 1.0187, "step": 5260 }, { "epoch": 0.97, "learning_rate": 4.846405975402424e-07, "loss": 1.1859, "step": 5261 }, { "epoch": 0.97, "learning_rate": 4.78788645351258e-07, "loss": 1.0185, "step": 5262 }, { "epoch": 0.97, "learning_rate": 4.7297215397181795e-07, "loss": 1.0528, "step": 5263 }, { "epoch": 0.97, "learning_rate": 4.671911254744199e-07, "loss": 0.9939, "step": 5264 }, { "epoch": 0.97, "learning_rate": 4.614455619189273e-07, "loss": 0.9172, "step": 5265 }, { "epoch": 0.97, "learning_rate": 4.557354653525581e-07, "loss": 0.9775, "step": 5266 }, { "epoch": 0.97, "learning_rate": 4.5006083780991804e-07, "loss": 1.0874, "step": 5267 }, { "epoch": 0.97, "learning_rate": 4.4442168131293427e-07, "loss": 1.1488, "step": 5268 }, { "epoch": 0.97, "learning_rate": 4.388179978709217e-07, "loss": 1.0492, "step": 5269 }, { "epoch": 0.97, "learning_rate": 4.3324978948057205e-07, "loss": 1.0389, "step": 5270 }, { "epoch": 0.97, "learning_rate": 4.2771705812588714e-07, "loss": 0.9855, "step": 5271 }, { "epoch": 0.97, "learning_rate": 4.222198057782789e-07, "loss": 1.0513, "step": 5272 }, { "epoch": 0.97, "learning_rate": 4.167580343964916e-07, "loss": 0.9866, "step": 5273 }, { "epoch": 0.97, "learning_rate": 4.113317459266242e-07, "loss": 1.01, "step": 5274 }, { "epoch": 0.97, "learning_rate": 4.0594094230214096e-07, "loss": 1.0887, "step": 5275 }, { "epoch": 0.97, "learning_rate": 4.0058562544387224e-07, "loss": 0.9036, "step": 5276 }, { "epoch": 0.97, "learning_rate": 3.952657972599805e-07, "loss": 0.9251, "step": 5277 }, { "epoch": 0.97, "learning_rate": 3.8998145964599386e-07, "loss": 1.0559, "step": 5278 }, { "epoch": 0.97, "learning_rate": 3.8473261448480623e-07, "loss": 0.9023, "step": 5279 }, { "epoch": 0.97, "learning_rate": 3.795192636466549e-07, "loss": 0.9923, "step": 5280 }, { "epoch": 0.97, "learning_rate": 3.7434140898912063e-07, "loss": 1.0616, "step": 5281 }, { "epoch": 0.97, "learning_rate": 3.691990523571498e-07, "loss": 0.9445, "step": 5282 }, { "epoch": 0.97, "learning_rate": 3.640921955830434e-07, "loss": 1.1118, "step": 5283 }, { "epoch": 0.97, "learning_rate": 3.590208404864348e-07, "loss": 1.0692, "step": 5284 }, { "epoch": 0.97, "learning_rate": 3.539849888743341e-07, "loss": 1.0155, "step": 5285 }, { "epoch": 0.97, "learning_rate": 3.4898464254107257e-07, "loss": 0.9275, "step": 5286 }, { "epoch": 0.97, "learning_rate": 3.4401980326835836e-07, "loss": 1.1003, "step": 5287 }, { "epoch": 0.97, "learning_rate": 3.390904728252098e-07, "loss": 1.0116, "step": 5288 }, { "epoch": 0.97, "learning_rate": 3.3419665296804404e-07, "loss": 1.0448, "step": 5289 }, { "epoch": 0.97, "learning_rate": 3.293383454405774e-07, "loss": 0.9217, "step": 5290 }, { "epoch": 0.97, "learning_rate": 3.245155519739029e-07, "loss": 1.0638, "step": 5291 }, { "epoch": 0.98, "learning_rate": 3.197282742864571e-07, "loss": 1.0037, "step": 5292 }, { "epoch": 0.98, "learning_rate": 3.1497651408399776e-07, "loss": 1.1328, "step": 5293 }, { "epoch": 0.98, "learning_rate": 3.1026027305964823e-07, "loss": 1.0167, "step": 5294 }, { "epoch": 0.98, "learning_rate": 3.0557955289387543e-07, "loss": 0.9294, "step": 5295 }, { "epoch": 0.98, "learning_rate": 3.009343552544897e-07, "loss": 1.1092, "step": 5296 }, { "epoch": 0.98, "learning_rate": 2.9632468179664474e-07, "loss": 0.9322, "step": 5297 }, { "epoch": 0.98, "learning_rate": 2.917505341628157e-07, "loss": 0.9785, "step": 5298 }, { "epoch": 0.98, "learning_rate": 2.872119139828433e-07, "loss": 1.0896, "step": 5299 }, { "epoch": 0.98, "learning_rate": 2.8270882287390055e-07, "loss": 1.02, "step": 5300 }, { "epoch": 0.98, "learning_rate": 2.78241262440504e-07, "loss": 1.1043, "step": 5301 }, { "epoch": 0.98, "learning_rate": 2.7380923427451355e-07, "loss": 0.9679, "step": 5302 }, { "epoch": 0.98, "learning_rate": 2.694127399551216e-07, "loss": 1.1346, "step": 5303 }, { "epoch": 0.98, "learning_rate": 2.6505178104885285e-07, "loss": 0.9599, "step": 5304 }, { "epoch": 0.98, "learning_rate": 2.607263591095754e-07, "loss": 0.9885, "step": 5305 }, { "epoch": 0.98, "learning_rate": 2.5643647567851205e-07, "loss": 0.9751, "step": 5306 }, { "epoch": 0.98, "learning_rate": 2.5218213228420664e-07, "loss": 1.0236, "step": 5307 }, { "epoch": 0.98, "learning_rate": 2.479633304425355e-07, "loss": 0.9244, "step": 5308 }, { "epoch": 0.98, "learning_rate": 2.4378007165671843e-07, "loss": 1.121, "step": 5309 }, { "epoch": 0.98, "learning_rate": 2.396323574173076e-07, "loss": 1.1664, "step": 5310 }, { "epoch": 0.98, "learning_rate": 2.3552018920219855e-07, "loss": 1.0638, "step": 5311 }, { "epoch": 0.98, "learning_rate": 2.314435684766081e-07, "loss": 0.9875, "step": 5312 }, { "epoch": 0.98, "learning_rate": 2.2740249669309655e-07, "loss": 0.9688, "step": 5313 }, { "epoch": 0.98, "learning_rate": 2.2339697529155656e-07, "loss": 1.0544, "step": 5314 }, { "epoch": 0.98, "learning_rate": 2.1942700569921314e-07, "loss": 0.9698, "step": 5315 }, { "epoch": 0.98, "learning_rate": 2.1549258933061255e-07, "loss": 0.99, "step": 5316 }, { "epoch": 0.98, "learning_rate": 2.115937275876445e-07, "loss": 1.1787, "step": 5317 }, { "epoch": 0.98, "learning_rate": 2.0773042185954218e-07, "loss": 0.9862, "step": 5318 }, { "epoch": 0.98, "learning_rate": 2.0390267352284888e-07, "loss": 1.1718, "step": 5319 }, { "epoch": 0.98, "learning_rate": 2.0011048394142917e-07, "loss": 0.8551, "step": 5320 }, { "epoch": 0.98, "learning_rate": 1.9635385446652442e-07, "loss": 1.0126, "step": 5321 }, { "epoch": 0.98, "learning_rate": 1.9263278643664175e-07, "loss": 1.009, "step": 5322 }, { "epoch": 0.98, "learning_rate": 1.88947281177676e-07, "loss": 1.1009, "step": 5323 }, { "epoch": 0.98, "learning_rate": 1.8529734000281017e-07, "loss": 0.9817, "step": 5324 }, { "epoch": 0.98, "learning_rate": 1.8168296421258169e-07, "loss": 0.9613, "step": 5325 }, { "epoch": 0.98, "learning_rate": 1.7810415509483812e-07, "loss": 0.9484, "step": 5326 }, { "epoch": 0.98, "learning_rate": 1.7456091392474838e-07, "loss": 1.0356, "step": 5327 }, { "epoch": 0.98, "learning_rate": 1.7105324196482475e-07, "loss": 1.0704, "step": 5328 }, { "epoch": 0.98, "learning_rate": 1.675811404649119e-07, "loss": 1.0032, "step": 5329 }, { "epoch": 0.98, "learning_rate": 1.6414461066216468e-07, "loss": 1.26, "step": 5330 }, { "epoch": 0.98, "learning_rate": 1.6074365378105915e-07, "loss": 0.9054, "step": 5331 }, { "epoch": 0.98, "learning_rate": 1.5737827103340375e-07, "loss": 0.989, "step": 5332 }, { "epoch": 0.98, "learning_rate": 1.5404846361833926e-07, "loss": 1.1637, "step": 5333 }, { "epoch": 0.98, "learning_rate": 1.5075423272231658e-07, "loss": 0.9529, "step": 5334 }, { "epoch": 0.98, "learning_rate": 1.4749557951911906e-07, "loss": 1.0949, "step": 5335 }, { "epoch": 0.98, "learning_rate": 1.4427250516985124e-07, "loss": 1.0249, "step": 5336 }, { "epoch": 0.98, "learning_rate": 1.4108501082295e-07, "loss": 0.9198, "step": 5337 }, { "epoch": 0.98, "learning_rate": 1.3793309761414018e-07, "loss": 1.018, "step": 5338 }, { "epoch": 0.98, "learning_rate": 1.3481676666651234e-07, "loss": 0.9858, "step": 5339 }, { "epoch": 0.98, "learning_rate": 1.3173601909045597e-07, "loss": 0.941, "step": 5340 }, { "epoch": 0.98, "learning_rate": 1.2869085598368191e-07, "loss": 1.0448, "step": 5341 }, { "epoch": 0.98, "learning_rate": 1.2568127843122223e-07, "loss": 0.932, "step": 5342 }, { "epoch": 0.98, "learning_rate": 1.2270728750544137e-07, "loss": 1.0394, "step": 5343 }, { "epoch": 0.98, "learning_rate": 1.1976888426600275e-07, "loss": 1.0399, "step": 5344 }, { "epoch": 0.98, "learning_rate": 1.1686606975991333e-07, "loss": 1.0216, "step": 5345 }, { "epoch": 0.99, "learning_rate": 1.1399884502146796e-07, "loss": 1.2395, "step": 5346 }, { "epoch": 0.99, "learning_rate": 1.111672110723272e-07, "loss": 1.0419, "step": 5347 }, { "epoch": 0.99, "learning_rate": 1.083711689214062e-07, "loss": 1.0036, "step": 5348 }, { "epoch": 0.99, "learning_rate": 1.0561071956500801e-07, "loss": 0.983, "step": 5349 }, { "epoch": 0.99, "learning_rate": 1.0288586398670142e-07, "loss": 1.0732, "step": 5350 }, { "epoch": 0.99, "learning_rate": 1.0019660315738755e-07, "loss": 0.9421, "step": 5351 }, { "epoch": 0.99, "learning_rate": 9.75429380352999e-08, "loss": 1.0221, "step": 5352 }, { "epoch": 0.99, "learning_rate": 9.492486956597102e-08, "loss": 1.0799, "step": 5353 }, { "epoch": 0.99, "learning_rate": 9.23423986822547e-08, "loss": 1.11, "step": 5354 }, { "epoch": 0.99, "learning_rate": 8.979552630433708e-08, "loss": 0.998, "step": 5355 }, { "epoch": 0.99, "learning_rate": 8.728425333967005e-08, "loss": 0.9062, "step": 5356 }, { "epoch": 0.99, "learning_rate": 8.480858068309339e-08, "loss": 0.9493, "step": 5357 }, { "epoch": 0.99, "learning_rate": 8.236850921670148e-08, "loss": 0.9495, "step": 5358 }, { "epoch": 0.99, "learning_rate": 7.996403980993217e-08, "loss": 0.9217, "step": 5359 }, { "epoch": 0.99, "learning_rate": 7.759517331952238e-08, "loss": 1.0693, "step": 5360 }, { "epoch": 0.99, "learning_rate": 7.526191058955245e-08, "loss": 1.0943, "step": 5361 }, { "epoch": 0.99, "learning_rate": 7.296425245137961e-08, "loss": 1.1406, "step": 5362 }, { "epoch": 0.99, "learning_rate": 7.070219972370451e-08, "loss": 0.9957, "step": 5363 }, { "epoch": 0.99, "learning_rate": 6.847575321251576e-08, "loss": 1.1293, "step": 5364 }, { "epoch": 0.99, "learning_rate": 6.628491371114543e-08, "loss": 1.1278, "step": 5365 }, { "epoch": 0.99, "learning_rate": 6.412968200019131e-08, "loss": 1.0167, "step": 5366 }, { "epoch": 0.99, "learning_rate": 6.201005884762801e-08, "loss": 0.9231, "step": 5367 }, { "epoch": 0.99, "learning_rate": 5.99260450086736e-08, "loss": 1.0466, "step": 5368 }, { "epoch": 0.99, "learning_rate": 5.787764122592298e-08, "loss": 1.0542, "step": 5369 }, { "epoch": 0.99, "learning_rate": 5.5864848229225666e-08, "loss": 1.0429, "step": 5370 }, { "epoch": 0.99, "learning_rate": 5.3887666735785716e-08, "loss": 1.0996, "step": 5371 }, { "epoch": 0.99, "learning_rate": 5.1946097450084054e-08, "loss": 1.0395, "step": 5372 }, { "epoch": 0.99, "learning_rate": 5.004014106394506e-08, "loss": 0.9695, "step": 5373 }, { "epoch": 0.99, "learning_rate": 4.816979825648105e-08, "loss": 0.9664, "step": 5374 }, { "epoch": 0.99, "learning_rate": 4.6335069694125597e-08, "loss": 0.9501, "step": 5375 }, { "epoch": 0.99, "learning_rate": 4.4535956030611336e-08, "loss": 1.0749, "step": 5376 }, { "epoch": 0.99, "learning_rate": 4.277245790698103e-08, "loss": 1.2564, "step": 5377 }, { "epoch": 0.99, "learning_rate": 4.1044575951620924e-08, "loss": 1.1097, "step": 5378 }, { "epoch": 0.99, "learning_rate": 3.935231078017187e-08, "loss": 0.9996, "step": 5379 }, { "epoch": 0.99, "learning_rate": 3.7695662995618216e-08, "loss": 1.0533, "step": 5380 }, { "epoch": 0.99, "learning_rate": 3.6074633188265536e-08, "loss": 1.0291, "step": 5381 }, { "epoch": 0.99, "learning_rate": 3.4489221935685156e-08, "loss": 1.0478, "step": 5382 }, { "epoch": 0.99, "learning_rate": 3.2939429802791856e-08, "loss": 1.0177, "step": 5383 }, { "epoch": 0.99, "learning_rate": 3.1425257341799464e-08, "loss": 0.9833, "step": 5384 }, { "epoch": 0.99, "learning_rate": 2.994670509223196e-08, "loss": 1.1364, "step": 5385 }, { "epoch": 0.99, "learning_rate": 2.8503773580912386e-08, "loss": 1.1385, "step": 5386 }, { "epoch": 0.99, "learning_rate": 2.7096463321973908e-08, "loss": 1.0164, "step": 5387 }, { "epoch": 0.99, "learning_rate": 2.5724774816870965e-08, "loss": 1.1719, "step": 5388 }, { "epoch": 0.99, "learning_rate": 2.438870855434594e-08, "loss": 1.1125, "step": 5389 }, { "epoch": 0.99, "learning_rate": 2.3088265010473565e-08, "loss": 1.001, "step": 5390 }, { "epoch": 0.99, "learning_rate": 2.1823444648605416e-08, "loss": 1.1435, "step": 5391 }, { "epoch": 0.99, "learning_rate": 2.0594247919414333e-08, "loss": 1.1409, "step": 5392 }, { "epoch": 0.99, "learning_rate": 1.9400675260883294e-08, "loss": 1.2421, "step": 5393 }, { "epoch": 0.99, "learning_rate": 1.8242727098305435e-08, "loss": 0.955, "step": 5394 }, { "epoch": 0.99, "learning_rate": 1.7120403844272937e-08, "loss": 1.0009, "step": 5395 }, { "epoch": 0.99, "learning_rate": 1.603370589867703e-08, "loss": 1.0508, "step": 5396 }, { "epoch": 0.99, "learning_rate": 1.4982633648730204e-08, "loss": 1.0657, "step": 5397 }, { "epoch": 0.99, "learning_rate": 1.3967187468932885e-08, "loss": 1.0816, "step": 5398 }, { "epoch": 0.99, "learning_rate": 1.2987367721128963e-08, "loss": 0.9815, "step": 5399 }, { "epoch": 1.0, "learning_rate": 1.2043174754405861e-08, "loss": 0.9972, "step": 5400 }, { "epoch": 1.0, "learning_rate": 1.1134608905227773e-08, "loss": 1.0804, "step": 5401 }, { "epoch": 1.0, "learning_rate": 1.0261670497313525e-08, "loss": 1.0546, "step": 5402 }, { "epoch": 1.0, "learning_rate": 9.4243598417032e-09, "loss": 1.1193, "step": 5403 }, { "epoch": 1.0, "learning_rate": 8.622677236735932e-09, "loss": 1.1438, "step": 5404 }, { "epoch": 1.0, "learning_rate": 7.856622968072103e-09, "loss": 1.0255, "step": 5405 }, { "epoch": 1.0, "learning_rate": 7.126197308671145e-09, "loss": 0.9139, "step": 5406 }, { "epoch": 1.0, "learning_rate": 6.4314005187804394e-09, "loss": 1.0861, "step": 5407 }, { "epoch": 1.0, "learning_rate": 5.7722328459797195e-09, "loss": 0.8922, "step": 5408 }, { "epoch": 1.0, "learning_rate": 5.148694525125564e-09, "loss": 1.083, "step": 5409 }, { "epoch": 1.0, "learning_rate": 4.5607857784069065e-09, "loss": 1.0029, "step": 5410 }, { "epoch": 1.0, "learning_rate": 4.0085068153006274e-09, "loss": 0.9107, "step": 5411 }, { "epoch": 1.0, "learning_rate": 3.491857832593759e-09, "loss": 0.9864, "step": 5412 }, { "epoch": 1.0, "learning_rate": 3.0108390143612774e-09, "loss": 1.0202, "step": 5413 }, { "epoch": 1.0, "learning_rate": 2.565450532010516e-09, "loss": 1.0615, "step": 5414 }, { "epoch": 1.0, "learning_rate": 2.1556925442367535e-09, "loss": 0.9911, "step": 5415 }, { "epoch": 1.0, "learning_rate": 1.7815651970343182e-09, "loss": 1.0924, "step": 5416 }, { "epoch": 1.0, "learning_rate": 1.443068623729893e-09, "loss": 1.1256, "step": 5417 }, { "epoch": 1.0, "learning_rate": 1.1402029449159024e-09, "loss": 1.0043, "step": 5418 }, { "epoch": 1.0, "learning_rate": 8.729682685171269e-10, "loss": 0.993, "step": 5419 }, { "epoch": 1.0, "learning_rate": 6.413646897462933e-10, "loss": 1.1428, "step": 5420 }, { "epoch": 1.0, "learning_rate": 4.453922911262787e-10, "loss": 1.002, "step": 5421 }, { "epoch": 1.0, "learning_rate": 2.8505114250121367e-10, "loss": 1.0557, "step": 5422 }, { "epoch": 1.0, "learning_rate": 1.6034130098097067e-10, "loss": 1.0573, "step": 5423 }, { "epoch": 1.0, "learning_rate": 7.126281101887955e-11, "loss": 0.9316, "step": 5424 }, { "epoch": 1.0, "learning_rate": 1.781570434511437e-11, "loss": 1.0129, "step": 5425 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.8958, "step": 5426 }, { "epoch": 1.0, "step": 5426, "total_flos": 4973007551987712.0, "train_loss": 1.1060255723842343, "train_runtime": 29434.1927, "train_samples_per_second": 23.599, "train_steps_per_second": 0.184 } ], "logging_steps": 1.0, "max_steps": 5426, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 4973007551987712.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }