diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,32586 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999861795734095, + "eval_steps": 500, + "global_step": 5426, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.226993865030675e-06, + "loss": 2.9133, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 2.45398773006135e-06, + "loss": 3.055, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 3.680981595092025e-06, + "loss": 2.9751, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 4.9079754601227e-06, + "loss": 3.0182, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 6.134969325153374e-06, + "loss": 2.8012, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 7.36196319018405e-06, + "loss": 2.9888, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 8.588957055214725e-06, + "loss": 2.714, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 9.8159509202454e-06, + "loss": 2.3919, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 1.1042944785276074e-05, + "loss": 2.7006, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.2269938650306748e-05, + "loss": 2.2474, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 1.3496932515337424e-05, + "loss": 2.2551, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 1.47239263803681e-05, + "loss": 2.1976, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 1.5950920245398772e-05, + "loss": 2.1565, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 1.717791411042945e-05, + "loss": 2.1496, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 1.8404907975460123e-05, + "loss": 2.1257, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 1.96319018404908e-05, + "loss": 1.8707, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 2.085889570552147e-05, + "loss": 2.1615, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 2.208588957055215e-05, + "loss": 1.8606, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 2.3312883435582822e-05, + "loss": 2.0974, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 2.4539877300613496e-05, + "loss": 2.0825, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 2.5766871165644174e-05, + "loss": 1.9132, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 2.6993865030674848e-05, + "loss": 1.9175, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 2.822085889570552e-05, + "loss": 1.7998, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 2.94478527607362e-05, + "loss": 1.7691, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 3.067484662576687e-05, + "loss": 1.6433, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 3.1901840490797544e-05, + "loss": 1.6845, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 3.312883435582822e-05, + "loss": 1.8083, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 3.43558282208589e-05, + "loss": 1.784, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 3.558282208588957e-05, + "loss": 1.7445, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 3.6809815950920246e-05, + "loss": 1.7455, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 3.8036809815950924e-05, + "loss": 1.7485, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 3.92638036809816e-05, + "loss": 1.5979, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 4.049079754601227e-05, + "loss": 1.6039, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 4.171779141104294e-05, + "loss": 1.5644, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 4.2944785276073626e-05, + "loss": 1.6367, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 4.41717791411043e-05, + "loss": 1.651, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 4.539877300613497e-05, + "loss": 1.5372, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 4.6625766871165645e-05, + "loss": 1.7554, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 4.785276073619632e-05, + "loss": 1.5379, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 4.907975460122699e-05, + "loss": 1.583, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 5.030674846625767e-05, + "loss": 1.5597, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 5.153374233128835e-05, + "loss": 1.7612, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 5.276073619631902e-05, + "loss": 1.595, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 5.3987730061349695e-05, + "loss": 1.6658, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 5.521472392638037e-05, + "loss": 1.7242, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 5.644171779141104e-05, + "loss": 1.5264, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 5.766871165644172e-05, + "loss": 1.5155, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 5.88957055214724e-05, + "loss": 1.6741, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 6.012269938650307e-05, + "loss": 1.5787, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 6.134969325153375e-05, + "loss": 1.5553, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 6.257668711656443e-05, + "loss": 1.6424, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 6.380368098159509e-05, + "loss": 1.5311, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 6.503067484662577e-05, + "loss": 1.3966, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 6.625766871165644e-05, + "loss": 1.491, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 6.748466257668711e-05, + "loss": 1.4504, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 6.87116564417178e-05, + "loss": 1.6267, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 6.993865030674847e-05, + "loss": 1.4846, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 7.116564417177914e-05, + "loss": 1.4432, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 7.239263803680982e-05, + "loss": 1.5841, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 7.361963190184049e-05, + "loss": 1.5825, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 7.484662576687118e-05, + "loss": 1.5897, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 7.607361963190185e-05, + "loss": 1.4428, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 7.730061349693252e-05, + "loss": 1.4481, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 7.85276073619632e-05, + "loss": 1.5164, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 7.975460122699386e-05, + "loss": 1.5669, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 8.098159509202454e-05, + "loss": 1.5615, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 8.220858895705523e-05, + "loss": 1.4287, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 8.343558282208588e-05, + "loss": 1.5482, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 8.466257668711657e-05, + "loss": 1.4216, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 8.588957055214725e-05, + "loss": 1.5713, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 8.711656441717791e-05, + "loss": 1.3897, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 8.83435582822086e-05, + "loss": 1.4794, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 8.957055214723928e-05, + "loss": 1.4433, + "step": 73 + }, + { + "epoch": 0.01, + "learning_rate": 9.079754601226993e-05, + "loss": 1.3892, + "step": 74 + }, + { + "epoch": 0.01, + "learning_rate": 9.202453987730062e-05, + "loss": 1.494, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 9.325153374233129e-05, + "loss": 1.5278, + "step": 76 + }, + { + "epoch": 0.01, + "learning_rate": 9.447852760736196e-05, + "loss": 1.3703, + "step": 77 + }, + { + "epoch": 0.01, + "learning_rate": 9.570552147239264e-05, + "loss": 1.4735, + "step": 78 + }, + { + "epoch": 0.01, + "learning_rate": 9.693251533742331e-05, + "loss": 1.5073, + "step": 79 + }, + { + "epoch": 0.01, + "learning_rate": 9.815950920245399e-05, + "loss": 1.3267, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 9.938650306748467e-05, + "loss": 1.4909, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010061349693251534, + "loss": 1.2488, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010184049079754601, + "loss": 1.3247, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001030674846625767, + "loss": 1.4525, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010429447852760737, + "loss": 1.4053, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010552147239263804, + "loss": 1.6705, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010674846625766872, + "loss": 1.5301, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010797546012269939, + "loss": 1.533, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 0.00010920245398773006, + "loss": 1.3868, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011042944785276075, + "loss": 1.3824, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011165644171779142, + "loss": 1.442, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011288343558282209, + "loss": 1.5594, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011411042944785277, + "loss": 1.4202, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011533742331288344, + "loss": 1.5025, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011656441717791411, + "loss": 1.5514, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001177914110429448, + "loss": 1.5556, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 0.00011901840490797547, + "loss": 1.5445, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012024539877300614, + "loss": 1.5812, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012147239263803682, + "loss": 1.3252, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001226993865030675, + "loss": 1.58, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012392638036809816, + "loss": 1.3717, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012515337423312886, + "loss": 1.337, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012638036809815953, + "loss": 1.3888, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012760736196319017, + "loss": 1.4114, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 0.00012883435582822084, + "loss": 1.4551, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013006134969325154, + "loss": 1.6447, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001312883435582822, + "loss": 1.2774, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013251533742331288, + "loss": 1.3539, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013374233128834358, + "loss": 1.3999, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013496932515337422, + "loss": 1.5652, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001361963190184049, + "loss": 1.3521, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001374233128834356, + "loss": 1.3553, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013865030674846626, + "loss": 1.4753, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 0.00013987730061349693, + "loss": 1.5041, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014110429447852763, + "loss": 1.2509, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014233128834355828, + "loss": 1.5168, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014355828220858895, + "loss": 1.4064, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014478527607361964, + "loss": 1.4439, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014601226993865031, + "loss": 1.4274, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014723926380368098, + "loss": 1.5336, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014846625766871168, + "loss": 1.4122, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 0.00014969325153374235, + "loss": 1.3413, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 0.000150920245398773, + "loss": 1.3997, + "step": 123 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001521472392638037, + "loss": 1.417, + "step": 124 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015337423312883436, + "loss": 1.5019, + "step": 125 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015460122699386504, + "loss": 1.3009, + "step": 126 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015582822085889573, + "loss": 1.4166, + "step": 127 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001570552147239264, + "loss": 1.3501, + "step": 128 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015828220858895705, + "loss": 1.3895, + "step": 129 + }, + { + "epoch": 0.02, + "learning_rate": 0.00015950920245398772, + "loss": 1.3085, + "step": 130 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016073619631901842, + "loss": 1.3562, + "step": 131 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016196319018404909, + "loss": 1.3536, + "step": 132 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016319018404907976, + "loss": 1.4299, + "step": 133 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016441717791411045, + "loss": 1.3528, + "step": 134 + }, + { + "epoch": 0.02, + "learning_rate": 0.0001656441717791411, + "loss": 1.3283, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016687116564417177, + "loss": 1.3906, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016809815950920247, + "loss": 1.2988, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 0.00016932515337423314, + "loss": 1.4344, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001705521472392638, + "loss": 1.328, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001717791411042945, + "loss": 1.4753, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017300613496932518, + "loss": 1.4446, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017423312883435582, + "loss": 1.3035, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017546012269938652, + "loss": 1.336, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001766871165644172, + "loss": 1.3953, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017791411042944786, + "loss": 1.3547, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 0.00017914110429447856, + "loss": 1.3653, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018036809815950923, + "loss": 1.4712, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018159509202453987, + "loss": 1.277, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018282208588957057, + "loss": 1.3809, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018404907975460124, + "loss": 1.353, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001852760736196319, + "loss": 1.2671, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018650306748466258, + "loss": 1.329, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018773006134969328, + "loss": 1.3542, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 0.00018895705521472392, + "loss": 1.4236, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001901840490797546, + "loss": 1.3466, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001914110429447853, + "loss": 1.2041, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019263803680981596, + "loss": 1.2636, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019386503067484663, + "loss": 1.2953, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019509202453987733, + "loss": 1.3976, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019631901840490797, + "loss": 1.4372, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019754601226993864, + "loss": 1.4172, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019877300613496934, + "loss": 1.2351, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002, + "loss": 1.4594, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999998218429567, + "loss": 1.3737, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999992873718898, + "loss": 1.395, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 0.000199999839658699, + "loss": 1.4466, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999997149488575, + "loss": 1.3668, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999955460770887, + "loss": 1.2873, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999935863531025, + "loss": 1.255, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999912703173148, + "loss": 1.2899, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999885979705512, + "loss": 1.3567, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999985569313763, + "loss": 1.3086, + "step": 172 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999821843480297, + "loss": 1.2711, + "step": 173 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999784430745575, + "loss": 1.4246, + "step": 174 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999743454946799, + "loss": 1.4704, + "step": 175 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999698916098562, + "loss": 1.571, + "step": 176 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999650814216742, + "loss": 1.2829, + "step": 177 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999959914931847, + "loss": 1.2935, + "step": 178 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999954392142216, + "loss": 1.4999, + "step": 179 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999948513054749, + "loss": 1.4001, + "step": 180 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999422776715404, + "loss": 1.4315, + "step": 181 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999356859948123, + "loss": 1.3782, + "step": 182 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999287380269133, + "loss": 1.3725, + "step": 183 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999214337703195, + "loss": 1.552, + "step": 184 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999137732276326, + "loss": 1.2235, + "step": 185 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019999057564015833, + "loss": 1.4118, + "step": 186 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999897383295027, + "loss": 1.4871, + "step": 187 + }, + { + "epoch": 0.03, + "learning_rate": 0.00019998886539109479, + "loss": 1.402, + "step": 188 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001999879568252456, + "loss": 1.4212, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019998701263227887, + "loss": 1.2837, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019998603281253107, + "loss": 1.4063, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999850173663513, + "loss": 1.3891, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019998396629410133, + "loss": 1.26, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019998287959615573, + "loss": 1.2396, + "step": 194 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999817572729017, + "loss": 1.2441, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019998059932473914, + "loss": 1.3385, + "step": 196 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999794057520806, + "loss": 1.2471, + "step": 197 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999781765553514, + "loss": 1.2956, + "step": 198 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997691173498955, + "loss": 1.4159, + "step": 199 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997561129144565, + "loss": 1.4816, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997427522518315, + "loss": 1.2821, + "step": 201 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997290353667804, + "loss": 1.3105, + "step": 202 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999714962264191, + "loss": 1.4725, + "step": 203 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019997005329490776, + "loss": 1.4484, + "step": 204 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999685747426582, + "loss": 1.1717, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996706057019722, + "loss": 1.3503, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996551077806435, + "loss": 1.3575, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996392536681174, + "loss": 1.3482, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999623043370044, + "loss": 1.5001, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019996064768921983, + "loss": 1.156, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999589554240484, + "loss": 1.3218, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995722754209303, + "loss": 1.2664, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999554640439694, + "loss": 1.3725, + "step": 213 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995366493030589, + "loss": 1.1872, + "step": 214 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019995183020174353, + "loss": 1.4838, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019994995985893606, + "loss": 1.4477, + "step": 216 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999480539025499, + "loss": 1.4168, + "step": 217 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019994611233326422, + "loss": 1.6, + "step": 218 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999441351517708, + "loss": 1.2846, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019994212235877406, + "loss": 1.5858, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019994007395499134, + "loss": 1.3058, + "step": 221 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993798994115237, + "loss": 1.3324, + "step": 222 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993587031799984, + "loss": 1.3545, + "step": 223 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993371508628886, + "loss": 1.281, + "step": 224 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019993152424678748, + "loss": 1.2807, + "step": 225 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999292978002763, + "loss": 1.3583, + "step": 226 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019992703574754864, + "loss": 1.2874, + "step": 227 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019992473808941045, + "loss": 1.3659, + "step": 228 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019992240482668048, + "loss": 1.3034, + "step": 229 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999200359601901, + "loss": 1.4119, + "step": 230 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999176314907833, + "loss": 1.3809, + "step": 231 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019991519141931692, + "loss": 1.3961, + "step": 232 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019991271574666035, + "loss": 1.3355, + "step": 233 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019991020447369568, + "loss": 1.4137, + "step": 234 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019990765760131776, + "loss": 1.3358, + "step": 235 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019990507513043405, + "loss": 1.4359, + "step": 236 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001999024570619647, + "loss": 1.3345, + "step": 237 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019989980339684261, + "loss": 1.3421, + "step": 238 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019989711413601332, + "loss": 1.2213, + "step": 239 + }, + { + "epoch": 0.04, + "learning_rate": 0.000199894389280435, + "loss": 1.426, + "step": 240 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001998916288310786, + "loss": 1.2883, + "step": 241 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001998888327889277, + "loss": 1.3506, + "step": 242 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019988600115497854, + "loss": 1.3957, + "step": 243 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001998831339302401, + "loss": 1.3449, + "step": 244 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199880231115734, + "loss": 1.4988, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019987729271249457, + "loss": 1.549, + "step": 246 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019987431872156878, + "loss": 1.4106, + "step": 247 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019987130914401633, + "loss": 1.3517, + "step": 248 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019986826398090955, + "loss": 1.4112, + "step": 249 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998651832333335, + "loss": 1.2307, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019986206690238587, + "loss": 1.4371, + "step": 251 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019985891498917706, + "loss": 1.3221, + "step": 252 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019985572749483014, + "loss": 1.3698, + "step": 253 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998525044204809, + "loss": 1.3083, + "step": 254 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998492457672777, + "loss": 1.3395, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019984595153638165, + "loss": 1.3562, + "step": 256 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998426217289666, + "loss": 1.3313, + "step": 257 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019983925634621893, + "loss": 1.4963, + "step": 258 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019983585538933785, + "loss": 1.5295, + "step": 259 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998324188595351, + "loss": 1.3574, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019982894675803518, + "loss": 1.3337, + "step": 261 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019982543908607524, + "loss": 1.3864, + "step": 262 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019982189584490519, + "loss": 1.2631, + "step": 263 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019981831703578742, + "loss": 1.3177, + "step": 264 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001998147026599972, + "loss": 1.463, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019981105271882235, + "loss": 1.2233, + "step": 266 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019980736721356336, + "loss": 1.3262, + "step": 267 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019980364614553348, + "loss": 1.3905, + "step": 268 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979988951605856, + "loss": 1.4483, + "step": 269 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979609732647717, + "loss": 1.4095, + "step": 270 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019979226957814045, + "loss": 1.1959, + "step": 271 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019978840627241236, + "loss": 1.4526, + "step": 272 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997845074106694, + "loss": 1.3379, + "step": 273 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019978057299430079, + "loss": 1.3522, + "step": 274 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019977660302470843, + "loss": 1.3332, + "step": 275 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997725975033069, + "loss": 1.4385, + "step": 276 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019976855643152338, + "loss": 1.3786, + "step": 277 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019976447981079782, + "loss": 1.2158, + "step": 278 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997603676425827, + "loss": 1.3663, + "step": 279 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019975621992834327, + "loss": 1.3057, + "step": 280 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019975203666955745, + "loss": 1.2422, + "step": 281 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997478178677158, + "loss": 1.3122, + "step": 282 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019974356352432147, + "loss": 1.4501, + "step": 283 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019973927364089043, + "loss": 1.2732, + "step": 284 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997349482189512, + "loss": 1.2324, + "step": 285 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997305872600449, + "loss": 1.4399, + "step": 286 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997261907657255, + "loss": 1.5414, + "step": 287 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997217587375595, + "loss": 1.3618, + "step": 288 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997172911771261, + "loss": 1.354, + "step": 289 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019971278808601716, + "loss": 1.2714, + "step": 290 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001997082494658372, + "loss": 1.3236, + "step": 291 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019970367531820337, + "loss": 1.4577, + "step": 292 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001996990656447455, + "loss": 1.4747, + "step": 293 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019969442044710613, + "loss": 1.4719, + "step": 294 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019968973972694037, + "loss": 1.3618, + "step": 295 + }, + { + "epoch": 0.05, + "learning_rate": 0.000199685023485916, + "loss": 1.1732, + "step": 296 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019968027172571356, + "loss": 1.4505, + "step": 297 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019967548444802612, + "loss": 1.3874, + "step": 298 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019967066165455942, + "loss": 1.2825, + "step": 299 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019966580334703196, + "loss": 1.3764, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001996609095271748, + "loss": 1.197, + "step": 301 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019965598019673167, + "loss": 1.2907, + "step": 302 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019965101535745895, + "loss": 1.433, + "step": 303 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019964601501112566, + "loss": 1.4297, + "step": 304 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019964097915951357, + "loss": 1.2658, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019963590780441696, + "loss": 1.3734, + "step": 306 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019963080094764286, + "loss": 1.3946, + "step": 307 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019962565859101087, + "loss": 1.2515, + "step": 308 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019962048073635336, + "loss": 1.3725, + "step": 309 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019961526738551522, + "loss": 1.5055, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 0.000199610018540354, + "loss": 1.2875, + "step": 311 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019960473420274004, + "loss": 1.3318, + "step": 312 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019959941437455614, + "loss": 1.3646, + "step": 313 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019959405905769785, + "loss": 1.322, + "step": 314 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001995886682540734, + "loss": 1.4699, + "step": 315 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001995832419656035, + "loss": 1.3175, + "step": 316 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019957778019422172, + "loss": 1.2536, + "step": 317 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001995722829418741, + "loss": 1.2587, + "step": 318 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019956675021051945, + "loss": 1.43, + "step": 319 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001995611820021291, + "loss": 1.4215, + "step": 320 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019955557831868707, + "loss": 1.2245, + "step": 321 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019954993916219008, + "loss": 1.3355, + "step": 322 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019954426453464746, + "loss": 1.3529, + "step": 323 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019953855443808108, + "loss": 1.2456, + "step": 324 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001995328088745256, + "loss": 1.2831, + "step": 325 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019952702784602818, + "loss": 1.2316, + "step": 326 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019952121135464874, + "loss": 1.4306, + "step": 327 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019951535940245977, + "loss": 1.2444, + "step": 328 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019950947199154638, + "loss": 1.3893, + "step": 329 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019950354912400635, + "loss": 1.3107, + "step": 330 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019949759080195006, + "loss": 1.35, + "step": 331 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994915970275006, + "loss": 1.4179, + "step": 332 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019948556780279356, + "loss": 1.2205, + "step": 333 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994795031299773, + "loss": 1.3611, + "step": 334 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019947340301121268, + "loss": 1.3528, + "step": 335 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019946726744867336, + "loss": 1.298, + "step": 336 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019946109644454544, + "loss": 1.281, + "step": 337 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019945489000102777, + "loss": 1.4903, + "step": 338 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019944864812033178, + "loss": 1.5107, + "step": 339 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994423708046816, + "loss": 1.2177, + "step": 340 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994360580563138, + "loss": 1.304, + "step": 341 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994297098774778, + "loss": 1.4084, + "step": 342 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019942332627043556, + "loss": 1.2874, + "step": 343 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001994169072374616, + "loss": 1.1817, + "step": 344 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019941045278084308, + "loss": 1.367, + "step": 345 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019940396290287987, + "loss": 1.3391, + "step": 346 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019939743760588442, + "loss": 1.3201, + "step": 347 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019939087689218175, + "loss": 1.3155, + "step": 348 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019938428076410954, + "loss": 1.3715, + "step": 349 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019937764922401806, + "loss": 1.4434, + "step": 350 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001993709822742703, + "loss": 1.3175, + "step": 351 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019936427991724167, + "loss": 1.3147, + "step": 352 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019935754215532042, + "loss": 1.2638, + "step": 353 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019935076899090724, + "loss": 1.3556, + "step": 354 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019934396042641555, + "loss": 1.2999, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001993371164642713, + "loss": 1.3627, + "step": 356 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019933023710691315, + "loss": 1.2443, + "step": 357 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019932332235679225, + "loss": 1.2654, + "step": 358 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019931637221637242, + "loss": 1.353, + "step": 359 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019930938668813016, + "loss": 1.2648, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019930236577455444, + "loss": 1.2313, + "step": 361 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019929530947814693, + "loss": 1.2778, + "step": 362 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019928821780142194, + "loss": 1.4433, + "step": 363 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019928109074690624, + "loss": 1.3975, + "step": 364 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992739283171394, + "loss": 1.2093, + "step": 365 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992667305146734, + "loss": 1.4877, + "step": 366 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019925949734207299, + "loss": 1.2254, + "step": 367 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992522288019154, + "loss": 1.2159, + "step": 368 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992449248967906, + "loss": 1.2056, + "step": 369 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019923758562930094, + "loss": 1.308, + "step": 370 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019923021100206164, + "loss": 1.2907, + "step": 371 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992228010177003, + "loss": 1.3554, + "step": 372 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001992153556788572, + "loss": 1.3918, + "step": 373 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019920787498818528, + "loss": 1.2498, + "step": 374 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019920035894834993, + "loss": 1.2315, + "step": 375 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019919280756202932, + "loss": 1.3674, + "step": 376 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019918522083191403, + "loss": 1.2367, + "step": 377 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019917759876070736, + "loss": 1.262, + "step": 378 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019916994135112516, + "loss": 1.2442, + "step": 379 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019916224860589587, + "loss": 1.3386, + "step": 380 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019915452052776051, + "loss": 1.3748, + "step": 381 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019914675711947273, + "loss": 1.2694, + "step": 382 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019913895838379872, + "loss": 1.3321, + "step": 383 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019913112432351726, + "loss": 1.32, + "step": 384 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019912325494141978, + "loss": 1.2755, + "step": 385 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019911535024031026, + "loss": 1.4258, + "step": 386 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019910741022300519, + "loss": 1.2682, + "step": 387 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019909943489233376, + "loss": 1.2658, + "step": 388 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019909142425113767, + "loss": 1.2079, + "step": 389 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019908337830227123, + "loss": 1.2834, + "step": 390 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019907529704860137, + "loss": 1.2887, + "step": 391 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019906718049300749, + "loss": 1.2534, + "step": 392 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019905902863838167, + "loss": 1.2917, + "step": 393 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001990508414876285, + "loss": 1.3415, + "step": 394 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001990426190436652, + "loss": 1.3396, + "step": 395 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019903436130942157, + "loss": 1.2498, + "step": 396 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001990260682878399, + "loss": 1.3885, + "step": 397 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019901773998187513, + "loss": 1.1937, + "step": 398 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019900937639449477, + "loss": 1.3459, + "step": 399 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019900097752867886, + "loss": 1.3642, + "step": 400 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019899254338742007, + "loss": 1.3218, + "step": 401 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019898407397372355, + "loss": 1.3259, + "step": 402 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001989755692906071, + "loss": 1.369, + "step": 403 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001989670293411011, + "loss": 1.5343, + "step": 404 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001989584541282484, + "loss": 1.3461, + "step": 405 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019894984365510449, + "loss": 1.2003, + "step": 406 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019894119792473734, + "loss": 1.2929, + "step": 407 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019893251694022768, + "loss": 1.3947, + "step": 408 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019892380070466855, + "loss": 1.2045, + "step": 409 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001989150492211657, + "loss": 1.3784, + "step": 410 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019890626249283744, + "loss": 1.1913, + "step": 411 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019889744052281456, + "loss": 1.2795, + "step": 412 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019888858331424048, + "loss": 1.2899, + "step": 413 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019887969087027113, + "loss": 1.1906, + "step": 414 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019887076319407502, + "loss": 1.3078, + "step": 415 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019886180028883321, + "loss": 1.3758, + "step": 416 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019885280215773933, + "loss": 1.438, + "step": 417 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001988437688039995, + "loss": 1.246, + "step": 418 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019883470023083244, + "loss": 1.4632, + "step": 419 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019882559644146945, + "loss": 1.3554, + "step": 420 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019881645743915428, + "loss": 1.2644, + "step": 421 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019880728322714333, + "loss": 1.4274, + "step": 422 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001987980738087055, + "loss": 1.5054, + "step": 423 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019878882918712218, + "loss": 1.3347, + "step": 424 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019877954936568744, + "loss": 1.2321, + "step": 425 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019877023434770774, + "loss": 1.1806, + "step": 426 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001987608841365022, + "loss": 1.411, + "step": 427 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001987514987354024, + "loss": 1.2936, + "step": 428 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019874207814775253, + "loss": 1.1896, + "step": 429 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019873262237690923, + "loss": 1.3398, + "step": 430 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019872313142624176, + "loss": 1.3694, + "step": 431 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019871360529913186, + "loss": 1.2843, + "step": 432 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019870404399897383, + "loss": 1.3487, + "step": 433 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019869444752917452, + "loss": 1.3347, + "step": 434 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019868481589315325, + "loss": 1.4285, + "step": 435 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019867514909434193, + "loss": 1.1552, + "step": 436 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019866544713618493, + "loss": 1.4068, + "step": 437 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019865571002213928, + "loss": 1.2267, + "step": 438 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019864593775567436, + "loss": 1.3524, + "step": 439 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019863613034027224, + "loss": 1.1046, + "step": 440 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001986262877794274, + "loss": 1.3049, + "step": 441 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001986164100766469, + "loss": 1.3798, + "step": 442 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001986064972354503, + "loss": 1.2179, + "step": 443 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019859654925936967, + "loss": 1.312, + "step": 444 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019858656615194965, + "loss": 1.3714, + "step": 445 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019857654791674732, + "loss": 1.61, + "step": 446 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019856649455733232, + "loss": 1.3262, + "step": 447 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019855640607728683, + "loss": 1.4056, + "step": 448 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019854628248020553, + "loss": 1.2483, + "step": 449 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019853612376969558, + "loss": 1.3335, + "step": 450 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019852592994937663, + "loss": 1.2708, + "step": 451 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019851570102288096, + "loss": 1.2169, + "step": 452 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001985054369938532, + "loss": 1.4164, + "step": 453 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019849513786595067, + "loss": 1.2357, + "step": 454 + }, + { + "epoch": 0.08, + "learning_rate": 0.000198484803642843, + "loss": 1.3957, + "step": 455 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019847443432821244, + "loss": 1.2552, + "step": 456 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019846402992575377, + "loss": 1.324, + "step": 457 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019845359043917416, + "loss": 1.3609, + "step": 458 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001984431158721934, + "loss": 1.1403, + "step": 459 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001984326062285437, + "loss": 1.4419, + "step": 460 + }, + { + "epoch": 0.08, + "learning_rate": 0.00019842206151196977, + "loss": 1.3143, + "step": 461 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001984114817262289, + "loss": 1.4272, + "step": 462 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019840086687509078, + "loss": 1.3789, + "step": 463 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019839021696233764, + "loss": 1.3452, + "step": 464 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001983795319917642, + "loss": 1.2198, + "step": 465 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001983688119671776, + "loss": 1.3062, + "step": 466 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019835805689239768, + "loss": 1.3758, + "step": 467 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019834726677125648, + "loss": 1.2731, + "step": 468 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019833644160759874, + "loss": 1.482, + "step": 469 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019832558140528162, + "loss": 1.3052, + "step": 470 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019831468616817471, + "loss": 1.2674, + "step": 471 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019830375590016022, + "loss": 1.1693, + "step": 472 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019829279060513272, + "loss": 1.2817, + "step": 473 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019828179028699925, + "loss": 1.3387, + "step": 474 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019827075494967944, + "loss": 1.2878, + "step": 475 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019825968459710532, + "loss": 1.3464, + "step": 476 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001982485792332214, + "loss": 1.2953, + "step": 477 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001982374388619847, + "loss": 1.4282, + "step": 478 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001982262634873647, + "loss": 1.3335, + "step": 479 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019821505311334333, + "loss": 1.2368, + "step": 480 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019820380774391495, + "loss": 1.2242, + "step": 481 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001981925273830865, + "loss": 1.3069, + "step": 482 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019818121203487736, + "loss": 1.1543, + "step": 483 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001981698617033193, + "loss": 1.4138, + "step": 484 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019815847639245658, + "loss": 1.2415, + "step": 485 + }, + { + "epoch": 0.09, + "learning_rate": 0.000198147056106346, + "loss": 1.2266, + "step": 486 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019813560084905676, + "loss": 1.2534, + "step": 487 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019812411062467052, + "loss": 1.3184, + "step": 488 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001981125854372814, + "loss": 1.3385, + "step": 489 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019810102529099597, + "loss": 1.2096, + "step": 490 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019808943018993332, + "loss": 1.2634, + "step": 491 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019807780013822492, + "loss": 1.3767, + "step": 492 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019806613514001474, + "loss": 1.4014, + "step": 493 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019805443519945915, + "loss": 1.3266, + "step": 494 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019804270032072703, + "loss": 1.3097, + "step": 495 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019803093050799967, + "loss": 1.3006, + "step": 496 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019801912576547083, + "loss": 1.3819, + "step": 497 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001980072860973467, + "loss": 1.373, + "step": 498 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019799541150784594, + "loss": 1.3092, + "step": 499 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001979835020011996, + "loss": 1.3512, + "step": 500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001979715575816512, + "loss": 1.3284, + "step": 501 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019795957825345678, + "loss": 1.4825, + "step": 502 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019794756402088462, + "loss": 1.2665, + "step": 503 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001979355148882157, + "loss": 1.2601, + "step": 504 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019792343085974317, + "loss": 1.3904, + "step": 505 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001979113119397728, + "loss": 1.3562, + "step": 506 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019789915813262274, + "loss": 1.2801, + "step": 507 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019788696944262357, + "loss": 1.2923, + "step": 508 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019787474587411823, + "loss": 1.4362, + "step": 509 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001978624874314622, + "loss": 1.2619, + "step": 510 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019785019411902336, + "loss": 1.2265, + "step": 511 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019783786594118193, + "loss": 1.3166, + "step": 512 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019782550290233064, + "loss": 1.2106, + "step": 513 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019781310500687464, + "loss": 1.4332, + "step": 514 + }, + { + "epoch": 0.09, + "learning_rate": 0.00019780067225923142, + "loss": 1.3165, + "step": 515 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019778820466383098, + "loss": 1.2276, + "step": 516 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001977757022251157, + "loss": 1.331, + "step": 517 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019776316494754035, + "loss": 1.2565, + "step": 518 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019775059283557217, + "loss": 1.4366, + "step": 519 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001977379858936908, + "loss": 1.2748, + "step": 520 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001977253441263882, + "loss": 1.3397, + "step": 521 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019771266753816885, + "loss": 1.2467, + "step": 522 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019769995613354958, + "loss": 1.4536, + "step": 523 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001976872099170597, + "loss": 1.2744, + "step": 524 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001976744288932408, + "loss": 1.3113, + "step": 525 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019766161306664694, + "loss": 1.26, + "step": 526 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019764876244184466, + "loss": 1.2703, + "step": 527 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001976358770234127, + "loss": 1.3892, + "step": 528 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001976229568159424, + "loss": 1.2105, + "step": 529 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001976100018240374, + "loss": 1.2751, + "step": 530 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019759701205231372, + "loss": 1.3075, + "step": 531 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019758398750539984, + "loss": 1.1783, + "step": 532 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019757092818793655, + "loss": 1.2705, + "step": 533 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019755783410457705, + "loss": 1.3734, + "step": 534 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019754470525998704, + "loss": 1.2772, + "step": 535 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001975315416588444, + "loss": 1.1862, + "step": 536 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019751834330583956, + "loss": 1.0466, + "step": 537 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019750511020567532, + "loss": 1.342, + "step": 538 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019749184236306672, + "loss": 1.385, + "step": 539 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019747853978274135, + "loss": 1.3489, + "step": 540 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001974652024694391, + "loss": 1.2117, + "step": 541 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019745183042791227, + "loss": 1.3268, + "step": 542 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001974384236629254, + "loss": 1.2534, + "step": 543 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019742498217925565, + "loss": 1.2625, + "step": 544 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019741150598169232, + "loss": 1.2415, + "step": 545 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001973979950750372, + "loss": 1.1915, + "step": 546 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019738444946410436, + "loss": 1.1787, + "step": 547 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019737086915372037, + "loss": 1.288, + "step": 548 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019735725414872404, + "loss": 1.3228, + "step": 549 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001973436044539666, + "loss": 1.2734, + "step": 550 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019732992007431168, + "loss": 1.384, + "step": 551 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019731620101463512, + "loss": 1.341, + "step": 552 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019730244727982527, + "loss": 1.2446, + "step": 553 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019728865887478278, + "loss": 1.3869, + "step": 554 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019727483580442065, + "loss": 1.2327, + "step": 555 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001972609780736642, + "loss": 1.2164, + "step": 556 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019724708568745117, + "loss": 1.2732, + "step": 557 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019723315865073163, + "loss": 1.3267, + "step": 558 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019721919696846795, + "loss": 1.2916, + "step": 559 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001972052006456349, + "loss": 1.4113, + "step": 560 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001971911696872195, + "loss": 1.3024, + "step": 561 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019717710409822127, + "loss": 1.1392, + "step": 562 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019716300388365193, + "loss": 1.2854, + "step": 563 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019714886904853554, + "loss": 1.2892, + "step": 564 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019713469959790864, + "loss": 1.3296, + "step": 565 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019712049553681992, + "loss": 1.3822, + "step": 566 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019710625687033054, + "loss": 1.1701, + "step": 567 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019709198360351392, + "loss": 1.3691, + "step": 568 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001970776757414558, + "loss": 1.2264, + "step": 569 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019706333328925433, + "loss": 1.256, + "step": 570 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019704895625201986, + "loss": 1.4763, + "step": 571 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019703454463487521, + "loss": 1.1871, + "step": 572 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019702009844295537, + "loss": 1.2319, + "step": 573 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019700561768140775, + "loss": 1.3715, + "step": 574 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019699110235539205, + "loss": 1.246, + "step": 575 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019697655247008028, + "loss": 1.2041, + "step": 576 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019696196803065677, + "loss": 1.4268, + "step": 577 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019694734904231818, + "loss": 1.3034, + "step": 578 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019693269551027346, + "loss": 1.2512, + "step": 579 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019691800743974384, + "loss": 1.3925, + "step": 580 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019690328483596288, + "loss": 1.3427, + "step": 581 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001968885277041765, + "loss": 1.2747, + "step": 582 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019687373604964284, + "loss": 1.2887, + "step": 583 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001968589098776324, + "loss": 1.2217, + "step": 584 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001968440491934279, + "loss": 1.3337, + "step": 585 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019682915400232447, + "loss": 1.1409, + "step": 586 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019681422430962943, + "loss": 1.3202, + "step": 587 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019679926012066247, + "loss": 1.1211, + "step": 588 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019678426144075558, + "loss": 1.3302, + "step": 589 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001967692282752529, + "loss": 1.3204, + "step": 590 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019675416062951104, + "loss": 1.1841, + "step": 591 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001967390585088988, + "loss": 1.2738, + "step": 592 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019672392191879724, + "loss": 1.2795, + "step": 593 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001967087508645998, + "loss": 1.2633, + "step": 594 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019669354535171207, + "loss": 1.2653, + "step": 595 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019667830538555204, + "loss": 1.3595, + "step": 596 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019666303097154992, + "loss": 1.1885, + "step": 597 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019664772211514818, + "loss": 1.2292, + "step": 598 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001966323788218016, + "loss": 1.3721, + "step": 599 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001966170010969772, + "loss": 1.3337, + "step": 600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001966015889461543, + "loss": 1.2437, + "step": 601 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001965861423748244, + "loss": 1.3784, + "step": 602 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019657066138849142, + "loss": 1.4655, + "step": 603 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001965551459926714, + "loss": 1.2058, + "step": 604 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001965395961928927, + "loss": 1.3213, + "step": 605 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019652401199469596, + "loss": 1.3299, + "step": 606 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019650839340363402, + "loss": 1.3567, + "step": 607 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019649274042527203, + "loss": 1.1036, + "step": 608 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001964770530651873, + "loss": 1.206, + "step": 609 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019646133132896957, + "loss": 1.1957, + "step": 610 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001964455752222206, + "loss": 1.2791, + "step": 611 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001964297847505546, + "loss": 1.1006, + "step": 612 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001964139599195979, + "loss": 1.449, + "step": 613 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001963981007349891, + "loss": 1.3149, + "step": 614 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019638220720237908, + "loss": 1.1924, + "step": 615 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001963662793274309, + "loss": 1.2912, + "step": 616 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001963503171158199, + "loss": 1.2306, + "step": 617 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019633432057323367, + "loss": 1.2028, + "step": 618 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019631828970537196, + "loss": 1.275, + "step": 619 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001963022245179468, + "loss": 1.1875, + "step": 620 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019628612501668244, + "loss": 1.2931, + "step": 621 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001962699912073154, + "loss": 1.3897, + "step": 622 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019625382309559433, + "loss": 1.3887, + "step": 623 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001962376206872802, + "loss": 1.342, + "step": 624 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001962213839881461, + "loss": 1.1804, + "step": 625 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019620511300397748, + "loss": 1.2132, + "step": 626 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019618880774057186, + "loss": 1.2959, + "step": 627 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019617246820373904, + "loss": 1.1326, + "step": 628 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019615609439930102, + "loss": 1.2853, + "step": 629 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019613968633309204, + "loss": 1.2793, + "step": 630 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019612324401095855, + "loss": 1.2479, + "step": 631 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019610676743875912, + "loss": 1.1642, + "step": 632 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001960902566223646, + "loss": 1.2391, + "step": 633 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019607371156765805, + "loss": 1.1625, + "step": 634 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001960571322805347, + "loss": 1.3174, + "step": 635 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019604051876690197, + "loss": 1.192, + "step": 636 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001960238710326795, + "loss": 1.135, + "step": 637 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019600718908379912, + "loss": 1.2268, + "step": 638 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019599047292620482, + "loss": 1.2769, + "step": 639 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001959737225658528, + "loss": 1.3075, + "step": 640 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019595693800871148, + "loss": 1.3346, + "step": 641 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019594011926076145, + "loss": 1.191, + "step": 642 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001959232663279954, + "loss": 1.184, + "step": 643 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019590637921641834, + "loss": 1.4132, + "step": 644 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001958894579320473, + "loss": 1.2753, + "step": 645 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019587250248091165, + "loss": 1.4092, + "step": 646 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019585551286905282, + "loss": 1.2548, + "step": 647 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019583848910252445, + "loss": 1.3317, + "step": 648 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019582143118739234, + "loss": 1.3346, + "step": 649 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001958043391297345, + "loss": 1.2209, + "step": 650 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019578721293564104, + "loss": 1.3885, + "step": 651 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019577005261121427, + "loss": 1.32, + "step": 652 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019575285816256867, + "loss": 1.1568, + "step": 653 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019573562959583084, + "loss": 1.2728, + "step": 654 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019571836691713957, + "loss": 1.1899, + "step": 655 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019570107013264581, + "loss": 1.373, + "step": 656 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019568373924851265, + "loss": 1.2485, + "step": 657 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019566637427091533, + "loss": 1.2093, + "step": 658 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019564897520604117, + "loss": 1.2889, + "step": 659 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001956315420600898, + "loss": 1.3359, + "step": 660 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019561407483927284, + "loss": 1.3748, + "step": 661 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019559657354981412, + "loss": 1.1845, + "step": 662 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019557903819794956, + "loss": 1.1793, + "step": 663 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019556146878992728, + "loss": 1.3056, + "step": 664 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019554386533200755, + "loss": 1.201, + "step": 665 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019552622783046268, + "loss": 1.2386, + "step": 666 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019550855629157717, + "loss": 1.2733, + "step": 667 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019549085072164765, + "loss": 1.4233, + "step": 668 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019547311112698282, + "loss": 1.198, + "step": 669 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019545533751390363, + "loss": 1.2365, + "step": 670 + }, + { + "epoch": 0.12, + "learning_rate": 0.000195437529888743, + "loss": 1.3078, + "step": 671 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019541968825784607, + "loss": 1.1676, + "step": 672 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019540181262757005, + "loss": 1.1725, + "step": 673 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019538390300428426, + "loss": 1.2591, + "step": 674 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019536595939437018, + "loss": 1.2647, + "step": 675 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019534798180422138, + "loss": 1.3211, + "step": 676 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001953299702402435, + "loss": 1.2914, + "step": 677 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019531192470885434, + "loss": 1.3499, + "step": 678 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019529384521648377, + "loss": 1.2062, + "step": 679 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019527573176957375, + "loss": 1.2547, + "step": 680 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019525758437457835, + "loss": 1.1982, + "step": 681 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001952394030379638, + "loss": 1.3225, + "step": 682 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019522118776620827, + "loss": 1.252, + "step": 683 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019520293856580223, + "loss": 1.3186, + "step": 684 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019518465544324806, + "loss": 1.4751, + "step": 685 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001951663384050603, + "loss": 1.217, + "step": 686 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019514798745776557, + "loss": 1.2782, + "step": 687 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019512960260790256, + "loss": 1.1716, + "step": 688 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001951111838620221, + "loss": 1.2439, + "step": 689 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019509273122668698, + "loss": 1.234, + "step": 690 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001950742447084722, + "loss": 1.3358, + "step": 691 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001950557243139647, + "loss": 1.3954, + "step": 692 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019503717004976364, + "loss": 1.2893, + "step": 693 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019501858192248007, + "loss": 1.1996, + "step": 694 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001949999599387373, + "loss": 1.2046, + "step": 695 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019498130410517054, + "loss": 1.2776, + "step": 696 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019496261442842713, + "loss": 1.2805, + "step": 697 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001949438909151665, + "loss": 1.3144, + "step": 698 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019492513357206005, + "loss": 1.3624, + "step": 699 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019490634240579134, + "loss": 1.3143, + "step": 700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019488751742305588, + "loss": 1.1949, + "step": 701 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001948686586305613, + "loss": 1.2804, + "step": 702 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019484976603502727, + "loss": 1.3076, + "step": 703 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019483083964318544, + "loss": 1.2584, + "step": 704 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001948118794617796, + "loss": 1.2261, + "step": 705 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019479288549756555, + "loss": 1.2492, + "step": 706 + }, + { + "epoch": 0.13, + "learning_rate": 0.000194773857757311, + "loss": 1.2525, + "step": 707 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001947547962477959, + "loss": 1.2207, + "step": 708 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001947357009758121, + "loss": 1.2, + "step": 709 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001947165719481635, + "loss": 1.306, + "step": 710 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019469740917166608, + "loss": 1.3034, + "step": 711 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019467821265314777, + "loss": 1.3047, + "step": 712 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001946589823994486, + "loss": 1.2443, + "step": 713 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019463971841742056, + "loss": 1.4226, + "step": 714 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019462042071392762, + "loss": 1.3659, + "step": 715 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019460108929584592, + "loss": 1.2171, + "step": 716 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019458172417006347, + "loss": 1.2141, + "step": 717 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019456232534348033, + "loss": 1.2808, + "step": 718 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001945428928230086, + "loss": 1.3332, + "step": 719 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019452342661557237, + "loss": 1.3255, + "step": 720 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001945039267281077, + "loss": 1.3136, + "step": 721 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001944843931675626, + "loss": 1.2888, + "step": 722 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001944648259408973, + "loss": 1.2373, + "step": 723 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001944452250550838, + "loss": 1.228, + "step": 724 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019442559051710618, + "loss": 1.1468, + "step": 725 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001944059223339605, + "loss": 1.3023, + "step": 726 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001943862205126548, + "loss": 1.4126, + "step": 727 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019436648506020916, + "loss": 1.3094, + "step": 728 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019434671598365553, + "loss": 1.241, + "step": 729 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019432691329003795, + "loss": 1.2634, + "step": 730 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019430707698641242, + "loss": 1.2991, + "step": 731 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019428720707984686, + "loss": 1.2839, + "step": 732 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019426730357742123, + "loss": 1.3583, + "step": 733 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001942473664862274, + "loss": 1.1762, + "step": 734 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019422739581336922, + "loss": 1.0533, + "step": 735 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019420739156596258, + "loss": 1.1889, + "step": 736 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019418735375113523, + "loss": 1.3905, + "step": 737 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019416728237602694, + "loss": 1.3503, + "step": 738 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001941471774477894, + "loss": 1.2366, + "step": 739 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019412703897358634, + "loss": 1.2518, + "step": 740 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001941068669605933, + "loss": 1.2818, + "step": 741 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019408666141599792, + "loss": 1.3562, + "step": 742 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019406642234699967, + "loss": 1.3772, + "step": 743 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019404614976081008, + "loss": 1.3198, + "step": 744 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019402584366465248, + "loss": 1.3195, + "step": 745 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019400550406576227, + "loss": 1.415, + "step": 746 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019398513097138676, + "loss": 1.3455, + "step": 747 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019396472438878507, + "loss": 1.3489, + "step": 748 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019394428432522847, + "loss": 1.1975, + "step": 749 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019392381078799998, + "loss": 1.0715, + "step": 750 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019390330378439458, + "loss": 1.1901, + "step": 751 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001938827633217193, + "loss": 1.2514, + "step": 752 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019386218940729288, + "loss": 1.1846, + "step": 753 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001938415820484462, + "loss": 1.2421, + "step": 754 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019382094125252187, + "loss": 1.1659, + "step": 755 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019380026702687454, + "loss": 1.2843, + "step": 756 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019377955937887071, + "loss": 1.2954, + "step": 757 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019375881831588886, + "loss": 1.3021, + "step": 758 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019373804384531927, + "loss": 1.2438, + "step": 759 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019371723597456415, + "loss": 1.2876, + "step": 760 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019369639471103768, + "loss": 1.2641, + "step": 761 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019367552006216592, + "loss": 1.2703, + "step": 762 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019365461203538675, + "loss": 1.2585, + "step": 763 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019363367063815004, + "loss": 1.1882, + "step": 764 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019361269587791745, + "loss": 1.3171, + "step": 765 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019359168776216263, + "loss": 1.2472, + "step": 766 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019357064629837103, + "loss": 1.1021, + "step": 767 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019354957149404008, + "loss": 1.3433, + "step": 768 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019352846335667895, + "loss": 1.3782, + "step": 769 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019350732189380882, + "loss": 1.1554, + "step": 770 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001934861471129627, + "loss": 1.2893, + "step": 771 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001934649390216854, + "loss": 1.118, + "step": 772 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019344369762753374, + "loss": 1.3013, + "step": 773 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019342242293807628, + "loss": 1.3452, + "step": 774 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001934011149608935, + "loss": 1.4487, + "step": 775 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019337977370357772, + "loss": 1.3262, + "step": 776 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019335839917373316, + "loss": 1.2729, + "step": 777 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019333699137897585, + "loss": 1.2489, + "step": 778 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019331555032693366, + "loss": 1.3548, + "step": 779 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001932940760252464, + "loss": 1.3048, + "step": 780 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019327256848156564, + "loss": 1.2581, + "step": 781 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001932510277035548, + "loss": 1.1131, + "step": 782 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001932294536988892, + "loss": 1.3438, + "step": 783 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019320784647525595, + "loss": 1.3383, + "step": 784 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019318620604035394, + "loss": 1.3406, + "step": 785 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019316453240189406, + "loss": 1.4445, + "step": 786 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001931428255675989, + "loss": 1.2059, + "step": 787 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001931210855452029, + "loss": 1.1649, + "step": 788 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019309931234245232, + "loss": 1.2059, + "step": 789 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019307750596710528, + "loss": 1.2254, + "step": 790 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001930556664269317, + "loss": 1.3368, + "step": 791 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019303379372971334, + "loss": 1.277, + "step": 792 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001930118878832437, + "loss": 1.2679, + "step": 793 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019298994889532816, + "loss": 1.301, + "step": 794 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001929679767737839, + "loss": 1.1645, + "step": 795 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001929459715264399, + "loss": 1.2618, + "step": 796 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019292393316113694, + "loss": 1.1408, + "step": 797 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019290186168572756, + "loss": 1.3571, + "step": 798 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019287975710807618, + "loss": 1.3265, + "step": 799 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019285761943605898, + "loss": 1.305, + "step": 800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019283544867756389, + "loss": 1.2608, + "step": 801 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001928132448404907, + "loss": 1.2952, + "step": 802 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019279100793275088, + "loss": 1.3283, + "step": 803 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019276873796226785, + "loss": 1.3343, + "step": 804 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019274643493697667, + "loss": 1.2268, + "step": 805 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001927240988648242, + "loss": 1.1538, + "step": 806 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019270172975376912, + "loss": 1.2076, + "step": 807 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019267932761178185, + "loss": 1.247, + "step": 808 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001926568924468446, + "loss": 1.2655, + "step": 809 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019263442426695134, + "loss": 1.3585, + "step": 810 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019261192308010777, + "loss": 1.3747, + "step": 811 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001925893888943314, + "loss": 1.2516, + "step": 812 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001925668217176515, + "loss": 1.2097, + "step": 813 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019254422155810902, + "loss": 1.2374, + "step": 814 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019252158842375676, + "loss": 1.3137, + "step": 815 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001924989223226592, + "loss": 1.2894, + "step": 816 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001924762232628926, + "loss": 1.1627, + "step": 817 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019245349125254497, + "loss": 1.2759, + "step": 818 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019243072629971603, + "loss": 1.2083, + "step": 819 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019240792841251724, + "loss": 1.2526, + "step": 820 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019238509759907183, + "loss": 1.2212, + "step": 821 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019236223386751475, + "loss": 1.1826, + "step": 822 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019233933722599263, + "loss": 1.367, + "step": 823 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019231640768266388, + "loss": 1.3257, + "step": 824 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019229344524569864, + "loss": 1.3204, + "step": 825 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019227044992327873, + "loss": 1.2533, + "step": 826 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019224742172359774, + "loss": 1.2269, + "step": 827 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019222436065486088, + "loss": 1.2668, + "step": 828 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001922012667252852, + "loss": 1.2467, + "step": 829 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019217813994309933, + "loss": 1.2551, + "step": 830 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019215498031654375, + "loss": 1.1791, + "step": 831 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019213178785387048, + "loss": 1.3202, + "step": 832 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019210856256334337, + "loss": 1.2334, + "step": 833 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001920853044532379, + "loss": 1.3027, + "step": 834 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019206201353184123, + "loss": 1.2659, + "step": 835 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001920386898074523, + "loss": 1.383, + "step": 836 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019201533328838167, + "loss": 1.2923, + "step": 837 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019199194398295157, + "loss": 1.2057, + "step": 838 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019196852189949594, + "loss": 1.3711, + "step": 839 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019194506704636047, + "loss": 1.2203, + "step": 840 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019192157943190234, + "loss": 1.2881, + "step": 841 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019189805906449057, + "loss": 1.1851, + "step": 842 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019187450595250586, + "loss": 1.2847, + "step": 843 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019185092010434043, + "loss": 1.2685, + "step": 844 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019182730152839827, + "loss": 1.2061, + "step": 845 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019180365023309504, + "loss": 1.3032, + "step": 846 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019177996622685799, + "loss": 1.2469, + "step": 847 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019175624951812606, + "loss": 1.1961, + "step": 848 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019173250011534994, + "loss": 1.3513, + "step": 849 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019170871802699177, + "loss": 1.2342, + "step": 850 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001916849032615255, + "loss": 1.2685, + "step": 851 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019166105582743662, + "loss": 1.2194, + "step": 852 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019163717573322236, + "loss": 1.1521, + "step": 853 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001916132629873915, + "loss": 1.2553, + "step": 854 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019158931759846447, + "loss": 1.3083, + "step": 855 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019156533957497343, + "loss": 1.3656, + "step": 856 + }, + { + "epoch": 0.16, + "learning_rate": 0.000191541328925462, + "loss": 1.2475, + "step": 857 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019151728565848557, + "loss": 1.2832, + "step": 858 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019149320978261106, + "loss": 1.3392, + "step": 859 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019146910130641705, + "loss": 1.2165, + "step": 860 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019144496023849373, + "loss": 1.3949, + "step": 861 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019142078658744292, + "loss": 1.2742, + "step": 862 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019139658036187803, + "loss": 1.3465, + "step": 863 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019137234157042405, + "loss": 1.2548, + "step": 864 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019134807022171764, + "loss": 1.4378, + "step": 865 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019132376632440695, + "loss": 1.3388, + "step": 866 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019129942988715192, + "loss": 1.3212, + "step": 867 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019127506091862388, + "loss": 1.1798, + "step": 868 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019125065942750586, + "loss": 1.3099, + "step": 869 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019122622542249248, + "loss": 1.213, + "step": 870 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019120175891228986, + "loss": 1.1397, + "step": 871 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001911772599056158, + "loss": 1.2269, + "step": 872 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019115272841119967, + "loss": 1.3249, + "step": 873 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001911281644377823, + "loss": 1.1296, + "step": 874 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001911035679941163, + "loss": 1.207, + "step": 875 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019107893908896562, + "loss": 1.3753, + "step": 876 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019105427773110593, + "loss": 1.1528, + "step": 877 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001910295839293244, + "loss": 1.3237, + "step": 878 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019100485769241984, + "loss": 1.2555, + "step": 879 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001909800990292025, + "loss": 1.2762, + "step": 880 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019095530794849426, + "loss": 1.2333, + "step": 881 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001909304844591285, + "loss": 1.2961, + "step": 882 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019090562856995024, + "loss": 1.2277, + "step": 883 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019088074028981595, + "loss": 1.1122, + "step": 884 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019085581962759366, + "loss": 1.1174, + "step": 885 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019083086659216295, + "loss": 1.1855, + "step": 886 + }, + { + "epoch": 0.16, + "learning_rate": 0.000190805881192415, + "loss": 1.271, + "step": 887 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019078086343725237, + "loss": 1.3975, + "step": 888 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001907558133355893, + "loss": 1.3431, + "step": 889 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019073073089635148, + "loss": 1.3291, + "step": 890 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019070561612847612, + "loss": 1.1924, + "step": 891 + }, + { + "epoch": 0.16, + "learning_rate": 0.000190680469040912, + "loss": 1.2245, + "step": 892 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019065528964261935, + "loss": 1.113, + "step": 893 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019063007794256994, + "loss": 1.1071, + "step": 894 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019060483394974707, + "loss": 1.3126, + "step": 895 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019057955767314556, + "loss": 1.184, + "step": 896 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019055424912177164, + "loss": 1.1464, + "step": 897 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019052890830464317, + "loss": 1.2027, + "step": 898 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001905035352307894, + "loss": 1.237, + "step": 899 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019047812990925108, + "loss": 1.3356, + "step": 900 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019045269234908057, + "loss": 1.272, + "step": 901 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019042722255934155, + "loss": 1.1261, + "step": 902 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019040172054910932, + "loss": 1.3207, + "step": 903 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001903761863274706, + "loss": 1.3079, + "step": 904 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019035061990352355, + "loss": 1.2828, + "step": 905 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019032502128637788, + "loss": 1.2624, + "step": 906 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019029939048515474, + "loss": 1.2862, + "step": 907 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019027372750898673, + "loss": 1.2695, + "step": 908 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019024803236701793, + "loss": 1.2618, + "step": 909 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001902223050684039, + "loss": 1.3428, + "step": 910 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019019654562231163, + "loss": 1.2756, + "step": 911 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001901707540379196, + "loss": 1.3188, + "step": 912 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019014493032441766, + "loss": 1.2962, + "step": 913 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019011907449100718, + "loss": 1.1797, + "step": 914 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019009318654690098, + "loss": 1.4172, + "step": 915 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001900672665013233, + "loss": 1.2961, + "step": 916 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019004131436350978, + "loss": 1.1933, + "step": 917 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019001533014270755, + "loss": 1.365, + "step": 918 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001899893138481752, + "loss": 1.1964, + "step": 919 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001899632654891826, + "loss": 1.1528, + "step": 920 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018993718507501125, + "loss": 1.3675, + "step": 921 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018991107261495394, + "loss": 1.2004, + "step": 922 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018988492811831487, + "loss": 1.1461, + "step": 923 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018985875159440973, + "loss": 1.2243, + "step": 924 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018983254305256558, + "loss": 1.4954, + "step": 925 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001898063025021209, + "loss": 1.3015, + "step": 926 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018978002995242548, + "loss": 1.2666, + "step": 927 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018975372541284074, + "loss": 1.2546, + "step": 928 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001897273888927393, + "loss": 1.1852, + "step": 929 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018970102040150516, + "loss": 1.2762, + "step": 930 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001896746199485339, + "loss": 1.2589, + "step": 931 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018964818754323227, + "loss": 1.3692, + "step": 932 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018962172319501863, + "loss": 1.1857, + "step": 933 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001895952269133225, + "loss": 1.2242, + "step": 934 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018956869870758488, + "loss": 1.3045, + "step": 935 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001895421385872582, + "loss": 1.2754, + "step": 936 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001895155465618062, + "loss": 1.2937, + "step": 937 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001894889226407039, + "loss": 1.238, + "step": 938 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001894622668334379, + "loss": 1.213, + "step": 939 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018943557914950597, + "loss": 1.2474, + "step": 940 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018940885959841736, + "loss": 1.0714, + "step": 941 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018938210818969254, + "loss": 1.2348, + "step": 942 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018935532493286349, + "loss": 1.1682, + "step": 943 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018932850983747343, + "loss": 1.2185, + "step": 944 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018930166291307695, + "loss": 1.2796, + "step": 945 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018927478416924003, + "loss": 1.2764, + "step": 946 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018924787361553988, + "loss": 1.1953, + "step": 947 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018922093126156516, + "loss": 1.129, + "step": 948 + }, + { + "epoch": 0.17, + "learning_rate": 0.00018919395711691577, + "loss": 1.1844, + "step": 949 + }, + { + "epoch": 0.18, + "learning_rate": 0.000189166951191203, + "loss": 1.3591, + "step": 950 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018913991349404948, + "loss": 1.2302, + "step": 951 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018911284403508904, + "loss": 1.2529, + "step": 952 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018908574282396697, + "loss": 1.2677, + "step": 953 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018905860987033978, + "loss": 1.2345, + "step": 954 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018903144518387537, + "loss": 1.1932, + "step": 955 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018900424877425287, + "loss": 1.2449, + "step": 956 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018897702065116275, + "loss": 1.3848, + "step": 957 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001889497608243067, + "loss": 1.1801, + "step": 958 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018892246930339793, + "loss": 1.248, + "step": 959 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018889514609816068, + "loss": 1.2639, + "step": 960 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018886779121833063, + "loss": 1.3458, + "step": 961 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018884040467365468, + "loss": 1.2077, + "step": 962 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001888129864738911, + "loss": 1.3178, + "step": 963 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018878553662880933, + "loss": 1.2055, + "step": 964 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018875805514819013, + "loss": 1.17, + "step": 965 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018873054204182559, + "loss": 1.1826, + "step": 966 + }, + { + "epoch": 0.18, + "learning_rate": 0.000188702997319519, + "loss": 1.3194, + "step": 967 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001886754209910849, + "loss": 1.1484, + "step": 968 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018864781306634915, + "loss": 1.2449, + "step": 969 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018862017355514884, + "loss": 1.252, + "step": 970 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018859250246733232, + "loss": 1.3488, + "step": 971 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001885647998127592, + "loss": 1.2597, + "step": 972 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018853706560130026, + "loss": 1.2552, + "step": 973 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018850929984283766, + "loss": 1.2241, + "step": 974 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018848150254726473, + "loss": 1.1664, + "step": 975 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018845367372448602, + "loss": 1.2568, + "step": 976 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018842581338441731, + "loss": 1.1739, + "step": 977 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018839792153698567, + "loss": 1.3899, + "step": 978 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018836999819212932, + "loss": 1.3008, + "step": 979 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018834204335979777, + "loss": 1.2617, + "step": 980 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018831405704995173, + "loss": 1.37, + "step": 981 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018828603927256309, + "loss": 1.3474, + "step": 982 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018825799003761498, + "loss": 1.2575, + "step": 983 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018822990935510173, + "loss": 1.4526, + "step": 984 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018820179723502893, + "loss": 1.2221, + "step": 985 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018817365368741328, + "loss": 1.2262, + "step": 986 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018814547872228273, + "loss": 1.267, + "step": 987 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001881172723496764, + "loss": 1.1593, + "step": 988 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018808903457964468, + "loss": 1.1345, + "step": 989 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018806076542224903, + "loss": 1.2518, + "step": 990 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018803246488756215, + "loss": 1.2693, + "step": 991 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018800413298566794, + "loss": 1.3409, + "step": 992 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018797576972666145, + "loss": 1.2885, + "step": 993 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001879473751206489, + "loss": 1.2941, + "step": 994 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018791894917774774, + "loss": 1.2552, + "step": 995 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018789049190808644, + "loss": 1.2884, + "step": 996 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018786200332180476, + "loss": 1.2446, + "step": 997 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018783348342905364, + "loss": 1.2551, + "step": 998 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018780493223999507, + "loss": 1.383, + "step": 999 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018777634976480226, + "loss": 1.2988, + "step": 1000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018774773601365951, + "loss": 1.2892, + "step": 1001 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018771909099676238, + "loss": 1.3437, + "step": 1002 + }, + { + "epoch": 0.18, + "learning_rate": 0.00018769041472431737, + "loss": 1.2857, + "step": 1003 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018766170720654237, + "loss": 1.1264, + "step": 1004 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018763296845366621, + "loss": 1.279, + "step": 1005 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001876041984759289, + "loss": 1.4246, + "step": 1006 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018757539728358162, + "loss": 1.2674, + "step": 1007 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018754656488688664, + "loss": 1.3967, + "step": 1008 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018751770129611732, + "loss": 1.2218, + "step": 1009 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018748880652155818, + "loss": 1.2084, + "step": 1010 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018745988057350486, + "loss": 1.2892, + "step": 1011 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018743092346226403, + "loss": 1.2815, + "step": 1012 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018740193519815355, + "loss": 1.3005, + "step": 1013 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018737291579150239, + "loss": 1.2216, + "step": 1014 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018734386525265045, + "loss": 1.2859, + "step": 1015 + }, + { + "epoch": 0.19, + "learning_rate": 0.000187314783591949, + "loss": 1.2176, + "step": 1016 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018728567081976013, + "loss": 1.2058, + "step": 1017 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018725652694645716, + "loss": 1.231, + "step": 1018 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001872273519824245, + "loss": 1.3593, + "step": 1019 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018719814593805753, + "loss": 1.3171, + "step": 1020 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018716890882376284, + "loss": 1.2533, + "step": 1021 + }, + { + "epoch": 0.19, + "learning_rate": 0.000187139640649958, + "loss": 1.2249, + "step": 1022 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018711034142707172, + "loss": 1.1695, + "step": 1023 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001870810111655436, + "loss": 1.1618, + "step": 1024 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018705164987582458, + "loss": 1.2022, + "step": 1025 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001870222575683764, + "loss": 1.0753, + "step": 1026 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018699283425367195, + "loss": 1.1242, + "step": 1027 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018696337994219525, + "loss": 1.1503, + "step": 1028 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018693389464444123, + "loss": 1.2885, + "step": 1029 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001869043783709159, + "loss": 1.3526, + "step": 1030 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018687483113213635, + "loss": 1.2479, + "step": 1031 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001868452529386307, + "loss": 1.3107, + "step": 1032 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018681564380093802, + "loss": 1.3351, + "step": 1033 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018678600372960854, + "loss": 1.2709, + "step": 1034 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018675633273520338, + "loss": 1.148, + "step": 1035 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018672663082829473, + "loss": 1.2832, + "step": 1036 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018669689801946584, + "loss": 1.3468, + "step": 1037 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001866671343193109, + "loss": 1.0989, + "step": 1038 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001866373397384351, + "loss": 1.3311, + "step": 1039 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018660751428745473, + "loss": 1.2225, + "step": 1040 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018657765797699697, + "loss": 1.2158, + "step": 1041 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018654777081770013, + "loss": 1.3245, + "step": 1042 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001865178528202133, + "loss": 1.2162, + "step": 1043 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018648790399519678, + "loss": 1.3197, + "step": 1044 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018645792435332173, + "loss": 1.1523, + "step": 1045 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001864279139052703, + "loss": 1.2849, + "step": 1046 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018639787266173566, + "loss": 1.2873, + "step": 1047 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018636780063342192, + "loss": 1.2718, + "step": 1048 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018633769783104416, + "loss": 1.226, + "step": 1049 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018630756426532842, + "loss": 1.2631, + "step": 1050 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018627739994701175, + "loss": 1.1056, + "step": 1051 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018624720488684212, + "loss": 1.3471, + "step": 1052 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018621697909557844, + "loss": 1.1636, + "step": 1053 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018618672258399058, + "loss": 1.2094, + "step": 1054 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018615643536285933, + "loss": 1.2115, + "step": 1055 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001861261174429765, + "loss": 1.3, + "step": 1056 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018609576883514483, + "loss": 1.088, + "step": 1057 + }, + { + "epoch": 0.19, + "learning_rate": 0.00018606538955017788, + "loss": 1.1683, + "step": 1058 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018603497959890025, + "loss": 1.2028, + "step": 1059 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001860045389921474, + "loss": 1.2272, + "step": 1060 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001859740677407658, + "loss": 1.1537, + "step": 1061 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018594356585561276, + "loss": 1.2668, + "step": 1062 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018591303334755655, + "loss": 1.1853, + "step": 1063 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018588247022747632, + "loss": 1.182, + "step": 1064 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001858518765062621, + "loss": 1.1811, + "step": 1065 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001858212521948149, + "loss": 1.3762, + "step": 1066 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018579059730404663, + "loss": 1.2155, + "step": 1067 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018575991184488, + "loss": 1.1876, + "step": 1068 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018572919582824867, + "loss": 1.1471, + "step": 1069 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018569844926509728, + "loss": 1.2537, + "step": 1070 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018566767216638116, + "loss": 1.1901, + "step": 1071 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018563686454306665, + "loss": 1.2109, + "step": 1072 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018560602640613097, + "loss": 1.3883, + "step": 1073 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018557515776656218, + "loss": 1.2913, + "step": 1074 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018554425863535917, + "loss": 1.1764, + "step": 1075 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018551332902353175, + "loss": 1.1987, + "step": 1076 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018548236894210064, + "loss": 1.1131, + "step": 1077 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018545137840209725, + "loss": 1.2708, + "step": 1078 + }, + { + "epoch": 0.2, + "learning_rate": 0.000185420357414564, + "loss": 1.1919, + "step": 1079 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001853893059905541, + "loss": 1.218, + "step": 1080 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018535822414113165, + "loss": 1.1838, + "step": 1081 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018532711187737149, + "loss": 1.1812, + "step": 1082 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018529596921035936, + "loss": 1.301, + "step": 1083 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018526479615119188, + "loss": 1.0555, + "step": 1084 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001852335927109764, + "loss": 1.3771, + "step": 1085 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001852023589008312, + "loss": 1.235, + "step": 1086 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018517109473188527, + "loss": 1.1915, + "step": 1087 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018513980021527847, + "loss": 1.2075, + "step": 1088 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018510847536216154, + "loss": 1.1858, + "step": 1089 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001850771201836959, + "loss": 1.3074, + "step": 1090 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001850457346910539, + "loss": 1.1771, + "step": 1091 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001850143188954186, + "loss": 1.2522, + "step": 1092 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018498287280798389, + "loss": 1.212, + "step": 1093 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018495139643995447, + "loss": 1.3738, + "step": 1094 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001849198898025458, + "loss": 1.2066, + "step": 1095 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018488835290698412, + "loss": 1.2706, + "step": 1096 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018485678576450653, + "loss": 1.2891, + "step": 1097 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018482518838636077, + "loss": 1.2844, + "step": 1098 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001847935607838055, + "loss": 1.3723, + "step": 1099 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018476190296811003, + "loss": 1.2969, + "step": 1100 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018473021495055454, + "loss": 1.1631, + "step": 1101 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018469849674242986, + "loss": 1.2794, + "step": 1102 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018466674835503763, + "loss": 1.225, + "step": 1103 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001846349697996903, + "loss": 1.2541, + "step": 1104 + }, + { + "epoch": 0.2, + "learning_rate": 0.000184603161087711, + "loss": 1.2703, + "step": 1105 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018457132223043363, + "loss": 1.3169, + "step": 1106 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018453945323920276, + "loss": 1.0742, + "step": 1107 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018450755412537383, + "loss": 1.15, + "step": 1108 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018447562490031297, + "loss": 1.2721, + "step": 1109 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001844436655753969, + "loss": 1.2389, + "step": 1110 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018441167616201328, + "loss": 1.2704, + "step": 1111 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001843796566715603, + "loss": 1.243, + "step": 1112 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018434760711544707, + "loss": 1.1161, + "step": 1113 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001843155275050932, + "loss": 1.3412, + "step": 1114 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018428341785192915, + "loss": 1.368, + "step": 1115 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018425127816739604, + "loss": 1.2702, + "step": 1116 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001842191084629457, + "loss": 1.2876, + "step": 1117 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018418690875004058, + "loss": 1.3158, + "step": 1118 + }, + { + "epoch": 0.21, + "learning_rate": 0.000184154679040154, + "loss": 1.172, + "step": 1119 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001841224193447698, + "loss": 1.2542, + "step": 1120 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018409012967538253, + "loss": 1.2386, + "step": 1121 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018405781004349753, + "loss": 1.3701, + "step": 1122 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018402546046063068, + "loss": 1.2703, + "step": 1123 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018399308093830863, + "loss": 1.2645, + "step": 1124 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018396067148806865, + "loss": 1.2255, + "step": 1125 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001839282321214587, + "loss": 1.3434, + "step": 1126 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018389576285003732, + "loss": 1.2211, + "step": 1127 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018386326368537384, + "loss": 1.329, + "step": 1128 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018383073463904811, + "loss": 1.1497, + "step": 1129 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018379817572265075, + "loss": 1.2968, + "step": 1130 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018376558694778292, + "loss": 1.1448, + "step": 1131 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018373296832605647, + "loss": 1.3169, + "step": 1132 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018370031986909385, + "loss": 1.2524, + "step": 1133 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018366764158852825, + "loss": 1.2234, + "step": 1134 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001836349334960033, + "loss": 1.2797, + "step": 1135 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001836021956031734, + "loss": 1.2387, + "step": 1136 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018356942792170351, + "loss": 1.3, + "step": 1137 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018353663046326922, + "loss": 1.1838, + "step": 1138 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018350380323955674, + "loss": 1.243, + "step": 1139 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018347094626226283, + "loss": 1.3304, + "step": 1140 + }, + { + "epoch": 0.21, + "learning_rate": 0.000183438059543095, + "loss": 1.1256, + "step": 1141 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018340514309377114, + "loss": 1.3568, + "step": 1142 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018337219692601985, + "loss": 1.1598, + "step": 1143 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001833392210515804, + "loss": 1.3103, + "step": 1144 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018330621548220247, + "loss": 1.3106, + "step": 1145 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018327318022964644, + "loss": 1.1189, + "step": 1146 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018324011530568326, + "loss": 1.1275, + "step": 1147 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018320702072209438, + "loss": 1.3179, + "step": 1148 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018317389649067192, + "loss": 1.2611, + "step": 1149 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018314074262321846, + "loss": 1.1585, + "step": 1150 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018310755913154725, + "loss": 1.17, + "step": 1151 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018307434602748198, + "loss": 1.1732, + "step": 1152 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018304110332285694, + "loss": 1.2953, + "step": 1153 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018300783102951704, + "loss": 1.2144, + "step": 1154 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018297452915931763, + "loss": 1.1426, + "step": 1155 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001829411977241246, + "loss": 1.2511, + "step": 1156 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018290783673581445, + "loss": 1.2518, + "step": 1157 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018287444620627418, + "loss": 1.273, + "step": 1158 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001828410261474013, + "loss": 1.2094, + "step": 1159 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018280757657110382, + "loss": 1.2331, + "step": 1160 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001827740974893003, + "loss": 1.1651, + "step": 1161 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018274058891391986, + "loss": 1.1944, + "step": 1162 + }, + { + "epoch": 0.21, + "learning_rate": 0.000182707050856902, + "loss": 1.1929, + "step": 1163 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018267348333019686, + "loss": 1.2302, + "step": 1164 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018263988634576497, + "loss": 1.2613, + "step": 1165 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018260625991557748, + "loss": 1.2728, + "step": 1166 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001825726040516159, + "loss": 1.1563, + "step": 1167 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001825389187658723, + "loss": 1.1817, + "step": 1168 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018250520407034926, + "loss": 1.0805, + "step": 1169 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018247145997705975, + "loss": 1.3775, + "step": 1170 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018243768649802732, + "loss": 1.3108, + "step": 1171 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018240388364528586, + "loss": 1.1651, + "step": 1172 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018237005143087988, + "loss": 1.312, + "step": 1173 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018233618986686424, + "loss": 1.1943, + "step": 1174 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001823022989653043, + "loss": 1.2351, + "step": 1175 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018226837873827585, + "loss": 1.2237, + "step": 1176 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018223442919786516, + "loss": 1.1805, + "step": 1177 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001822004503561689, + "loss": 1.2117, + "step": 1178 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018216644222529427, + "loss": 1.1895, + "step": 1179 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001821324048173588, + "loss": 1.2199, + "step": 1180 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018209833814449052, + "loss": 1.3299, + "step": 1181 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018206424221882782, + "loss": 1.1756, + "step": 1182 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018203011705251963, + "loss": 1.214, + "step": 1183 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001819959626577252, + "loss": 1.2791, + "step": 1184 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018196177904661415, + "loss": 1.343, + "step": 1185 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001819275662313667, + "loss": 1.2274, + "step": 1186 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018189332422417325, + "loss": 1.1298, + "step": 1187 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018185905303723482, + "loss": 1.2298, + "step": 1188 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018182475268276263, + "loss": 1.3459, + "step": 1189 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018179042317297844, + "loss": 1.3471, + "step": 1190 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018175606452011425, + "loss": 1.1728, + "step": 1191 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018172167673641263, + "loss": 1.2208, + "step": 1192 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018168725983412637, + "loss": 1.1564, + "step": 1193 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018165281382551872, + "loss": 1.2529, + "step": 1194 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001816183387228633, + "loss": 1.2068, + "step": 1195 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018158383453844402, + "loss": 1.2711, + "step": 1196 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018154930128455523, + "loss": 1.2721, + "step": 1197 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018151473897350164, + "loss": 1.2419, + "step": 1198 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018148014761759827, + "loss": 1.3283, + "step": 1199 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001814455272291705, + "loss": 1.2221, + "step": 1200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001814108778205541, + "loss": 1.341, + "step": 1201 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018137619940409506, + "loss": 1.3237, + "step": 1202 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018134149199214988, + "loss": 1.3324, + "step": 1203 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018130675559708525, + "loss": 1.1857, + "step": 1204 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018127199023127823, + "loss": 1.3635, + "step": 1205 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018123719590711624, + "loss": 1.28, + "step": 1206 + }, + { + "epoch": 0.22, + "learning_rate": 0.000181202372636997, + "loss": 1.2417, + "step": 1207 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018116752043332847, + "loss": 1.1998, + "step": 1208 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018113263930852902, + "loss": 1.2496, + "step": 1209 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001810977292750273, + "loss": 1.2237, + "step": 1210 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001810627903452622, + "loss": 1.2938, + "step": 1211 + }, + { + "epoch": 0.22, + "learning_rate": 0.000181027822531683, + "loss": 1.1901, + "step": 1212 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001809928258467492, + "loss": 1.2315, + "step": 1213 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018095780030293062, + "loss": 1.1738, + "step": 1214 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001809227459127074, + "loss": 1.1581, + "step": 1215 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018088766268856984, + "loss": 1.2853, + "step": 1216 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018085255064301858, + "loss": 1.2228, + "step": 1217 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001808174097885646, + "loss": 1.1582, + "step": 1218 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018078224013772903, + "loss": 1.3397, + "step": 1219 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018074704170304334, + "loss": 1.1083, + "step": 1220 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018071181449704927, + "loss": 1.2055, + "step": 1221 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018067655853229865, + "loss": 1.1628, + "step": 1222 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001806412738213538, + "loss": 1.1761, + "step": 1223 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018060596037678707, + "loss": 1.1952, + "step": 1224 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001805706182111812, + "loss": 1.2856, + "step": 1225 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018053524733712908, + "loss": 1.0367, + "step": 1226 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018049984776723384, + "loss": 1.1357, + "step": 1227 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018046441951410883, + "loss": 1.1493, + "step": 1228 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001804289625903777, + "loss": 1.2594, + "step": 1229 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001803934770086742, + "loss": 1.2872, + "step": 1230 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018035796278164233, + "loss": 1.2739, + "step": 1231 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018032241992193635, + "loss": 1.1775, + "step": 1232 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018028684844222066, + "loss": 1.2807, + "step": 1233 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018025124835516988, + "loss": 1.2762, + "step": 1234 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018021561967346882, + "loss": 1.2427, + "step": 1235 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018017996240981253, + "loss": 1.2255, + "step": 1236 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001801442765769061, + "loss": 1.2736, + "step": 1237 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018010856218746496, + "loss": 0.9902, + "step": 1238 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018007281925421463, + "loss": 1.1292, + "step": 1239 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018003704778989086, + "loss": 1.12, + "step": 1240 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001800012478072395, + "loss": 1.334, + "step": 1241 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017996541931901652, + "loss": 1.2327, + "step": 1242 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017992956233798825, + "loss": 1.1804, + "step": 1243 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001798936768769309, + "loss": 1.1307, + "step": 1244 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001798577629486311, + "loss": 1.1742, + "step": 1245 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017982182056588535, + "loss": 1.2913, + "step": 1246 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017978584974150054, + "loss": 1.1641, + "step": 1247 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017974985048829352, + "loss": 1.2154, + "step": 1248 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017971382281909134, + "loss": 1.1288, + "step": 1249 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001796777667467312, + "loss": 1.265, + "step": 1250 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017964168228406035, + "loss": 1.3487, + "step": 1251 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001796055694439362, + "loss": 1.206, + "step": 1252 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017956942823922625, + "loss": 1.199, + "step": 1253 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001795332586828082, + "loss": 1.1803, + "step": 1254 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017949706078756967, + "loss": 1.0641, + "step": 1255 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017946083456640851, + "loss": 1.1758, + "step": 1256 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017942458003223266, + "loss": 1.2883, + "step": 1257 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001793882971979601, + "loss": 1.3222, + "step": 1258 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001793519860765189, + "loss": 1.2017, + "step": 1259 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017931564668084726, + "loss": 1.1392, + "step": 1260 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017927927902389342, + "loss": 1.0438, + "step": 1261 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017924288311861565, + "loss": 1.2606, + "step": 1262 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017920645897798236, + "loss": 1.1746, + "step": 1263 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017917000661497195, + "loss": 1.2884, + "step": 1264 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001791335260425729, + "loss": 1.1281, + "step": 1265 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017909701727378385, + "loss": 1.171, + "step": 1266 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017906048032161328, + "loss": 1.3053, + "step": 1267 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017902391519907987, + "loss": 1.2621, + "step": 1268 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001789873219192123, + "loss": 1.3831, + "step": 1269 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001789507004950492, + "loss": 1.3112, + "step": 1270 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017891405093963938, + "loss": 1.2407, + "step": 1271 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017887737326604156, + "loss": 1.2135, + "step": 1272 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001788406674873245, + "loss": 1.3374, + "step": 1273 + }, + { + "epoch": 0.23, + "learning_rate": 0.000178803933616567, + "loss": 1.2058, + "step": 1274 + }, + { + "epoch": 0.23, + "learning_rate": 0.00017876717166685787, + "loss": 1.2151, + "step": 1275 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017873038165129587, + "loss": 1.2119, + "step": 1276 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017869356358298988, + "loss": 1.4366, + "step": 1277 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017865671747505858, + "loss": 1.1781, + "step": 1278 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017861984334063084, + "loss": 1.1427, + "step": 1279 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017858294119284544, + "loss": 1.1903, + "step": 1280 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001785460110448511, + "loss": 1.2264, + "step": 1281 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017850905290980656, + "loss": 1.2178, + "step": 1282 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001784720668008805, + "loss": 1.2567, + "step": 1283 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017843505273125164, + "loss": 1.2121, + "step": 1284 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001783980107141086, + "loss": 1.3689, + "step": 1285 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017836094076264996, + "loss": 1.2936, + "step": 1286 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017832384289008423, + "loss": 1.2547, + "step": 1287 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017828671710963, + "loss": 1.0993, + "step": 1288 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001782495634345156, + "loss": 1.1734, + "step": 1289 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017821238187797948, + "loss": 1.2159, + "step": 1290 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017817517245326996, + "loss": 1.3679, + "step": 1291 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017813793517364523, + "loss": 1.2212, + "step": 1292 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017810067005237347, + "loss": 1.2879, + "step": 1293 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001780633771027328, + "loss": 1.3256, + "step": 1294 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017802605633801118, + "loss": 1.1026, + "step": 1295 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001779887077715066, + "loss": 1.2733, + "step": 1296 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017795133141652677, + "loss": 1.1758, + "step": 1297 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001779139272863895, + "loss": 1.2666, + "step": 1298 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017787649539442236, + "loss": 1.318, + "step": 1299 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001778390357539629, + "loss": 1.1325, + "step": 1300 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017780154837835848, + "loss": 1.3479, + "step": 1301 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017776403328096643, + "loss": 1.2396, + "step": 1302 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017772649047515383, + "loss": 1.3131, + "step": 1303 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017768891997429776, + "loss": 1.209, + "step": 1304 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017765132179178514, + "loss": 1.3803, + "step": 1305 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017761369594101266, + "loss": 1.2069, + "step": 1306 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017757604243538703, + "loss": 1.3159, + "step": 1307 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017753836128832465, + "loss": 1.3416, + "step": 1308 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001775006525132519, + "loss": 1.1981, + "step": 1309 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001774629161236049, + "loss": 1.1581, + "step": 1310 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001774251521328297, + "loss": 1.2708, + "step": 1311 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001773873605543821, + "loss": 1.1515, + "step": 1312 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001773495414017278, + "loss": 1.2559, + "step": 1313 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001773116946883423, + "loss": 1.1202, + "step": 1314 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001772738204277109, + "loss": 1.196, + "step": 1315 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017723591863332873, + "loss": 1.2173, + "step": 1316 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017719798931870075, + "loss": 1.2556, + "step": 1317 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001771600324973417, + "loss": 1.2175, + "step": 1318 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017712204818277614, + "loss": 1.1328, + "step": 1319 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017708403638853838, + "loss": 1.2068, + "step": 1320 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017704599712817262, + "loss": 1.2144, + "step": 1321 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017700793041523273, + "loss": 1.2606, + "step": 1322 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017696983626328244, + "loss": 1.132, + "step": 1323 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017693171468589523, + "loss": 1.2381, + "step": 1324 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017689356569665433, + "loss": 1.2181, + "step": 1325 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017685538930915283, + "loss": 1.3792, + "step": 1326 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017681718553699341, + "loss": 1.1832, + "step": 1327 + }, + { + "epoch": 0.24, + "learning_rate": 0.00017677895439378874, + "loss": 1.2582, + "step": 1328 + }, + { + "epoch": 0.24, + "learning_rate": 0.000176740695893161, + "loss": 1.3206, + "step": 1329 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017670241004874232, + "loss": 1.2381, + "step": 1330 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001766640968741744, + "loss": 1.2651, + "step": 1331 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017662575638310887, + "loss": 1.0618, + "step": 1332 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017658738858920687, + "loss": 1.1737, + "step": 1333 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017654899350613948, + "loss": 1.297, + "step": 1334 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017651057114758734, + "loss": 1.1929, + "step": 1335 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001764721215272409, + "loss": 1.2204, + "step": 1336 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017643364465880036, + "loss": 1.3038, + "step": 1337 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017639514055597548, + "loss": 1.2502, + "step": 1338 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001763566092324859, + "loss": 1.2261, + "step": 1339 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001763180507020608, + "loss": 1.0371, + "step": 1340 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017627946497843916, + "loss": 1.2524, + "step": 1341 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001762408520753696, + "loss": 1.2008, + "step": 1342 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017620221200661046, + "loss": 1.3663, + "step": 1343 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017616354478592973, + "loss": 1.2386, + "step": 1344 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001761248504271051, + "loss": 1.1968, + "step": 1345 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017608612894392392, + "loss": 1.1832, + "step": 1346 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017604738035018316, + "loss": 1.2211, + "step": 1347 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017600860465968952, + "loss": 1.216, + "step": 1348 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017596980188625932, + "loss": 1.2222, + "step": 1349 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017593097204371852, + "loss": 1.1297, + "step": 1350 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001758921151459028, + "loss": 1.1722, + "step": 1351 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017585323120665732, + "loss": 1.1123, + "step": 1352 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017581432023983706, + "loss": 1.2245, + "step": 1353 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017577538225930654, + "loss": 1.1896, + "step": 1354 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017573641727893985, + "loss": 1.1446, + "step": 1355 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001756974253126208, + "loss": 1.1731, + "step": 1356 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001756584063742428, + "loss": 1.1536, + "step": 1357 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017561936047770879, + "loss": 1.2709, + "step": 1358 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001755802876369314, + "loss": 1.1872, + "step": 1359 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001755411878658329, + "loss": 1.1587, + "step": 1360 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017550206117834498, + "loss": 1.1885, + "step": 1361 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017546290758840906, + "loss": 1.3322, + "step": 1362 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017542372710997617, + "loss": 1.1306, + "step": 1363 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001753845197570068, + "loss": 1.201, + "step": 1364 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017534528554347115, + "loss": 1.0762, + "step": 1365 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017530602448334888, + "loss": 1.2592, + "step": 1366 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017526673659062922, + "loss": 1.1118, + "step": 1367 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017522742187931108, + "loss": 1.2199, + "step": 1368 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017518808036340279, + "loss": 1.1314, + "step": 1369 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017514871205692232, + "loss": 1.2075, + "step": 1370 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017510931697389714, + "loss": 1.2119, + "step": 1371 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017506989512836425, + "loss": 1.229, + "step": 1372 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017503044653437025, + "loss": 1.1953, + "step": 1373 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001749909712059712, + "loss": 1.0281, + "step": 1374 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017495146915723272, + "loss": 1.1775, + "step": 1375 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017491194040222994, + "loss": 1.2288, + "step": 1376 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001748723849550476, + "loss": 1.2656, + "step": 1377 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001748328028297797, + "loss": 1.2991, + "step": 1378 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017479319404053003, + "loss": 1.3449, + "step": 1379 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017475355860141172, + "loss": 1.2125, + "step": 1380 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017471389652654745, + "loss": 1.3109, + "step": 1381 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017467420783006933, + "loss": 1.4411, + "step": 1382 + }, + { + "epoch": 0.25, + "learning_rate": 0.00017463449252611905, + "loss": 1.1428, + "step": 1383 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017459475062884776, + "loss": 1.1977, + "step": 1384 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017455498215241598, + "loss": 1.2016, + "step": 1385 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001745151871109938, + "loss": 1.1913, + "step": 1386 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001744753655187608, + "loss": 1.1875, + "step": 1387 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017443551738990595, + "loss": 1.1636, + "step": 1388 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001743956427386277, + "loss": 1.1997, + "step": 1389 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017435574157913394, + "loss": 1.2893, + "step": 1390 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017431581392564197, + "loss": 1.18, + "step": 1391 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017427585979237867, + "loss": 1.1265, + "step": 1392 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017423587919358023, + "loss": 1.1473, + "step": 1393 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017419587214349225, + "loss": 1.3176, + "step": 1394 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017415583865636983, + "loss": 1.1197, + "step": 1395 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001741157787464775, + "loss": 1.2416, + "step": 1396 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017407569242808917, + "loss": 1.2736, + "step": 1397 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001740355797154881, + "loss": 1.0422, + "step": 1398 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017399544062296707, + "loss": 1.1918, + "step": 1399 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017395527516482818, + "loss": 1.2197, + "step": 1400 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017391508335538294, + "loss": 1.1656, + "step": 1401 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017387486520895228, + "loss": 1.2237, + "step": 1402 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001738346207398665, + "loss": 1.3257, + "step": 1403 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017379434996246524, + "loss": 1.2784, + "step": 1404 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017375405289109757, + "loss": 1.2915, + "step": 1405 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017371372954012187, + "loss": 1.0518, + "step": 1406 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017367337992390595, + "loss": 1.2375, + "step": 1407 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017363300405682697, + "loss": 1.2821, + "step": 1408 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017359260195327135, + "loss": 1.1144, + "step": 1409 + }, + { + "epoch": 0.26, + "learning_rate": 0.000173552173627635, + "loss": 1.1843, + "step": 1410 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017351171909432304, + "loss": 1.3609, + "step": 1411 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017347123836775006, + "loss": 1.1684, + "step": 1412 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017343073146233983, + "loss": 1.1361, + "step": 1413 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001733901983925256, + "loss": 1.1992, + "step": 1414 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017334963917274985, + "loss": 1.0418, + "step": 1415 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001733090538174644, + "loss": 1.1364, + "step": 1416 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017326844234113038, + "loss": 1.2021, + "step": 1417 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017322780475821824, + "loss": 1.1642, + "step": 1418 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001731871410832077, + "loss": 1.2481, + "step": 1419 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001731464513305878, + "loss": 1.1703, + "step": 1420 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017310573551485692, + "loss": 1.1008, + "step": 1421 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017306499365052266, + "loss": 1.2727, + "step": 1422 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017302422575210186, + "loss": 1.2382, + "step": 1423 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017298343183412077, + "loss": 1.1945, + "step": 1424 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017294261191111484, + "loss": 1.2835, + "step": 1425 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017290176599762873, + "loss": 1.3709, + "step": 1426 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017286089410821641, + "loss": 1.2991, + "step": 1427 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017281999625744118, + "loss": 1.3137, + "step": 1428 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001727790724598755, + "loss": 1.1907, + "step": 1429 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017273812273010104, + "loss": 1.1794, + "step": 1430 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017269714708270884, + "loss": 1.1233, + "step": 1431 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017265614553229903, + "loss": 1.2167, + "step": 1432 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017261511809348105, + "loss": 1.2112, + "step": 1433 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001725740647808736, + "loss": 1.1812, + "step": 1434 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017253298560910457, + "loss": 1.1797, + "step": 1435 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017249188059281098, + "loss": 1.0878, + "step": 1436 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017245074974663914, + "loss": 1.191, + "step": 1437 + }, + { + "epoch": 0.26, + "learning_rate": 0.00017240959308524457, + "loss": 1.214, + "step": 1438 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017236841062329196, + "loss": 1.2253, + "step": 1439 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017232720237545524, + "loss": 1.1965, + "step": 1440 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001722859683564174, + "loss": 1.2632, + "step": 1441 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017224470858087076, + "loss": 1.2249, + "step": 1442 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017220342306351679, + "loss": 1.0468, + "step": 1443 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017216211181906606, + "loss": 1.276, + "step": 1444 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001721207748622383, + "loss": 1.2347, + "step": 1445 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017207941220776256, + "loss": 1.158, + "step": 1446 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017203802387037688, + "loss": 0.9859, + "step": 1447 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001719966098648285, + "loss": 1.1001, + "step": 1448 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017195517020587378, + "loss": 1.3223, + "step": 1449 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017191370490827836, + "loss": 1.1533, + "step": 1450 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017187221398681684, + "loss": 1.2613, + "step": 1451 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017183069745627298, + "loss": 1.1942, + "step": 1452 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017178915533143979, + "loss": 1.1766, + "step": 1453 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017174758762711925, + "loss": 1.1639, + "step": 1454 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017170599435812253, + "loss": 1.1171, + "step": 1455 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017166437553926993, + "loss": 1.2142, + "step": 1456 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017162273118539082, + "loss": 1.258, + "step": 1457 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001715810613113236, + "loss": 1.12, + "step": 1458 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017153936593191594, + "loss": 1.2543, + "step": 1459 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001714976450620244, + "loss": 1.1931, + "step": 1460 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017145589871651477, + "loss": 1.2134, + "step": 1461 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001714141269102618, + "loss": 1.1229, + "step": 1462 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017137232965814948, + "loss": 1.0308, + "step": 1463 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017133050697507068, + "loss": 1.1187, + "step": 1464 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017128865887592739, + "loss": 1.1222, + "step": 1465 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017124678537563072, + "loss": 1.2002, + "step": 1466 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001712048864891008, + "loss": 1.2559, + "step": 1467 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017116296223126676, + "loss": 1.1725, + "step": 1468 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001711210126170668, + "loss": 1.2989, + "step": 1469 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001710790376614482, + "loss": 1.2134, + "step": 1470 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017103703737936718, + "loss": 1.2327, + "step": 1471 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017099501178578907, + "loss": 0.9165, + "step": 1472 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017095296089568817, + "loss": 1.1552, + "step": 1473 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001709108847240478, + "loss": 1.329, + "step": 1474 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001708687832858603, + "loss": 1.1479, + "step": 1475 + }, + { + "epoch": 0.27, + "learning_rate": 0.000170826656596127, + "loss": 1.1453, + "step": 1476 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001707845046698582, + "loss": 1.2917, + "step": 1477 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017074232752207328, + "loss": 1.2175, + "step": 1478 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017070012516780055, + "loss": 1.0365, + "step": 1479 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017065789762207726, + "loss": 1.278, + "step": 1480 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017061564489994968, + "loss": 1.1648, + "step": 1481 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001705733670164731, + "loss": 1.3297, + "step": 1482 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001705310639867117, + "loss": 1.403, + "step": 1483 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017048873582573864, + "loss": 1.1626, + "step": 1484 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017044638254863604, + "loss": 1.1389, + "step": 1485 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017040400417049495, + "loss": 1.3136, + "step": 1486 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017036160070641544, + "loss": 1.3039, + "step": 1487 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001703191721715064, + "loss": 1.1937, + "step": 1488 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017027671858088576, + "loss": 1.0943, + "step": 1489 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001702342399496803, + "loss": 1.1182, + "step": 1490 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017019173629302577, + "loss": 1.2138, + "step": 1491 + }, + { + "epoch": 0.27, + "learning_rate": 0.00017014920762606687, + "loss": 1.1032, + "step": 1492 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017010665396395704, + "loss": 1.0061, + "step": 1493 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017006407532185887, + "loss": 1.2154, + "step": 1494 + }, + { + "epoch": 0.28, + "learning_rate": 0.00017002147171494365, + "loss": 1.2172, + "step": 1495 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001699788431583917, + "loss": 1.2804, + "step": 1496 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016993618966739215, + "loss": 1.3163, + "step": 1497 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016989351125714305, + "loss": 1.2119, + "step": 1498 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016985080794285128, + "loss": 1.2323, + "step": 1499 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001698080797397327, + "loss": 1.1655, + "step": 1500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001697653266630119, + "loss": 1.1004, + "step": 1501 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016972254872792245, + "loss": 1.3874, + "step": 1502 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016967974594970673, + "loss": 1.2388, + "step": 1503 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016963691834361597, + "loss": 1.275, + "step": 1504 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016959406592491024, + "loss": 1.2606, + "step": 1505 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016955118870885843, + "loss": 1.3397, + "step": 1506 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016950828671073834, + "loss": 1.2047, + "step": 1507 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016946535994583656, + "loss": 1.1473, + "step": 1508 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016942240842944848, + "loss": 1.2658, + "step": 1509 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016937943217687832, + "loss": 1.0781, + "step": 1510 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016933643120343916, + "loss": 1.1663, + "step": 1511 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016929340552445282, + "loss": 1.1983, + "step": 1512 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016925035515524996, + "loss": 1.2973, + "step": 1513 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016920728011117007, + "loss": 1.2403, + "step": 1514 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016916418040756134, + "loss": 1.1292, + "step": 1515 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001691210560597808, + "loss": 1.2362, + "step": 1516 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016907790708319434, + "loss": 1.2901, + "step": 1517 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016903473349317652, + "loss": 1.2024, + "step": 1518 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016899153530511063, + "loss": 1.1289, + "step": 1519 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016894831253438884, + "loss": 1.2576, + "step": 1520 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016890506519641203, + "loss": 1.2019, + "step": 1521 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016886179330658985, + "loss": 1.2431, + "step": 1522 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016881849688034066, + "loss": 1.1562, + "step": 1523 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001687751759330916, + "loss": 1.2901, + "step": 1524 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016873183048027852, + "loss": 1.2413, + "step": 1525 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016868846053734606, + "loss": 1.1445, + "step": 1526 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001686450661197475, + "loss": 1.1726, + "step": 1527 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016860164724294486, + "loss": 1.2321, + "step": 1528 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016855820392240896, + "loss": 1.2365, + "step": 1529 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016851473617361922, + "loss": 1.3249, + "step": 1530 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016847124401206384, + "loss": 1.2852, + "step": 1531 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016842772745323967, + "loss": 1.2638, + "step": 1532 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016838418651265227, + "loss": 1.247, + "step": 1533 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016834062120581588, + "loss": 1.3268, + "step": 1534 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001682970315482535, + "loss": 1.1704, + "step": 1535 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016825341755549665, + "loss": 1.2463, + "step": 1536 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016820977924308567, + "loss": 1.2014, + "step": 1537 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016816611662656946, + "loss": 1.2113, + "step": 1538 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016812242972150568, + "loss": 1.2345, + "step": 1539 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001680787185434606, + "loss": 1.2886, + "step": 1540 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016803498310800902, + "loss": 1.2227, + "step": 1541 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001679912234307346, + "loss": 1.2133, + "step": 1542 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001679474395272295, + "loss": 1.2004, + "step": 1543 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016790363141309448, + "loss": 1.1993, + "step": 1544 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001678597991039391, + "loss": 1.1738, + "step": 1545 + }, + { + "epoch": 0.28, + "learning_rate": 0.00016781594261538132, + "loss": 1.2293, + "step": 1546 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001677720619630479, + "loss": 1.2308, + "step": 1547 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016772815716257412, + "loss": 1.2348, + "step": 1548 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016768422822960387, + "loss": 1.0569, + "step": 1549 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016764027517978961, + "loss": 1.2729, + "step": 1550 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016759629802879248, + "loss": 1.1908, + "step": 1551 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016755229679228217, + "loss": 1.1799, + "step": 1552 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016750827148593692, + "loss": 1.2052, + "step": 1553 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016746422212544353, + "loss": 1.1969, + "step": 1554 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016742014872649745, + "loss": 1.235, + "step": 1555 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016737605130480264, + "loss": 1.3365, + "step": 1556 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016733192987607165, + "loss": 1.2668, + "step": 1557 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016728778445602554, + "loss": 1.2502, + "step": 1558 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016724361506039397, + "loss": 1.174, + "step": 1559 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016719942170491508, + "loss": 1.1723, + "step": 1560 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016715520440533564, + "loss": 1.22, + "step": 1561 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016711096317741087, + "loss": 1.1611, + "step": 1562 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016706669803690449, + "loss": 1.1629, + "step": 1563 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016702240899958888, + "loss": 1.1538, + "step": 1564 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016697809608124482, + "loss": 1.2002, + "step": 1565 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001669337592976616, + "loss": 1.0641, + "step": 1566 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016688939866463706, + "loss": 1.3104, + "step": 1567 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001668450141979775, + "loss": 1.2248, + "step": 1568 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016680060591349775, + "loss": 1.3583, + "step": 1569 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016675617382702114, + "loss": 1.1808, + "step": 1570 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016671171795437938, + "loss": 1.1572, + "step": 1571 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016666723831141277, + "loss": 1.1311, + "step": 1572 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016662273491397004, + "loss": 1.257, + "step": 1573 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016657820777790834, + "loss": 1.0092, + "step": 1574 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016653365691909333, + "loss": 1.2538, + "step": 1575 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001664890823533991, + "loss": 1.1146, + "step": 1576 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016644448409670822, + "loss": 1.0686, + "step": 1577 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016639986216491167, + "loss": 1.1499, + "step": 1578 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016635521657390885, + "loss": 1.2007, + "step": 1579 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016631054733960764, + "loss": 1.1455, + "step": 1580 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001662658544779243, + "loss": 1.1934, + "step": 1581 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016622113800478355, + "loss": 1.1172, + "step": 1582 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016617639793611847, + "loss": 1.2024, + "step": 1583 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016613163428787063, + "loss": 1.21, + "step": 1584 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016608684707598986, + "loss": 1.2116, + "step": 1585 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016604203631643454, + "loss": 1.2464, + "step": 1586 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016599720202517136, + "loss": 1.1444, + "step": 1587 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001659523442181754, + "loss": 1.1333, + "step": 1588 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016590746291143013, + "loss": 1.17, + "step": 1589 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001658625581209274, + "loss": 1.3383, + "step": 1590 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016581762986266744, + "loss": 1.0725, + "step": 1591 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016577267815265877, + "loss": 1.2019, + "step": 1592 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016572770300691837, + "loss": 1.2065, + "step": 1593 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001656827044414715, + "loss": 1.1116, + "step": 1594 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016563768247235174, + "loss": 1.2632, + "step": 1595 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001655926371156011, + "loss": 1.1505, + "step": 1596 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001655475683872699, + "loss": 1.1906, + "step": 1597 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016550247630341667, + "loss": 1.2502, + "step": 1598 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016545736088010844, + "loss": 1.1832, + "step": 1599 + }, + { + "epoch": 0.29, + "learning_rate": 0.00016541222213342044, + "loss": 1.3324, + "step": 1600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016536706007943625, + "loss": 1.2838, + "step": 1601 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016532187473424772, + "loss": 1.1723, + "step": 1602 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001652766661139551, + "loss": 1.1632, + "step": 1603 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016523143423466673, + "loss": 1.0828, + "step": 1604 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016518617911249946, + "loss": 1.2907, + "step": 1605 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016514090076357834, + "loss": 1.2458, + "step": 1606 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001650955992040366, + "loss": 1.3055, + "step": 1607 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016505027445001594, + "loss": 1.1279, + "step": 1608 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001650049265176661, + "loss": 1.2035, + "step": 1609 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001649595554231452, + "loss": 1.207, + "step": 1610 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016491416118261966, + "loss": 1.1223, + "step": 1611 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016486874381226407, + "loss": 1.2268, + "step": 1612 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016482330332826125, + "loss": 1.1599, + "step": 1613 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001647778397468023, + "loss": 1.2854, + "step": 1614 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016473235308408653, + "loss": 1.2155, + "step": 1615 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016468684335632148, + "loss": 1.3172, + "step": 1616 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016464131057972292, + "loss": 1.2471, + "step": 1617 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001645957547705148, + "loss": 1.1826, + "step": 1618 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016455017594492932, + "loss": 1.1543, + "step": 1619 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016450457411920684, + "loss": 1.1779, + "step": 1620 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016445894930959591, + "loss": 1.2497, + "step": 1621 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016441330153235334, + "loss": 1.2055, + "step": 1622 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016436763080374407, + "loss": 1.1475, + "step": 1623 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016432193714004117, + "loss": 1.2126, + "step": 1624 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016427622055752603, + "loss": 1.191, + "step": 1625 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016423048107248802, + "loss": 1.0712, + "step": 1626 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016418471870122483, + "loss": 1.1962, + "step": 1627 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016413893346004215, + "loss": 1.2409, + "step": 1628 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016409312536525401, + "loss": 1.2057, + "step": 1629 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016404729443318243, + "loss": 1.0753, + "step": 1630 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001640014406801576, + "loss": 1.1432, + "step": 1631 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001639555641225179, + "loss": 1.2125, + "step": 1632 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016390966477660977, + "loss": 1.1986, + "step": 1633 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016386374265878778, + "loss": 1.1974, + "step": 1634 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016381779778541468, + "loss": 1.2197, + "step": 1635 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001637718301728612, + "loss": 1.1791, + "step": 1636 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001637258398375063, + "loss": 1.165, + "step": 1637 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016367982679573698, + "loss": 1.1384, + "step": 1638 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001636337910639483, + "loss": 1.1868, + "step": 1639 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001635877326585435, + "loss": 1.2002, + "step": 1640 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001635416515959338, + "loss": 1.0799, + "step": 1641 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016349554789253851, + "loss": 1.1965, + "step": 1642 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016344942156478506, + "loss": 1.1711, + "step": 1643 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016340327262910894, + "loss": 1.1503, + "step": 1644 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001633571011019536, + "loss": 1.255, + "step": 1645 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016331090699977062, + "loss": 1.3944, + "step": 1646 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001632646903390197, + "loss": 1.1169, + "step": 1647 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016321845113616835, + "loss": 1.1392, + "step": 1648 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016317218940769233, + "loss": 1.326, + "step": 1649 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001631259051700753, + "loss": 1.3237, + "step": 1650 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016307959843980905, + "loss": 1.0904, + "step": 1651 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001630332692333933, + "loss": 1.1922, + "step": 1652 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016298691756733573, + "loss": 1.2966, + "step": 1653 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016294054345815216, + "loss": 1.2367, + "step": 1654 + }, + { + "epoch": 0.3, + "learning_rate": 0.00016289414692236635, + "loss": 1.2741, + "step": 1655 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016284772797651, + "loss": 1.2113, + "step": 1656 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016280128663712285, + "loss": 1.1456, + "step": 1657 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001627548229207526, + "loss": 1.2241, + "step": 1658 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016270833684395492, + "loss": 1.1227, + "step": 1659 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016266182842329349, + "loss": 1.1471, + "step": 1660 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016261529767533985, + "loss": 1.1612, + "step": 1661 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016256874461667358, + "loss": 1.2026, + "step": 1662 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016252216926388228, + "loss": 1.2526, + "step": 1663 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016247557163356127, + "loss": 1.3482, + "step": 1664 + }, + { + "epoch": 0.31, + "learning_rate": 0.000162428951742314, + "loss": 1.2425, + "step": 1665 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016238230960675183, + "loss": 1.1133, + "step": 1666 + }, + { + "epoch": 0.31, + "learning_rate": 0.000162335645243494, + "loss": 1.2609, + "step": 1667 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001622889586691676, + "loss": 1.2487, + "step": 1668 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016224224990040782, + "loss": 1.1242, + "step": 1669 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016219551895385758, + "loss": 1.1746, + "step": 1670 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016214876584616782, + "loss": 1.292, + "step": 1671 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001621019905939973, + "loss": 1.1323, + "step": 1672 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016205519321401267, + "loss": 1.2474, + "step": 1673 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016200837372288863, + "loss": 1.2454, + "step": 1674 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016196153213730746, + "loss": 1.2006, + "step": 1675 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016191466847395954, + "loss": 1.2506, + "step": 1676 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001618677827495431, + "loss": 1.258, + "step": 1677 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016182087498076416, + "loss": 1.1712, + "step": 1678 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016177394518433654, + "loss": 1.0774, + "step": 1679 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001617269933769821, + "loss": 1.2573, + "step": 1680 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016168001957543037, + "loss": 1.1031, + "step": 1681 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001616330237964188, + "loss": 1.1632, + "step": 1682 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016158600605669263, + "loss": 1.3699, + "step": 1683 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016153896637300497, + "loss": 1.1484, + "step": 1684 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016149190476211667, + "loss": 1.1303, + "step": 1685 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016144482124079649, + "loss": 1.1778, + "step": 1686 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016139771582582093, + "loss": 1.207, + "step": 1687 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016135058853397434, + "loss": 1.2598, + "step": 1688 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016130343938204882, + "loss": 1.153, + "step": 1689 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016125626838684425, + "loss": 1.2401, + "step": 1690 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016120907556516837, + "loss": 1.2112, + "step": 1691 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016116186093383662, + "loss": 1.2133, + "step": 1692 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016111462450967226, + "loss": 1.3208, + "step": 1693 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016106736630950628, + "loss": 1.2315, + "step": 1694 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001610200863501774, + "loss": 1.1142, + "step": 1695 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001609727846485322, + "loss": 1.159, + "step": 1696 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001609254612214249, + "loss": 1.2663, + "step": 1697 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016087811608571752, + "loss": 1.261, + "step": 1698 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001608307492582798, + "loss": 1.1375, + "step": 1699 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016078336075598925, + "loss": 1.1515, + "step": 1700 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016073595059573097, + "loss": 1.2158, + "step": 1701 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016068851879439793, + "loss": 1.0893, + "step": 1702 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016064106536889074, + "loss": 1.1549, + "step": 1703 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016059359033611772, + "loss": 1.1901, + "step": 1704 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016054609371299487, + "loss": 1.2144, + "step": 1705 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016049857551644596, + "loss": 1.2192, + "step": 1706 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016045103576340237, + "loss": 1.0749, + "step": 1707 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016040347447080313, + "loss": 1.1549, + "step": 1708 + }, + { + "epoch": 0.31, + "learning_rate": 0.00016035589165559508, + "loss": 1.1835, + "step": 1709 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016030828733473257, + "loss": 1.144, + "step": 1710 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016026066152517775, + "loss": 1.2309, + "step": 1711 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016021301424390036, + "loss": 1.2118, + "step": 1712 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001601653455078778, + "loss": 1.3033, + "step": 1713 + }, + { + "epoch": 0.32, + "learning_rate": 0.00016011765533409505, + "loss": 1.2568, + "step": 1714 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001600699437395449, + "loss": 1.2009, + "step": 1715 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001600222107412275, + "loss": 1.2614, + "step": 1716 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015997445635615099, + "loss": 1.3483, + "step": 1717 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001599266806013308, + "loss": 1.2348, + "step": 1718 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001598788834937901, + "loss": 1.0866, + "step": 1719 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015983106505055976, + "loss": 1.2245, + "step": 1720 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015978322528867808, + "loss": 1.1653, + "step": 1721 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001597353642251911, + "loss": 1.2378, + "step": 1722 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015968748187715235, + "loss": 1.2658, + "step": 1723 + }, + { + "epoch": 0.32, + "learning_rate": 0.000159639578261623, + "loss": 1.1331, + "step": 1724 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015959165339567174, + "loss": 1.3096, + "step": 1725 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015954370729637498, + "loss": 1.2427, + "step": 1726 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001594957399808165, + "loss": 1.193, + "step": 1727 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015944775146608777, + "loss": 1.1624, + "step": 1728 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015939974176928777, + "loss": 1.1353, + "step": 1729 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015935171090752302, + "loss": 1.2333, + "step": 1730 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015930365889790758, + "loss": 1.2503, + "step": 1731 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001592555857575631, + "loss": 1.198, + "step": 1732 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015920749150361867, + "loss": 1.2043, + "step": 1733 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015915937615321098, + "loss": 1.149, + "step": 1734 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015911123972348417, + "loss": 1.1039, + "step": 1735 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015906308223158998, + "loss": 1.089, + "step": 1736 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015901490369468762, + "loss": 1.2054, + "step": 1737 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015896670412994366, + "loss": 1.2139, + "step": 1738 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001589184835545324, + "loss": 1.1971, + "step": 1739 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015887024198563552, + "loss": 1.2483, + "step": 1740 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015882197944044207, + "loss": 1.2866, + "step": 1741 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015877369593614874, + "loss": 1.1243, + "step": 1742 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015872539148995964, + "loss": 1.2749, + "step": 1743 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015867706611908625, + "loss": 1.2976, + "step": 1744 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001586287198407477, + "loss": 1.114, + "step": 1745 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015858035267217032, + "loss": 1.107, + "step": 1746 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001585319646305881, + "loss": 1.015, + "step": 1747 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015848355573324234, + "loss": 1.1707, + "step": 1748 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001584351259973818, + "loss": 1.183, + "step": 1749 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015838667544026275, + "loss": 1.0872, + "step": 1750 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015833820407914874, + "loss": 1.202, + "step": 1751 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001582897119313108, + "loss": 1.1932, + "step": 1752 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001582411990140274, + "loss": 1.2403, + "step": 1753 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015819266534458434, + "loss": 1.149, + "step": 1754 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001581441109402749, + "loss": 1.3746, + "step": 1755 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015809553581839966, + "loss": 1.2211, + "step": 1756 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001580469399962666, + "loss": 1.0727, + "step": 1757 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015799832349119112, + "loss": 1.0982, + "step": 1758 + }, + { + "epoch": 0.32, + "learning_rate": 0.000157949686320496, + "loss": 1.1591, + "step": 1759 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015790102850151127, + "loss": 1.1572, + "step": 1760 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015785235005157447, + "loss": 1.185, + "step": 1761 + }, + { + "epoch": 0.32, + "learning_rate": 0.00015780365098803042, + "loss": 1.0971, + "step": 1762 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001577549313282312, + "loss": 1.1482, + "step": 1763 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015770619108953642, + "loss": 1.2177, + "step": 1764 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015765743028931285, + "loss": 1.136, + "step": 1765 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001576086489449346, + "loss": 1.2006, + "step": 1766 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015755984707378326, + "loss": 1.3344, + "step": 1767 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015751102469324755, + "loss": 1.1215, + "step": 1768 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015746218182072357, + "loss": 1.1343, + "step": 1769 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001574133184736148, + "loss": 1.0403, + "step": 1770 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001573644346693319, + "loss": 1.2373, + "step": 1771 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015731553042529277, + "loss": 1.2421, + "step": 1772 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001572666057589228, + "loss": 1.1018, + "step": 1773 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015721766068765448, + "loss": 1.2736, + "step": 1774 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015716869522892766, + "loss": 1.1442, + "step": 1775 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001571197094001894, + "loss": 1.2853, + "step": 1776 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015707070321889404, + "loss": 1.1599, + "step": 1777 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001570216767025032, + "loss": 1.2125, + "step": 1778 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015697262986848564, + "loss": 1.2547, + "step": 1779 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001569235627343175, + "loss": 1.2284, + "step": 1780 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015687447531748214, + "loss": 1.2231, + "step": 1781 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015682536763547003, + "loss": 1.1945, + "step": 1782 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015677623970577893, + "loss": 1.177, + "step": 1783 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001567270915459138, + "loss": 1.308, + "step": 1784 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001566779231733869, + "loss": 1.2833, + "step": 1785 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015662873460571753, + "loss": 1.1298, + "step": 1786 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001565795258604323, + "loss": 1.3203, + "step": 1787 + }, + { + "epoch": 0.33, + "learning_rate": 0.000156530296955065, + "loss": 1.1169, + "step": 1788 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015648104790715658, + "loss": 1.364, + "step": 1789 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015643177873425507, + "loss": 1.2208, + "step": 1790 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015638248945391592, + "loss": 1.232, + "step": 1791 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015633318008370151, + "loss": 1.2005, + "step": 1792 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015628385064118147, + "loss": 1.2992, + "step": 1793 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001562345011439326, + "loss": 1.216, + "step": 1794 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015618513160953882, + "loss": 1.1715, + "step": 1795 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015613574205559116, + "loss": 1.1994, + "step": 1796 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015608633249968783, + "loss": 1.1454, + "step": 1797 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015603690295943413, + "loss": 1.1363, + "step": 1798 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015598745345244257, + "loss": 1.1356, + "step": 1799 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015593798399633264, + "loss": 1.1786, + "step": 1800 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015588849460873103, + "loss": 1.2953, + "step": 1801 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015583898530727148, + "loss": 1.0148, + "step": 1802 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001557894561095949, + "loss": 1.0244, + "step": 1803 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015573990703334918, + "loss": 1.258, + "step": 1804 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001556903380961894, + "loss": 1.2378, + "step": 1805 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015564074931577766, + "loss": 1.1281, + "step": 1806 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001555911407097831, + "loss": 1.1018, + "step": 1807 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015554151229588205, + "loss": 1.132, + "step": 1808 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015549186409175776, + "loss": 1.1643, + "step": 1809 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015544219611510058, + "loss": 1.2201, + "step": 1810 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015539250838360794, + "loss": 1.3515, + "step": 1811 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001553428009149842, + "loss": 1.1936, + "step": 1812 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015529307372694095, + "loss": 1.2234, + "step": 1813 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015524332683719663, + "loss": 1.1958, + "step": 1814 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001551935602634767, + "loss": 1.2116, + "step": 1815 + }, + { + "epoch": 0.33, + "learning_rate": 0.00015514377402351377, + "loss": 1.1441, + "step": 1816 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001550939681350474, + "loss": 0.9673, + "step": 1817 + }, + { + "epoch": 0.34, + "learning_rate": 0.000155044142615824, + "loss": 1.242, + "step": 1818 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015499429748359726, + "loss": 1.184, + "step": 1819 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015494443275612763, + "loss": 1.1994, + "step": 1820 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015489454845118257, + "loss": 1.0545, + "step": 1821 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015484464458653664, + "loss": 1.0699, + "step": 1822 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015479472117997122, + "loss": 1.2067, + "step": 1823 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001547447782492748, + "loss": 1.3628, + "step": 1824 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015469481581224272, + "loss": 1.186, + "step": 1825 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015464483388667724, + "loss": 1.1259, + "step": 1826 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001545948324903877, + "loss": 1.1251, + "step": 1827 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015454481164119027, + "loss": 1.0714, + "step": 1828 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015449477135690812, + "loss": 1.2438, + "step": 1829 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015444471165537124, + "loss": 1.1487, + "step": 1830 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015439463255441667, + "loss": 1.1651, + "step": 1831 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015434453407188825, + "loss": 1.1959, + "step": 1832 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015429441622563683, + "loss": 1.1575, + "step": 1833 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015424427903352003, + "loss": 1.1268, + "step": 1834 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001541941225134025, + "loss": 1.0749, + "step": 1835 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001541439466831557, + "loss": 1.2632, + "step": 1836 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015409375156065798, + "loss": 1.1152, + "step": 1837 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015404353716379454, + "loss": 1.1925, + "step": 1838 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015399330351045754, + "loss": 1.4314, + "step": 1839 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015394305061854592, + "loss": 1.3242, + "step": 1840 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015389277850596546, + "loss": 1.2713, + "step": 1841 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015384248719062883, + "loss": 1.2198, + "step": 1842 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015379217669045554, + "loss": 1.2433, + "step": 1843 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015374184702337198, + "loss": 1.1114, + "step": 1844 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015369149820731122, + "loss": 1.2474, + "step": 1845 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015364113026021333, + "loss": 1.17, + "step": 1846 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001535907432000251, + "loss": 1.2861, + "step": 1847 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015354033704470012, + "loss": 1.2025, + "step": 1848 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015348991181219886, + "loss": 1.1277, + "step": 1849 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015343946752048853, + "loss": 1.0464, + "step": 1850 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015338900418754309, + "loss": 1.0817, + "step": 1851 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001533385218313434, + "loss": 1.3188, + "step": 1852 + }, + { + "epoch": 0.34, + "learning_rate": 0.000153288020469877, + "loss": 1.1344, + "step": 1853 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015323750012113827, + "loss": 1.1315, + "step": 1854 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015318696080312828, + "loss": 1.224, + "step": 1855 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015313640253385494, + "loss": 1.3144, + "step": 1856 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015308582533133283, + "loss": 1.2467, + "step": 1857 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015303522921358335, + "loss": 1.1719, + "step": 1858 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001529846141986346, + "loss": 1.1511, + "step": 1859 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015293398030452143, + "loss": 1.2501, + "step": 1860 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015288332754928542, + "loss": 1.0516, + "step": 1861 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015283265595097481, + "loss": 1.1917, + "step": 1862 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001527819655276447, + "loss": 1.0388, + "step": 1863 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001527312562973567, + "loss": 1.0899, + "step": 1864 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015268052827817927, + "loss": 1.2662, + "step": 1865 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015262978148818755, + "loss": 1.2239, + "step": 1866 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015257901594546326, + "loss": 1.239, + "step": 1867 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001525282316680949, + "loss": 1.1392, + "step": 1868 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015247742867417768, + "loss": 1.1724, + "step": 1869 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015242660698181336, + "loss": 1.1661, + "step": 1870 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015237576660911042, + "loss": 1.1015, + "step": 1871 + }, + { + "epoch": 0.34, + "learning_rate": 0.00015232490757418406, + "loss": 1.229, + "step": 1872 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015227402989515608, + "loss": 1.2928, + "step": 1873 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015222313359015483, + "loss": 1.1976, + "step": 1874 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015217221867731544, + "loss": 1.0774, + "step": 1875 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015212128517477956, + "loss": 1.1569, + "step": 1876 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015207033310069557, + "loss": 1.2913, + "step": 1877 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015201936247321841, + "loss": 1.0955, + "step": 1878 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015196837331050959, + "loss": 1.1342, + "step": 1879 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001519173656307373, + "loss": 1.1871, + "step": 1880 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001518663394520763, + "loss": 1.1208, + "step": 1881 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015181529479270787, + "loss": 1.2869, + "step": 1882 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015176423167082007, + "loss": 1.0946, + "step": 1883 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015171315010460728, + "loss": 1.213, + "step": 1884 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015166205011227061, + "loss": 1.18, + "step": 1885 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015161093171201778, + "loss": 1.3339, + "step": 1886 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001515597949220629, + "loss": 1.3212, + "step": 1887 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001515086397606268, + "loss": 1.0859, + "step": 1888 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001514574662459368, + "loss": 1.2132, + "step": 1889 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015140627439622667, + "loss": 1.198, + "step": 1890 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001513550642297368, + "loss": 1.1644, + "step": 1891 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015130383576471415, + "loss": 1.2114, + "step": 1892 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015125258901941207, + "loss": 1.2877, + "step": 1893 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015120132401209055, + "loss": 1.1249, + "step": 1894 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015115004076101608, + "loss": 1.0511, + "step": 1895 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015109873928446148, + "loss": 1.0507, + "step": 1896 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015104741960070625, + "loss": 1.224, + "step": 1897 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015099608172803634, + "loss": 1.1339, + "step": 1898 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015094472568474415, + "loss": 1.2023, + "step": 1899 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001508933514891285, + "loss": 1.2233, + "step": 1900 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015084195915949486, + "loss": 1.1744, + "step": 1901 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015079054871415494, + "loss": 1.1341, + "step": 1902 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015073912017142703, + "loss": 1.1534, + "step": 1903 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015068767354963584, + "loss": 1.1108, + "step": 1904 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015063620886711254, + "loss": 1.1263, + "step": 1905 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015058472614219473, + "loss": 1.1693, + "step": 1906 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015053322539322644, + "loss": 1.1911, + "step": 1907 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015048170663855802, + "loss": 1.1529, + "step": 1908 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015043016989654647, + "loss": 1.2447, + "step": 1909 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015037861518555493, + "loss": 1.1881, + "step": 1910 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015032704252395315, + "loss": 1.118, + "step": 1911 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015027545193011717, + "loss": 1.2974, + "step": 1912 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015022384342242945, + "loss": 1.1459, + "step": 1913 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001501722170192788, + "loss": 1.2728, + "step": 1914 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015012057273906048, + "loss": 1.2525, + "step": 1915 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015006891060017607, + "loss": 1.2262, + "step": 1916 + }, + { + "epoch": 0.35, + "learning_rate": 0.00015001723062103348, + "loss": 1.1223, + "step": 1917 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014996553282004704, + "loss": 1.0908, + "step": 1918 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001499138172156374, + "loss": 1.0805, + "step": 1919 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014986208382623156, + "loss": 1.2395, + "step": 1920 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014981033267026284, + "loss": 1.1868, + "step": 1921 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014975856376617093, + "loss": 1.1752, + "step": 1922 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001497067771324018, + "loss": 1.17, + "step": 1923 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001496549727874078, + "loss": 1.2026, + "step": 1924 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001496031507496475, + "loss": 1.2408, + "step": 1925 + }, + { + "epoch": 0.35, + "learning_rate": 0.00014955131103758582, + "loss": 1.0918, + "step": 1926 + }, + { + "epoch": 0.36, + "learning_rate": 0.000149499453669694, + "loss": 1.2437, + "step": 1927 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014944757866444956, + "loss": 1.1693, + "step": 1928 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001493956860403363, + "loss": 1.1655, + "step": 1929 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014934377581584424, + "loss": 1.198, + "step": 1930 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014929184800946976, + "loss": 1.1534, + "step": 1931 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001492399026397155, + "loss": 1.2215, + "step": 1932 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014918793972509026, + "loss": 1.2677, + "step": 1933 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001491359592841092, + "loss": 1.1819, + "step": 1934 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014908396133529373, + "loss": 1.2181, + "step": 1935 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014903194589717134, + "loss": 1.2934, + "step": 1936 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014897991298827595, + "loss": 1.1545, + "step": 1937 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014892786262714756, + "loss": 1.2464, + "step": 1938 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001488757948323325, + "loss": 1.2632, + "step": 1939 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014882370962238323, + "loss": 1.1012, + "step": 1940 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014877160701585847, + "loss": 1.1842, + "step": 1941 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014871948703132305, + "loss": 1.2288, + "step": 1942 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014866734968734815, + "loss": 1.0926, + "step": 1943 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014861519500251096, + "loss": 1.1781, + "step": 1944 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014856302299539494, + "loss": 1.1532, + "step": 1945 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001485108336845898, + "loss": 1.0978, + "step": 1946 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001484586270886912, + "loss": 1.2654, + "step": 1947 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014840640322630113, + "loss": 1.2137, + "step": 1948 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001483541621160277, + "loss": 1.0873, + "step": 1949 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014830190377648517, + "loss": 1.2027, + "step": 1950 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001482496282262939, + "loss": 1.1771, + "step": 1951 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014819733548408038, + "loss": 1.0771, + "step": 1952 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014814502556847732, + "loss": 1.1594, + "step": 1953 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014809269849812344, + "loss": 1.1016, + "step": 1954 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014804035429166358, + "loss": 1.1045, + "step": 1955 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014798799296774877, + "loss": 1.0824, + "step": 1956 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001479356145450361, + "loss": 1.139, + "step": 1957 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014788321904218865, + "loss": 1.1904, + "step": 1958 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014783080647787575, + "loss": 1.069, + "step": 1959 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014777837687077273, + "loss": 1.1481, + "step": 1960 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014772593023956097, + "loss": 1.2412, + "step": 1961 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014767346660292797, + "loss": 1.1434, + "step": 1962 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014762098597956725, + "loss": 1.11, + "step": 1963 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014756848838817836, + "loss": 1.2569, + "step": 1964 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014751597384746702, + "loss": 1.3318, + "step": 1965 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014746344237614482, + "loss": 1.1718, + "step": 1966 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014741089399292952, + "loss": 1.0122, + "step": 1967 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001473583287165448, + "loss": 1.1295, + "step": 1968 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014730574656572043, + "loss": 1.1801, + "step": 1969 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014725314755919218, + "loss": 1.118, + "step": 1970 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001472005317157018, + "loss": 1.1714, + "step": 1971 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014714789905399708, + "loss": 1.2643, + "step": 1972 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014709524959283175, + "loss": 1.152, + "step": 1973 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014704258335096557, + "loss": 1.2183, + "step": 1974 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014698990034716424, + "loss": 1.3027, + "step": 1975 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014693720060019954, + "loss": 1.2305, + "step": 1976 + }, + { + "epoch": 0.36, + "learning_rate": 0.000146884484128849, + "loss": 1.2388, + "step": 1977 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014683175095189633, + "loss": 1.0385, + "step": 1978 + }, + { + "epoch": 0.36, + "learning_rate": 0.00014677900108813112, + "loss": 1.0735, + "step": 1979 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001467262345563488, + "loss": 1.1125, + "step": 1980 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014667345137535094, + "loss": 1.0643, + "step": 1981 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014662065156394484, + "loss": 1.2534, + "step": 1982 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014656783514094385, + "loss": 1.1415, + "step": 1983 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014651500212516723, + "loss": 1.2329, + "step": 1984 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014646215253544007, + "loss": 1.0278, + "step": 1985 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001464092863905935, + "loss": 1.1046, + "step": 1986 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001463564037094644, + "loss": 1.097, + "step": 1987 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014630350451089562, + "loss": 1.3219, + "step": 1988 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014625058881373595, + "loss": 1.3264, + "step": 1989 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014619765663683996, + "loss": 1.262, + "step": 1990 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001461447079990681, + "loss": 1.2117, + "step": 1991 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001460917429192868, + "loss": 1.164, + "step": 1992 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001460387614163682, + "loss": 1.1638, + "step": 1993 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014598576350919034, + "loss": 1.2814, + "step": 1994 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014593274921663717, + "loss": 1.1794, + "step": 1995 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001458797185575984, + "loss": 1.1822, + "step": 1996 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001458266715509696, + "loss": 1.1143, + "step": 1997 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001457736082156522, + "loss": 1.1284, + "step": 1998 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014572052857055333, + "loss": 1.2232, + "step": 1999 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001456674326345861, + "loss": 1.1815, + "step": 2000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014561432042666935, + "loss": 1.1345, + "step": 2001 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014556119196572763, + "loss": 1.0504, + "step": 2002 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014550804727069143, + "loss": 1.2016, + "step": 2003 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001454548863604969, + "loss": 1.2157, + "step": 2004 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014540170925408607, + "loss": 1.2171, + "step": 2005 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014534851597040665, + "loss": 1.2835, + "step": 2006 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014529530652841217, + "loss": 1.2192, + "step": 2007 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014524208094706192, + "loss": 1.1174, + "step": 2008 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001451888392453209, + "loss": 1.2283, + "step": 2009 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001451355814421599, + "loss": 1.2266, + "step": 2010 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014508230755655538, + "loss": 1.2042, + "step": 2011 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014502901760748965, + "loss": 1.1187, + "step": 2012 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014497571161395061, + "loss": 1.2553, + "step": 2013 + }, + { + "epoch": 0.37, + "learning_rate": 0.000144922389594932, + "loss": 1.1884, + "step": 2014 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001448690515694331, + "loss": 1.1478, + "step": 2015 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001448156975564591, + "loss": 1.151, + "step": 2016 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014476232757502076, + "loss": 1.2284, + "step": 2017 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001447089416441345, + "loss": 1.265, + "step": 2018 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014465553978282257, + "loss": 1.2493, + "step": 2019 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014460212201011276, + "loss": 1.1118, + "step": 2020 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014454868834503853, + "loss": 1.2118, + "step": 2021 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014449523880663914, + "loss": 1.1419, + "step": 2022 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014444177341395935, + "loss": 1.0892, + "step": 2023 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014438829218604962, + "loss": 1.0627, + "step": 2024 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014433479514196616, + "loss": 1.0854, + "step": 2025 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001442812823007706, + "loss": 1.1566, + "step": 2026 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014422775368153038, + "loss": 1.1685, + "step": 2027 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014417420930331852, + "loss": 1.3243, + "step": 2028 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014412064918521358, + "loss": 1.2063, + "step": 2029 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014406707334629982, + "loss": 1.2416, + "step": 2030 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001440134818056671, + "loss": 1.2353, + "step": 2031 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001439598745824108, + "loss": 1.1128, + "step": 2032 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014390625169563188, + "loss": 1.1628, + "step": 2033 + }, + { + "epoch": 0.37, + "learning_rate": 0.000143852613164437, + "loss": 1.2055, + "step": 2034 + }, + { + "epoch": 0.37, + "learning_rate": 0.00014379895900793832, + "loss": 1.1262, + "step": 2035 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014374528924525354, + "loss": 1.0998, + "step": 2036 + }, + { + "epoch": 0.38, + "learning_rate": 0.000143691603895506, + "loss": 1.0355, + "step": 2037 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001436379029778245, + "loss": 1.2082, + "step": 2038 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001435841865113434, + "loss": 1.1736, + "step": 2039 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014353045451520277, + "loss": 1.16, + "step": 2040 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014347670700854797, + "loss": 1.2142, + "step": 2041 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014342294401052997, + "loss": 1.1933, + "step": 2042 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014336916554030538, + "loss": 1.203, + "step": 2043 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001433153716170361, + "loss": 1.1386, + "step": 2044 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001432615622598898, + "loss": 1.2912, + "step": 2045 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014320773748803943, + "loss": 1.1502, + "step": 2046 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014315389732066352, + "loss": 1.2381, + "step": 2047 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014310004177694611, + "loss": 1.1459, + "step": 2048 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001430461708760767, + "loss": 1.1796, + "step": 2049 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014299228463725017, + "loss": 1.0897, + "step": 2050 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014293838307966703, + "loss": 1.1646, + "step": 2051 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014288446622253312, + "loss": 1.2261, + "step": 2052 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001428305340850598, + "loss": 1.3285, + "step": 2053 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014277658668646382, + "loss": 1.1648, + "step": 2054 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014272262404596746, + "loss": 1.0389, + "step": 2055 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014266864618279828, + "loss": 1.153, + "step": 2056 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001426146531161894, + "loss": 1.0344, + "step": 2057 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014256064486537935, + "loss": 1.1058, + "step": 2058 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014250662144961197, + "loss": 1.1649, + "step": 2059 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014245258288813655, + "loss": 1.3766, + "step": 2060 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014239852920020787, + "loss": 1.1357, + "step": 2061 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014234446040508597, + "loss": 1.1765, + "step": 2062 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014229037652203628, + "loss": 1.1883, + "step": 2063 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014223627757032977, + "loss": 1.3059, + "step": 2064 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014218216356924254, + "loss": 1.0891, + "step": 2065 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014212803453805624, + "loss": 1.0636, + "step": 2066 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014207389049605777, + "loss": 1.1118, + "step": 2067 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014201973146253945, + "loss": 1.1708, + "step": 2068 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014196555745679888, + "loss": 1.3085, + "step": 2069 + }, + { + "epoch": 0.38, + "learning_rate": 0.000141911368498139, + "loss": 1.1189, + "step": 2070 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014185716460586814, + "loss": 1.1788, + "step": 2071 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014180294579929992, + "loss": 1.1917, + "step": 2072 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001417487120977532, + "loss": 1.1913, + "step": 2073 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014169446352055228, + "loss": 1.0902, + "step": 2074 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014164020008702665, + "loss": 1.078, + "step": 2075 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014158592181651115, + "loss": 1.2039, + "step": 2076 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001415316287283459, + "loss": 1.2646, + "step": 2077 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001414773208418763, + "loss": 1.3399, + "step": 2078 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014142299817645297, + "loss": 1.0572, + "step": 2079 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001413686607514319, + "loss": 1.1533, + "step": 2080 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014131430858617424, + "loss": 1.2233, + "step": 2081 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014125994170004644, + "loss": 1.2068, + "step": 2082 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014120556011242022, + "loss": 1.1038, + "step": 2083 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014115116384267243, + "loss": 1.1185, + "step": 2084 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001410967529101853, + "loss": 1.2832, + "step": 2085 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014104232733434617, + "loss": 1.1352, + "step": 2086 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014098788713454768, + "loss": 1.2236, + "step": 2087 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001409334323301876, + "loss": 1.3005, + "step": 2088 + }, + { + "epoch": 0.38, + "learning_rate": 0.00014087896294066892, + "loss": 1.1701, + "step": 2089 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014082447898539993, + "loss": 1.1391, + "step": 2090 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014076998048379402, + "loss": 1.2062, + "step": 2091 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001407154674552697, + "loss": 1.2521, + "step": 2092 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014066093991925077, + "loss": 1.1474, + "step": 2093 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014060639789516619, + "loss": 1.2104, + "step": 2094 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014055184140245, + "loss": 1.1568, + "step": 2095 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014049727046054148, + "loss": 1.2198, + "step": 2096 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014044268508888504, + "loss": 1.3989, + "step": 2097 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014038808530693017, + "loss": 1.296, + "step": 2098 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014033347113413157, + "loss": 1.2515, + "step": 2099 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014027884258994904, + "loss": 1.2295, + "step": 2100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014022419969384747, + "loss": 1.0794, + "step": 2101 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014016954246529696, + "loss": 1.2837, + "step": 2102 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014011487092377257, + "loss": 1.1345, + "step": 2103 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001400601850887546, + "loss": 1.2329, + "step": 2104 + }, + { + "epoch": 0.39, + "learning_rate": 0.00014000548497972837, + "loss": 1.1129, + "step": 2105 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013995077061618427, + "loss": 1.2025, + "step": 2106 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013989604201761782, + "loss": 1.1743, + "step": 2107 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001398412992035296, + "loss": 1.1322, + "step": 2108 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013978654219342523, + "loss": 1.1265, + "step": 2109 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013973177100681542, + "loss": 1.0796, + "step": 2110 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001396769856632159, + "loss": 1.1701, + "step": 2111 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013962218618214747, + "loss": 1.166, + "step": 2112 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013956737258313595, + "loss": 1.0792, + "step": 2113 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013951254488571218, + "loss": 1.3418, + "step": 2114 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013945770310941209, + "loss": 1.2416, + "step": 2115 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013940284727377656, + "loss": 1.1036, + "step": 2116 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013934797739835144, + "loss": 1.1451, + "step": 2117 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001392930935026877, + "loss": 1.1262, + "step": 2118 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013923819560634126, + "loss": 1.2163, + "step": 2119 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013918328372887294, + "loss": 1.0765, + "step": 2120 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001391283578898487, + "loss": 1.213, + "step": 2121 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013907341810883933, + "loss": 1.2957, + "step": 2122 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013901846440542064, + "loss": 1.2726, + "step": 2123 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001389634967991735, + "loss": 1.2049, + "step": 2124 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013890851530968355, + "loss": 1.212, + "step": 2125 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013885351995654148, + "loss": 1.0843, + "step": 2126 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013879851075934295, + "loss": 1.2288, + "step": 2127 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013874348773768846, + "loss": 1.237, + "step": 2128 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001386884509111835, + "loss": 1.0991, + "step": 2129 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013863340029943857, + "loss": 1.1858, + "step": 2130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013857833592206882, + "loss": 1.1367, + "step": 2131 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013852325779869457, + "loss": 1.1011, + "step": 2132 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013846816594894086, + "loss": 1.2287, + "step": 2133 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013841306039243776, + "loss": 1.1327, + "step": 2134 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001383579411488201, + "loss": 1.0768, + "step": 2135 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013830280823772767, + "loss": 1.074, + "step": 2136 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001382476616788051, + "loss": 1.2397, + "step": 2137 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001381925014917019, + "loss": 1.1157, + "step": 2138 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013813732769607238, + "loss": 1.1127, + "step": 2139 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001380821403115758, + "loss": 1.2264, + "step": 2140 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013802693935787618, + "loss": 1.2599, + "step": 2141 + }, + { + "epoch": 0.39, + "learning_rate": 0.00013797172485464237, + "loss": 1.1145, + "step": 2142 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001379164968215481, + "loss": 0.9969, + "step": 2143 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013786125527827188, + "loss": 1.1128, + "step": 2144 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013780600024449702, + "loss": 1.2391, + "step": 2145 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001377507317399118, + "loss": 1.2668, + "step": 2146 + }, + { + "epoch": 0.4, + "learning_rate": 0.000137695449784209, + "loss": 1.0677, + "step": 2147 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013764015439708641, + "loss": 1.2158, + "step": 2148 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013758484559824665, + "loss": 0.9452, + "step": 2149 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001375295234073969, + "loss": 1.2596, + "step": 2150 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013747418784424933, + "loss": 1.1894, + "step": 2151 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013741883892852072, + "loss": 1.0541, + "step": 2152 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001373634766799327, + "loss": 1.23, + "step": 2153 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013730810111821156, + "loss": 1.0779, + "step": 2154 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001372527122630885, + "loss": 1.0471, + "step": 2155 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013719731013429924, + "loss": 1.1311, + "step": 2156 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013714189475158442, + "loss": 1.1803, + "step": 2157 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013708646613468925, + "loss": 1.0539, + "step": 2158 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001370310243033638, + "loss": 1.1649, + "step": 2159 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013697556927736275, + "loss": 1.1642, + "step": 2160 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013692010107644546, + "loss": 1.1245, + "step": 2161 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013686461972037609, + "loss": 1.1506, + "step": 2162 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013680912522892342, + "loss": 1.2176, + "step": 2163 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013675361762186086, + "loss": 1.096, + "step": 2164 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013669809691896668, + "loss": 0.9321, + "step": 2165 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013664256314002354, + "loss": 1.1143, + "step": 2166 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013658701630481898, + "loss": 1.1739, + "step": 2167 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013653145643314514, + "loss": 1.179, + "step": 2168 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013647588354479876, + "loss": 1.1639, + "step": 2169 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013642029765958125, + "loss": 1.1338, + "step": 2170 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013636469879729863, + "loss": 1.2208, + "step": 2171 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013630908697776154, + "loss": 1.3334, + "step": 2172 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013625346222078533, + "loss": 1.0992, + "step": 2173 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001361978245461898, + "loss": 1.1338, + "step": 2174 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013614217397379946, + "loss": 1.2534, + "step": 2175 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013608651052344342, + "loss": 1.1597, + "step": 2176 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013603083421495534, + "loss": 1.0833, + "step": 2177 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013597514506817347, + "loss": 1.2514, + "step": 2178 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013591944310294065, + "loss": 1.3631, + "step": 2179 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013586372833910423, + "loss": 1.0257, + "step": 2180 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001358080007965162, + "loss": 1.1199, + "step": 2181 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013575226049503312, + "loss": 1.1184, + "step": 2182 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013569650745451592, + "loss": 1.1917, + "step": 2183 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001356407416948303, + "loss": 1.1305, + "step": 2184 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013558496323584632, + "loss": 1.1415, + "step": 2185 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013552917209743869, + "loss": 1.1899, + "step": 2186 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013547336829948653, + "loss": 1.1525, + "step": 2187 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013541755186187356, + "loss": 1.2563, + "step": 2188 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013536172280448794, + "loss": 1.168, + "step": 2189 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001353058811472223, + "loss": 1.2115, + "step": 2190 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013525002690997393, + "loss": 1.044, + "step": 2191 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013519416011264435, + "loss": 1.0153, + "step": 2192 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001351382807751398, + "loss": 1.1562, + "step": 2193 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001350823889173708, + "loss": 1.1503, + "step": 2194 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013502648455925243, + "loss": 1.1598, + "step": 2195 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013497056772070418, + "loss": 1.2215, + "step": 2196 + }, + { + "epoch": 0.4, + "learning_rate": 0.00013491463842165008, + "loss": 1.2096, + "step": 2197 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001348586966820184, + "loss": 1.1895, + "step": 2198 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013480274252174206, + "loss": 1.2395, + "step": 2199 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013474677596075836, + "loss": 1.1184, + "step": 2200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013469079701900887, + "loss": 1.1917, + "step": 2201 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001346348057164397, + "loss": 1.2257, + "step": 2202 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001345788020730014, + "loss": 1.1542, + "step": 2203 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013452278610864878, + "loss": 1.3057, + "step": 2204 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013446675784334116, + "loss": 1.0866, + "step": 2205 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013441071729704225, + "loss": 1.1352, + "step": 2206 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013435466448971997, + "loss": 1.1386, + "step": 2207 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001342985994413468, + "loss": 1.1704, + "step": 2208 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001342425221718995, + "loss": 1.4093, + "step": 2209 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013418643270135915, + "loss": 1.2635, + "step": 2210 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013413033104971126, + "loss": 1.1196, + "step": 2211 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013407421723694567, + "loss": 1.2798, + "step": 2212 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013401809128305643, + "loss": 1.2037, + "step": 2213 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013396195320804208, + "loss": 1.2052, + "step": 2214 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001339058030319054, + "loss": 1.2119, + "step": 2215 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013384964077465346, + "loss": 1.2989, + "step": 2216 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013379346645629768, + "loss": 1.1364, + "step": 2217 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013373728009685376, + "loss": 1.2124, + "step": 2218 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013368108171634167, + "loss": 1.3454, + "step": 2219 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013362487133478573, + "loss": 1.2772, + "step": 2220 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013356864897221446, + "loss": 1.1861, + "step": 2221 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001335124146486607, + "loss": 1.1008, + "step": 2222 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013345616838416152, + "loss": 1.1255, + "step": 2223 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001333999101987582, + "loss": 1.1178, + "step": 2224 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013334364011249642, + "loss": 1.0621, + "step": 2225 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013328735814542597, + "loss": 1.2475, + "step": 2226 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013323106431760087, + "loss": 1.3147, + "step": 2227 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013317475864907947, + "loss": 1.0659, + "step": 2228 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001331184411599242, + "loss": 1.1849, + "step": 2229 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001330621118702018, + "loss": 1.2221, + "step": 2230 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013300577079998325, + "loss": 1.1645, + "step": 2231 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001329494179693436, + "loss": 1.2079, + "step": 2232 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013289305339836215, + "loss": 1.2521, + "step": 2233 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013283667710712243, + "loss": 1.1597, + "step": 2234 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001327802891157121, + "loss": 1.1276, + "step": 2235 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013272388944422298, + "loss": 1.0833, + "step": 2236 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001326674781127511, + "loss": 1.3674, + "step": 2237 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013261105514139656, + "loss": 1.1939, + "step": 2238 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013255462055026369, + "loss": 1.179, + "step": 2239 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013249817435946092, + "loss": 1.1928, + "step": 2240 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013244171658910084, + "loss": 1.0918, + "step": 2241 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013238524725930014, + "loss": 1.1268, + "step": 2242 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013232876639017964, + "loss": 1.1262, + "step": 2243 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013227227400186426, + "loss": 1.2725, + "step": 2244 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013221577011448304, + "loss": 1.2306, + "step": 2245 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001321592547481691, + "loss": 1.1869, + "step": 2246 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013210272792305968, + "loss": 1.1312, + "step": 2247 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013204618965929608, + "loss": 1.2226, + "step": 2248 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013198963997702366, + "loss": 1.1567, + "step": 2249 + }, + { + "epoch": 0.41, + "learning_rate": 0.00013193307889639193, + "loss": 1.1627, + "step": 2250 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001318765064375543, + "loss": 1.1909, + "step": 2251 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001318199226206684, + "loss": 0.9967, + "step": 2252 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013176332746589586, + "loss": 1.048, + "step": 2253 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013170672099340226, + "loss": 1.205, + "step": 2254 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013165010322335733, + "loss": 1.2069, + "step": 2255 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001315934741759348, + "loss": 1.3226, + "step": 2256 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013153683387131233, + "loss": 1.1234, + "step": 2257 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013148018232967171, + "loss": 1.1628, + "step": 2258 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001314235195711987, + "loss": 1.2717, + "step": 2259 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013136684561608296, + "loss": 1.2405, + "step": 2260 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013131016048451832, + "loss": 1.2467, + "step": 2261 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001312534641967024, + "loss": 0.9505, + "step": 2262 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013119675677283695, + "loss": 1.2074, + "step": 2263 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001311400382331276, + "loss": 1.1934, + "step": 2264 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013108330859778393, + "loss": 1.1102, + "step": 2265 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013102656788701954, + "loss": 1.1612, + "step": 2266 + }, + { + "epoch": 0.42, + "learning_rate": 0.000130969816121052, + "loss": 1.2512, + "step": 2267 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013091305332010267, + "loss": 1.1914, + "step": 2268 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013085627950439697, + "loss": 1.1964, + "step": 2269 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013079949469416423, + "loss": 1.1138, + "step": 2270 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013074269890963767, + "loss": 1.1724, + "step": 2271 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013068589217105441, + "loss": 1.1047, + "step": 2272 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013062907449865548, + "loss": 1.0098, + "step": 2273 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013057224591268588, + "loss": 1.1356, + "step": 2274 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001305154064333944, + "loss": 1.1933, + "step": 2275 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001304585560810337, + "loss": 1.1862, + "step": 2276 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013040169487586043, + "loss": 1.2482, + "step": 2277 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013034482283813501, + "loss": 1.2311, + "step": 2278 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013028793998812174, + "loss": 1.1315, + "step": 2279 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013023104634608883, + "loss": 1.1464, + "step": 2280 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001301741419323082, + "loss": 1.1436, + "step": 2281 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013011722676705575, + "loss": 1.1881, + "step": 2282 + }, + { + "epoch": 0.42, + "learning_rate": 0.00013006030087061118, + "loss": 1.1082, + "step": 2283 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001300033642632579, + "loss": 1.2172, + "step": 2284 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012994641696528332, + "loss": 1.1622, + "step": 2285 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012988945899697853, + "loss": 1.1371, + "step": 2286 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012983249037863844, + "loss": 1.1989, + "step": 2287 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012977551113056178, + "loss": 1.0517, + "step": 2288 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012971852127305108, + "loss": 1.1667, + "step": 2289 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001296615208264126, + "loss": 1.1243, + "step": 2290 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012960450981095643, + "loss": 1.193, + "step": 2291 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012954748824699634, + "loss": 1.1945, + "step": 2292 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012949045615484996, + "loss": 1.2369, + "step": 2293 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012943341355483865, + "loss": 1.2569, + "step": 2294 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012937636046728748, + "loss": 1.2559, + "step": 2295 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001293192969125252, + "loss": 1.1525, + "step": 2296 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001292622229108845, + "loss": 1.2324, + "step": 2297 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012920513848270148, + "loss": 1.2584, + "step": 2298 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012914804364831623, + "loss": 1.1816, + "step": 2299 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012909093842807247, + "loss": 1.2349, + "step": 2300 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001290338228423175, + "loss": 1.0238, + "step": 2301 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001289766969114025, + "loss": 1.1712, + "step": 2302 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012891956065568216, + "loss": 1.1654, + "step": 2303 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012886241409551497, + "loss": 1.2209, + "step": 2304 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001288052572512631, + "loss": 1.27, + "step": 2305 + }, + { + "epoch": 0.42, + "learning_rate": 0.00012874809014329228, + "loss": 1.1358, + "step": 2306 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012869091279197196, + "loss": 1.2087, + "step": 2307 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012863372521767526, + "loss": 1.0351, + "step": 2308 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012857652744077892, + "loss": 1.1844, + "step": 2309 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012851931948166327, + "loss": 1.2499, + "step": 2310 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012846210136071236, + "loss": 1.1632, + "step": 2311 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001284048730983138, + "loss": 1.075, + "step": 2312 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001283476347148588, + "loss": 1.1557, + "step": 2313 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012829038623074222, + "loss": 0.9654, + "step": 2314 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012823312766636254, + "loss": 1.103, + "step": 2315 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012817585904212174, + "loss": 1.1527, + "step": 2316 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012811858037842542, + "loss": 1.184, + "step": 2317 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012806129169568283, + "loss": 1.1292, + "step": 2318 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012800399301430676, + "loss": 1.1345, + "step": 2319 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012794668435471345, + "loss": 1.3219, + "step": 2320 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012788936573732281, + "loss": 1.1224, + "step": 2321 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001278320371825583, + "loss": 1.1546, + "step": 2322 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001277746987108469, + "loss": 1.2905, + "step": 2323 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001277173503426191, + "loss": 1.1957, + "step": 2324 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012765999209830888, + "loss": 1.127, + "step": 2325 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012760262399835384, + "loss": 1.2173, + "step": 2326 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012754524606319506, + "loss": 1.1525, + "step": 2327 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012748785831327708, + "loss": 1.2898, + "step": 2328 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012743046076904793, + "loss": 1.1994, + "step": 2329 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012737305345095925, + "loss": 1.1015, + "step": 2330 + }, + { + "epoch": 0.43, + "learning_rate": 0.000127315636379466, + "loss": 1.1919, + "step": 2331 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001272582095750267, + "loss": 1.1455, + "step": 2332 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012720077305810337, + "loss": 1.1077, + "step": 2333 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012714332684916145, + "loss": 1.1889, + "step": 2334 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012708587096866975, + "loss": 1.0604, + "step": 2335 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012702840543710073, + "loss": 1.1839, + "step": 2336 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012697093027493007, + "loss": 1.175, + "step": 2337 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012691344550263701, + "loss": 1.146, + "step": 2338 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012685595114070422, + "loss": 1.1522, + "step": 2339 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012679844720961774, + "loss": 1.2195, + "step": 2340 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012674093372986697, + "loss": 1.3952, + "step": 2341 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012668341072194484, + "loss": 1.1336, + "step": 2342 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001266258782063476, + "loss": 1.0566, + "step": 2343 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012656833620357485, + "loss": 1.0786, + "step": 2344 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012651078473412968, + "loss": 1.1571, + "step": 2345 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012645322381851844, + "loss": 1.1982, + "step": 2346 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001263956534772509, + "loss": 1.1808, + "step": 2347 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001263380737308402, + "loss": 1.2683, + "step": 2348 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012628048459980282, + "loss": 1.1929, + "step": 2349 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012622288610465856, + "loss": 1.2234, + "step": 2350 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012616527826593058, + "loss": 1.1556, + "step": 2351 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001261076611041454, + "loss": 1.1436, + "step": 2352 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012605003463983272, + "loss": 1.1422, + "step": 2353 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001259923988935258, + "loss": 1.3158, + "step": 2354 + }, + { + "epoch": 0.43, + "learning_rate": 0.000125934753885761, + "loss": 1.2412, + "step": 2355 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012587709963707804, + "loss": 1.0814, + "step": 2356 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012581943616801996, + "loss": 1.0459, + "step": 2357 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012576176349913303, + "loss": 1.2729, + "step": 2358 + }, + { + "epoch": 0.43, + "learning_rate": 0.00012570408165096689, + "loss": 1.21, + "step": 2359 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001256463906440744, + "loss": 1.192, + "step": 2360 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012558869049901158, + "loss": 1.1511, + "step": 2361 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001255309812363379, + "loss": 1.1182, + "step": 2362 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012547326287661598, + "loss": 1.1768, + "step": 2363 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012541553544041163, + "loss": 1.2487, + "step": 2364 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012535779894829398, + "loss": 1.2886, + "step": 2365 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012530005342083537, + "loss": 1.2132, + "step": 2366 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012524229887861133, + "loss": 1.2176, + "step": 2367 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012518453534220064, + "loss": 1.1987, + "step": 2368 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012512676283218518, + "loss": 1.1494, + "step": 2369 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012506898136915019, + "loss": 1.2431, + "step": 2370 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012501119097368398, + "loss": 1.168, + "step": 2371 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012495339166637812, + "loss": 1.1926, + "step": 2372 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012489558346782728, + "loss": 1.2016, + "step": 2373 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012483776639862938, + "loss": 1.254, + "step": 2374 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001247799404793854, + "loss": 1.0338, + "step": 2375 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012472210573069955, + "loss": 1.1688, + "step": 2376 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012466426217317921, + "loss": 1.3834, + "step": 2377 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001246064098274348, + "loss": 1.1624, + "step": 2378 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012454854871407994, + "loss": 1.3367, + "step": 2379 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012449067885373133, + "loss": 1.1309, + "step": 2380 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001244328002670089, + "loss": 1.1796, + "step": 2381 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012437491297453553, + "loss": 1.1653, + "step": 2382 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001243170169969373, + "loss": 1.1182, + "step": 2383 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012425911235484333, + "loss": 1.1529, + "step": 2384 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012420119906888597, + "loss": 1.2708, + "step": 2385 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001241432771597004, + "loss": 1.1637, + "step": 2386 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012408534664792512, + "loss": 1.238, + "step": 2387 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001240274075542015, + "loss": 1.119, + "step": 2388 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012396945989917411, + "loss": 1.1852, + "step": 2389 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012391150370349055, + "loss": 1.1691, + "step": 2390 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012385353898780135, + "loss": 1.2312, + "step": 2391 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012379556577276016, + "loss": 1.1804, + "step": 2392 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012373758407902374, + "loss": 1.0994, + "step": 2393 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001236795939272517, + "loss": 1.1909, + "step": 2394 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012362159533810672, + "loss": 1.1752, + "step": 2395 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012356358833225464, + "loss": 1.1164, + "step": 2396 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012350557293036406, + "loss": 1.1759, + "step": 2397 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012344754915310678, + "loss": 1.0387, + "step": 2398 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012338951702115737, + "loss": 1.2502, + "step": 2399 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001233314765551936, + "loss": 1.0502, + "step": 2400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012327342777589606, + "loss": 1.0923, + "step": 2401 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012321537070394836, + "loss": 1.1018, + "step": 2402 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012315730536003702, + "loss": 1.2402, + "step": 2403 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001230992317648516, + "loss": 1.1782, + "step": 2404 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012304114993908448, + "loss": 1.2401, + "step": 2405 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012298305990343108, + "loss": 1.1519, + "step": 2406 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012292496167858966, + "loss": 1.1616, + "step": 2407 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012286685528526146, + "loss": 1.107, + "step": 2408 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012280874074415064, + "loss": 1.2418, + "step": 2409 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012275061807596416, + "loss": 1.1631, + "step": 2410 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012269248730141193, + "loss": 1.0946, + "step": 2411 + }, + { + "epoch": 0.44, + "learning_rate": 0.00012263434844120687, + "loss": 1.1494, + "step": 2412 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001225762015160646, + "loss": 1.215, + "step": 2413 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001225180465467037, + "loss": 1.2676, + "step": 2414 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012245988355384562, + "loss": 1.1574, + "step": 2415 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001224017125582146, + "loss": 1.0766, + "step": 2416 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012234353358053784, + "loss": 1.199, + "step": 2417 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012228534664154536, + "loss": 1.1849, + "step": 2418 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012222715176196991, + "loss": 1.1376, + "step": 2419 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001221689489625472, + "loss": 1.0291, + "step": 2420 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012211073826401566, + "loss": 1.041, + "step": 2421 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012205251968711657, + "loss": 1.1085, + "step": 2422 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012199429325259412, + "loss": 1.1747, + "step": 2423 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012193605898119514, + "loss": 1.0536, + "step": 2424 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012187781689366933, + "loss": 1.099, + "step": 2425 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012181956701076918, + "loss": 1.2654, + "step": 2426 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012176130935324991, + "loss": 1.1345, + "step": 2427 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001217030439418696, + "loss": 1.2354, + "step": 2428 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012164477079738901, + "loss": 1.0145, + "step": 2429 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012158648994057167, + "loss": 1.2244, + "step": 2430 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012152820139218389, + "loss": 1.0166, + "step": 2431 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012146990517299466, + "loss": 1.3502, + "step": 2432 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012141160130377578, + "loss": 1.1571, + "step": 2433 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012135328980530176, + "loss": 0.9717, + "step": 2434 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012129497069834977, + "loss": 1.0889, + "step": 2435 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012123664400369972, + "loss": 1.2496, + "step": 2436 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012117830974213428, + "loss": 1.0968, + "step": 2437 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012111996793443874, + "loss": 1.1994, + "step": 2438 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001210616186014011, + "loss": 1.0901, + "step": 2439 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012100326176381208, + "loss": 1.2122, + "step": 2440 + }, + { + "epoch": 0.45, + "learning_rate": 0.000120944897442465, + "loss": 1.2938, + "step": 2441 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012088652565815594, + "loss": 1.1401, + "step": 2442 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012082814643168357, + "loss": 1.1358, + "step": 2443 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012076975978384917, + "loss": 1.1072, + "step": 2444 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012071136573545683, + "loss": 1.0365, + "step": 2445 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001206529643073131, + "loss": 1.2713, + "step": 2446 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012059455552022725, + "loss": 1.0882, + "step": 2447 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012053613939501119, + "loss": 1.1342, + "step": 2448 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012047771595247934, + "loss": 1.1563, + "step": 2449 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012041928521344883, + "loss": 1.1356, + "step": 2450 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012036084719873936, + "loss": 1.1777, + "step": 2451 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012030240192917321, + "loss": 1.2123, + "step": 2452 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012024394942557529, + "loss": 1.1274, + "step": 2453 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012018548970877299, + "loss": 1.2835, + "step": 2454 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012012702279959633, + "loss": 1.1401, + "step": 2455 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012006854871887798, + "loss": 1.2389, + "step": 2456 + }, + { + "epoch": 0.45, + "learning_rate": 0.00012001006748745299, + "loss": 1.1614, + "step": 2457 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011995157912615905, + "loss": 1.1671, + "step": 2458 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011989308365583645, + "loss": 1.1242, + "step": 2459 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011983458109732791, + "loss": 1.1436, + "step": 2460 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011977607147147869, + "loss": 1.1157, + "step": 2461 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011971755479913665, + "loss": 1.0812, + "step": 2462 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011965903110115206, + "loss": 1.1483, + "step": 2463 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011960050039837778, + "loss": 1.1711, + "step": 2464 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011954196271166906, + "loss": 1.1187, + "step": 2465 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011948341806188377, + "loss": 1.0817, + "step": 2466 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011942486646988219, + "loss": 1.1482, + "step": 2467 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011936630795652699, + "loss": 1.1816, + "step": 2468 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011930774254268348, + "loss": 1.0892, + "step": 2469 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011924917024921935, + "loss": 1.1977, + "step": 2470 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011919059109700465, + "loss": 1.086, + "step": 2471 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011913200510691203, + "loss": 1.1638, + "step": 2472 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001190734122998165, + "loss": 1.1315, + "step": 2473 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011901481269659547, + "loss": 1.1125, + "step": 2474 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011895620631812883, + "loss": 1.0866, + "step": 2475 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011889759318529882, + "loss": 1.1153, + "step": 2476 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011883897331899016, + "loss": 1.1876, + "step": 2477 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011878034674008993, + "loss": 1.2004, + "step": 2478 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001187217134694876, + "loss": 1.1519, + "step": 2479 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011866307352807503, + "loss": 1.1258, + "step": 2480 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011860442693674647, + "loss": 1.1298, + "step": 2481 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011854577371639848, + "loss": 1.1432, + "step": 2482 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011848711388793008, + "loss": 1.1132, + "step": 2483 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001184284474722426, + "loss": 1.0624, + "step": 2484 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011836977449023967, + "loss": 0.9779, + "step": 2485 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001183110949628273, + "loss": 1.1777, + "step": 2486 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001182524089109139, + "loss": 1.0849, + "step": 2487 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011819371635541003, + "loss": 1.1167, + "step": 2488 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011813501731722875, + "loss": 1.1642, + "step": 2489 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011807631181728536, + "loss": 1.2354, + "step": 2490 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011801759987649741, + "loss": 1.2188, + "step": 2491 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001179588815157848, + "loss": 0.9772, + "step": 2492 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011790015675606976, + "loss": 1.0416, + "step": 2493 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011784142561827669, + "loss": 1.0628, + "step": 2494 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011778268812333233, + "loss": 1.1692, + "step": 2495 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011772394429216571, + "loss": 1.1143, + "step": 2496 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011766519414570805, + "loss": 1.1468, + "step": 2497 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011760643770489288, + "loss": 0.9961, + "step": 2498 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011754767499065593, + "loss": 1.0917, + "step": 2499 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011748890602393521, + "loss": 1.0489, + "step": 2500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011743013082567087, + "loss": 1.1545, + "step": 2501 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011737134941680544, + "loss": 1.0226, + "step": 2502 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011731256181828346, + "loss": 1.2031, + "step": 2503 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011725376805105182, + "loss": 1.0091, + "step": 2504 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011719496813605962, + "loss": 1.2643, + "step": 2505 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011713616209425799, + "loss": 1.1435, + "step": 2506 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011707734994660044, + "loss": 1.2261, + "step": 2507 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011701853171404256, + "loss": 1.2295, + "step": 2508 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011695970741754206, + "loss": 1.1587, + "step": 2509 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001169008770780589, + "loss": 1.3099, + "step": 2510 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011684204071655519, + "loss": 1.0988, + "step": 2511 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011678319835399509, + "loss": 1.2338, + "step": 2512 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011672435001134501, + "loss": 1.1452, + "step": 2513 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011666549570957341, + "loss": 1.0466, + "step": 2514 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011660663546965094, + "loss": 1.0122, + "step": 2515 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011654776931255033, + "loss": 1.2486, + "step": 2516 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001164888972592464, + "loss": 1.1329, + "step": 2517 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011643001933071609, + "loss": 1.3244, + "step": 2518 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011637113554793846, + "loss": 1.29, + "step": 2519 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011631224593189461, + "loss": 1.0814, + "step": 2520 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011625335050356771, + "loss": 1.0645, + "step": 2521 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011619444928394312, + "loss": 1.0967, + "step": 2522 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011613554229400809, + "loss": 1.1293, + "step": 2523 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011607662955475204, + "loss": 1.0466, + "step": 2524 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011601771108716646, + "loss": 1.1332, + "step": 2525 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011595878691224473, + "loss": 1.1365, + "step": 2526 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011589985705098245, + "loss": 1.119, + "step": 2527 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011584092152437711, + "loss": 1.0807, + "step": 2528 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011578198035342826, + "loss": 1.0951, + "step": 2529 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011572303355913754, + "loss": 1.1518, + "step": 2530 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011566408116250842, + "loss": 1.096, + "step": 2531 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011560512318454655, + "loss": 1.0141, + "step": 2532 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011554615964625948, + "loss": 1.092, + "step": 2533 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011548719056865671, + "loss": 1.2266, + "step": 2534 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011542821597274978, + "loss": 1.1931, + "step": 2535 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011536923587955218, + "loss": 1.1408, + "step": 2536 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001153102503100793, + "loss": 1.161, + "step": 2537 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011525125928534859, + "loss": 1.2919, + "step": 2538 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011519226282637934, + "loss": 1.0823, + "step": 2539 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011513326095419281, + "loss": 1.1829, + "step": 2540 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011507425368981228, + "loss": 0.8794, + "step": 2541 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011501524105426278, + "loss": 1.2194, + "step": 2542 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011495622306857136, + "loss": 1.2199, + "step": 2543 + }, + { + "epoch": 0.47, + "learning_rate": 0.000114897199753767, + "loss": 1.0803, + "step": 2544 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011483817113088052, + "loss": 1.1077, + "step": 2545 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011477913722094462, + "loss": 1.1179, + "step": 2546 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011472009804499396, + "loss": 1.2917, + "step": 2547 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011466105362406498, + "loss": 1.1322, + "step": 2548 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001146020039791961, + "loss": 0.9531, + "step": 2549 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011454294913142747, + "loss": 1.0986, + "step": 2550 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011448388910180121, + "loss": 1.1656, + "step": 2551 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011442482391136124, + "loss": 1.2494, + "step": 2552 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011436575358115331, + "loss": 1.209, + "step": 2553 + }, + { + "epoch": 0.47, + "learning_rate": 0.000114306678132225, + "loss": 1.2455, + "step": 2554 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011424759758562575, + "loss": 1.1988, + "step": 2555 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011418851196240677, + "loss": 0.9714, + "step": 2556 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001141294212836211, + "loss": 1.1598, + "step": 2557 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011407032557032363, + "loss": 1.1304, + "step": 2558 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001140112248435709, + "loss": 1.1281, + "step": 2559 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011395211912442141, + "loss": 1.2328, + "step": 2560 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011389300843393533, + "loss": 1.076, + "step": 2561 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011383389279317463, + "loss": 1.1091, + "step": 2562 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011377477222320308, + "loss": 1.189, + "step": 2563 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001137156467450861, + "loss": 1.1025, + "step": 2564 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011365651637989099, + "loss": 1.1351, + "step": 2565 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011359738114868674, + "loss": 1.1474, + "step": 2566 + }, + { + "epoch": 0.47, + "learning_rate": 0.000113538241072544, + "loss": 1.1408, + "step": 2567 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001134790961725353, + "loss": 1.2802, + "step": 2568 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011341994646973475, + "loss": 1.2954, + "step": 2569 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011336079198521821, + "loss": 1.2128, + "step": 2570 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011330163274006328, + "loss": 1.101, + "step": 2571 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011324246875534923, + "loss": 1.2731, + "step": 2572 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011318330005215701, + "loss": 1.3144, + "step": 2573 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011312412665156929, + "loss": 1.1286, + "step": 2574 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011306494857467034, + "loss": 1.2133, + "step": 2575 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011300576584254617, + "loss": 1.1499, + "step": 2576 + }, + { + "epoch": 0.47, + "learning_rate": 0.00011294657847628445, + "loss": 1.181, + "step": 2577 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011288738649697442, + "loss": 1.1862, + "step": 2578 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011282818992570702, + "loss": 1.2001, + "step": 2579 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011276898878357489, + "loss": 1.1466, + "step": 2580 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011270978309167215, + "loss": 1.0682, + "step": 2581 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011265057287109466, + "loss": 1.3249, + "step": 2582 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011259135814293986, + "loss": 1.1241, + "step": 2583 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011253213892830676, + "loss": 1.1139, + "step": 2584 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011247291524829605, + "loss": 1.0491, + "step": 2585 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011241368712400991, + "loss": 1.2558, + "step": 2586 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011235445457655218, + "loss": 1.083, + "step": 2587 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011229521762702826, + "loss": 1.2925, + "step": 2588 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001122359762965451, + "loss": 1.2176, + "step": 2589 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001121767306062112, + "loss": 1.1209, + "step": 2590 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011211748057713666, + "loss": 1.1409, + "step": 2591 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001120582262304331, + "loss": 1.0563, + "step": 2592 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011199896758721369, + "loss": 1.1717, + "step": 2593 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011193970466859308, + "loss": 1.1957, + "step": 2594 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011188043749568751, + "loss": 1.0123, + "step": 2595 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001118211660896147, + "loss": 1.0432, + "step": 2596 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011176189047149388, + "loss": 1.09, + "step": 2597 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001117026106624458, + "loss": 1.1628, + "step": 2598 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011164332668359272, + "loss": 1.149, + "step": 2599 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011158403855605828, + "loss": 1.0062, + "step": 2600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011152474630096775, + "loss": 1.0715, + "step": 2601 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011146544993944776, + "loss": 1.2313, + "step": 2602 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011140614949262642, + "loss": 1.1178, + "step": 2603 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001113468449816334, + "loss": 1.1836, + "step": 2604 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011128753642759961, + "loss": 1.2046, + "step": 2605 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011122822385165761, + "loss": 1.2259, + "step": 2606 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001111689072749413, + "loss": 0.9961, + "step": 2607 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011110958671858596, + "loss": 1.2199, + "step": 2608 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011105026220372836, + "loss": 1.1327, + "step": 2609 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011099093375150673, + "loss": 1.2387, + "step": 2610 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011093160138306054, + "loss": 1.1808, + "step": 2611 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011087226511953077, + "loss": 1.191, + "step": 2612 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011081292498205981, + "loss": 1.0799, + "step": 2613 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011075358099179137, + "loss": 1.217, + "step": 2614 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011069423316987053, + "loss": 1.2011, + "step": 2615 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011063488153744375, + "loss": 1.09, + "step": 2616 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011057552611565887, + "loss": 1.1407, + "step": 2617 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011051616692566506, + "loss": 1.0737, + "step": 2618 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011045680398861284, + "loss": 1.1981, + "step": 2619 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011039743732565404, + "loss": 1.1588, + "step": 2620 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011033806695794189, + "loss": 1.0824, + "step": 2621 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011027869290663083, + "loss": 1.1839, + "step": 2622 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011021931519287669, + "loss": 1.092, + "step": 2623 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011015993383783658, + "loss": 1.1764, + "step": 2624 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011010054886266894, + "loss": 1.2329, + "step": 2625 + }, + { + "epoch": 0.48, + "learning_rate": 0.00011004116028853346, + "loss": 1.13, + "step": 2626 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010998176813659111, + "loss": 1.0489, + "step": 2627 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010992237242800416, + "loss": 1.0591, + "step": 2628 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010986297318393615, + "loss": 1.1447, + "step": 2629 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010980357042555182, + "loss": 1.2702, + "step": 2630 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010974416417401729, + "loss": 1.3087, + "step": 2631 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001096847544504998, + "loss": 1.1993, + "step": 2632 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010962534127616784, + "loss": 1.1942, + "step": 2633 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001095659246721912, + "loss": 1.1287, + "step": 2634 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010950650465974088, + "loss": 1.021, + "step": 2635 + }, + { + "epoch": 0.49, + "learning_rate": 0.000109447081259989, + "loss": 1.1218, + "step": 2636 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010938765449410899, + "loss": 1.2724, + "step": 2637 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010932822438327546, + "loss": 1.026, + "step": 2638 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010926879094866416, + "loss": 1.1191, + "step": 2639 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010920935421145206, + "loss": 1.0635, + "step": 2640 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010914991419281738, + "loss": 1.1212, + "step": 2641 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010909047091393931, + "loss": 1.1606, + "step": 2642 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010903102439599844, + "loss": 1.2255, + "step": 2643 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010897157466017636, + "loss": 1.1133, + "step": 2644 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010891212172765583, + "loss": 1.1296, + "step": 2645 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001088526656196208, + "loss": 1.1277, + "step": 2646 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010879320635725632, + "loss": 1.1991, + "step": 2647 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010873374396174852, + "loss": 1.0973, + "step": 2648 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001086742784542847, + "loss": 1.2157, + "step": 2649 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001086148098560533, + "loss": 1.3286, + "step": 2650 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010855533818824379, + "loss": 1.2743, + "step": 2651 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010849586347204676, + "loss": 1.1059, + "step": 2652 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010843638572865391, + "loss": 1.2247, + "step": 2653 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010837690497925796, + "loss": 1.2212, + "step": 2654 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010831742124505277, + "loss": 1.074, + "step": 2655 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010825793454723325, + "loss": 1.123, + "step": 2656 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010819844490699529, + "loss": 1.0697, + "step": 2657 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010813895234553593, + "loss": 1.178, + "step": 2658 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010807945688405321, + "loss": 1.2779, + "step": 2659 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010801995854374616, + "loss": 1.1658, + "step": 2660 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010796045734581492, + "loss": 1.1164, + "step": 2661 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001079009533114606, + "loss": 1.1152, + "step": 2662 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001078414464618853, + "loss": 1.2467, + "step": 2663 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010778193681829216, + "loss": 1.2853, + "step": 2664 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010772242440188534, + "loss": 1.1149, + "step": 2665 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001076629092338699, + "loss": 1.2443, + "step": 2666 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010760339133545196, + "loss": 1.2211, + "step": 2667 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010754387072783858, + "loss": 1.0768, + "step": 2668 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010748434743223777, + "loss": 1.1974, + "step": 2669 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010742482146985857, + "loss": 1.2262, + "step": 2670 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010736529286191086, + "loss": 1.1593, + "step": 2671 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010730576162960555, + "loss": 1.2016, + "step": 2672 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001072462277941545, + "loss": 1.1254, + "step": 2673 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010718669137677038, + "loss": 1.1648, + "step": 2674 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010712715239866687, + "loss": 1.0671, + "step": 2675 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010706761088105858, + "loss": 1.1664, + "step": 2676 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010700806684516096, + "loss": 1.1348, + "step": 2677 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010694852031219042, + "loss": 1.1004, + "step": 2678 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010688897130336419, + "loss": 1.1889, + "step": 2679 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010682941983990045, + "loss": 1.1503, + "step": 2680 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010676986594301823, + "loss": 1.2464, + "step": 2681 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001067103096339374, + "loss": 1.1898, + "step": 2682 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010665075093387871, + "loss": 1.0809, + "step": 2683 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001065911898640638, + "loss": 1.0276, + "step": 2684 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010653162644571506, + "loss": 1.1817, + "step": 2685 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010647206070005581, + "loss": 1.0228, + "step": 2686 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010641249264831019, + "loss": 1.1234, + "step": 2687 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010635292231170309, + "loss": 1.1554, + "step": 2688 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010629334971146028, + "loss": 1.0802, + "step": 2689 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001062337748688083, + "loss": 1.1457, + "step": 2690 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010617419780497452, + "loss": 1.0177, + "step": 2691 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010611461854118714, + "loss": 1.1415, + "step": 2692 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010605503709867499, + "loss": 1.2212, + "step": 2693 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010599545349866782, + "loss": 1.225, + "step": 2694 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010593586776239614, + "loss": 1.2349, + "step": 2695 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010587627991109112, + "loss": 1.1879, + "step": 2696 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010581668996598482, + "loss": 1.1246, + "step": 2697 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010575709794830994, + "loss": 1.0883, + "step": 2698 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010569750387929998, + "loss": 1.1192, + "step": 2699 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010563790778018912, + "loss": 1.0926, + "step": 2700 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010557830967221227, + "loss": 1.201, + "step": 2701 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010551870957660513, + "loss": 1.1383, + "step": 2702 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010545910751460401, + "loss": 1.2806, + "step": 2703 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010539950350744598, + "loss": 1.233, + "step": 2704 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001053398975763688, + "loss": 1.0361, + "step": 2705 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010528028974261091, + "loss": 1.074, + "step": 2706 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010522068002741136, + "loss": 1.0866, + "step": 2707 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010516106845200999, + "loss": 1.1422, + "step": 2708 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010510145503764726, + "loss": 1.1416, + "step": 2709 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010504183980556419, + "loss": 1.0377, + "step": 2710 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010498222277700261, + "loss": 1.1315, + "step": 2711 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010492260397320483, + "loss": 1.0963, + "step": 2712 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010486298341541393, + "loss": 1.1728, + "step": 2713 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010480336112487357, + "loss": 1.0906, + "step": 2714 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010474373712282793, + "loss": 1.1162, + "step": 2715 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010468411143052194, + "loss": 1.0348, + "step": 2716 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010462448406920108, + "loss": 1.1426, + "step": 2717 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010456485506011138, + "loss": 1.0521, + "step": 2718 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010450522442449952, + "loss": 1.1511, + "step": 2719 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010444559218361277, + "loss": 1.236, + "step": 2720 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010438595835869885, + "loss": 1.1441, + "step": 2721 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010432632297100621, + "loss": 1.2428, + "step": 2722 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010426668604178376, + "loss": 1.0873, + "step": 2723 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010420704759228093, + "loss": 1.1771, + "step": 2724 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010414740764374782, + "loss": 1.1065, + "step": 2725 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010408776621743493, + "loss": 1.1559, + "step": 2726 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010402812333459333, + "loss": 0.982, + "step": 2727 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010396847901647468, + "loss": 1.1638, + "step": 2728 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010390883328433103, + "loss": 1.1073, + "step": 2729 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010384918615941502, + "loss": 1.1431, + "step": 2730 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010378953766297976, + "loss": 1.1682, + "step": 2731 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010372988781627884, + "loss": 0.9678, + "step": 2732 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010367023664056638, + "loss": 1.1385, + "step": 2733 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010361058415709687, + "loss": 1.2598, + "step": 2734 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010355093038712538, + "loss": 1.2064, + "step": 2735 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010349127535190737, + "loss": 1.1375, + "step": 2736 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010343161907269876, + "loss": 1.1605, + "step": 2737 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010337196157075593, + "loss": 1.1404, + "step": 2738 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010331230286733573, + "loss": 1.2019, + "step": 2739 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010325264298369532, + "loss": 1.0539, + "step": 2740 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010319298194109241, + "loss": 1.0801, + "step": 2741 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010313331976078505, + "loss": 1.0284, + "step": 2742 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010307365646403171, + "loss": 1.1523, + "step": 2743 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001030139920720913, + "loss": 1.038, + "step": 2744 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010295432660622304, + "loss": 1.0257, + "step": 2745 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001028946600876866, + "loss": 1.0239, + "step": 2746 + }, + { + "epoch": 0.51, + "learning_rate": 0.000102834992537742, + "loss": 1.1311, + "step": 2747 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010277532397764959, + "loss": 1.1001, + "step": 2748 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001027156544286702, + "loss": 1.0652, + "step": 2749 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010265598391206486, + "loss": 1.256, + "step": 2750 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010259631244909502, + "loss": 1.0919, + "step": 2751 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001025366400610225, + "loss": 1.0199, + "step": 2752 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010247696676910939, + "loss": 1.0619, + "step": 2753 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010241729259461812, + "loss": 1.0408, + "step": 2754 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010235761755881148, + "loss": 1.068, + "step": 2755 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010229794168295245, + "loss": 1.0859, + "step": 2756 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010223826498830444, + "loss": 1.2821, + "step": 2757 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001021785874961311, + "loss": 1.1864, + "step": 2758 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010211890922769631, + "loss": 1.1406, + "step": 2759 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010205923020426434, + "loss": 1.1202, + "step": 2760 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010199955044709965, + "loss": 1.2381, + "step": 2761 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010193986997746697, + "loss": 1.1271, + "step": 2762 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010188018881663126, + "loss": 1.1877, + "step": 2763 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010182050698585781, + "loss": 1.1282, + "step": 2764 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010176082450641207, + "loss": 1.1597, + "step": 2765 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010170114139955975, + "loss": 1.0115, + "step": 2766 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010164145768656679, + "loss": 1.3792, + "step": 2767 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010158177338869932, + "loss": 1.2977, + "step": 2768 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010152208852722374, + "loss": 1.2069, + "step": 2769 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010146240312340656, + "loss": 1.1965, + "step": 2770 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010140271719851453, + "loss": 1.1649, + "step": 2771 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010134303077381464, + "loss": 1.132, + "step": 2772 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010128334387057394, + "loss": 1.1908, + "step": 2773 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010122365651005977, + "loss": 1.3693, + "step": 2774 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001011639687135395, + "loss": 1.1898, + "step": 2775 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001011042805022808, + "loss": 1.0139, + "step": 2776 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010104459189755142, + "loss": 1.3464, + "step": 2777 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001009849029206192, + "loss": 1.0817, + "step": 2778 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001009252135927522, + "loss": 1.1611, + "step": 2779 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010086552393521859, + "loss": 1.2593, + "step": 2780 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001008058339692866, + "loss": 1.1097, + "step": 2781 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010074614371622459, + "loss": 1.2347, + "step": 2782 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001006864531973011, + "loss": 1.0658, + "step": 2783 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010062676243378465, + "loss": 1.1776, + "step": 2784 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001005670714469439, + "loss": 1.1776, + "step": 2785 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010050738025804764, + "loss": 1.2015, + "step": 2786 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010044768888836462, + "loss": 1.1661, + "step": 2787 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010038799735916374, + "loss": 1.134, + "step": 2788 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010032830569171396, + "loss": 1.3523, + "step": 2789 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010026861390728422, + "loss": 1.1579, + "step": 2790 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010020892202714356, + "loss": 1.0133, + "step": 2791 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010014923007256105, + "loss": 1.1, + "step": 2792 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010008953806480575, + "loss": 1.0181, + "step": 2793 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010002984602514678, + "loss": 1.0146, + "step": 2794 + }, + { + "epoch": 0.52, + "learning_rate": 9.997015397485326e-05, + "loss": 1.1794, + "step": 2795 + }, + { + "epoch": 0.52, + "learning_rate": 9.991046193519428e-05, + "loss": 1.117, + "step": 2796 + }, + { + "epoch": 0.52, + "learning_rate": 9.985076992743897e-05, + "loss": 1.1827, + "step": 2797 + }, + { + "epoch": 0.52, + "learning_rate": 9.979107797285648e-05, + "loss": 1.0759, + "step": 2798 + }, + { + "epoch": 0.52, + "learning_rate": 9.97313860927158e-05, + "loss": 1.2887, + "step": 2799 + }, + { + "epoch": 0.52, + "learning_rate": 9.967169430828608e-05, + "loss": 1.1825, + "step": 2800 + }, + { + "epoch": 0.52, + "learning_rate": 9.961200264083627e-05, + "loss": 1.0948, + "step": 2801 + }, + { + "epoch": 0.52, + "learning_rate": 9.95523111116354e-05, + "loss": 1.0327, + "step": 2802 + }, + { + "epoch": 0.52, + "learning_rate": 9.949261974195238e-05, + "loss": 1.1337, + "step": 2803 + }, + { + "epoch": 0.52, + "learning_rate": 9.943292855305612e-05, + "loss": 1.1318, + "step": 2804 + }, + { + "epoch": 0.52, + "learning_rate": 9.937323756621537e-05, + "loss": 1.0624, + "step": 2805 + }, + { + "epoch": 0.52, + "learning_rate": 9.931354680269893e-05, + "loss": 1.183, + "step": 2806 + }, + { + "epoch": 0.52, + "learning_rate": 9.925385628377543e-05, + "loss": 1.1029, + "step": 2807 + }, + { + "epoch": 0.52, + "learning_rate": 9.919416603071341e-05, + "loss": 1.2817, + "step": 2808 + }, + { + "epoch": 0.52, + "learning_rate": 9.913447606478142e-05, + "loss": 1.1089, + "step": 2809 + }, + { + "epoch": 0.52, + "learning_rate": 9.907478640724781e-05, + "loss": 1.051, + "step": 2810 + }, + { + "epoch": 0.52, + "learning_rate": 9.901509707938084e-05, + "loss": 1.3055, + "step": 2811 + }, + { + "epoch": 0.52, + "learning_rate": 9.895540810244863e-05, + "loss": 1.1214, + "step": 2812 + }, + { + "epoch": 0.52, + "learning_rate": 9.889571949771922e-05, + "loss": 1.2766, + "step": 2813 + }, + { + "epoch": 0.52, + "learning_rate": 9.883603128646052e-05, + "loss": 1.1544, + "step": 2814 + }, + { + "epoch": 0.52, + "learning_rate": 9.877634348994027e-05, + "loss": 1.1618, + "step": 2815 + }, + { + "epoch": 0.52, + "learning_rate": 9.87166561294261e-05, + "loss": 1.1811, + "step": 2816 + }, + { + "epoch": 0.52, + "learning_rate": 9.86569692261854e-05, + "loss": 1.1813, + "step": 2817 + }, + { + "epoch": 0.52, + "learning_rate": 9.85972828014855e-05, + "loss": 1.06, + "step": 2818 + }, + { + "epoch": 0.52, + "learning_rate": 9.853759687659346e-05, + "loss": 1.0418, + "step": 2819 + }, + { + "epoch": 0.52, + "learning_rate": 9.847791147277627e-05, + "loss": 1.242, + "step": 2820 + }, + { + "epoch": 0.52, + "learning_rate": 9.841822661130067e-05, + "loss": 1.1247, + "step": 2821 + }, + { + "epoch": 0.52, + "learning_rate": 9.835854231343325e-05, + "loss": 1.2734, + "step": 2822 + }, + { + "epoch": 0.52, + "learning_rate": 9.829885860044028e-05, + "loss": 1.0058, + "step": 2823 + }, + { + "epoch": 0.52, + "learning_rate": 9.823917549358797e-05, + "loss": 1.1912, + "step": 2824 + }, + { + "epoch": 0.52, + "learning_rate": 9.817949301414222e-05, + "loss": 1.1374, + "step": 2825 + }, + { + "epoch": 0.52, + "learning_rate": 9.811981118336875e-05, + "loss": 1.1321, + "step": 2826 + }, + { + "epoch": 0.52, + "learning_rate": 9.806013002253304e-05, + "loss": 1.0034, + "step": 2827 + }, + { + "epoch": 0.52, + "learning_rate": 9.800044955290038e-05, + "loss": 1.2428, + "step": 2828 + }, + { + "epoch": 0.52, + "learning_rate": 9.794076979573567e-05, + "loss": 1.0916, + "step": 2829 + }, + { + "epoch": 0.52, + "learning_rate": 9.78810907723037e-05, + "loss": 1.2001, + "step": 2830 + }, + { + "epoch": 0.52, + "learning_rate": 9.782141250386892e-05, + "loss": 1.0784, + "step": 2831 + }, + { + "epoch": 0.52, + "learning_rate": 9.776173501169557e-05, + "loss": 1.0897, + "step": 2832 + }, + { + "epoch": 0.52, + "learning_rate": 9.77020583170476e-05, + "loss": 1.0899, + "step": 2833 + }, + { + "epoch": 0.52, + "learning_rate": 9.764238244118858e-05, + "loss": 0.9825, + "step": 2834 + }, + { + "epoch": 0.52, + "learning_rate": 9.75827074053819e-05, + "loss": 1.1774, + "step": 2835 + }, + { + "epoch": 0.52, + "learning_rate": 9.752303323089062e-05, + "loss": 1.0733, + "step": 2836 + }, + { + "epoch": 0.52, + "learning_rate": 9.746335993897751e-05, + "loss": 1.2107, + "step": 2837 + }, + { + "epoch": 0.52, + "learning_rate": 9.740368755090498e-05, + "loss": 1.2573, + "step": 2838 + }, + { + "epoch": 0.52, + "learning_rate": 9.734401608793519e-05, + "loss": 1.0779, + "step": 2839 + }, + { + "epoch": 0.52, + "learning_rate": 9.728434557132984e-05, + "loss": 1.1124, + "step": 2840 + }, + { + "epoch": 0.52, + "learning_rate": 9.722467602235042e-05, + "loss": 1.1838, + "step": 2841 + }, + { + "epoch": 0.52, + "learning_rate": 9.716500746225802e-05, + "loss": 1.0211, + "step": 2842 + }, + { + "epoch": 0.52, + "learning_rate": 9.710533991231341e-05, + "loss": 1.2617, + "step": 2843 + }, + { + "epoch": 0.52, + "learning_rate": 9.704567339377701e-05, + "loss": 1.1832, + "step": 2844 + }, + { + "epoch": 0.52, + "learning_rate": 9.698600792790875e-05, + "loss": 1.2816, + "step": 2845 + }, + { + "epoch": 0.52, + "learning_rate": 9.69263435359683e-05, + "loss": 1.1304, + "step": 2846 + }, + { + "epoch": 0.52, + "learning_rate": 9.686668023921496e-05, + "loss": 1.1654, + "step": 2847 + }, + { + "epoch": 0.52, + "learning_rate": 9.680701805890761e-05, + "loss": 1.0638, + "step": 2848 + }, + { + "epoch": 0.52, + "learning_rate": 9.674735701630469e-05, + "loss": 0.9831, + "step": 2849 + }, + { + "epoch": 0.53, + "learning_rate": 9.668769713266432e-05, + "loss": 1.1964, + "step": 2850 + }, + { + "epoch": 0.53, + "learning_rate": 9.662803842924408e-05, + "loss": 1.1528, + "step": 2851 + }, + { + "epoch": 0.53, + "learning_rate": 9.656838092730126e-05, + "loss": 1.276, + "step": 2852 + }, + { + "epoch": 0.53, + "learning_rate": 9.650872464809265e-05, + "loss": 1.137, + "step": 2853 + }, + { + "epoch": 0.53, + "learning_rate": 9.644906961287463e-05, + "loss": 1.0677, + "step": 2854 + }, + { + "epoch": 0.53, + "learning_rate": 9.638941584290318e-05, + "loss": 1.1631, + "step": 2855 + }, + { + "epoch": 0.53, + "learning_rate": 9.632976335943367e-05, + "loss": 1.1207, + "step": 2856 + }, + { + "epoch": 0.53, + "learning_rate": 9.627011218372117e-05, + "loss": 0.9687, + "step": 2857 + }, + { + "epoch": 0.53, + "learning_rate": 9.621046233702026e-05, + "loss": 0.9849, + "step": 2858 + }, + { + "epoch": 0.53, + "learning_rate": 9.615081384058499e-05, + "loss": 1.2067, + "step": 2859 + }, + { + "epoch": 0.53, + "learning_rate": 9.609116671566898e-05, + "loss": 1.0645, + "step": 2860 + }, + { + "epoch": 0.53, + "learning_rate": 9.603152098352537e-05, + "loss": 1.0706, + "step": 2861 + }, + { + "epoch": 0.53, + "learning_rate": 9.597187666540668e-05, + "loss": 1.224, + "step": 2862 + }, + { + "epoch": 0.53, + "learning_rate": 9.59122337825651e-05, + "loss": 1.2859, + "step": 2863 + }, + { + "epoch": 0.53, + "learning_rate": 9.58525923562522e-05, + "loss": 1.2456, + "step": 2864 + }, + { + "epoch": 0.53, + "learning_rate": 9.579295240771906e-05, + "loss": 1.1496, + "step": 2865 + }, + { + "epoch": 0.53, + "learning_rate": 9.57333139582163e-05, + "loss": 1.1208, + "step": 2866 + }, + { + "epoch": 0.53, + "learning_rate": 9.567367702899382e-05, + "loss": 1.2035, + "step": 2867 + }, + { + "epoch": 0.53, + "learning_rate": 9.561404164130116e-05, + "loss": 1.1492, + "step": 2868 + }, + { + "epoch": 0.53, + "learning_rate": 9.555440781638727e-05, + "loss": 1.2671, + "step": 2869 + }, + { + "epoch": 0.53, + "learning_rate": 9.549477557550047e-05, + "loss": 1.2542, + "step": 2870 + }, + { + "epoch": 0.53, + "learning_rate": 9.543514493988862e-05, + "loss": 1.1468, + "step": 2871 + }, + { + "epoch": 0.53, + "learning_rate": 9.537551593079897e-05, + "loss": 1.1828, + "step": 2872 + }, + { + "epoch": 0.53, + "learning_rate": 9.531588856947808e-05, + "loss": 1.2418, + "step": 2873 + }, + { + "epoch": 0.53, + "learning_rate": 9.525626287717208e-05, + "loss": 1.1855, + "step": 2874 + }, + { + "epoch": 0.53, + "learning_rate": 9.519663887512647e-05, + "loss": 1.1575, + "step": 2875 + }, + { + "epoch": 0.53, + "learning_rate": 9.513701658458606e-05, + "loss": 1.1831, + "step": 2876 + }, + { + "epoch": 0.53, + "learning_rate": 9.507739602679519e-05, + "loss": 1.0928, + "step": 2877 + }, + { + "epoch": 0.53, + "learning_rate": 9.501777722299744e-05, + "loss": 1.2053, + "step": 2878 + }, + { + "epoch": 0.53, + "learning_rate": 9.495816019443584e-05, + "loss": 1.2392, + "step": 2879 + }, + { + "epoch": 0.53, + "learning_rate": 9.489854496235278e-05, + "loss": 1.1332, + "step": 2880 + }, + { + "epoch": 0.53, + "learning_rate": 9.483893154799001e-05, + "loss": 1.0718, + "step": 2881 + }, + { + "epoch": 0.53, + "learning_rate": 9.477931997258864e-05, + "loss": 1.0826, + "step": 2882 + }, + { + "epoch": 0.53, + "learning_rate": 9.471971025738914e-05, + "loss": 1.2234, + "step": 2883 + }, + { + "epoch": 0.53, + "learning_rate": 9.466010242363123e-05, + "loss": 1.0699, + "step": 2884 + }, + { + "epoch": 0.53, + "learning_rate": 9.460049649255403e-05, + "loss": 1.2242, + "step": 2885 + }, + { + "epoch": 0.53, + "learning_rate": 9.4540892485396e-05, + "loss": 1.089, + "step": 2886 + }, + { + "epoch": 0.53, + "learning_rate": 9.448129042339488e-05, + "loss": 1.0964, + "step": 2887 + }, + { + "epoch": 0.53, + "learning_rate": 9.442169032778777e-05, + "loss": 1.1245, + "step": 2888 + }, + { + "epoch": 0.53, + "learning_rate": 9.436209221981093e-05, + "loss": 1.15, + "step": 2889 + }, + { + "epoch": 0.53, + "learning_rate": 9.430249612070006e-05, + "loss": 1.1172, + "step": 2890 + }, + { + "epoch": 0.53, + "learning_rate": 9.424290205169007e-05, + "loss": 1.2088, + "step": 2891 + }, + { + "epoch": 0.53, + "learning_rate": 9.418331003401519e-05, + "loss": 1.1243, + "step": 2892 + }, + { + "epoch": 0.53, + "learning_rate": 9.412372008890886e-05, + "loss": 1.1578, + "step": 2893 + }, + { + "epoch": 0.53, + "learning_rate": 9.406413223760391e-05, + "loss": 1.105, + "step": 2894 + }, + { + "epoch": 0.53, + "learning_rate": 9.40045465013322e-05, + "loss": 1.1036, + "step": 2895 + }, + { + "epoch": 0.53, + "learning_rate": 9.394496290132503e-05, + "loss": 1.2113, + "step": 2896 + }, + { + "epoch": 0.53, + "learning_rate": 9.388538145881289e-05, + "loss": 1.1959, + "step": 2897 + }, + { + "epoch": 0.53, + "learning_rate": 9.382580219502547e-05, + "loss": 1.119, + "step": 2898 + }, + { + "epoch": 0.53, + "learning_rate": 9.376622513119173e-05, + "loss": 1.1749, + "step": 2899 + }, + { + "epoch": 0.53, + "learning_rate": 9.370665028853976e-05, + "loss": 1.0931, + "step": 2900 + }, + { + "epoch": 0.53, + "learning_rate": 9.364707768829694e-05, + "loss": 1.0954, + "step": 2901 + }, + { + "epoch": 0.53, + "learning_rate": 9.358750735168983e-05, + "loss": 1.2064, + "step": 2902 + }, + { + "epoch": 0.53, + "learning_rate": 9.35279392999442e-05, + "loss": 1.2186, + "step": 2903 + }, + { + "epoch": 0.54, + "learning_rate": 9.346837355428494e-05, + "loss": 1.0256, + "step": 2904 + }, + { + "epoch": 0.54, + "learning_rate": 9.340881013593625e-05, + "loss": 1.1713, + "step": 2905 + }, + { + "epoch": 0.54, + "learning_rate": 9.334924906612131e-05, + "loss": 1.0995, + "step": 2906 + }, + { + "epoch": 0.54, + "learning_rate": 9.328969036606262e-05, + "loss": 1.1875, + "step": 2907 + }, + { + "epoch": 0.54, + "learning_rate": 9.323013405698178e-05, + "loss": 1.1381, + "step": 2908 + }, + { + "epoch": 0.54, + "learning_rate": 9.317058016009954e-05, + "loss": 1.0689, + "step": 2909 + }, + { + "epoch": 0.54, + "learning_rate": 9.311102869663584e-05, + "loss": 1.2151, + "step": 2910 + }, + { + "epoch": 0.54, + "learning_rate": 9.305147968780962e-05, + "loss": 1.166, + "step": 2911 + }, + { + "epoch": 0.54, + "learning_rate": 9.299193315483906e-05, + "loss": 1.2019, + "step": 2912 + }, + { + "epoch": 0.54, + "learning_rate": 9.293238911894144e-05, + "loss": 1.2719, + "step": 2913 + }, + { + "epoch": 0.54, + "learning_rate": 9.287284760133313e-05, + "loss": 1.0417, + "step": 2914 + }, + { + "epoch": 0.54, + "learning_rate": 9.281330862322963e-05, + "loss": 1.1438, + "step": 2915 + }, + { + "epoch": 0.54, + "learning_rate": 9.275377220584554e-05, + "loss": 1.2057, + "step": 2916 + }, + { + "epoch": 0.54, + "learning_rate": 9.269423837039446e-05, + "loss": 1.2664, + "step": 2917 + }, + { + "epoch": 0.54, + "learning_rate": 9.263470713808915e-05, + "loss": 1.1658, + "step": 2918 + }, + { + "epoch": 0.54, + "learning_rate": 9.257517853014145e-05, + "loss": 1.2042, + "step": 2919 + }, + { + "epoch": 0.54, + "learning_rate": 9.251565256776223e-05, + "loss": 1.2215, + "step": 2920 + }, + { + "epoch": 0.54, + "learning_rate": 9.245612927216145e-05, + "loss": 1.1456, + "step": 2921 + }, + { + "epoch": 0.54, + "learning_rate": 9.239660866454809e-05, + "loss": 1.0487, + "step": 2922 + }, + { + "epoch": 0.54, + "learning_rate": 9.233709076613013e-05, + "loss": 1.2356, + "step": 2923 + }, + { + "epoch": 0.54, + "learning_rate": 9.227757559811469e-05, + "loss": 1.1757, + "step": 2924 + }, + { + "epoch": 0.54, + "learning_rate": 9.221806318170783e-05, + "loss": 1.1628, + "step": 2925 + }, + { + "epoch": 0.54, + "learning_rate": 9.21585535381147e-05, + "loss": 1.1013, + "step": 2926 + }, + { + "epoch": 0.54, + "learning_rate": 9.209904668853943e-05, + "loss": 1.076, + "step": 2927 + }, + { + "epoch": 0.54, + "learning_rate": 9.20395426541851e-05, + "loss": 1.1132, + "step": 2928 + }, + { + "epoch": 0.54, + "learning_rate": 9.198004145625386e-05, + "loss": 1.1724, + "step": 2929 + }, + { + "epoch": 0.54, + "learning_rate": 9.192054311594682e-05, + "loss": 1.1824, + "step": 2930 + }, + { + "epoch": 0.54, + "learning_rate": 9.186104765446408e-05, + "loss": 1.1273, + "step": 2931 + }, + { + "epoch": 0.54, + "learning_rate": 9.180155509300473e-05, + "loss": 1.2039, + "step": 2932 + }, + { + "epoch": 0.54, + "learning_rate": 9.174206545276677e-05, + "loss": 1.3616, + "step": 2933 + }, + { + "epoch": 0.54, + "learning_rate": 9.168257875494724e-05, + "loss": 1.107, + "step": 2934 + }, + { + "epoch": 0.54, + "learning_rate": 9.162309502074206e-05, + "loss": 1.1277, + "step": 2935 + }, + { + "epoch": 0.54, + "learning_rate": 9.156361427134611e-05, + "loss": 1.1397, + "step": 2936 + }, + { + "epoch": 0.54, + "learning_rate": 9.150413652795325e-05, + "loss": 1.1044, + "step": 2937 + }, + { + "epoch": 0.54, + "learning_rate": 9.144466181175622e-05, + "loss": 0.8782, + "step": 2938 + }, + { + "epoch": 0.54, + "learning_rate": 9.138519014394671e-05, + "loss": 1.1193, + "step": 2939 + }, + { + "epoch": 0.54, + "learning_rate": 9.132572154571532e-05, + "loss": 1.0753, + "step": 2940 + }, + { + "epoch": 0.54, + "learning_rate": 9.126625603825152e-05, + "loss": 1.158, + "step": 2941 + }, + { + "epoch": 0.54, + "learning_rate": 9.120679364274372e-05, + "loss": 1.1534, + "step": 2942 + }, + { + "epoch": 0.54, + "learning_rate": 9.114733438037922e-05, + "loss": 1.0979, + "step": 2943 + }, + { + "epoch": 0.54, + "learning_rate": 9.108787827234418e-05, + "loss": 1.1033, + "step": 2944 + }, + { + "epoch": 0.54, + "learning_rate": 9.102842533982365e-05, + "loss": 1.1068, + "step": 2945 + }, + { + "epoch": 0.54, + "learning_rate": 9.096897560400158e-05, + "loss": 1.1755, + "step": 2946 + }, + { + "epoch": 0.54, + "learning_rate": 9.09095290860607e-05, + "loss": 1.1487, + "step": 2947 + }, + { + "epoch": 0.54, + "learning_rate": 9.085008580718266e-05, + "loss": 1.1299, + "step": 2948 + }, + { + "epoch": 0.54, + "learning_rate": 9.079064578854795e-05, + "loss": 1.0345, + "step": 2949 + }, + { + "epoch": 0.54, + "learning_rate": 9.073120905133588e-05, + "loss": 1.1673, + "step": 2950 + }, + { + "epoch": 0.54, + "learning_rate": 9.067177561672456e-05, + "loss": 1.1317, + "step": 2951 + }, + { + "epoch": 0.54, + "learning_rate": 9.061234550589103e-05, + "loss": 1.3414, + "step": 2952 + }, + { + "epoch": 0.54, + "learning_rate": 9.055291874001103e-05, + "loss": 1.126, + "step": 2953 + }, + { + "epoch": 0.54, + "learning_rate": 9.049349534025917e-05, + "loss": 1.0681, + "step": 2954 + }, + { + "epoch": 0.54, + "learning_rate": 9.043407532780882e-05, + "loss": 1.065, + "step": 2955 + }, + { + "epoch": 0.54, + "learning_rate": 9.037465872383218e-05, + "loss": 1.1425, + "step": 2956 + }, + { + "epoch": 0.54, + "learning_rate": 9.031524554950023e-05, + "loss": 1.2085, + "step": 2957 + }, + { + "epoch": 0.55, + "learning_rate": 9.025583582598273e-05, + "loss": 0.9626, + "step": 2958 + }, + { + "epoch": 0.55, + "learning_rate": 9.019642957444819e-05, + "loss": 0.9804, + "step": 2959 + }, + { + "epoch": 0.55, + "learning_rate": 9.013702681606389e-05, + "loss": 1.0576, + "step": 2960 + }, + { + "epoch": 0.55, + "learning_rate": 9.007762757199587e-05, + "loss": 1.1011, + "step": 2961 + }, + { + "epoch": 0.55, + "learning_rate": 9.001823186340892e-05, + "loss": 1.0638, + "step": 2962 + }, + { + "epoch": 0.55, + "learning_rate": 8.995883971146656e-05, + "loss": 1.1404, + "step": 2963 + }, + { + "epoch": 0.55, + "learning_rate": 8.989945113733108e-05, + "loss": 1.1637, + "step": 2964 + }, + { + "epoch": 0.55, + "learning_rate": 8.984006616216346e-05, + "loss": 1.0596, + "step": 2965 + }, + { + "epoch": 0.55, + "learning_rate": 8.978068480712335e-05, + "loss": 1.2259, + "step": 2966 + }, + { + "epoch": 0.55, + "learning_rate": 8.97213070933692e-05, + "loss": 1.1142, + "step": 2967 + }, + { + "epoch": 0.55, + "learning_rate": 8.966193304205813e-05, + "loss": 1.1162, + "step": 2968 + }, + { + "epoch": 0.55, + "learning_rate": 8.960256267434596e-05, + "loss": 0.9594, + "step": 2969 + }, + { + "epoch": 0.55, + "learning_rate": 8.954319601138718e-05, + "loss": 1.0133, + "step": 2970 + }, + { + "epoch": 0.55, + "learning_rate": 8.948383307433498e-05, + "loss": 1.3156, + "step": 2971 + }, + { + "epoch": 0.55, + "learning_rate": 8.942447388434115e-05, + "loss": 1.0209, + "step": 2972 + }, + { + "epoch": 0.55, + "learning_rate": 8.936511846255627e-05, + "loss": 1.0959, + "step": 2973 + }, + { + "epoch": 0.55, + "learning_rate": 8.930576683012949e-05, + "loss": 1.1188, + "step": 2974 + }, + { + "epoch": 0.55, + "learning_rate": 8.924641900820864e-05, + "loss": 1.1334, + "step": 2975 + }, + { + "epoch": 0.55, + "learning_rate": 8.918707501794021e-05, + "loss": 0.9522, + "step": 2976 + }, + { + "epoch": 0.55, + "learning_rate": 8.912773488046925e-05, + "loss": 1.1984, + "step": 2977 + }, + { + "epoch": 0.55, + "learning_rate": 8.906839861693949e-05, + "loss": 1.18, + "step": 2978 + }, + { + "epoch": 0.55, + "learning_rate": 8.90090662484933e-05, + "loss": 1.2392, + "step": 2979 + }, + { + "epoch": 0.55, + "learning_rate": 8.894973779627163e-05, + "loss": 1.0964, + "step": 2980 + }, + { + "epoch": 0.55, + "learning_rate": 8.889041328141405e-05, + "loss": 1.1851, + "step": 2981 + }, + { + "epoch": 0.55, + "learning_rate": 8.883109272505877e-05, + "loss": 1.2173, + "step": 2982 + }, + { + "epoch": 0.55, + "learning_rate": 8.877177614834242e-05, + "loss": 1.1561, + "step": 2983 + }, + { + "epoch": 0.55, + "learning_rate": 8.871246357240041e-05, + "loss": 1.1446, + "step": 2984 + }, + { + "epoch": 0.55, + "learning_rate": 8.865315501836664e-05, + "loss": 1.1767, + "step": 2985 + }, + { + "epoch": 0.55, + "learning_rate": 8.859385050737356e-05, + "loss": 1.1216, + "step": 2986 + }, + { + "epoch": 0.55, + "learning_rate": 8.853455006055229e-05, + "loss": 1.1508, + "step": 2987 + }, + { + "epoch": 0.55, + "learning_rate": 8.847525369903229e-05, + "loss": 1.206, + "step": 2988 + }, + { + "epoch": 0.55, + "learning_rate": 8.841596144394174e-05, + "loss": 1.1434, + "step": 2989 + }, + { + "epoch": 0.55, + "learning_rate": 8.835667331640731e-05, + "loss": 1.0858, + "step": 2990 + }, + { + "epoch": 0.55, + "learning_rate": 8.82973893375542e-05, + "loss": 0.9895, + "step": 2991 + }, + { + "epoch": 0.55, + "learning_rate": 8.823810952850612e-05, + "loss": 1.1926, + "step": 2992 + }, + { + "epoch": 0.55, + "learning_rate": 8.817883391038533e-05, + "loss": 1.0319, + "step": 2993 + }, + { + "epoch": 0.55, + "learning_rate": 8.811956250431253e-05, + "loss": 1.1582, + "step": 2994 + }, + { + "epoch": 0.55, + "learning_rate": 8.806029533140693e-05, + "loss": 1.1574, + "step": 2995 + }, + { + "epoch": 0.55, + "learning_rate": 8.800103241278634e-05, + "loss": 1.1944, + "step": 2996 + }, + { + "epoch": 0.55, + "learning_rate": 8.79417737695669e-05, + "loss": 1.0363, + "step": 2997 + }, + { + "epoch": 0.55, + "learning_rate": 8.788251942286337e-05, + "loss": 1.1398, + "step": 2998 + }, + { + "epoch": 0.55, + "learning_rate": 8.782326939378883e-05, + "loss": 1.053, + "step": 2999 + }, + { + "epoch": 0.55, + "learning_rate": 8.776402370345494e-05, + "loss": 1.0548, + "step": 3000 + }, + { + "epoch": 0.55, + "learning_rate": 8.770478237297176e-05, + "loss": 1.3369, + "step": 3001 + }, + { + "epoch": 0.55, + "learning_rate": 8.764554542344783e-05, + "loss": 1.1963, + "step": 3002 + }, + { + "epoch": 0.55, + "learning_rate": 8.75863128759901e-05, + "loss": 1.2434, + "step": 3003 + }, + { + "epoch": 0.55, + "learning_rate": 8.752708475170399e-05, + "loss": 1.0964, + "step": 3004 + }, + { + "epoch": 0.55, + "learning_rate": 8.746786107169327e-05, + "loss": 1.255, + "step": 3005 + }, + { + "epoch": 0.55, + "learning_rate": 8.740864185706016e-05, + "loss": 1.106, + "step": 3006 + }, + { + "epoch": 0.55, + "learning_rate": 8.734942712890534e-05, + "loss": 1.087, + "step": 3007 + }, + { + "epoch": 0.55, + "learning_rate": 8.729021690832785e-05, + "loss": 1.128, + "step": 3008 + }, + { + "epoch": 0.55, + "learning_rate": 8.723101121642516e-05, + "loss": 0.9239, + "step": 3009 + }, + { + "epoch": 0.55, + "learning_rate": 8.717181007429299e-05, + "loss": 1.091, + "step": 3010 + }, + { + "epoch": 0.55, + "learning_rate": 8.71126135030256e-05, + "loss": 1.1511, + "step": 3011 + }, + { + "epoch": 0.56, + "learning_rate": 8.705342152371557e-05, + "loss": 1.1565, + "step": 3012 + }, + { + "epoch": 0.56, + "learning_rate": 8.699423415745383e-05, + "loss": 1.1547, + "step": 3013 + }, + { + "epoch": 0.56, + "learning_rate": 8.693505142532966e-05, + "loss": 1.1623, + "step": 3014 + }, + { + "epoch": 0.56, + "learning_rate": 8.687587334843078e-05, + "loss": 1.2677, + "step": 3015 + }, + { + "epoch": 0.56, + "learning_rate": 8.681669994784303e-05, + "loss": 1.141, + "step": 3016 + }, + { + "epoch": 0.56, + "learning_rate": 8.67575312446508e-05, + "loss": 1.0403, + "step": 3017 + }, + { + "epoch": 0.56, + "learning_rate": 8.669836725993674e-05, + "loss": 1.1266, + "step": 3018 + }, + { + "epoch": 0.56, + "learning_rate": 8.66392080147818e-05, + "loss": 1.1954, + "step": 3019 + }, + { + "epoch": 0.56, + "learning_rate": 8.65800535302653e-05, + "loss": 1.1671, + "step": 3020 + }, + { + "epoch": 0.56, + "learning_rate": 8.652090382746474e-05, + "loss": 1.1481, + "step": 3021 + }, + { + "epoch": 0.56, + "learning_rate": 8.6461758927456e-05, + "loss": 1.0806, + "step": 3022 + }, + { + "epoch": 0.56, + "learning_rate": 8.640261885131328e-05, + "loss": 1.3917, + "step": 3023 + }, + { + "epoch": 0.56, + "learning_rate": 8.634348362010902e-05, + "loss": 1.1322, + "step": 3024 + }, + { + "epoch": 0.56, + "learning_rate": 8.628435325491394e-05, + "loss": 1.3397, + "step": 3025 + }, + { + "epoch": 0.56, + "learning_rate": 8.622522777679699e-05, + "loss": 1.2579, + "step": 3026 + }, + { + "epoch": 0.56, + "learning_rate": 8.61661072068254e-05, + "loss": 1.1412, + "step": 3027 + }, + { + "epoch": 0.56, + "learning_rate": 8.61069915660647e-05, + "loss": 1.2435, + "step": 3028 + }, + { + "epoch": 0.56, + "learning_rate": 8.60478808755786e-05, + "loss": 1.1183, + "step": 3029 + }, + { + "epoch": 0.56, + "learning_rate": 8.598877515642911e-05, + "loss": 1.268, + "step": 3030 + }, + { + "epoch": 0.56, + "learning_rate": 8.592967442967644e-05, + "loss": 1.1197, + "step": 3031 + }, + { + "epoch": 0.56, + "learning_rate": 8.587057871637891e-05, + "loss": 1.1447, + "step": 3032 + }, + { + "epoch": 0.56, + "learning_rate": 8.581148803759324e-05, + "loss": 1.1857, + "step": 3033 + }, + { + "epoch": 0.56, + "learning_rate": 8.575240241437427e-05, + "loss": 1.1563, + "step": 3034 + }, + { + "epoch": 0.56, + "learning_rate": 8.5693321867775e-05, + "loss": 1.1515, + "step": 3035 + }, + { + "epoch": 0.56, + "learning_rate": 8.563424641884674e-05, + "loss": 1.0991, + "step": 3036 + }, + { + "epoch": 0.56, + "learning_rate": 8.55751760886388e-05, + "loss": 1.094, + "step": 3037 + }, + { + "epoch": 0.56, + "learning_rate": 8.551611089819883e-05, + "loss": 1.0166, + "step": 3038 + }, + { + "epoch": 0.56, + "learning_rate": 8.545705086857256e-05, + "loss": 1.0674, + "step": 3039 + }, + { + "epoch": 0.56, + "learning_rate": 8.539799602080394e-05, + "loss": 1.1136, + "step": 3040 + }, + { + "epoch": 0.56, + "learning_rate": 8.533894637593503e-05, + "loss": 1.0936, + "step": 3041 + }, + { + "epoch": 0.56, + "learning_rate": 8.527990195500609e-05, + "loss": 1.1186, + "step": 3042 + }, + { + "epoch": 0.56, + "learning_rate": 8.522086277905541e-05, + "loss": 1.09, + "step": 3043 + }, + { + "epoch": 0.56, + "learning_rate": 8.516182886911952e-05, + "loss": 1.1297, + "step": 3044 + }, + { + "epoch": 0.56, + "learning_rate": 8.510280024623301e-05, + "loss": 0.9226, + "step": 3045 + }, + { + "epoch": 0.56, + "learning_rate": 8.504377693142865e-05, + "loss": 1.2127, + "step": 3046 + }, + { + "epoch": 0.56, + "learning_rate": 8.498475894573728e-05, + "loss": 1.2971, + "step": 3047 + }, + { + "epoch": 0.56, + "learning_rate": 8.492574631018777e-05, + "loss": 1.1164, + "step": 3048 + }, + { + "epoch": 0.56, + "learning_rate": 8.48667390458072e-05, + "loss": 1.2226, + "step": 3049 + }, + { + "epoch": 0.56, + "learning_rate": 8.480773717362069e-05, + "loss": 1.0534, + "step": 3050 + }, + { + "epoch": 0.56, + "learning_rate": 8.474874071465144e-05, + "loss": 1.0608, + "step": 3051 + }, + { + "epoch": 0.56, + "learning_rate": 8.46897496899207e-05, + "loss": 1.2981, + "step": 3052 + }, + { + "epoch": 0.56, + "learning_rate": 8.463076412044789e-05, + "loss": 1.0084, + "step": 3053 + }, + { + "epoch": 0.56, + "learning_rate": 8.457178402725025e-05, + "loss": 1.2863, + "step": 3054 + }, + { + "epoch": 0.56, + "learning_rate": 8.451280943134332e-05, + "loss": 1.125, + "step": 3055 + }, + { + "epoch": 0.56, + "learning_rate": 8.445384035374055e-05, + "loss": 1.0367, + "step": 3056 + }, + { + "epoch": 0.56, + "learning_rate": 8.439487681545345e-05, + "loss": 1.2817, + "step": 3057 + }, + { + "epoch": 0.56, + "learning_rate": 8.433591883749162e-05, + "loss": 1.0949, + "step": 3058 + }, + { + "epoch": 0.56, + "learning_rate": 8.427696644086251e-05, + "loss": 1.1196, + "step": 3059 + }, + { + "epoch": 0.56, + "learning_rate": 8.421801964657175e-05, + "loss": 1.114, + "step": 3060 + }, + { + "epoch": 0.56, + "learning_rate": 8.415907847562292e-05, + "loss": 1.1472, + "step": 3061 + }, + { + "epoch": 0.56, + "learning_rate": 8.410014294901757e-05, + "loss": 1.1202, + "step": 3062 + }, + { + "epoch": 0.56, + "learning_rate": 8.404121308775525e-05, + "loss": 1.1515, + "step": 3063 + }, + { + "epoch": 0.56, + "learning_rate": 8.398228891283358e-05, + "loss": 1.1511, + "step": 3064 + }, + { + "epoch": 0.56, + "learning_rate": 8.392337044524797e-05, + "loss": 1.0768, + "step": 3065 + }, + { + "epoch": 0.56, + "learning_rate": 8.386445770599192e-05, + "loss": 1.0511, + "step": 3066 + }, + { + "epoch": 0.57, + "learning_rate": 8.380555071605689e-05, + "loss": 1.2184, + "step": 3067 + }, + { + "epoch": 0.57, + "learning_rate": 8.374664949643228e-05, + "loss": 1.1507, + "step": 3068 + }, + { + "epoch": 0.57, + "learning_rate": 8.368775406810543e-05, + "loss": 1.0192, + "step": 3069 + }, + { + "epoch": 0.57, + "learning_rate": 8.362886445206159e-05, + "loss": 1.0429, + "step": 3070 + }, + { + "epoch": 0.57, + "learning_rate": 8.356998066928394e-05, + "loss": 1.2672, + "step": 3071 + }, + { + "epoch": 0.57, + "learning_rate": 8.351110274075363e-05, + "loss": 1.0522, + "step": 3072 + }, + { + "epoch": 0.57, + "learning_rate": 8.345223068744968e-05, + "loss": 1.0327, + "step": 3073 + }, + { + "epoch": 0.57, + "learning_rate": 8.339336453034905e-05, + "loss": 1.1057, + "step": 3074 + }, + { + "epoch": 0.57, + "learning_rate": 8.33345042904266e-05, + "loss": 1.0988, + "step": 3075 + }, + { + "epoch": 0.57, + "learning_rate": 8.327564998865503e-05, + "loss": 1.1436, + "step": 3076 + }, + { + "epoch": 0.57, + "learning_rate": 8.321680164600493e-05, + "loss": 1.0987, + "step": 3077 + }, + { + "epoch": 0.57, + "learning_rate": 8.315795928344482e-05, + "loss": 1.1273, + "step": 3078 + }, + { + "epoch": 0.57, + "learning_rate": 8.30991229219411e-05, + "loss": 1.1621, + "step": 3079 + }, + { + "epoch": 0.57, + "learning_rate": 8.304029258245795e-05, + "loss": 1.2133, + "step": 3080 + }, + { + "epoch": 0.57, + "learning_rate": 8.298146828595748e-05, + "loss": 1.0005, + "step": 3081 + }, + { + "epoch": 0.57, + "learning_rate": 8.292265005339958e-05, + "loss": 1.1018, + "step": 3082 + }, + { + "epoch": 0.57, + "learning_rate": 8.286383790574202e-05, + "loss": 1.158, + "step": 3083 + }, + { + "epoch": 0.57, + "learning_rate": 8.280503186394042e-05, + "loss": 1.2132, + "step": 3084 + }, + { + "epoch": 0.57, + "learning_rate": 8.274623194894818e-05, + "loss": 1.1722, + "step": 3085 + }, + { + "epoch": 0.57, + "learning_rate": 8.268743818171657e-05, + "loss": 1.1002, + "step": 3086 + }, + { + "epoch": 0.57, + "learning_rate": 8.26286505831946e-05, + "loss": 1.1078, + "step": 3087 + }, + { + "epoch": 0.57, + "learning_rate": 8.256986917432914e-05, + "loss": 1.0197, + "step": 3088 + }, + { + "epoch": 0.57, + "learning_rate": 8.251109397606483e-05, + "loss": 1.2089, + "step": 3089 + }, + { + "epoch": 0.57, + "learning_rate": 8.245232500934408e-05, + "loss": 1.2076, + "step": 3090 + }, + { + "epoch": 0.57, + "learning_rate": 8.239356229510715e-05, + "loss": 0.9508, + "step": 3091 + }, + { + "epoch": 0.57, + "learning_rate": 8.233480585429196e-05, + "loss": 1.1723, + "step": 3092 + }, + { + "epoch": 0.57, + "learning_rate": 8.227605570783431e-05, + "loss": 0.9848, + "step": 3093 + }, + { + "epoch": 0.57, + "learning_rate": 8.22173118766677e-05, + "loss": 1.0922, + "step": 3094 + }, + { + "epoch": 0.57, + "learning_rate": 8.215857438172334e-05, + "loss": 1.2258, + "step": 3095 + }, + { + "epoch": 0.57, + "learning_rate": 8.209984324393026e-05, + "loss": 1.1184, + "step": 3096 + }, + { + "epoch": 0.57, + "learning_rate": 8.20411184842152e-05, + "loss": 1.1338, + "step": 3097 + }, + { + "epoch": 0.57, + "learning_rate": 8.198240012350261e-05, + "loss": 1.187, + "step": 3098 + }, + { + "epoch": 0.57, + "learning_rate": 8.192368818271465e-05, + "loss": 1.1214, + "step": 3099 + }, + { + "epoch": 0.57, + "learning_rate": 8.186498268277126e-05, + "loss": 1.1113, + "step": 3100 + }, + { + "epoch": 0.57, + "learning_rate": 8.180628364458998e-05, + "loss": 1.1226, + "step": 3101 + }, + { + "epoch": 0.57, + "learning_rate": 8.174759108908615e-05, + "loss": 1.0049, + "step": 3102 + }, + { + "epoch": 0.57, + "learning_rate": 8.168890503717271e-05, + "loss": 1.1606, + "step": 3103 + }, + { + "epoch": 0.57, + "learning_rate": 8.163022550976035e-05, + "loss": 1.0784, + "step": 3104 + }, + { + "epoch": 0.57, + "learning_rate": 8.157155252775742e-05, + "loss": 1.1108, + "step": 3105 + }, + { + "epoch": 0.57, + "learning_rate": 8.151288611206992e-05, + "loss": 1.0967, + "step": 3106 + }, + { + "epoch": 0.57, + "learning_rate": 8.145422628360153e-05, + "loss": 1.0757, + "step": 3107 + }, + { + "epoch": 0.57, + "learning_rate": 8.139557306325358e-05, + "loss": 1.107, + "step": 3108 + }, + { + "epoch": 0.57, + "learning_rate": 8.1336926471925e-05, + "loss": 1.2708, + "step": 3109 + }, + { + "epoch": 0.57, + "learning_rate": 8.127828653051243e-05, + "loss": 1.1873, + "step": 3110 + }, + { + "epoch": 0.57, + "learning_rate": 8.121965325991008e-05, + "loss": 1.1988, + "step": 3111 + }, + { + "epoch": 0.57, + "learning_rate": 8.116102668100986e-05, + "loss": 1.2832, + "step": 3112 + }, + { + "epoch": 0.57, + "learning_rate": 8.110240681470123e-05, + "loss": 1.1698, + "step": 3113 + }, + { + "epoch": 0.57, + "learning_rate": 8.104379368187122e-05, + "loss": 1.0821, + "step": 3114 + }, + { + "epoch": 0.57, + "learning_rate": 8.098518730340456e-05, + "loss": 1.1157, + "step": 3115 + }, + { + "epoch": 0.57, + "learning_rate": 8.092658770018351e-05, + "loss": 1.1829, + "step": 3116 + }, + { + "epoch": 0.57, + "learning_rate": 8.086799489308797e-05, + "loss": 1.1656, + "step": 3117 + }, + { + "epoch": 0.57, + "learning_rate": 8.080940890299536e-05, + "loss": 1.0372, + "step": 3118 + }, + { + "epoch": 0.57, + "learning_rate": 8.07508297507807e-05, + "loss": 1.2365, + "step": 3119 + }, + { + "epoch": 0.57, + "learning_rate": 8.069225745731654e-05, + "loss": 1.2585, + "step": 3120 + }, + { + "epoch": 0.58, + "learning_rate": 8.063369204347302e-05, + "loss": 1.1302, + "step": 3121 + }, + { + "epoch": 0.58, + "learning_rate": 8.057513353011785e-05, + "loss": 1.1863, + "step": 3122 + }, + { + "epoch": 0.58, + "learning_rate": 8.051658193811623e-05, + "loss": 1.1274, + "step": 3123 + }, + { + "epoch": 0.58, + "learning_rate": 8.045803728833097e-05, + "loss": 1.1456, + "step": 3124 + }, + { + "epoch": 0.58, + "learning_rate": 8.039949960162227e-05, + "loss": 1.0766, + "step": 3125 + }, + { + "epoch": 0.58, + "learning_rate": 8.034096889884797e-05, + "loss": 1.1282, + "step": 3126 + }, + { + "epoch": 0.58, + "learning_rate": 8.028244520086337e-05, + "loss": 1.1553, + "step": 3127 + }, + { + "epoch": 0.58, + "learning_rate": 8.022392852852132e-05, + "loss": 1.1938, + "step": 3128 + }, + { + "epoch": 0.58, + "learning_rate": 8.01654189026721e-05, + "loss": 1.1029, + "step": 3129 + }, + { + "epoch": 0.58, + "learning_rate": 8.01069163441636e-05, + "loss": 1.0612, + "step": 3130 + }, + { + "epoch": 0.58, + "learning_rate": 8.004842087384096e-05, + "loss": 1.1839, + "step": 3131 + }, + { + "epoch": 0.58, + "learning_rate": 7.998993251254705e-05, + "loss": 1.184, + "step": 3132 + }, + { + "epoch": 0.58, + "learning_rate": 7.993145128112205e-05, + "loss": 1.148, + "step": 3133 + }, + { + "epoch": 0.58, + "learning_rate": 7.987297720040365e-05, + "loss": 1.1483, + "step": 3134 + }, + { + "epoch": 0.58, + "learning_rate": 7.981451029122706e-05, + "loss": 1.1227, + "step": 3135 + }, + { + "epoch": 0.58, + "learning_rate": 7.975605057442476e-05, + "loss": 1.201, + "step": 3136 + }, + { + "epoch": 0.58, + "learning_rate": 7.96975980708268e-05, + "loss": 1.1106, + "step": 3137 + }, + { + "epoch": 0.58, + "learning_rate": 7.963915280126066e-05, + "loss": 1.1145, + "step": 3138 + }, + { + "epoch": 0.58, + "learning_rate": 7.958071478655118e-05, + "loss": 1.1158, + "step": 3139 + }, + { + "epoch": 0.58, + "learning_rate": 7.952228404752067e-05, + "loss": 1.1921, + "step": 3140 + }, + { + "epoch": 0.58, + "learning_rate": 7.946386060498886e-05, + "loss": 1.0459, + "step": 3141 + }, + { + "epoch": 0.58, + "learning_rate": 7.940544447977276e-05, + "loss": 0.9492, + "step": 3142 + }, + { + "epoch": 0.58, + "learning_rate": 7.934703569268691e-05, + "loss": 1.2154, + "step": 3143 + }, + { + "epoch": 0.58, + "learning_rate": 7.928863426454318e-05, + "loss": 0.9868, + "step": 3144 + }, + { + "epoch": 0.58, + "learning_rate": 7.923024021615082e-05, + "loss": 1.0932, + "step": 3145 + }, + { + "epoch": 0.58, + "learning_rate": 7.917185356831648e-05, + "loss": 1.1503, + "step": 3146 + }, + { + "epoch": 0.58, + "learning_rate": 7.91134743418441e-05, + "loss": 1.0997, + "step": 3147 + }, + { + "epoch": 0.58, + "learning_rate": 7.905510255753501e-05, + "loss": 1.0977, + "step": 3148 + }, + { + "epoch": 0.58, + "learning_rate": 7.899673823618793e-05, + "loss": 1.2557, + "step": 3149 + }, + { + "epoch": 0.58, + "learning_rate": 7.893838139859891e-05, + "loss": 1.2678, + "step": 3150 + }, + { + "epoch": 0.58, + "learning_rate": 7.888003206556126e-05, + "loss": 1.0974, + "step": 3151 + }, + { + "epoch": 0.58, + "learning_rate": 7.882169025786575e-05, + "loss": 1.2378, + "step": 3152 + }, + { + "epoch": 0.58, + "learning_rate": 7.87633559963003e-05, + "loss": 1.0408, + "step": 3153 + }, + { + "epoch": 0.58, + "learning_rate": 7.870502930165026e-05, + "loss": 0.9884, + "step": 3154 + }, + { + "epoch": 0.58, + "learning_rate": 7.864671019469826e-05, + "loss": 1.0159, + "step": 3155 + }, + { + "epoch": 0.58, + "learning_rate": 7.858839869622422e-05, + "loss": 1.1755, + "step": 3156 + }, + { + "epoch": 0.58, + "learning_rate": 7.853009482700539e-05, + "loss": 1.0817, + "step": 3157 + }, + { + "epoch": 0.58, + "learning_rate": 7.847179860781616e-05, + "loss": 1.1309, + "step": 3158 + }, + { + "epoch": 0.58, + "learning_rate": 7.841351005942836e-05, + "loss": 1.0647, + "step": 3159 + }, + { + "epoch": 0.58, + "learning_rate": 7.8355229202611e-05, + "loss": 1.2224, + "step": 3160 + }, + { + "epoch": 0.58, + "learning_rate": 7.829695605813039e-05, + "loss": 1.0081, + "step": 3161 + }, + { + "epoch": 0.58, + "learning_rate": 7.823869064675007e-05, + "loss": 1.1445, + "step": 3162 + }, + { + "epoch": 0.58, + "learning_rate": 7.818043298923086e-05, + "loss": 1.1952, + "step": 3163 + }, + { + "epoch": 0.58, + "learning_rate": 7.81221831063307e-05, + "loss": 1.0846, + "step": 3164 + }, + { + "epoch": 0.58, + "learning_rate": 7.806394101880489e-05, + "loss": 1.1801, + "step": 3165 + }, + { + "epoch": 0.58, + "learning_rate": 7.80057067474059e-05, + "loss": 1.0914, + "step": 3166 + }, + { + "epoch": 0.58, + "learning_rate": 7.794748031288342e-05, + "loss": 1.0859, + "step": 3167 + }, + { + "epoch": 0.58, + "learning_rate": 7.788926173598441e-05, + "loss": 1.0192, + "step": 3168 + }, + { + "epoch": 0.58, + "learning_rate": 7.783105103745286e-05, + "loss": 1.1546, + "step": 3169 + }, + { + "epoch": 0.58, + "learning_rate": 7.777284823803012e-05, + "loss": 1.1766, + "step": 3170 + }, + { + "epoch": 0.58, + "learning_rate": 7.771465335845467e-05, + "loss": 1.0625, + "step": 3171 + }, + { + "epoch": 0.58, + "learning_rate": 7.765646641946215e-05, + "loss": 1.0765, + "step": 3172 + }, + { + "epoch": 0.58, + "learning_rate": 7.759828744178539e-05, + "loss": 1.1468, + "step": 3173 + }, + { + "epoch": 0.58, + "learning_rate": 7.754011644615444e-05, + "loss": 1.0343, + "step": 3174 + }, + { + "epoch": 0.59, + "learning_rate": 7.748195345329634e-05, + "loss": 1.1511, + "step": 3175 + }, + { + "epoch": 0.59, + "learning_rate": 7.742379848393543e-05, + "loss": 1.0864, + "step": 3176 + }, + { + "epoch": 0.59, + "learning_rate": 7.736565155879314e-05, + "loss": 1.0561, + "step": 3177 + }, + { + "epoch": 0.59, + "learning_rate": 7.730751269858806e-05, + "loss": 0.9745, + "step": 3178 + }, + { + "epoch": 0.59, + "learning_rate": 7.72493819240359e-05, + "loss": 1.1102, + "step": 3179 + }, + { + "epoch": 0.59, + "learning_rate": 7.719125925584941e-05, + "loss": 1.0523, + "step": 3180 + }, + { + "epoch": 0.59, + "learning_rate": 7.713314471473855e-05, + "loss": 1.1171, + "step": 3181 + }, + { + "epoch": 0.59, + "learning_rate": 7.707503832141034e-05, + "loss": 1.1898, + "step": 3182 + }, + { + "epoch": 0.59, + "learning_rate": 7.701694009656892e-05, + "loss": 1.2348, + "step": 3183 + }, + { + "epoch": 0.59, + "learning_rate": 7.695885006091552e-05, + "loss": 1.1847, + "step": 3184 + }, + { + "epoch": 0.59, + "learning_rate": 7.690076823514844e-05, + "loss": 1.0157, + "step": 3185 + }, + { + "epoch": 0.59, + "learning_rate": 7.6842694639963e-05, + "loss": 1.2321, + "step": 3186 + }, + { + "epoch": 0.59, + "learning_rate": 7.678462929605167e-05, + "loss": 1.1772, + "step": 3187 + }, + { + "epoch": 0.59, + "learning_rate": 7.672657222410395e-05, + "loss": 1.0534, + "step": 3188 + }, + { + "epoch": 0.59, + "learning_rate": 7.666852344480641e-05, + "loss": 1.1113, + "step": 3189 + }, + { + "epoch": 0.59, + "learning_rate": 7.661048297884266e-05, + "loss": 1.1037, + "step": 3190 + }, + { + "epoch": 0.59, + "learning_rate": 7.655245084689326e-05, + "loss": 1.1486, + "step": 3191 + }, + { + "epoch": 0.59, + "learning_rate": 7.649442706963594e-05, + "loss": 1.0509, + "step": 3192 + }, + { + "epoch": 0.59, + "learning_rate": 7.643641166774538e-05, + "loss": 1.1276, + "step": 3193 + }, + { + "epoch": 0.59, + "learning_rate": 7.637840466189326e-05, + "loss": 1.2689, + "step": 3194 + }, + { + "epoch": 0.59, + "learning_rate": 7.632040607274832e-05, + "loss": 1.157, + "step": 3195 + }, + { + "epoch": 0.59, + "learning_rate": 7.626241592097631e-05, + "loss": 1.1377, + "step": 3196 + }, + { + "epoch": 0.59, + "learning_rate": 7.620443422723985e-05, + "loss": 1.1501, + "step": 3197 + }, + { + "epoch": 0.59, + "learning_rate": 7.614646101219868e-05, + "loss": 1.199, + "step": 3198 + }, + { + "epoch": 0.59, + "learning_rate": 7.608849629650947e-05, + "loss": 1.1213, + "step": 3199 + }, + { + "epoch": 0.59, + "learning_rate": 7.603054010082588e-05, + "loss": 1.2034, + "step": 3200 + }, + { + "epoch": 0.59, + "learning_rate": 7.597259244579853e-05, + "loss": 1.1787, + "step": 3201 + }, + { + "epoch": 0.59, + "learning_rate": 7.591465335207492e-05, + "loss": 1.1466, + "step": 3202 + }, + { + "epoch": 0.59, + "learning_rate": 7.585672284029962e-05, + "loss": 1.1366, + "step": 3203 + }, + { + "epoch": 0.59, + "learning_rate": 7.579880093111407e-05, + "loss": 1.057, + "step": 3204 + }, + { + "epoch": 0.59, + "learning_rate": 7.574088764515665e-05, + "loss": 1.1525, + "step": 3205 + }, + { + "epoch": 0.59, + "learning_rate": 7.568298300306271e-05, + "loss": 1.1291, + "step": 3206 + }, + { + "epoch": 0.59, + "learning_rate": 7.562508702546452e-05, + "loss": 1.0518, + "step": 3207 + }, + { + "epoch": 0.59, + "learning_rate": 7.556719973299115e-05, + "loss": 1.113, + "step": 3208 + }, + { + "epoch": 0.59, + "learning_rate": 7.550932114626869e-05, + "loss": 1.2159, + "step": 3209 + }, + { + "epoch": 0.59, + "learning_rate": 7.54514512859201e-05, + "loss": 1.1312, + "step": 3210 + }, + { + "epoch": 0.59, + "learning_rate": 7.539359017256522e-05, + "loss": 1.0772, + "step": 3211 + }, + { + "epoch": 0.59, + "learning_rate": 7.533573782682084e-05, + "loss": 1.2186, + "step": 3212 + }, + { + "epoch": 0.59, + "learning_rate": 7.527789426930046e-05, + "loss": 1.104, + "step": 3213 + }, + { + "epoch": 0.59, + "learning_rate": 7.522005952061462e-05, + "loss": 1.0655, + "step": 3214 + }, + { + "epoch": 0.59, + "learning_rate": 7.516223360137065e-05, + "loss": 1.0864, + "step": 3215 + }, + { + "epoch": 0.59, + "learning_rate": 7.510441653217272e-05, + "loss": 1.215, + "step": 3216 + }, + { + "epoch": 0.59, + "learning_rate": 7.504660833362187e-05, + "loss": 1.0473, + "step": 3217 + }, + { + "epoch": 0.59, + "learning_rate": 7.498880902631604e-05, + "loss": 1.0355, + "step": 3218 + }, + { + "epoch": 0.59, + "learning_rate": 7.493101863084985e-05, + "loss": 1.1933, + "step": 3219 + }, + { + "epoch": 0.59, + "learning_rate": 7.487323716781485e-05, + "loss": 1.1756, + "step": 3220 + }, + { + "epoch": 0.59, + "learning_rate": 7.48154646577994e-05, + "loss": 1.1431, + "step": 3221 + }, + { + "epoch": 0.59, + "learning_rate": 7.475770112138866e-05, + "loss": 1.0798, + "step": 3222 + }, + { + "epoch": 0.59, + "learning_rate": 7.469994657916463e-05, + "loss": 1.0109, + "step": 3223 + }, + { + "epoch": 0.59, + "learning_rate": 7.464220105170603e-05, + "loss": 1.0241, + "step": 3224 + }, + { + "epoch": 0.59, + "learning_rate": 7.458446455958839e-05, + "loss": 1.154, + "step": 3225 + }, + { + "epoch": 0.59, + "learning_rate": 7.452673712338404e-05, + "loss": 1.0658, + "step": 3226 + }, + { + "epoch": 0.59, + "learning_rate": 7.44690187636621e-05, + "loss": 1.1563, + "step": 3227 + }, + { + "epoch": 0.59, + "learning_rate": 7.441130950098841e-05, + "loss": 1.1024, + "step": 3228 + }, + { + "epoch": 0.6, + "learning_rate": 7.435360935592563e-05, + "loss": 0.9866, + "step": 3229 + }, + { + "epoch": 0.6, + "learning_rate": 7.429591834903314e-05, + "loss": 1.0766, + "step": 3230 + }, + { + "epoch": 0.6, + "learning_rate": 7.423823650086699e-05, + "loss": 1.0853, + "step": 3231 + }, + { + "epoch": 0.6, + "learning_rate": 7.418056383198007e-05, + "loss": 1.1761, + "step": 3232 + }, + { + "epoch": 0.6, + "learning_rate": 7.412290036292197e-05, + "loss": 1.0165, + "step": 3233 + }, + { + "epoch": 0.6, + "learning_rate": 7.406524611423903e-05, + "loss": 1.2967, + "step": 3234 + }, + { + "epoch": 0.6, + "learning_rate": 7.40076011064742e-05, + "loss": 1.1091, + "step": 3235 + }, + { + "epoch": 0.6, + "learning_rate": 7.394996536016729e-05, + "loss": 1.1849, + "step": 3236 + }, + { + "epoch": 0.6, + "learning_rate": 7.389233889585465e-05, + "loss": 1.1426, + "step": 3237 + }, + { + "epoch": 0.6, + "learning_rate": 7.383472173406944e-05, + "loss": 1.1224, + "step": 3238 + }, + { + "epoch": 0.6, + "learning_rate": 7.377711389534145e-05, + "loss": 1.0081, + "step": 3239 + }, + { + "epoch": 0.6, + "learning_rate": 7.371951540019721e-05, + "loss": 1.046, + "step": 3240 + }, + { + "epoch": 0.6, + "learning_rate": 7.366192626915981e-05, + "loss": 1.1591, + "step": 3241 + }, + { + "epoch": 0.6, + "learning_rate": 7.360434652274913e-05, + "loss": 1.0869, + "step": 3242 + }, + { + "epoch": 0.6, + "learning_rate": 7.35467761814816e-05, + "loss": 1.2311, + "step": 3243 + }, + { + "epoch": 0.6, + "learning_rate": 7.348921526587034e-05, + "loss": 1.1525, + "step": 3244 + }, + { + "epoch": 0.6, + "learning_rate": 7.343166379642517e-05, + "loss": 1.1103, + "step": 3245 + }, + { + "epoch": 0.6, + "learning_rate": 7.337412179365243e-05, + "loss": 1.0426, + "step": 3246 + }, + { + "epoch": 0.6, + "learning_rate": 7.331658927805516e-05, + "loss": 1.2594, + "step": 3247 + }, + { + "epoch": 0.6, + "learning_rate": 7.325906627013304e-05, + "loss": 1.0406, + "step": 3248 + }, + { + "epoch": 0.6, + "learning_rate": 7.32015527903823e-05, + "loss": 0.9441, + "step": 3249 + }, + { + "epoch": 0.6, + "learning_rate": 7.314404885929578e-05, + "loss": 1.1859, + "step": 3250 + }, + { + "epoch": 0.6, + "learning_rate": 7.3086554497363e-05, + "loss": 0.9717, + "step": 3251 + }, + { + "epoch": 0.6, + "learning_rate": 7.302906972506995e-05, + "loss": 1.1589, + "step": 3252 + }, + { + "epoch": 0.6, + "learning_rate": 7.29715945628993e-05, + "loss": 1.1238, + "step": 3253 + }, + { + "epoch": 0.6, + "learning_rate": 7.291412903133026e-05, + "loss": 1.1324, + "step": 3254 + }, + { + "epoch": 0.6, + "learning_rate": 7.28566731508386e-05, + "loss": 1.1963, + "step": 3255 + }, + { + "epoch": 0.6, + "learning_rate": 7.279922694189666e-05, + "loss": 1.1863, + "step": 3256 + }, + { + "epoch": 0.6, + "learning_rate": 7.27417904249733e-05, + "loss": 1.1373, + "step": 3257 + }, + { + "epoch": 0.6, + "learning_rate": 7.268436362053403e-05, + "loss": 1.1364, + "step": 3258 + }, + { + "epoch": 0.6, + "learning_rate": 7.262694654904077e-05, + "loss": 1.0194, + "step": 3259 + }, + { + "epoch": 0.6, + "learning_rate": 7.256953923095209e-05, + "loss": 1.1818, + "step": 3260 + }, + { + "epoch": 0.6, + "learning_rate": 7.251214168672298e-05, + "loss": 1.0542, + "step": 3261 + }, + { + "epoch": 0.6, + "learning_rate": 7.245475393680499e-05, + "loss": 1.1721, + "step": 3262 + }, + { + "epoch": 0.6, + "learning_rate": 7.239737600164618e-05, + "loss": 1.0847, + "step": 3263 + }, + { + "epoch": 0.6, + "learning_rate": 7.234000790169114e-05, + "loss": 1.0345, + "step": 3264 + }, + { + "epoch": 0.6, + "learning_rate": 7.228264965738093e-05, + "loss": 1.0901, + "step": 3265 + }, + { + "epoch": 0.6, + "learning_rate": 7.222530128915313e-05, + "loss": 1.1637, + "step": 3266 + }, + { + "epoch": 0.6, + "learning_rate": 7.216796281744172e-05, + "loss": 1.1334, + "step": 3267 + }, + { + "epoch": 0.6, + "learning_rate": 7.211063426267721e-05, + "loss": 1.1499, + "step": 3268 + }, + { + "epoch": 0.6, + "learning_rate": 7.205331564528658e-05, + "loss": 1.0228, + "step": 3269 + }, + { + "epoch": 0.6, + "learning_rate": 7.199600698569327e-05, + "loss": 1.1231, + "step": 3270 + }, + { + "epoch": 0.6, + "learning_rate": 7.193870830431715e-05, + "loss": 1.1008, + "step": 3271 + }, + { + "epoch": 0.6, + "learning_rate": 7.188141962157461e-05, + "loss": 1.1347, + "step": 3272 + }, + { + "epoch": 0.6, + "learning_rate": 7.18241409578783e-05, + "loss": 1.1925, + "step": 3273 + }, + { + "epoch": 0.6, + "learning_rate": 7.17668723336375e-05, + "loss": 1.026, + "step": 3274 + }, + { + "epoch": 0.6, + "learning_rate": 7.170961376925779e-05, + "loss": 1.0538, + "step": 3275 + }, + { + "epoch": 0.6, + "learning_rate": 7.16523652851412e-05, + "loss": 1.1736, + "step": 3276 + }, + { + "epoch": 0.6, + "learning_rate": 7.159512690168622e-05, + "loss": 1.2162, + "step": 3277 + }, + { + "epoch": 0.6, + "learning_rate": 7.153789863928769e-05, + "loss": 1.1371, + "step": 3278 + }, + { + "epoch": 0.6, + "learning_rate": 7.148068051833676e-05, + "loss": 1.2695, + "step": 3279 + }, + { + "epoch": 0.6, + "learning_rate": 7.142347255922112e-05, + "loss": 1.2569, + "step": 3280 + }, + { + "epoch": 0.6, + "learning_rate": 7.136627478232476e-05, + "loss": 1.2343, + "step": 3281 + }, + { + "epoch": 0.6, + "learning_rate": 7.130908720802805e-05, + "loss": 1.1145, + "step": 3282 + }, + { + "epoch": 0.6, + "learning_rate": 7.125190985670777e-05, + "loss": 1.086, + "step": 3283 + }, + { + "epoch": 0.61, + "learning_rate": 7.119474274873693e-05, + "loss": 1.1666, + "step": 3284 + }, + { + "epoch": 0.61, + "learning_rate": 7.113758590448502e-05, + "loss": 1.1027, + "step": 3285 + }, + { + "epoch": 0.61, + "learning_rate": 7.108043934431785e-05, + "loss": 1.215, + "step": 3286 + }, + { + "epoch": 0.61, + "learning_rate": 7.102330308859753e-05, + "loss": 1.0828, + "step": 3287 + }, + { + "epoch": 0.61, + "learning_rate": 7.09661771576825e-05, + "loss": 0.9659, + "step": 3288 + }, + { + "epoch": 0.61, + "learning_rate": 7.090906157192758e-05, + "loss": 1.1638, + "step": 3289 + }, + { + "epoch": 0.61, + "learning_rate": 7.085195635168377e-05, + "loss": 1.2037, + "step": 3290 + }, + { + "epoch": 0.61, + "learning_rate": 7.079486151729854e-05, + "loss": 1.0959, + "step": 3291 + }, + { + "epoch": 0.61, + "learning_rate": 7.073777708911556e-05, + "loss": 1.2897, + "step": 3292 + }, + { + "epoch": 0.61, + "learning_rate": 7.068070308747479e-05, + "loss": 1.292, + "step": 3293 + }, + { + "epoch": 0.61, + "learning_rate": 7.062363953271259e-05, + "loss": 1.1087, + "step": 3294 + }, + { + "epoch": 0.61, + "learning_rate": 7.056658644516138e-05, + "loss": 1.1715, + "step": 3295 + }, + { + "epoch": 0.61, + "learning_rate": 7.050954384515005e-05, + "loss": 0.9097, + "step": 3296 + }, + { + "epoch": 0.61, + "learning_rate": 7.04525117530037e-05, + "loss": 1.2046, + "step": 3297 + }, + { + "epoch": 0.61, + "learning_rate": 7.03954901890436e-05, + "loss": 1.1817, + "step": 3298 + }, + { + "epoch": 0.61, + "learning_rate": 7.033847917358741e-05, + "loss": 1.1052, + "step": 3299 + }, + { + "epoch": 0.61, + "learning_rate": 7.028147872694897e-05, + "loss": 1.1743, + "step": 3300 + }, + { + "epoch": 0.61, + "learning_rate": 7.022448886943824e-05, + "loss": 1.1383, + "step": 3301 + }, + { + "epoch": 0.61, + "learning_rate": 7.016750962136158e-05, + "loss": 1.1679, + "step": 3302 + }, + { + "epoch": 0.61, + "learning_rate": 7.011054100302148e-05, + "loss": 1.2427, + "step": 3303 + }, + { + "epoch": 0.61, + "learning_rate": 7.005358303471667e-05, + "loss": 1.127, + "step": 3304 + }, + { + "epoch": 0.61, + "learning_rate": 6.999663573674211e-05, + "loss": 1.1007, + "step": 3305 + }, + { + "epoch": 0.61, + "learning_rate": 6.993969912938887e-05, + "loss": 1.1633, + "step": 3306 + }, + { + "epoch": 0.61, + "learning_rate": 6.988277323294426e-05, + "loss": 1.1544, + "step": 3307 + }, + { + "epoch": 0.61, + "learning_rate": 6.982585806769181e-05, + "loss": 1.132, + "step": 3308 + }, + { + "epoch": 0.61, + "learning_rate": 6.97689536539112e-05, + "loss": 0.9402, + "step": 3309 + }, + { + "epoch": 0.61, + "learning_rate": 6.971206001187824e-05, + "loss": 1.0496, + "step": 3310 + }, + { + "epoch": 0.61, + "learning_rate": 6.965517716186502e-05, + "loss": 1.0992, + "step": 3311 + }, + { + "epoch": 0.61, + "learning_rate": 6.95983051241396e-05, + "loss": 1.125, + "step": 3312 + }, + { + "epoch": 0.61, + "learning_rate": 6.954144391896631e-05, + "loss": 1.1088, + "step": 3313 + }, + { + "epoch": 0.61, + "learning_rate": 6.948459356660563e-05, + "loss": 1.1083, + "step": 3314 + }, + { + "epoch": 0.61, + "learning_rate": 6.942775408731413e-05, + "loss": 1.0246, + "step": 3315 + }, + { + "epoch": 0.61, + "learning_rate": 6.937092550134454e-05, + "loss": 1.1468, + "step": 3316 + }, + { + "epoch": 0.61, + "learning_rate": 6.931410782894562e-05, + "loss": 1.1172, + "step": 3317 + }, + { + "epoch": 0.61, + "learning_rate": 6.925730109036236e-05, + "loss": 1.1054, + "step": 3318 + }, + { + "epoch": 0.61, + "learning_rate": 6.920050530583578e-05, + "loss": 1.273, + "step": 3319 + }, + { + "epoch": 0.61, + "learning_rate": 6.914372049560304e-05, + "loss": 1.2008, + "step": 3320 + }, + { + "epoch": 0.61, + "learning_rate": 6.908694667989735e-05, + "loss": 1.0215, + "step": 3321 + }, + { + "epoch": 0.61, + "learning_rate": 6.903018387894805e-05, + "loss": 1.1444, + "step": 3322 + }, + { + "epoch": 0.61, + "learning_rate": 6.897343211298047e-05, + "loss": 1.1119, + "step": 3323 + }, + { + "epoch": 0.61, + "learning_rate": 6.89166914022161e-05, + "loss": 1.1033, + "step": 3324 + }, + { + "epoch": 0.61, + "learning_rate": 6.885996176687244e-05, + "loss": 1.1069, + "step": 3325 + }, + { + "epoch": 0.61, + "learning_rate": 6.880324322716306e-05, + "loss": 1.1489, + "step": 3326 + }, + { + "epoch": 0.61, + "learning_rate": 6.874653580329764e-05, + "loss": 1.065, + "step": 3327 + }, + { + "epoch": 0.61, + "learning_rate": 6.868983951548171e-05, + "loss": 1.1035, + "step": 3328 + }, + { + "epoch": 0.61, + "learning_rate": 6.863315438391705e-05, + "loss": 1.0294, + "step": 3329 + }, + { + "epoch": 0.61, + "learning_rate": 6.857648042880133e-05, + "loss": 1.154, + "step": 3330 + }, + { + "epoch": 0.61, + "learning_rate": 6.85198176703283e-05, + "loss": 1.1528, + "step": 3331 + }, + { + "epoch": 0.61, + "learning_rate": 6.846316612868765e-05, + "loss": 1.1558, + "step": 3332 + }, + { + "epoch": 0.61, + "learning_rate": 6.840652582406525e-05, + "loss": 1.0721, + "step": 3333 + }, + { + "epoch": 0.61, + "learning_rate": 6.83498967766427e-05, + "loss": 1.0806, + "step": 3334 + }, + { + "epoch": 0.61, + "learning_rate": 6.829327900659776e-05, + "loss": 0.9791, + "step": 3335 + }, + { + "epoch": 0.61, + "learning_rate": 6.823667253410417e-05, + "loss": 1.0732, + "step": 3336 + }, + { + "epoch": 0.61, + "learning_rate": 6.81800773793316e-05, + "loss": 1.1795, + "step": 3337 + }, + { + "epoch": 0.62, + "learning_rate": 6.812349356244574e-05, + "loss": 0.9927, + "step": 3338 + }, + { + "epoch": 0.62, + "learning_rate": 6.806692110360812e-05, + "loss": 1.1296, + "step": 3339 + }, + { + "epoch": 0.62, + "learning_rate": 6.801036002297634e-05, + "loss": 1.0663, + "step": 3340 + }, + { + "epoch": 0.62, + "learning_rate": 6.795381034070394e-05, + "loss": 1.1162, + "step": 3341 + }, + { + "epoch": 0.62, + "learning_rate": 6.789727207694033e-05, + "loss": 1.0895, + "step": 3342 + }, + { + "epoch": 0.62, + "learning_rate": 6.78407452518309e-05, + "loss": 1.0273, + "step": 3343 + }, + { + "epoch": 0.62, + "learning_rate": 6.778422988551701e-05, + "loss": 1.0596, + "step": 3344 + }, + { + "epoch": 0.62, + "learning_rate": 6.772772599813577e-05, + "loss": 1.0937, + "step": 3345 + }, + { + "epoch": 0.62, + "learning_rate": 6.767123360982038e-05, + "loss": 1.0752, + "step": 3346 + }, + { + "epoch": 0.62, + "learning_rate": 6.761475274069986e-05, + "loss": 1.0884, + "step": 3347 + }, + { + "epoch": 0.62, + "learning_rate": 6.755828341089917e-05, + "loss": 1.1279, + "step": 3348 + }, + { + "epoch": 0.62, + "learning_rate": 6.750182564053911e-05, + "loss": 1.0794, + "step": 3349 + }, + { + "epoch": 0.62, + "learning_rate": 6.744537944973635e-05, + "loss": 1.2396, + "step": 3350 + }, + { + "epoch": 0.62, + "learning_rate": 6.738894485860348e-05, + "loss": 1.0755, + "step": 3351 + }, + { + "epoch": 0.62, + "learning_rate": 6.733252188724892e-05, + "loss": 1.0125, + "step": 3352 + }, + { + "epoch": 0.62, + "learning_rate": 6.727611055577703e-05, + "loss": 1.123, + "step": 3353 + }, + { + "epoch": 0.62, + "learning_rate": 6.721971088428789e-05, + "loss": 1.0441, + "step": 3354 + }, + { + "epoch": 0.62, + "learning_rate": 6.716332289287759e-05, + "loss": 1.1861, + "step": 3355 + }, + { + "epoch": 0.62, + "learning_rate": 6.710694660163787e-05, + "loss": 1.1581, + "step": 3356 + }, + { + "epoch": 0.62, + "learning_rate": 6.705058203065644e-05, + "loss": 1.1241, + "step": 3357 + }, + { + "epoch": 0.62, + "learning_rate": 6.699422920001677e-05, + "loss": 1.1545, + "step": 3358 + }, + { + "epoch": 0.62, + "learning_rate": 6.693788812979819e-05, + "loss": 1.1591, + "step": 3359 + }, + { + "epoch": 0.62, + "learning_rate": 6.688155884007586e-05, + "loss": 1.055, + "step": 3360 + }, + { + "epoch": 0.62, + "learning_rate": 6.682524135092058e-05, + "loss": 1.221, + "step": 3361 + }, + { + "epoch": 0.62, + "learning_rate": 6.676893568239915e-05, + "loss": 1.0342, + "step": 3362 + }, + { + "epoch": 0.62, + "learning_rate": 6.671264185457407e-05, + "loss": 1.1931, + "step": 3363 + }, + { + "epoch": 0.62, + "learning_rate": 6.665635988750358e-05, + "loss": 0.9978, + "step": 3364 + }, + { + "epoch": 0.62, + "learning_rate": 6.66000898012418e-05, + "loss": 1.0563, + "step": 3365 + }, + { + "epoch": 0.62, + "learning_rate": 6.654383161583855e-05, + "loss": 1.0092, + "step": 3366 + }, + { + "epoch": 0.62, + "learning_rate": 6.648758535133934e-05, + "loss": 1.1265, + "step": 3367 + }, + { + "epoch": 0.62, + "learning_rate": 6.643135102778555e-05, + "loss": 1.1786, + "step": 3368 + }, + { + "epoch": 0.62, + "learning_rate": 6.637512866521427e-05, + "loss": 1.1273, + "step": 3369 + }, + { + "epoch": 0.62, + "learning_rate": 6.631891828365833e-05, + "loss": 1.1136, + "step": 3370 + }, + { + "epoch": 0.62, + "learning_rate": 6.626271990314626e-05, + "loss": 1.0124, + "step": 3371 + }, + { + "epoch": 0.62, + "learning_rate": 6.620653354370235e-05, + "loss": 1.2785, + "step": 3372 + }, + { + "epoch": 0.62, + "learning_rate": 6.615035922534657e-05, + "loss": 1.1804, + "step": 3373 + }, + { + "epoch": 0.62, + "learning_rate": 6.609419696809462e-05, + "loss": 1.1054, + "step": 3374 + }, + { + "epoch": 0.62, + "learning_rate": 6.603804679195792e-05, + "loss": 1.1064, + "step": 3375 + }, + { + "epoch": 0.62, + "learning_rate": 6.598190871694356e-05, + "loss": 1.2409, + "step": 3376 + }, + { + "epoch": 0.62, + "learning_rate": 6.592578276305436e-05, + "loss": 0.9732, + "step": 3377 + }, + { + "epoch": 0.62, + "learning_rate": 6.586966895028876e-05, + "loss": 1.0801, + "step": 3378 + }, + { + "epoch": 0.62, + "learning_rate": 6.581356729864087e-05, + "loss": 1.1631, + "step": 3379 + }, + { + "epoch": 0.62, + "learning_rate": 6.575747782810053e-05, + "loss": 1.2809, + "step": 3380 + }, + { + "epoch": 0.62, + "learning_rate": 6.570140055865321e-05, + "loss": 1.1101, + "step": 3381 + }, + { + "epoch": 0.62, + "learning_rate": 6.564533551028005e-05, + "loss": 1.2128, + "step": 3382 + }, + { + "epoch": 0.62, + "learning_rate": 6.558928270295779e-05, + "loss": 1.1091, + "step": 3383 + }, + { + "epoch": 0.62, + "learning_rate": 6.553324215665883e-05, + "loss": 1.2042, + "step": 3384 + }, + { + "epoch": 0.62, + "learning_rate": 6.547721389135121e-05, + "loss": 1.1536, + "step": 3385 + }, + { + "epoch": 0.62, + "learning_rate": 6.542119792699861e-05, + "loss": 1.185, + "step": 3386 + }, + { + "epoch": 0.62, + "learning_rate": 6.536519428356029e-05, + "loss": 1.2076, + "step": 3387 + }, + { + "epoch": 0.62, + "learning_rate": 6.530920298099115e-05, + "loss": 1.1469, + "step": 3388 + }, + { + "epoch": 0.62, + "learning_rate": 6.525322403924166e-05, + "loss": 1.0598, + "step": 3389 + }, + { + "epoch": 0.62, + "learning_rate": 6.519725747825795e-05, + "loss": 1.0364, + "step": 3390 + }, + { + "epoch": 0.62, + "learning_rate": 6.514130331798162e-05, + "loss": 1.0272, + "step": 3391 + }, + { + "epoch": 0.63, + "learning_rate": 6.508536157834996e-05, + "loss": 1.0343, + "step": 3392 + }, + { + "epoch": 0.63, + "learning_rate": 6.502943227929586e-05, + "loss": 1.0356, + "step": 3393 + }, + { + "epoch": 0.63, + "learning_rate": 6.497351544074761e-05, + "loss": 1.1444, + "step": 3394 + }, + { + "epoch": 0.63, + "learning_rate": 6.491761108262923e-05, + "loss": 1.1718, + "step": 3395 + }, + { + "epoch": 0.63, + "learning_rate": 6.486171922486024e-05, + "loss": 1.0802, + "step": 3396 + }, + { + "epoch": 0.63, + "learning_rate": 6.480583988735565e-05, + "loss": 1.0561, + "step": 3397 + }, + { + "epoch": 0.63, + "learning_rate": 6.47499730900261e-05, + "loss": 0.9797, + "step": 3398 + }, + { + "epoch": 0.63, + "learning_rate": 6.46941188527777e-05, + "loss": 1.1883, + "step": 3399 + }, + { + "epoch": 0.63, + "learning_rate": 6.46382771955121e-05, + "loss": 1.0437, + "step": 3400 + }, + { + "epoch": 0.63, + "learning_rate": 6.458244813812646e-05, + "loss": 1.0546, + "step": 3401 + }, + { + "epoch": 0.63, + "learning_rate": 6.45266317005135e-05, + "loss": 1.1714, + "step": 3402 + }, + { + "epoch": 0.63, + "learning_rate": 6.447082790256134e-05, + "loss": 1.0804, + "step": 3403 + }, + { + "epoch": 0.63, + "learning_rate": 6.44150367641537e-05, + "loss": 1.0601, + "step": 3404 + }, + { + "epoch": 0.63, + "learning_rate": 6.435925830516973e-05, + "loss": 1.184, + "step": 3405 + }, + { + "epoch": 0.63, + "learning_rate": 6.43034925454841e-05, + "loss": 1.0652, + "step": 3406 + }, + { + "epoch": 0.63, + "learning_rate": 6.424773950496692e-05, + "loss": 1.2654, + "step": 3407 + }, + { + "epoch": 0.63, + "learning_rate": 6.419199920348381e-05, + "loss": 1.165, + "step": 3408 + }, + { + "epoch": 0.63, + "learning_rate": 6.413627166089579e-05, + "loss": 1.0988, + "step": 3409 + }, + { + "epoch": 0.63, + "learning_rate": 6.40805568970594e-05, + "loss": 1.1947, + "step": 3410 + }, + { + "epoch": 0.63, + "learning_rate": 6.402485493182655e-05, + "loss": 1.0915, + "step": 3411 + }, + { + "epoch": 0.63, + "learning_rate": 6.396916578504467e-05, + "loss": 0.9958, + "step": 3412 + }, + { + "epoch": 0.63, + "learning_rate": 6.391348947655657e-05, + "loss": 1.1958, + "step": 3413 + }, + { + "epoch": 0.63, + "learning_rate": 6.385782602620056e-05, + "loss": 0.9541, + "step": 3414 + }, + { + "epoch": 0.63, + "learning_rate": 6.380217545381024e-05, + "loss": 1.1626, + "step": 3415 + }, + { + "epoch": 0.63, + "learning_rate": 6.374653777921471e-05, + "loss": 1.1146, + "step": 3416 + }, + { + "epoch": 0.63, + "learning_rate": 6.369091302223847e-05, + "loss": 1.0974, + "step": 3417 + }, + { + "epoch": 0.63, + "learning_rate": 6.363530120270141e-05, + "loss": 1.0686, + "step": 3418 + }, + { + "epoch": 0.63, + "learning_rate": 6.357970234041877e-05, + "loss": 1.0973, + "step": 3419 + }, + { + "epoch": 0.63, + "learning_rate": 6.352411645520126e-05, + "loss": 1.1156, + "step": 3420 + }, + { + "epoch": 0.63, + "learning_rate": 6.346854356685488e-05, + "loss": 1.106, + "step": 3421 + }, + { + "epoch": 0.63, + "learning_rate": 6.341298369518103e-05, + "loss": 1.0691, + "step": 3422 + }, + { + "epoch": 0.63, + "learning_rate": 6.335743685997648e-05, + "loss": 1.0921, + "step": 3423 + }, + { + "epoch": 0.63, + "learning_rate": 6.330190308103336e-05, + "loss": 1.136, + "step": 3424 + }, + { + "epoch": 0.63, + "learning_rate": 6.324638237813912e-05, + "loss": 1.112, + "step": 3425 + }, + { + "epoch": 0.63, + "learning_rate": 6.319087477107663e-05, + "loss": 1.0709, + "step": 3426 + }, + { + "epoch": 0.63, + "learning_rate": 6.313538027962394e-05, + "loss": 1.1227, + "step": 3427 + }, + { + "epoch": 0.63, + "learning_rate": 6.307989892355455e-05, + "loss": 1.0836, + "step": 3428 + }, + { + "epoch": 0.63, + "learning_rate": 6.302443072263728e-05, + "loss": 1.1906, + "step": 3429 + }, + { + "epoch": 0.63, + "learning_rate": 6.296897569663621e-05, + "loss": 1.1079, + "step": 3430 + }, + { + "epoch": 0.63, + "learning_rate": 6.291353386531074e-05, + "loss": 1.0141, + "step": 3431 + }, + { + "epoch": 0.63, + "learning_rate": 6.285810524841563e-05, + "loss": 1.1028, + "step": 3432 + }, + { + "epoch": 0.63, + "learning_rate": 6.280268986570079e-05, + "loss": 1.0406, + "step": 3433 + }, + { + "epoch": 0.63, + "learning_rate": 6.274728773691154e-05, + "loss": 1.079, + "step": 3434 + }, + { + "epoch": 0.63, + "learning_rate": 6.269189888178843e-05, + "loss": 1.2429, + "step": 3435 + }, + { + "epoch": 0.63, + "learning_rate": 6.263652332006734e-05, + "loss": 1.0168, + "step": 3436 + }, + { + "epoch": 0.63, + "learning_rate": 6.258116107147933e-05, + "loss": 0.958, + "step": 3437 + }, + { + "epoch": 0.63, + "learning_rate": 6.252581215575071e-05, + "loss": 1.1684, + "step": 3438 + }, + { + "epoch": 0.63, + "learning_rate": 6.247047659260311e-05, + "loss": 1.059, + "step": 3439 + }, + { + "epoch": 0.63, + "learning_rate": 6.241515440175338e-05, + "loss": 1.1514, + "step": 3440 + }, + { + "epoch": 0.63, + "learning_rate": 6.235984560291357e-05, + "loss": 1.0361, + "step": 3441 + }, + { + "epoch": 0.63, + "learning_rate": 6.230455021579102e-05, + "loss": 1.0805, + "step": 3442 + }, + { + "epoch": 0.63, + "learning_rate": 6.224926826008828e-05, + "loss": 1.0911, + "step": 3443 + }, + { + "epoch": 0.63, + "learning_rate": 6.2193999755503e-05, + "loss": 1.1332, + "step": 3444 + }, + { + "epoch": 0.63, + "learning_rate": 6.213874472172815e-05, + "loss": 1.0844, + "step": 3445 + }, + { + "epoch": 0.64, + "learning_rate": 6.208350317845193e-05, + "loss": 1.0353, + "step": 3446 + }, + { + "epoch": 0.64, + "learning_rate": 6.202827514535765e-05, + "loss": 1.0725, + "step": 3447 + }, + { + "epoch": 0.64, + "learning_rate": 6.197306064212387e-05, + "loss": 1.1013, + "step": 3448 + }, + { + "epoch": 0.64, + "learning_rate": 6.191785968842422e-05, + "loss": 1.1153, + "step": 3449 + }, + { + "epoch": 0.64, + "learning_rate": 6.186267230392762e-05, + "loss": 1.118, + "step": 3450 + }, + { + "epoch": 0.64, + "learning_rate": 6.180749850829812e-05, + "loss": 1.1262, + "step": 3451 + }, + { + "epoch": 0.64, + "learning_rate": 6.175233832119489e-05, + "loss": 1.1, + "step": 3452 + }, + { + "epoch": 0.64, + "learning_rate": 6.169719176227234e-05, + "loss": 1.1734, + "step": 3453 + }, + { + "epoch": 0.64, + "learning_rate": 6.164205885117993e-05, + "loss": 1.2041, + "step": 3454 + }, + { + "epoch": 0.64, + "learning_rate": 6.158693960756228e-05, + "loss": 1.109, + "step": 3455 + }, + { + "epoch": 0.64, + "learning_rate": 6.153183405105915e-05, + "loss": 0.9999, + "step": 3456 + }, + { + "epoch": 0.64, + "learning_rate": 6.147674220130547e-05, + "loss": 1.1923, + "step": 3457 + }, + { + "epoch": 0.64, + "learning_rate": 6.142166407793119e-05, + "loss": 1.1535, + "step": 3458 + }, + { + "epoch": 0.64, + "learning_rate": 6.136659970056149e-05, + "loss": 1.0617, + "step": 3459 + }, + { + "epoch": 0.64, + "learning_rate": 6.131154908881648e-05, + "loss": 1.1146, + "step": 3460 + }, + { + "epoch": 0.64, + "learning_rate": 6.125651226231155e-05, + "loss": 1.0492, + "step": 3461 + }, + { + "epoch": 0.64, + "learning_rate": 6.120148924065707e-05, + "loss": 1.0338, + "step": 3462 + }, + { + "epoch": 0.64, + "learning_rate": 6.114648004345853e-05, + "loss": 1.2382, + "step": 3463 + }, + { + "epoch": 0.64, + "learning_rate": 6.109148469031646e-05, + "loss": 1.0657, + "step": 3464 + }, + { + "epoch": 0.64, + "learning_rate": 6.103650320082655e-05, + "loss": 1.0971, + "step": 3465 + }, + { + "epoch": 0.64, + "learning_rate": 6.098153559457935e-05, + "loss": 1.045, + "step": 3466 + }, + { + "epoch": 0.64, + "learning_rate": 6.092658189116068e-05, + "loss": 0.9964, + "step": 3467 + }, + { + "epoch": 0.64, + "learning_rate": 6.0871642110151305e-05, + "loss": 1.0464, + "step": 3468 + }, + { + "epoch": 0.64, + "learning_rate": 6.081671627112704e-05, + "loss": 1.1524, + "step": 3469 + }, + { + "epoch": 0.64, + "learning_rate": 6.0761804393658775e-05, + "loss": 1.1306, + "step": 3470 + }, + { + "epoch": 0.64, + "learning_rate": 6.070690649731231e-05, + "loss": 1.197, + "step": 3471 + }, + { + "epoch": 0.64, + "learning_rate": 6.0652022601648575e-05, + "loss": 1.1043, + "step": 3472 + }, + { + "epoch": 0.64, + "learning_rate": 6.059715272622346e-05, + "loss": 1.0815, + "step": 3473 + }, + { + "epoch": 0.64, + "learning_rate": 6.0542296890587904e-05, + "loss": 0.9544, + "step": 3474 + }, + { + "epoch": 0.64, + "learning_rate": 6.0487455114287794e-05, + "loss": 1.1646, + "step": 3475 + }, + { + "epoch": 0.64, + "learning_rate": 6.043262741686408e-05, + "loss": 1.1465, + "step": 3476 + }, + { + "epoch": 0.64, + "learning_rate": 6.037781381785256e-05, + "loss": 1.1574, + "step": 3477 + }, + { + "epoch": 0.64, + "learning_rate": 6.032301433678411e-05, + "loss": 1.1747, + "step": 3478 + }, + { + "epoch": 0.64, + "learning_rate": 6.026822899318458e-05, + "loss": 1.2477, + "step": 3479 + }, + { + "epoch": 0.64, + "learning_rate": 6.021345780657477e-05, + "loss": 1.1319, + "step": 3480 + }, + { + "epoch": 0.64, + "learning_rate": 6.015870079647044e-05, + "loss": 1.13, + "step": 3481 + }, + { + "epoch": 0.64, + "learning_rate": 6.01039579823822e-05, + "loss": 1.0228, + "step": 3482 + }, + { + "epoch": 0.64, + "learning_rate": 6.0049229383815755e-05, + "loss": 1.0275, + "step": 3483 + }, + { + "epoch": 0.64, + "learning_rate": 5.9994515020271644e-05, + "loss": 1.0349, + "step": 3484 + }, + { + "epoch": 0.64, + "learning_rate": 5.993981491124541e-05, + "loss": 1.2311, + "step": 3485 + }, + { + "epoch": 0.64, + "learning_rate": 5.9885129076227456e-05, + "loss": 1.0984, + "step": 3486 + }, + { + "epoch": 0.64, + "learning_rate": 5.983045753470308e-05, + "loss": 1.109, + "step": 3487 + }, + { + "epoch": 0.64, + "learning_rate": 5.977580030615254e-05, + "loss": 1.1557, + "step": 3488 + }, + { + "epoch": 0.64, + "learning_rate": 5.9721157410050976e-05, + "loss": 1.0801, + "step": 3489 + }, + { + "epoch": 0.64, + "learning_rate": 5.966652886586843e-05, + "loss": 1.0554, + "step": 3490 + }, + { + "epoch": 0.64, + "learning_rate": 5.961191469306984e-05, + "loss": 1.1005, + "step": 3491 + }, + { + "epoch": 0.64, + "learning_rate": 5.9557314911115006e-05, + "loss": 1.0159, + "step": 3492 + }, + { + "epoch": 0.64, + "learning_rate": 5.9502729539458535e-05, + "loss": 1.118, + "step": 3493 + }, + { + "epoch": 0.64, + "learning_rate": 5.944815859755002e-05, + "loss": 1.1158, + "step": 3494 + }, + { + "epoch": 0.64, + "learning_rate": 5.9393602104833824e-05, + "loss": 1.1619, + "step": 3495 + }, + { + "epoch": 0.64, + "learning_rate": 5.933906008074923e-05, + "loss": 1.0688, + "step": 3496 + }, + { + "epoch": 0.64, + "learning_rate": 5.928453254473034e-05, + "loss": 0.9525, + "step": 3497 + }, + { + "epoch": 0.64, + "learning_rate": 5.9230019516206034e-05, + "loss": 1.1962, + "step": 3498 + }, + { + "epoch": 0.64, + "learning_rate": 5.917552101460008e-05, + "loss": 1.135, + "step": 3499 + }, + { + "epoch": 0.64, + "learning_rate": 5.912103705933107e-05, + "loss": 1.0233, + "step": 3500 + }, + { + "epoch": 0.65, + "learning_rate": 5.906656766981242e-05, + "loss": 1.1583, + "step": 3501 + }, + { + "epoch": 0.65, + "learning_rate": 5.901211286545234e-05, + "loss": 1.1482, + "step": 3502 + }, + { + "epoch": 0.65, + "learning_rate": 5.895767266565386e-05, + "loss": 0.8926, + "step": 3503 + }, + { + "epoch": 0.65, + "learning_rate": 5.8903247089814736e-05, + "loss": 1.2208, + "step": 3504 + }, + { + "epoch": 0.65, + "learning_rate": 5.884883615732759e-05, + "loss": 1.1015, + "step": 3505 + }, + { + "epoch": 0.65, + "learning_rate": 5.879443988757982e-05, + "loss": 1.105, + "step": 3506 + }, + { + "epoch": 0.65, + "learning_rate": 5.874005829995357e-05, + "loss": 1.1509, + "step": 3507 + }, + { + "epoch": 0.65, + "learning_rate": 5.868569141382581e-05, + "loss": 1.0446, + "step": 3508 + }, + { + "epoch": 0.65, + "learning_rate": 5.863133924856814e-05, + "loss": 1.0195, + "step": 3509 + }, + { + "epoch": 0.65, + "learning_rate": 5.857700182354704e-05, + "loss": 1.1436, + "step": 3510 + }, + { + "epoch": 0.65, + "learning_rate": 5.852267915812373e-05, + "loss": 1.0786, + "step": 3511 + }, + { + "epoch": 0.65, + "learning_rate": 5.8468371271654096e-05, + "loss": 1.1681, + "step": 3512 + }, + { + "epoch": 0.65, + "learning_rate": 5.841407818348885e-05, + "loss": 1.1124, + "step": 3513 + }, + { + "epoch": 0.65, + "learning_rate": 5.8359799912973365e-05, + "loss": 1.0574, + "step": 3514 + }, + { + "epoch": 0.65, + "learning_rate": 5.8305536479447765e-05, + "loss": 1.195, + "step": 3515 + }, + { + "epoch": 0.65, + "learning_rate": 5.825128790224681e-05, + "loss": 1.1133, + "step": 3516 + }, + { + "epoch": 0.65, + "learning_rate": 5.819705420070012e-05, + "loss": 1.011, + "step": 3517 + }, + { + "epoch": 0.65, + "learning_rate": 5.814283539413185e-05, + "loss": 1.0027, + "step": 3518 + }, + { + "epoch": 0.65, + "learning_rate": 5.8088631501861034e-05, + "loss": 1.1713, + "step": 3519 + }, + { + "epoch": 0.65, + "learning_rate": 5.803444254320115e-05, + "loss": 1.1182, + "step": 3520 + }, + { + "epoch": 0.65, + "learning_rate": 5.798026853746059e-05, + "loss": 1.1442, + "step": 3521 + }, + { + "epoch": 0.65, + "learning_rate": 5.792610950394222e-05, + "loss": 1.1895, + "step": 3522 + }, + { + "epoch": 0.65, + "learning_rate": 5.7871965461943765e-05, + "loss": 1.1761, + "step": 3523 + }, + { + "epoch": 0.65, + "learning_rate": 5.781783643075743e-05, + "loss": 1.174, + "step": 3524 + }, + { + "epoch": 0.65, + "learning_rate": 5.7763722429670274e-05, + "loss": 1.0728, + "step": 3525 + }, + { + "epoch": 0.65, + "learning_rate": 5.7709623477963694e-05, + "loss": 0.9746, + "step": 3526 + }, + { + "epoch": 0.65, + "learning_rate": 5.765553959491406e-05, + "loss": 1.1236, + "step": 3527 + }, + { + "epoch": 0.65, + "learning_rate": 5.760147079979212e-05, + "loss": 1.1028, + "step": 3528 + }, + { + "epoch": 0.65, + "learning_rate": 5.7547417111863444e-05, + "loss": 1.1351, + "step": 3529 + }, + { + "epoch": 0.65, + "learning_rate": 5.7493378550388064e-05, + "loss": 1.0124, + "step": 3530 + }, + { + "epoch": 0.65, + "learning_rate": 5.7439355134620696e-05, + "loss": 1.128, + "step": 3531 + }, + { + "epoch": 0.65, + "learning_rate": 5.7385346883810596e-05, + "loss": 1.1857, + "step": 3532 + }, + { + "epoch": 0.65, + "learning_rate": 5.733135381720174e-05, + "loss": 1.1611, + "step": 3533 + }, + { + "epoch": 0.65, + "learning_rate": 5.7277375954032555e-05, + "loss": 1.1514, + "step": 3534 + }, + { + "epoch": 0.65, + "learning_rate": 5.722341331353618e-05, + "loss": 1.2045, + "step": 3535 + }, + { + "epoch": 0.65, + "learning_rate": 5.716946591494022e-05, + "loss": 1.2385, + "step": 3536 + }, + { + "epoch": 0.65, + "learning_rate": 5.711553377746691e-05, + "loss": 0.9946, + "step": 3537 + }, + { + "epoch": 0.65, + "learning_rate": 5.706161692033298e-05, + "loss": 1.0875, + "step": 3538 + }, + { + "epoch": 0.65, + "learning_rate": 5.700771536274985e-05, + "loss": 1.1313, + "step": 3539 + }, + { + "epoch": 0.65, + "learning_rate": 5.695382912392331e-05, + "loss": 1.0484, + "step": 3540 + }, + { + "epoch": 0.65, + "learning_rate": 5.6899958223053915e-05, + "loss": 1.0825, + "step": 3541 + }, + { + "epoch": 0.65, + "learning_rate": 5.684610267933648e-05, + "loss": 1.1641, + "step": 3542 + }, + { + "epoch": 0.65, + "learning_rate": 5.67922625119606e-05, + "loss": 1.1711, + "step": 3543 + }, + { + "epoch": 0.65, + "learning_rate": 5.673843774011021e-05, + "loss": 1.2011, + "step": 3544 + }, + { + "epoch": 0.65, + "learning_rate": 5.66846283829639e-05, + "loss": 1.2088, + "step": 3545 + }, + { + "epoch": 0.65, + "learning_rate": 5.663083445969464e-05, + "loss": 1.1835, + "step": 3546 + }, + { + "epoch": 0.65, + "learning_rate": 5.6577055989470076e-05, + "loss": 0.9808, + "step": 3547 + }, + { + "epoch": 0.65, + "learning_rate": 5.652329299145207e-05, + "loss": 0.9946, + "step": 3548 + }, + { + "epoch": 0.65, + "learning_rate": 5.646954548479726e-05, + "loss": 1.1848, + "step": 3549 + }, + { + "epoch": 0.65, + "learning_rate": 5.6415813488656565e-05, + "loss": 1.0989, + "step": 3550 + }, + { + "epoch": 0.65, + "learning_rate": 5.636209702217552e-05, + "loss": 1.1398, + "step": 3551 + }, + { + "epoch": 0.65, + "learning_rate": 5.630839610449403e-05, + "loss": 1.0816, + "step": 3552 + }, + { + "epoch": 0.65, + "learning_rate": 5.625471075474649e-05, + "loss": 1.1275, + "step": 3553 + }, + { + "epoch": 0.65, + "learning_rate": 5.6201040992061694e-05, + "loss": 1.1384, + "step": 3554 + }, + { + "epoch": 0.66, + "learning_rate": 5.614738683556302e-05, + "loss": 1.1594, + "step": 3555 + }, + { + "epoch": 0.66, + "learning_rate": 5.609374830436812e-05, + "loss": 1.1849, + "step": 3556 + }, + { + "epoch": 0.66, + "learning_rate": 5.604012541758924e-05, + "loss": 1.1124, + "step": 3557 + }, + { + "epoch": 0.66, + "learning_rate": 5.598651819433294e-05, + "loss": 1.1594, + "step": 3558 + }, + { + "epoch": 0.66, + "learning_rate": 5.5932926653700203e-05, + "loss": 1.212, + "step": 3559 + }, + { + "epoch": 0.66, + "learning_rate": 5.5879350814786426e-05, + "loss": 1.0658, + "step": 3560 + }, + { + "epoch": 0.66, + "learning_rate": 5.582579069668151e-05, + "loss": 1.0939, + "step": 3561 + }, + { + "epoch": 0.66, + "learning_rate": 5.577224631846961e-05, + "loss": 1.1059, + "step": 3562 + }, + { + "epoch": 0.66, + "learning_rate": 5.571871769922945e-05, + "loss": 1.2605, + "step": 3563 + }, + { + "epoch": 0.66, + "learning_rate": 5.566520485803388e-05, + "loss": 1.0479, + "step": 3564 + }, + { + "epoch": 0.66, + "learning_rate": 5.5611707813950396e-05, + "loss": 1.1187, + "step": 3565 + }, + { + "epoch": 0.66, + "learning_rate": 5.5558226586040674e-05, + "loss": 1.2146, + "step": 3566 + }, + { + "epoch": 0.66, + "learning_rate": 5.55047611933609e-05, + "loss": 1.0977, + "step": 3567 + }, + { + "epoch": 0.66, + "learning_rate": 5.5451311654961456e-05, + "loss": 1.0772, + "step": 3568 + }, + { + "epoch": 0.66, + "learning_rate": 5.5397877989887314e-05, + "loss": 0.9909, + "step": 3569 + }, + { + "epoch": 0.66, + "learning_rate": 5.5344460217177454e-05, + "loss": 1.0689, + "step": 3570 + }, + { + "epoch": 0.66, + "learning_rate": 5.529105835586552e-05, + "loss": 0.9891, + "step": 3571 + }, + { + "epoch": 0.66, + "learning_rate": 5.523767242497927e-05, + "loss": 1.1737, + "step": 3572 + }, + { + "epoch": 0.66, + "learning_rate": 5.518430244354093e-05, + "loss": 1.1421, + "step": 3573 + }, + { + "epoch": 0.66, + "learning_rate": 5.5130948430566934e-05, + "loss": 1.0646, + "step": 3574 + }, + { + "epoch": 0.66, + "learning_rate": 5.507761040506807e-05, + "loss": 1.2255, + "step": 3575 + }, + { + "epoch": 0.66, + "learning_rate": 5.50242883860494e-05, + "loss": 1.118, + "step": 3576 + }, + { + "epoch": 0.66, + "learning_rate": 5.497098239251038e-05, + "loss": 1.2044, + "step": 3577 + }, + { + "epoch": 0.66, + "learning_rate": 5.4917692443444614e-05, + "loss": 1.1062, + "step": 3578 + }, + { + "epoch": 0.66, + "learning_rate": 5.4864418557840136e-05, + "loss": 1.2171, + "step": 3579 + }, + { + "epoch": 0.66, + "learning_rate": 5.4811160754679134e-05, + "loss": 1.0664, + "step": 3580 + }, + { + "epoch": 0.66, + "learning_rate": 5.4757919052938134e-05, + "loss": 1.2069, + "step": 3581 + }, + { + "epoch": 0.66, + "learning_rate": 5.470469347158784e-05, + "loss": 0.9919, + "step": 3582 + }, + { + "epoch": 0.66, + "learning_rate": 5.465148402959339e-05, + "loss": 1.0257, + "step": 3583 + }, + { + "epoch": 0.66, + "learning_rate": 5.459829074591394e-05, + "loss": 1.0929, + "step": 3584 + }, + { + "epoch": 0.66, + "learning_rate": 5.454511363950314e-05, + "loss": 1.0922, + "step": 3585 + }, + { + "epoch": 0.66, + "learning_rate": 5.44919527293086e-05, + "loss": 1.1473, + "step": 3586 + }, + { + "epoch": 0.66, + "learning_rate": 5.443880803427239e-05, + "loss": 1.0737, + "step": 3587 + }, + { + "epoch": 0.66, + "learning_rate": 5.4385679573330675e-05, + "loss": 0.9704, + "step": 3588 + }, + { + "epoch": 0.66, + "learning_rate": 5.43325673654139e-05, + "loss": 1.1391, + "step": 3589 + }, + { + "epoch": 0.66, + "learning_rate": 5.427947142944666e-05, + "loss": 1.1599, + "step": 3590 + }, + { + "epoch": 0.66, + "learning_rate": 5.422639178434788e-05, + "loss": 1.0874, + "step": 3591 + }, + { + "epoch": 0.66, + "learning_rate": 5.4173328449030436e-05, + "loss": 1.0805, + "step": 3592 + }, + { + "epoch": 0.66, + "learning_rate": 5.4120281442401655e-05, + "loss": 1.0812, + "step": 3593 + }, + { + "epoch": 0.66, + "learning_rate": 5.4067250783362856e-05, + "loss": 1.1754, + "step": 3594 + }, + { + "epoch": 0.66, + "learning_rate": 5.401423649080969e-05, + "loss": 1.1085, + "step": 3595 + }, + { + "epoch": 0.66, + "learning_rate": 5.3961238583631855e-05, + "loss": 1.045, + "step": 3596 + }, + { + "epoch": 0.66, + "learning_rate": 5.390825708071325e-05, + "loss": 1.1862, + "step": 3597 + }, + { + "epoch": 0.66, + "learning_rate": 5.38552920009319e-05, + "loss": 1.1159, + "step": 3598 + }, + { + "epoch": 0.66, + "learning_rate": 5.380234336316008e-05, + "loss": 1.1864, + "step": 3599 + }, + { + "epoch": 0.66, + "learning_rate": 5.374941118626406e-05, + "loss": 1.1238, + "step": 3600 + }, + { + "epoch": 0.66, + "learning_rate": 5.369649548910438e-05, + "loss": 1.1769, + "step": 3601 + }, + { + "epoch": 0.66, + "learning_rate": 5.364359629053566e-05, + "loss": 1.209, + "step": 3602 + }, + { + "epoch": 0.66, + "learning_rate": 5.359071360940657e-05, + "loss": 1.2212, + "step": 3603 + }, + { + "epoch": 0.66, + "learning_rate": 5.3537847464559946e-05, + "loss": 1.172, + "step": 3604 + }, + { + "epoch": 0.66, + "learning_rate": 5.348499787483281e-05, + "loss": 0.9784, + "step": 3605 + }, + { + "epoch": 0.66, + "learning_rate": 5.3432164859056154e-05, + "loss": 1.0827, + "step": 3606 + }, + { + "epoch": 0.66, + "learning_rate": 5.337934843605519e-05, + "loss": 1.1459, + "step": 3607 + }, + { + "epoch": 0.66, + "learning_rate": 5.33265486246491e-05, + "loss": 1.0646, + "step": 3608 + }, + { + "epoch": 0.67, + "learning_rate": 5.3273765443651235e-05, + "loss": 1.0034, + "step": 3609 + }, + { + "epoch": 0.67, + "learning_rate": 5.3220998911868913e-05, + "loss": 1.274, + "step": 3610 + }, + { + "epoch": 0.67, + "learning_rate": 5.316824904810369e-05, + "loss": 1.2058, + "step": 3611 + }, + { + "epoch": 0.67, + "learning_rate": 5.3115515871151e-05, + "loss": 1.1416, + "step": 3612 + }, + { + "epoch": 0.67, + "learning_rate": 5.30627993998005e-05, + "loss": 1.1827, + "step": 3613 + }, + { + "epoch": 0.67, + "learning_rate": 5.301009965283576e-05, + "loss": 1.0412, + "step": 3614 + }, + { + "epoch": 0.67, + "learning_rate": 5.2957416649034475e-05, + "loss": 0.9322, + "step": 3615 + }, + { + "epoch": 0.67, + "learning_rate": 5.290475040716827e-05, + "loss": 0.9815, + "step": 3616 + }, + { + "epoch": 0.67, + "learning_rate": 5.285210094600295e-05, + "loss": 1.1357, + "step": 3617 + }, + { + "epoch": 0.67, + "learning_rate": 5.279946828429823e-05, + "loss": 1.0297, + "step": 3618 + }, + { + "epoch": 0.67, + "learning_rate": 5.274685244080783e-05, + "loss": 1.2611, + "step": 3619 + }, + { + "epoch": 0.67, + "learning_rate": 5.2694253434279585e-05, + "loss": 1.3117, + "step": 3620 + }, + { + "epoch": 0.67, + "learning_rate": 5.264167128345523e-05, + "loss": 1.1544, + "step": 3621 + }, + { + "epoch": 0.67, + "learning_rate": 5.2589106007070486e-05, + "loss": 1.064, + "step": 3622 + }, + { + "epoch": 0.67, + "learning_rate": 5.2536557623855185e-05, + "loss": 1.2438, + "step": 3623 + }, + { + "epoch": 0.67, + "learning_rate": 5.248402615253301e-05, + "loss": 1.166, + "step": 3624 + }, + { + "epoch": 0.67, + "learning_rate": 5.2431511611821626e-05, + "loss": 1.1175, + "step": 3625 + }, + { + "epoch": 0.67, + "learning_rate": 5.237901402043278e-05, + "loss": 1.182, + "step": 3626 + }, + { + "epoch": 0.67, + "learning_rate": 5.2326533397072076e-05, + "loss": 1.133, + "step": 3627 + }, + { + "epoch": 0.67, + "learning_rate": 5.227406976043905e-05, + "loss": 1.1564, + "step": 3628 + }, + { + "epoch": 0.67, + "learning_rate": 5.22216231292273e-05, + "loss": 0.9762, + "step": 3629 + }, + { + "epoch": 0.67, + "learning_rate": 5.2169193522124284e-05, + "loss": 1.0497, + "step": 3630 + }, + { + "epoch": 0.67, + "learning_rate": 5.211678095781136e-05, + "loss": 1.139, + "step": 3631 + }, + { + "epoch": 0.67, + "learning_rate": 5.206438545496395e-05, + "loss": 1.1853, + "step": 3632 + }, + { + "epoch": 0.67, + "learning_rate": 5.2012007032251256e-05, + "loss": 1.1171, + "step": 3633 + }, + { + "epoch": 0.67, + "learning_rate": 5.195964570833641e-05, + "loss": 1.1865, + "step": 3634 + }, + { + "epoch": 0.67, + "learning_rate": 5.190730150187658e-05, + "loss": 1.0779, + "step": 3635 + }, + { + "epoch": 0.67, + "learning_rate": 5.1854974431522696e-05, + "loss": 1.0759, + "step": 3636 + }, + { + "epoch": 0.67, + "learning_rate": 5.1802664515919595e-05, + "loss": 1.1321, + "step": 3637 + }, + { + "epoch": 0.67, + "learning_rate": 5.175037177370612e-05, + "loss": 1.1127, + "step": 3638 + }, + { + "epoch": 0.67, + "learning_rate": 5.169809622351486e-05, + "loss": 1.0204, + "step": 3639 + }, + { + "epoch": 0.67, + "learning_rate": 5.164583788397234e-05, + "loss": 1.1019, + "step": 3640 + }, + { + "epoch": 0.67, + "learning_rate": 5.159359677369889e-05, + "loss": 1.0556, + "step": 3641 + }, + { + "epoch": 0.67, + "learning_rate": 5.154137291130885e-05, + "loss": 1.1694, + "step": 3642 + }, + { + "epoch": 0.67, + "learning_rate": 5.148916631541023e-05, + "loss": 1.1383, + "step": 3643 + }, + { + "epoch": 0.67, + "learning_rate": 5.1436977004605046e-05, + "loss": 1.0735, + "step": 3644 + }, + { + "epoch": 0.67, + "learning_rate": 5.138480499748905e-05, + "loss": 1.2048, + "step": 3645 + }, + { + "epoch": 0.67, + "learning_rate": 5.133265031265189e-05, + "loss": 1.062, + "step": 3646 + }, + { + "epoch": 0.67, + "learning_rate": 5.128051296867693e-05, + "loss": 1.0966, + "step": 3647 + }, + { + "epoch": 0.67, + "learning_rate": 5.122839298414156e-05, + "loss": 1.1293, + "step": 3648 + }, + { + "epoch": 0.67, + "learning_rate": 5.1176290377616754e-05, + "loss": 1.24, + "step": 3649 + }, + { + "epoch": 0.67, + "learning_rate": 5.11242051676675e-05, + "loss": 1.0429, + "step": 3650 + }, + { + "epoch": 0.67, + "learning_rate": 5.1072137372852455e-05, + "loss": 1.1123, + "step": 3651 + }, + { + "epoch": 0.67, + "learning_rate": 5.1020087011724095e-05, + "loss": 1.0326, + "step": 3652 + }, + { + "epoch": 0.67, + "learning_rate": 5.096805410282868e-05, + "loss": 1.1792, + "step": 3653 + }, + { + "epoch": 0.67, + "learning_rate": 5.091603866470631e-05, + "loss": 1.182, + "step": 3654 + }, + { + "epoch": 0.67, + "learning_rate": 5.086404071589077e-05, + "loss": 1.2008, + "step": 3655 + }, + { + "epoch": 0.67, + "learning_rate": 5.0812060274909745e-05, + "loss": 1.0656, + "step": 3656 + }, + { + "epoch": 0.67, + "learning_rate": 5.076009736028453e-05, + "loss": 1.082, + "step": 3657 + }, + { + "epoch": 0.67, + "learning_rate": 5.070815199053026e-05, + "loss": 1.1013, + "step": 3658 + }, + { + "epoch": 0.67, + "learning_rate": 5.065622418415577e-05, + "loss": 1.0777, + "step": 3659 + }, + { + "epoch": 0.67, + "learning_rate": 5.0604313959663726e-05, + "loss": 1.1422, + "step": 3660 + }, + { + "epoch": 0.67, + "learning_rate": 5.0552421335550426e-05, + "loss": 1.075, + "step": 3661 + }, + { + "epoch": 0.67, + "learning_rate": 5.050054633030603e-05, + "loss": 1.1669, + "step": 3662 + }, + { + "epoch": 0.67, + "learning_rate": 5.0448688962414194e-05, + "loss": 1.0267, + "step": 3663 + }, + { + "epoch": 0.68, + "learning_rate": 5.0396849250352526e-05, + "loss": 1.0334, + "step": 3664 + }, + { + "epoch": 0.68, + "learning_rate": 5.03450272125922e-05, + "loss": 0.9526, + "step": 3665 + }, + { + "epoch": 0.68, + "learning_rate": 5.029322286759819e-05, + "loss": 1.1315, + "step": 3666 + }, + { + "epoch": 0.68, + "learning_rate": 5.0241436233829044e-05, + "loss": 1.0728, + "step": 3667 + }, + { + "epoch": 0.68, + "learning_rate": 5.018966732973719e-05, + "loss": 1.0266, + "step": 3668 + }, + { + "epoch": 0.68, + "learning_rate": 5.013791617376846e-05, + "loss": 1.1222, + "step": 3669 + }, + { + "epoch": 0.68, + "learning_rate": 5.0086182784362625e-05, + "loss": 1.1009, + "step": 3670 + }, + { + "epoch": 0.68, + "learning_rate": 5.003446717995296e-05, + "loss": 1.1066, + "step": 3671 + }, + { + "epoch": 0.68, + "learning_rate": 4.998276937896653e-05, + "loss": 1.0769, + "step": 3672 + }, + { + "epoch": 0.68, + "learning_rate": 4.993108939982395e-05, + "loss": 1.2136, + "step": 3673 + }, + { + "epoch": 0.68, + "learning_rate": 4.987942726093954e-05, + "loss": 1.0422, + "step": 3674 + }, + { + "epoch": 0.68, + "learning_rate": 4.982778298072118e-05, + "loss": 1.2031, + "step": 3675 + }, + { + "epoch": 0.68, + "learning_rate": 4.977615657757056e-05, + "loss": 1.0289, + "step": 3676 + }, + { + "epoch": 0.68, + "learning_rate": 4.972454806988281e-05, + "loss": 1.0338, + "step": 3677 + }, + { + "epoch": 0.68, + "learning_rate": 4.967295747604685e-05, + "loss": 1.2799, + "step": 3678 + }, + { + "epoch": 0.68, + "learning_rate": 4.9621384814445084e-05, + "loss": 1.2522, + "step": 3679 + }, + { + "epoch": 0.68, + "learning_rate": 4.956983010345357e-05, + "loss": 1.1116, + "step": 3680 + }, + { + "epoch": 0.68, + "learning_rate": 4.9518293361441966e-05, + "loss": 1.004, + "step": 3681 + }, + { + "epoch": 0.68, + "learning_rate": 4.9466774606773605e-05, + "loss": 1.1639, + "step": 3682 + }, + { + "epoch": 0.68, + "learning_rate": 4.9415273857805255e-05, + "loss": 1.1331, + "step": 3683 + }, + { + "epoch": 0.68, + "learning_rate": 4.9363791132887494e-05, + "loss": 1.1534, + "step": 3684 + }, + { + "epoch": 0.68, + "learning_rate": 4.9312326450364176e-05, + "loss": 1.0097, + "step": 3685 + }, + { + "epoch": 0.68, + "learning_rate": 4.9260879828573006e-05, + "loss": 1.0105, + "step": 3686 + }, + { + "epoch": 0.68, + "learning_rate": 4.920945128584507e-05, + "loss": 1.2717, + "step": 3687 + }, + { + "epoch": 0.68, + "learning_rate": 4.915804084050516e-05, + "loss": 1.1253, + "step": 3688 + }, + { + "epoch": 0.68, + "learning_rate": 4.9106648510871456e-05, + "loss": 1.2089, + "step": 3689 + }, + { + "epoch": 0.68, + "learning_rate": 4.905527431525589e-05, + "loss": 1.1116, + "step": 3690 + }, + { + "epoch": 0.68, + "learning_rate": 4.900391827196367e-05, + "loss": 1.0693, + "step": 3691 + }, + { + "epoch": 0.68, + "learning_rate": 4.8952580399293766e-05, + "loss": 1.0486, + "step": 3692 + }, + { + "epoch": 0.68, + "learning_rate": 4.890126071553853e-05, + "loss": 1.1518, + "step": 3693 + }, + { + "epoch": 0.68, + "learning_rate": 4.884995923898396e-05, + "loss": 1.1351, + "step": 3694 + }, + { + "epoch": 0.68, + "learning_rate": 4.879867598790946e-05, + "loss": 0.9381, + "step": 3695 + }, + { + "epoch": 0.68, + "learning_rate": 4.874741098058796e-05, + "loss": 1.0508, + "step": 3696 + }, + { + "epoch": 0.68, + "learning_rate": 4.869616423528588e-05, + "loss": 1.0777, + "step": 3697 + }, + { + "epoch": 0.68, + "learning_rate": 4.864493577026322e-05, + "loss": 1.0017, + "step": 3698 + }, + { + "epoch": 0.68, + "learning_rate": 4.859372560377334e-05, + "loss": 1.0737, + "step": 3699 + }, + { + "epoch": 0.68, + "learning_rate": 4.854253375406322e-05, + "loss": 1.2536, + "step": 3700 + }, + { + "epoch": 0.68, + "learning_rate": 4.849136023937321e-05, + "loss": 1.2136, + "step": 3701 + }, + { + "epoch": 0.68, + "learning_rate": 4.844020507793712e-05, + "loss": 1.0095, + "step": 3702 + }, + { + "epoch": 0.68, + "learning_rate": 4.8389068287982244e-05, + "loss": 1.1265, + "step": 3703 + }, + { + "epoch": 0.68, + "learning_rate": 4.833794988772941e-05, + "loss": 1.1204, + "step": 3704 + }, + { + "epoch": 0.68, + "learning_rate": 4.828684989539275e-05, + "loss": 1.0967, + "step": 3705 + }, + { + "epoch": 0.68, + "learning_rate": 4.823576832918001e-05, + "loss": 0.9641, + "step": 3706 + }, + { + "epoch": 0.68, + "learning_rate": 4.8184705207292144e-05, + "loss": 1.118, + "step": 3707 + }, + { + "epoch": 0.68, + "learning_rate": 4.8133660547923754e-05, + "loss": 1.1874, + "step": 3708 + }, + { + "epoch": 0.68, + "learning_rate": 4.808263436926271e-05, + "loss": 1.0879, + "step": 3709 + }, + { + "epoch": 0.68, + "learning_rate": 4.803162668949044e-05, + "loss": 1.0692, + "step": 3710 + }, + { + "epoch": 0.68, + "learning_rate": 4.7980637526781635e-05, + "loss": 1.0759, + "step": 3711 + }, + { + "epoch": 0.68, + "learning_rate": 4.792966689930447e-05, + "loss": 1.2337, + "step": 3712 + }, + { + "epoch": 0.68, + "learning_rate": 4.7878714825220464e-05, + "loss": 1.101, + "step": 3713 + }, + { + "epoch": 0.68, + "learning_rate": 4.782778132268462e-05, + "loss": 1.2124, + "step": 3714 + }, + { + "epoch": 0.68, + "learning_rate": 4.7776866409845186e-05, + "loss": 1.1078, + "step": 3715 + }, + { + "epoch": 0.68, + "learning_rate": 4.772597010484395e-05, + "loss": 1.1205, + "step": 3716 + }, + { + "epoch": 0.68, + "learning_rate": 4.767509242581595e-05, + "loss": 1.1333, + "step": 3717 + }, + { + "epoch": 0.69, + "learning_rate": 4.7624233390889606e-05, + "loss": 1.0879, + "step": 3718 + }, + { + "epoch": 0.69, + "learning_rate": 4.757339301818667e-05, + "loss": 1.036, + "step": 3719 + }, + { + "epoch": 0.69, + "learning_rate": 4.7522571325822376e-05, + "loss": 1.2444, + "step": 3720 + }, + { + "epoch": 0.69, + "learning_rate": 4.747176833190511e-05, + "loss": 1.107, + "step": 3721 + }, + { + "epoch": 0.69, + "learning_rate": 4.742098405453682e-05, + "loss": 1.1024, + "step": 3722 + }, + { + "epoch": 0.69, + "learning_rate": 4.7370218511812515e-05, + "loss": 1.2115, + "step": 3723 + }, + { + "epoch": 0.69, + "learning_rate": 4.731947172182078e-05, + "loss": 1.2413, + "step": 3724 + }, + { + "epoch": 0.69, + "learning_rate": 4.726874370264333e-05, + "loss": 1.0447, + "step": 3725 + }, + { + "epoch": 0.69, + "learning_rate": 4.7218034472355344e-05, + "loss": 1.0397, + "step": 3726 + }, + { + "epoch": 0.69, + "learning_rate": 4.7167344049025175e-05, + "loss": 1.0428, + "step": 3727 + }, + { + "epoch": 0.69, + "learning_rate": 4.711667245071464e-05, + "loss": 1.2467, + "step": 3728 + }, + { + "epoch": 0.69, + "learning_rate": 4.706601969547859e-05, + "loss": 1.0534, + "step": 3729 + }, + { + "epoch": 0.69, + "learning_rate": 4.701538580136543e-05, + "loss": 1.2835, + "step": 3730 + }, + { + "epoch": 0.69, + "learning_rate": 4.696477078641667e-05, + "loss": 1.1105, + "step": 3731 + }, + { + "epoch": 0.69, + "learning_rate": 4.6914174668667197e-05, + "loss": 1.1457, + "step": 3732 + }, + { + "epoch": 0.69, + "learning_rate": 4.6863597466145116e-05, + "loss": 1.1288, + "step": 3733 + }, + { + "epoch": 0.69, + "learning_rate": 4.6813039196871776e-05, + "loss": 1.1897, + "step": 3734 + }, + { + "epoch": 0.69, + "learning_rate": 4.6762499878861764e-05, + "loss": 1.0902, + "step": 3735 + }, + { + "epoch": 0.69, + "learning_rate": 4.671197953012303e-05, + "loss": 1.1987, + "step": 3736 + }, + { + "epoch": 0.69, + "learning_rate": 4.666147816865661e-05, + "loss": 1.1423, + "step": 3737 + }, + { + "epoch": 0.69, + "learning_rate": 4.661099581245694e-05, + "loss": 1.1178, + "step": 3738 + }, + { + "epoch": 0.69, + "learning_rate": 4.6560532479511535e-05, + "loss": 1.1645, + "step": 3739 + }, + { + "epoch": 0.69, + "learning_rate": 4.651008818780119e-05, + "loss": 1.086, + "step": 3740 + }, + { + "epoch": 0.69, + "learning_rate": 4.64596629552999e-05, + "loss": 1.1762, + "step": 3741 + }, + { + "epoch": 0.69, + "learning_rate": 4.6409256799974944e-05, + "loss": 1.2224, + "step": 3742 + }, + { + "epoch": 0.69, + "learning_rate": 4.635886973978668e-05, + "loss": 1.0288, + "step": 3743 + }, + { + "epoch": 0.69, + "learning_rate": 4.63085017926888e-05, + "loss": 1.2216, + "step": 3744 + }, + { + "epoch": 0.69, + "learning_rate": 4.625815297662808e-05, + "loss": 1.1146, + "step": 3745 + }, + { + "epoch": 0.69, + "learning_rate": 4.6207823309544495e-05, + "loss": 1.0916, + "step": 3746 + }, + { + "epoch": 0.69, + "learning_rate": 4.6157512809371185e-05, + "loss": 1.025, + "step": 3747 + }, + { + "epoch": 0.69, + "learning_rate": 4.610722149403458e-05, + "loss": 1.1459, + "step": 3748 + }, + { + "epoch": 0.69, + "learning_rate": 4.605694938145408e-05, + "loss": 1.2603, + "step": 3749 + }, + { + "epoch": 0.69, + "learning_rate": 4.600669648954246e-05, + "loss": 1.1042, + "step": 3750 + }, + { + "epoch": 0.69, + "learning_rate": 4.595646283620547e-05, + "loss": 1.2045, + "step": 3751 + }, + { + "epoch": 0.69, + "learning_rate": 4.590624843934207e-05, + "loss": 1.0919, + "step": 3752 + }, + { + "epoch": 0.69, + "learning_rate": 4.5856053316844317e-05, + "loss": 1.099, + "step": 3753 + }, + { + "epoch": 0.69, + "learning_rate": 4.580587748659752e-05, + "loss": 1.0782, + "step": 3754 + }, + { + "epoch": 0.69, + "learning_rate": 4.575572096648001e-05, + "loss": 1.0914, + "step": 3755 + }, + { + "epoch": 0.69, + "learning_rate": 4.5705583774363204e-05, + "loss": 1.1245, + "step": 3756 + }, + { + "epoch": 0.69, + "learning_rate": 4.5655465928111776e-05, + "loss": 0.9898, + "step": 3757 + }, + { + "epoch": 0.69, + "learning_rate": 4.560536744558338e-05, + "loss": 1.1313, + "step": 3758 + }, + { + "epoch": 0.69, + "learning_rate": 4.5555288344628766e-05, + "loss": 1.0153, + "step": 3759 + }, + { + "epoch": 0.69, + "learning_rate": 4.550522864309191e-05, + "loss": 1.2169, + "step": 3760 + }, + { + "epoch": 0.69, + "learning_rate": 4.5455188358809755e-05, + "loss": 1.1477, + "step": 3761 + }, + { + "epoch": 0.69, + "learning_rate": 4.5405167509612304e-05, + "loss": 1.0719, + "step": 3762 + }, + { + "epoch": 0.69, + "learning_rate": 4.535516611332278e-05, + "loss": 1.162, + "step": 3763 + }, + { + "epoch": 0.69, + "learning_rate": 4.530518418775733e-05, + "loss": 0.9118, + "step": 3764 + }, + { + "epoch": 0.69, + "learning_rate": 4.525522175072521e-05, + "loss": 1.1812, + "step": 3765 + }, + { + "epoch": 0.69, + "learning_rate": 4.5205278820028784e-05, + "loss": 1.0954, + "step": 3766 + }, + { + "epoch": 0.69, + "learning_rate": 4.515535541346341e-05, + "loss": 1.0538, + "step": 3767 + }, + { + "epoch": 0.69, + "learning_rate": 4.510545154881744e-05, + "loss": 1.1568, + "step": 3768 + }, + { + "epoch": 0.69, + "learning_rate": 4.505556724387241e-05, + "loss": 1.1273, + "step": 3769 + }, + { + "epoch": 0.69, + "learning_rate": 4.500570251640277e-05, + "loss": 1.1155, + "step": 3770 + }, + { + "epoch": 0.69, + "learning_rate": 4.4955857384175984e-05, + "loss": 1.0461, + "step": 3771 + }, + { + "epoch": 0.7, + "learning_rate": 4.490603186495264e-05, + "loss": 1.0759, + "step": 3772 + }, + { + "epoch": 0.7, + "learning_rate": 4.485622597648624e-05, + "loss": 1.0465, + "step": 3773 + }, + { + "epoch": 0.7, + "learning_rate": 4.4806439736523295e-05, + "loss": 1.1971, + "step": 3774 + }, + { + "epoch": 0.7, + "learning_rate": 4.475667316280341e-05, + "loss": 1.1035, + "step": 3775 + }, + { + "epoch": 0.7, + "learning_rate": 4.470692627305908e-05, + "loss": 1.1341, + "step": 3776 + }, + { + "epoch": 0.7, + "learning_rate": 4.465719908501581e-05, + "loss": 1.2121, + "step": 3777 + }, + { + "epoch": 0.7, + "learning_rate": 4.4607491616392094e-05, + "loss": 1.1331, + "step": 3778 + }, + { + "epoch": 0.7, + "learning_rate": 4.4557803884899444e-05, + "loss": 1.09, + "step": 3779 + }, + { + "epoch": 0.7, + "learning_rate": 4.450813590824224e-05, + "loss": 1.1486, + "step": 3780 + }, + { + "epoch": 0.7, + "learning_rate": 4.445848770411795e-05, + "loss": 1.031, + "step": 3781 + }, + { + "epoch": 0.7, + "learning_rate": 4.4408859290216895e-05, + "loss": 1.2219, + "step": 3782 + }, + { + "epoch": 0.7, + "learning_rate": 4.4359250684222383e-05, + "loss": 1.1754, + "step": 3783 + }, + { + "epoch": 0.7, + "learning_rate": 4.4309661903810616e-05, + "loss": 1.0919, + "step": 3784 + }, + { + "epoch": 0.7, + "learning_rate": 4.426009296665084e-05, + "loss": 1.0651, + "step": 3785 + }, + { + "epoch": 0.7, + "learning_rate": 4.421054389040511e-05, + "loss": 1.0824, + "step": 3786 + }, + { + "epoch": 0.7, + "learning_rate": 4.416101469272853e-05, + "loss": 1.14, + "step": 3787 + }, + { + "epoch": 0.7, + "learning_rate": 4.4111505391269e-05, + "loss": 1.1776, + "step": 3788 + }, + { + "epoch": 0.7, + "learning_rate": 4.406201600366739e-05, + "loss": 1.1027, + "step": 3789 + }, + { + "epoch": 0.7, + "learning_rate": 4.4012546547557434e-05, + "loss": 1.1752, + "step": 3790 + }, + { + "epoch": 0.7, + "learning_rate": 4.396309704056587e-05, + "loss": 1.018, + "step": 3791 + }, + { + "epoch": 0.7, + "learning_rate": 4.391366750031217e-05, + "loss": 1.0545, + "step": 3792 + }, + { + "epoch": 0.7, + "learning_rate": 4.3864257944408846e-05, + "loss": 0.9805, + "step": 3793 + }, + { + "epoch": 0.7, + "learning_rate": 4.381486839046121e-05, + "loss": 1.1791, + "step": 3794 + }, + { + "epoch": 0.7, + "learning_rate": 4.376549885606742e-05, + "loss": 0.9248, + "step": 3795 + }, + { + "epoch": 0.7, + "learning_rate": 4.3716149358818526e-05, + "loss": 1.1379, + "step": 3796 + }, + { + "epoch": 0.7, + "learning_rate": 4.366681991629851e-05, + "loss": 1.0514, + "step": 3797 + }, + { + "epoch": 0.7, + "learning_rate": 4.3617510546084073e-05, + "loss": 1.1314, + "step": 3798 + }, + { + "epoch": 0.7, + "learning_rate": 4.356822126574496e-05, + "loss": 1.234, + "step": 3799 + }, + { + "epoch": 0.7, + "learning_rate": 4.351895209284347e-05, + "loss": 1.0009, + "step": 3800 + }, + { + "epoch": 0.7, + "learning_rate": 4.3469703044935026e-05, + "loss": 1.204, + "step": 3801 + }, + { + "epoch": 0.7, + "learning_rate": 4.342047413956769e-05, + "loss": 1.0751, + "step": 3802 + }, + { + "epoch": 0.7, + "learning_rate": 4.337126539428248e-05, + "loss": 1.215, + "step": 3803 + }, + { + "epoch": 0.7, + "learning_rate": 4.332207682661309e-05, + "loss": 1.2157, + "step": 3804 + }, + { + "epoch": 0.7, + "learning_rate": 4.327290845408622e-05, + "loss": 1.079, + "step": 3805 + }, + { + "epoch": 0.7, + "learning_rate": 4.3223760294221084e-05, + "loss": 1.1196, + "step": 3806 + }, + { + "epoch": 0.7, + "learning_rate": 4.317463236453e-05, + "loss": 1.1248, + "step": 3807 + }, + { + "epoch": 0.7, + "learning_rate": 4.312552468251785e-05, + "loss": 0.9879, + "step": 3808 + }, + { + "epoch": 0.7, + "learning_rate": 4.3076437265682476e-05, + "loss": 1.1624, + "step": 3809 + }, + { + "epoch": 0.7, + "learning_rate": 4.302737013151438e-05, + "loss": 1.0846, + "step": 3810 + }, + { + "epoch": 0.7, + "learning_rate": 4.297832329749687e-05, + "loss": 1.1436, + "step": 3811 + }, + { + "epoch": 0.7, + "learning_rate": 4.292929678110598e-05, + "loss": 1.2243, + "step": 3812 + }, + { + "epoch": 0.7, + "learning_rate": 4.2880290599810636e-05, + "loss": 1.1428, + "step": 3813 + }, + { + "epoch": 0.7, + "learning_rate": 4.283130477107234e-05, + "loss": 1.1023, + "step": 3814 + }, + { + "epoch": 0.7, + "learning_rate": 4.278233931234552e-05, + "loss": 1.1027, + "step": 3815 + }, + { + "epoch": 0.7, + "learning_rate": 4.2733394241077215e-05, + "loss": 1.2648, + "step": 3816 + }, + { + "epoch": 0.7, + "learning_rate": 4.268446957470725e-05, + "loss": 1.0445, + "step": 3817 + }, + { + "epoch": 0.7, + "learning_rate": 4.263556533066815e-05, + "loss": 1.1149, + "step": 3818 + }, + { + "epoch": 0.7, + "learning_rate": 4.258668152638522e-05, + "loss": 1.0795, + "step": 3819 + }, + { + "epoch": 0.7, + "learning_rate": 4.25378181792764e-05, + "loss": 1.1137, + "step": 3820 + }, + { + "epoch": 0.7, + "learning_rate": 4.24889753067525e-05, + "loss": 1.1363, + "step": 3821 + }, + { + "epoch": 0.7, + "learning_rate": 4.244015292621677e-05, + "loss": 1.1948, + "step": 3822 + }, + { + "epoch": 0.7, + "learning_rate": 4.239135105506543e-05, + "loss": 1.0981, + "step": 3823 + }, + { + "epoch": 0.7, + "learning_rate": 4.2342569710687184e-05, + "loss": 1.0887, + "step": 3824 + }, + { + "epoch": 0.7, + "learning_rate": 4.2293808910463606e-05, + "loss": 1.2024, + "step": 3825 + }, + { + "epoch": 0.71, + "learning_rate": 4.224506867176877e-05, + "loss": 1.0967, + "step": 3826 + }, + { + "epoch": 0.71, + "learning_rate": 4.219634901196963e-05, + "loss": 1.0635, + "step": 3827 + }, + { + "epoch": 0.71, + "learning_rate": 4.2147649948425525e-05, + "loss": 1.0936, + "step": 3828 + }, + { + "epoch": 0.71, + "learning_rate": 4.2098971498488734e-05, + "loss": 1.0513, + "step": 3829 + }, + { + "epoch": 0.71, + "learning_rate": 4.2050313679504015e-05, + "loss": 1.1384, + "step": 3830 + }, + { + "epoch": 0.71, + "learning_rate": 4.2001676508808886e-05, + "loss": 1.0778, + "step": 3831 + }, + { + "epoch": 0.71, + "learning_rate": 4.195306000373344e-05, + "loss": 1.1438, + "step": 3832 + }, + { + "epoch": 0.71, + "learning_rate": 4.19044641816004e-05, + "loss": 1.1358, + "step": 3833 + }, + { + "epoch": 0.71, + "learning_rate": 4.1855889059725117e-05, + "loss": 1.2715, + "step": 3834 + }, + { + "epoch": 0.71, + "learning_rate": 4.180733465541568e-05, + "loss": 1.1102, + "step": 3835 + }, + { + "epoch": 0.71, + "learning_rate": 4.17588009859726e-05, + "loss": 1.0803, + "step": 3836 + }, + { + "epoch": 0.71, + "learning_rate": 4.171028806868921e-05, + "loss": 1.149, + "step": 3837 + }, + { + "epoch": 0.71, + "learning_rate": 4.166179592085129e-05, + "loss": 1.0204, + "step": 3838 + }, + { + "epoch": 0.71, + "learning_rate": 4.161332455973729e-05, + "loss": 1.1283, + "step": 3839 + }, + { + "epoch": 0.71, + "learning_rate": 4.1564874002618194e-05, + "loss": 1.1551, + "step": 3840 + }, + { + "epoch": 0.71, + "learning_rate": 4.1516444266757694e-05, + "loss": 1.1275, + "step": 3841 + }, + { + "epoch": 0.71, + "learning_rate": 4.1468035369411904e-05, + "loss": 1.0128, + "step": 3842 + }, + { + "epoch": 0.71, + "learning_rate": 4.1419647327829724e-05, + "loss": 1.0717, + "step": 3843 + }, + { + "epoch": 0.71, + "learning_rate": 4.137128015925233e-05, + "loss": 0.954, + "step": 3844 + }, + { + "epoch": 0.71, + "learning_rate": 4.132293388091375e-05, + "loss": 1.1154, + "step": 3845 + }, + { + "epoch": 0.71, + "learning_rate": 4.127460851004037e-05, + "loss": 1.0278, + "step": 3846 + }, + { + "epoch": 0.71, + "learning_rate": 4.122630406385126e-05, + "loss": 1.0079, + "step": 3847 + }, + { + "epoch": 0.71, + "learning_rate": 4.117802055955792e-05, + "loss": 0.988, + "step": 3848 + }, + { + "epoch": 0.71, + "learning_rate": 4.112975801436454e-05, + "loss": 1.1164, + "step": 3849 + }, + { + "epoch": 0.71, + "learning_rate": 4.10815164454676e-05, + "loss": 1.0555, + "step": 3850 + }, + { + "epoch": 0.71, + "learning_rate": 4.103329587005635e-05, + "loss": 1.1075, + "step": 3851 + }, + { + "epoch": 0.71, + "learning_rate": 4.0985096305312407e-05, + "loss": 1.0592, + "step": 3852 + }, + { + "epoch": 0.71, + "learning_rate": 4.093691776841002e-05, + "loss": 1.2827, + "step": 3853 + }, + { + "epoch": 0.71, + "learning_rate": 4.088876027651585e-05, + "loss": 1.1398, + "step": 3854 + }, + { + "epoch": 0.71, + "learning_rate": 4.084062384678907e-05, + "loss": 1.1062, + "step": 3855 + }, + { + "epoch": 0.71, + "learning_rate": 4.0792508496381366e-05, + "loss": 0.9963, + "step": 3856 + }, + { + "epoch": 0.71, + "learning_rate": 4.0744414242436944e-05, + "loss": 1.2199, + "step": 3857 + }, + { + "epoch": 0.71, + "learning_rate": 4.0696341102092425e-05, + "loss": 0.9629, + "step": 3858 + }, + { + "epoch": 0.71, + "learning_rate": 4.064828909247701e-05, + "loss": 1.022, + "step": 3859 + }, + { + "epoch": 0.71, + "learning_rate": 4.060025823071226e-05, + "loss": 1.0639, + "step": 3860 + }, + { + "epoch": 0.71, + "learning_rate": 4.055224853391226e-05, + "loss": 1.1327, + "step": 3861 + }, + { + "epoch": 0.71, + "learning_rate": 4.05042600191835e-05, + "loss": 1.1247, + "step": 3862 + }, + { + "epoch": 0.71, + "learning_rate": 4.0456292703625044e-05, + "loss": 1.2655, + "step": 3863 + }, + { + "epoch": 0.71, + "learning_rate": 4.040834660432824e-05, + "loss": 1.2365, + "step": 3864 + }, + { + "epoch": 0.71, + "learning_rate": 4.036042173837706e-05, + "loss": 1.0528, + "step": 3865 + }, + { + "epoch": 0.71, + "learning_rate": 4.031251812284769e-05, + "loss": 1.0379, + "step": 3866 + }, + { + "epoch": 0.71, + "learning_rate": 4.0264635774808945e-05, + "loss": 1.097, + "step": 3867 + }, + { + "epoch": 0.71, + "learning_rate": 4.0216774711321925e-05, + "loss": 1.0776, + "step": 3868 + }, + { + "epoch": 0.71, + "learning_rate": 4.016893494944026e-05, + "loss": 1.1543, + "step": 3869 + }, + { + "epoch": 0.71, + "learning_rate": 4.0121116506209876e-05, + "loss": 1.1606, + "step": 3870 + }, + { + "epoch": 0.71, + "learning_rate": 4.007331939866925e-05, + "loss": 1.0545, + "step": 3871 + }, + { + "epoch": 0.71, + "learning_rate": 4.002554364384903e-05, + "loss": 1.1188, + "step": 3872 + }, + { + "epoch": 0.71, + "learning_rate": 3.99777892587725e-05, + "loss": 1.0402, + "step": 3873 + }, + { + "epoch": 0.71, + "learning_rate": 3.993005626045514e-05, + "loss": 0.9382, + "step": 3874 + }, + { + "epoch": 0.71, + "learning_rate": 3.9882344665904956e-05, + "loss": 1.1758, + "step": 3875 + }, + { + "epoch": 0.71, + "learning_rate": 3.983465449212225e-05, + "loss": 1.0588, + "step": 3876 + }, + { + "epoch": 0.71, + "learning_rate": 3.9786985756099674e-05, + "loss": 1.0028, + "step": 3877 + }, + { + "epoch": 0.71, + "learning_rate": 3.9739338474822253e-05, + "loss": 1.2163, + "step": 3878 + }, + { + "epoch": 0.71, + "learning_rate": 3.969171266526745e-05, + "loss": 0.9874, + "step": 3879 + }, + { + "epoch": 0.71, + "learning_rate": 3.964410834440494e-05, + "loss": 1.0988, + "step": 3880 + }, + { + "epoch": 0.72, + "learning_rate": 3.959652552919688e-05, + "loss": 1.0397, + "step": 3881 + }, + { + "epoch": 0.72, + "learning_rate": 3.954896423659767e-05, + "loss": 1.0689, + "step": 3882 + }, + { + "epoch": 0.72, + "learning_rate": 3.9501424483554074e-05, + "loss": 1.0401, + "step": 3883 + }, + { + "epoch": 0.72, + "learning_rate": 3.945390628700513e-05, + "loss": 1.248, + "step": 3884 + }, + { + "epoch": 0.72, + "learning_rate": 3.940640966388231e-05, + "loss": 1.0897, + "step": 3885 + }, + { + "epoch": 0.72, + "learning_rate": 3.935893463110926e-05, + "loss": 1.0001, + "step": 3886 + }, + { + "epoch": 0.72, + "learning_rate": 3.931148120560211e-05, + "loss": 1.0901, + "step": 3887 + }, + { + "epoch": 0.72, + "learning_rate": 3.926404940426904e-05, + "loss": 1.0199, + "step": 3888 + }, + { + "epoch": 0.72, + "learning_rate": 3.921663924401079e-05, + "loss": 1.0199, + "step": 3889 + }, + { + "epoch": 0.72, + "learning_rate": 3.916925074172019e-05, + "loss": 1.1137, + "step": 3890 + }, + { + "epoch": 0.72, + "learning_rate": 3.912188391428249e-05, + "loss": 1.1406, + "step": 3891 + }, + { + "epoch": 0.72, + "learning_rate": 3.90745387785751e-05, + "loss": 1.0378, + "step": 3892 + }, + { + "epoch": 0.72, + "learning_rate": 3.9027215351467864e-05, + "loss": 1.1226, + "step": 3893 + }, + { + "epoch": 0.72, + "learning_rate": 3.897991364982263e-05, + "loss": 1.1269, + "step": 3894 + }, + { + "epoch": 0.72, + "learning_rate": 3.893263369049378e-05, + "loss": 1.0733, + "step": 3895 + }, + { + "epoch": 0.72, + "learning_rate": 3.888537549032776e-05, + "loss": 1.1071, + "step": 3896 + }, + { + "epoch": 0.72, + "learning_rate": 3.8838139066163394e-05, + "loss": 1.12, + "step": 3897 + }, + { + "epoch": 0.72, + "learning_rate": 3.8790924434831655e-05, + "loss": 0.9865, + "step": 3898 + }, + { + "epoch": 0.72, + "learning_rate": 3.8743731613155785e-05, + "loss": 1.208, + "step": 3899 + }, + { + "epoch": 0.72, + "learning_rate": 3.869656061795121e-05, + "loss": 1.0725, + "step": 3900 + }, + { + "epoch": 0.72, + "learning_rate": 3.8649411466025696e-05, + "loss": 1.0006, + "step": 3901 + }, + { + "epoch": 0.72, + "learning_rate": 3.860228417417907e-05, + "loss": 0.9458, + "step": 3902 + }, + { + "epoch": 0.72, + "learning_rate": 3.855517875920354e-05, + "loss": 1.0974, + "step": 3903 + }, + { + "epoch": 0.72, + "learning_rate": 3.850809523788337e-05, + "loss": 1.0718, + "step": 3904 + }, + { + "epoch": 0.72, + "learning_rate": 3.846103362699509e-05, + "loss": 1.0345, + "step": 3905 + }, + { + "epoch": 0.72, + "learning_rate": 3.841399394330739e-05, + "loss": 1.0296, + "step": 3906 + }, + { + "epoch": 0.72, + "learning_rate": 3.8366976203581226e-05, + "loss": 1.1689, + "step": 3907 + }, + { + "epoch": 0.72, + "learning_rate": 3.8319980424569625e-05, + "loss": 1.0831, + "step": 3908 + }, + { + "epoch": 0.72, + "learning_rate": 3.827300662301791e-05, + "loss": 1.1152, + "step": 3909 + }, + { + "epoch": 0.72, + "learning_rate": 3.822605481566347e-05, + "loss": 1.2104, + "step": 3910 + }, + { + "epoch": 0.72, + "learning_rate": 3.81791250192359e-05, + "loss": 1.1702, + "step": 3911 + }, + { + "epoch": 0.72, + "learning_rate": 3.81322172504569e-05, + "loss": 1.0461, + "step": 3912 + }, + { + "epoch": 0.72, + "learning_rate": 3.808533152604047e-05, + "loss": 1.0794, + "step": 3913 + }, + { + "epoch": 0.72, + "learning_rate": 3.803846786269255e-05, + "loss": 1.0761, + "step": 3914 + }, + { + "epoch": 0.72, + "learning_rate": 3.7991626277111404e-05, + "loss": 1.2169, + "step": 3915 + }, + { + "epoch": 0.72, + "learning_rate": 3.7944806785987316e-05, + "loss": 1.0682, + "step": 3916 + }, + { + "epoch": 0.72, + "learning_rate": 3.7898009406002746e-05, + "loss": 1.1774, + "step": 3917 + }, + { + "epoch": 0.72, + "learning_rate": 3.7851234153832195e-05, + "loss": 1.03, + "step": 3918 + }, + { + "epoch": 0.72, + "learning_rate": 3.780448104614244e-05, + "loss": 1.1805, + "step": 3919 + }, + { + "epoch": 0.72, + "learning_rate": 3.775775009959221e-05, + "loss": 1.041, + "step": 3920 + }, + { + "epoch": 0.72, + "learning_rate": 3.77110413308324e-05, + "loss": 1.0076, + "step": 3921 + }, + { + "epoch": 0.72, + "learning_rate": 3.7664354756506036e-05, + "loss": 1.09, + "step": 3922 + }, + { + "epoch": 0.72, + "learning_rate": 3.761769039324818e-05, + "loss": 1.1011, + "step": 3923 + }, + { + "epoch": 0.72, + "learning_rate": 3.757104825768599e-05, + "loss": 1.0387, + "step": 3924 + }, + { + "epoch": 0.72, + "learning_rate": 3.752442836643876e-05, + "loss": 1.098, + "step": 3925 + }, + { + "epoch": 0.72, + "learning_rate": 3.747783073611777e-05, + "loss": 1.1473, + "step": 3926 + }, + { + "epoch": 0.72, + "learning_rate": 3.743125538332641e-05, + "loss": 1.0322, + "step": 3927 + }, + { + "epoch": 0.72, + "learning_rate": 3.738470232466018e-05, + "loss": 1.103, + "step": 3928 + }, + { + "epoch": 0.72, + "learning_rate": 3.733817157670656e-05, + "loss": 1.1478, + "step": 3929 + }, + { + "epoch": 0.72, + "learning_rate": 3.7291663156045085e-05, + "loss": 1.1204, + "step": 3930 + }, + { + "epoch": 0.72, + "learning_rate": 3.724517707924742e-05, + "loss": 1.0659, + "step": 3931 + }, + { + "epoch": 0.72, + "learning_rate": 3.719871336287718e-05, + "loss": 1.1612, + "step": 3932 + }, + { + "epoch": 0.72, + "learning_rate": 3.7152272023489996e-05, + "loss": 1.0439, + "step": 3933 + }, + { + "epoch": 0.72, + "learning_rate": 3.710585307763366e-05, + "loss": 1.1846, + "step": 3934 + }, + { + "epoch": 0.73, + "learning_rate": 3.705945654184785e-05, + "loss": 1.031, + "step": 3935 + }, + { + "epoch": 0.73, + "learning_rate": 3.701308243266427e-05, + "loss": 1.0967, + "step": 3936 + }, + { + "epoch": 0.73, + "learning_rate": 3.696673076660674e-05, + "loss": 1.1056, + "step": 3937 + }, + { + "epoch": 0.73, + "learning_rate": 3.6920401560190975e-05, + "loss": 1.1599, + "step": 3938 + }, + { + "epoch": 0.73, + "learning_rate": 3.687409482992469e-05, + "loss": 1.0687, + "step": 3939 + }, + { + "epoch": 0.73, + "learning_rate": 3.68278105923077e-05, + "loss": 1.1833, + "step": 3940 + }, + { + "epoch": 0.73, + "learning_rate": 3.678154886383168e-05, + "loss": 1.2226, + "step": 3941 + }, + { + "epoch": 0.73, + "learning_rate": 3.673530966098036e-05, + "loss": 1.0901, + "step": 3942 + }, + { + "epoch": 0.73, + "learning_rate": 3.668909300022937e-05, + "loss": 1.0379, + "step": 3943 + }, + { + "epoch": 0.73, + "learning_rate": 3.664289889804643e-05, + "loss": 1.0862, + "step": 3944 + }, + { + "epoch": 0.73, + "learning_rate": 3.659672737089107e-05, + "loss": 1.0437, + "step": 3945 + }, + { + "epoch": 0.73, + "learning_rate": 3.6550578435214936e-05, + "loss": 1.0089, + "step": 3946 + }, + { + "epoch": 0.73, + "learning_rate": 3.650445210746151e-05, + "loss": 1.2325, + "step": 3947 + }, + { + "epoch": 0.73, + "learning_rate": 3.645834840406625e-05, + "loss": 1.1324, + "step": 3948 + }, + { + "epoch": 0.73, + "learning_rate": 3.6412267341456505e-05, + "loss": 1.0579, + "step": 3949 + }, + { + "epoch": 0.73, + "learning_rate": 3.6366208936051705e-05, + "loss": 1.0277, + "step": 3950 + }, + { + "epoch": 0.73, + "learning_rate": 3.6320173204263016e-05, + "loss": 1.1842, + "step": 3951 + }, + { + "epoch": 0.73, + "learning_rate": 3.62741601624937e-05, + "loss": 1.1998, + "step": 3952 + }, + { + "epoch": 0.73, + "learning_rate": 3.6228169827138815e-05, + "loss": 1.0118, + "step": 3953 + }, + { + "epoch": 0.73, + "learning_rate": 3.618220221458536e-05, + "loss": 1.2026, + "step": 3954 + }, + { + "epoch": 0.73, + "learning_rate": 3.613625734121221e-05, + "loss": 1.11, + "step": 3955 + }, + { + "epoch": 0.73, + "learning_rate": 3.609033522339025e-05, + "loss": 1.028, + "step": 3956 + }, + { + "epoch": 0.73, + "learning_rate": 3.604443587748209e-05, + "loss": 1.1469, + "step": 3957 + }, + { + "epoch": 0.73, + "learning_rate": 3.599855931984243e-05, + "loss": 1.3163, + "step": 3958 + }, + { + "epoch": 0.73, + "learning_rate": 3.595270556681759e-05, + "loss": 1.0422, + "step": 3959 + }, + { + "epoch": 0.73, + "learning_rate": 3.5906874634746015e-05, + "loss": 1.0087, + "step": 3960 + }, + { + "epoch": 0.73, + "learning_rate": 3.5861066539957844e-05, + "loss": 1.0731, + "step": 3961 + }, + { + "epoch": 0.73, + "learning_rate": 3.58152812987752e-05, + "loss": 0.9968, + "step": 3962 + }, + { + "epoch": 0.73, + "learning_rate": 3.5769518927511966e-05, + "loss": 1.0977, + "step": 3963 + }, + { + "epoch": 0.73, + "learning_rate": 3.5723779442474014e-05, + "loss": 1.1538, + "step": 3964 + }, + { + "epoch": 0.73, + "learning_rate": 3.567806285995883e-05, + "loss": 1.1945, + "step": 3965 + }, + { + "epoch": 0.73, + "learning_rate": 3.563236919625595e-05, + "loss": 1.1948, + "step": 3966 + }, + { + "epoch": 0.73, + "learning_rate": 3.558669846764665e-05, + "loss": 1.2501, + "step": 3967 + }, + { + "epoch": 0.73, + "learning_rate": 3.55410506904041e-05, + "loss": 1.0492, + "step": 3968 + }, + { + "epoch": 0.73, + "learning_rate": 3.54954258807932e-05, + "loss": 1.1874, + "step": 3969 + }, + { + "epoch": 0.73, + "learning_rate": 3.544982405507072e-05, + "loss": 1.0948, + "step": 3970 + }, + { + "epoch": 0.73, + "learning_rate": 3.540424522948521e-05, + "loss": 1.0558, + "step": 3971 + }, + { + "epoch": 0.73, + "learning_rate": 3.5358689420277115e-05, + "loss": 1.1318, + "step": 3972 + }, + { + "epoch": 0.73, + "learning_rate": 3.531315664367852e-05, + "loss": 1.0285, + "step": 3973 + }, + { + "epoch": 0.73, + "learning_rate": 3.526764691591349e-05, + "loss": 1.1824, + "step": 3974 + }, + { + "epoch": 0.73, + "learning_rate": 3.522216025319773e-05, + "loss": 1.2099, + "step": 3975 + }, + { + "epoch": 0.73, + "learning_rate": 3.5176696671738795e-05, + "loss": 1.197, + "step": 3976 + }, + { + "epoch": 0.73, + "learning_rate": 3.513125618773595e-05, + "loss": 1.2338, + "step": 3977 + }, + { + "epoch": 0.73, + "learning_rate": 3.5085838817380356e-05, + "loss": 0.9394, + "step": 3978 + }, + { + "epoch": 0.73, + "learning_rate": 3.504044457685479e-05, + "loss": 1.162, + "step": 3979 + }, + { + "epoch": 0.73, + "learning_rate": 3.499507348233396e-05, + "loss": 1.1827, + "step": 3980 + }, + { + "epoch": 0.73, + "learning_rate": 3.49497255499841e-05, + "loss": 1.1167, + "step": 3981 + }, + { + "epoch": 0.73, + "learning_rate": 3.490440079596341e-05, + "loss": 1.0978, + "step": 3982 + }, + { + "epoch": 0.73, + "learning_rate": 3.485909923642168e-05, + "loss": 1.1518, + "step": 3983 + }, + { + "epoch": 0.73, + "learning_rate": 3.481382088750054e-05, + "loss": 1.1388, + "step": 3984 + }, + { + "epoch": 0.73, + "learning_rate": 3.476856576533326e-05, + "loss": 1.0785, + "step": 3985 + }, + { + "epoch": 0.73, + "learning_rate": 3.4723333886044964e-05, + "loss": 1.0734, + "step": 3986 + }, + { + "epoch": 0.73, + "learning_rate": 3.467812526575228e-05, + "loss": 1.1485, + "step": 3987 + }, + { + "epoch": 0.73, + "learning_rate": 3.463293992056378e-05, + "loss": 1.083, + "step": 3988 + }, + { + "epoch": 0.74, + "learning_rate": 3.4587777866579566e-05, + "loss": 1.0833, + "step": 3989 + }, + { + "epoch": 0.74, + "learning_rate": 3.4542639119891575e-05, + "loss": 1.0623, + "step": 3990 + }, + { + "epoch": 0.74, + "learning_rate": 3.4497523696583344e-05, + "loss": 1.2452, + "step": 3991 + }, + { + "epoch": 0.74, + "learning_rate": 3.445243161273016e-05, + "loss": 1.1422, + "step": 3992 + }, + { + "epoch": 0.74, + "learning_rate": 3.4407362884398906e-05, + "loss": 1.104, + "step": 3993 + }, + { + "epoch": 0.74, + "learning_rate": 3.436231752764829e-05, + "loss": 1.1466, + "step": 3994 + }, + { + "epoch": 0.74, + "learning_rate": 3.431729555852853e-05, + "loss": 1.1481, + "step": 3995 + }, + { + "epoch": 0.74, + "learning_rate": 3.4272296993081644e-05, + "loss": 1.2325, + "step": 3996 + }, + { + "epoch": 0.74, + "learning_rate": 3.422732184734124e-05, + "loss": 1.1282, + "step": 3997 + }, + { + "epoch": 0.74, + "learning_rate": 3.41823701373326e-05, + "loss": 1.3084, + "step": 3998 + }, + { + "epoch": 0.74, + "learning_rate": 3.413744187907261e-05, + "loss": 1.2073, + "step": 3999 + }, + { + "epoch": 0.74, + "learning_rate": 3.4092537088569885e-05, + "loss": 1.1476, + "step": 4000 + }, + { + "epoch": 0.74, + "learning_rate": 3.40476557818246e-05, + "loss": 1.1374, + "step": 4001 + }, + { + "epoch": 0.74, + "learning_rate": 3.4002797974828695e-05, + "loss": 1.0588, + "step": 4002 + }, + { + "epoch": 0.74, + "learning_rate": 3.395796368356548e-05, + "loss": 1.2085, + "step": 4003 + }, + { + "epoch": 0.74, + "learning_rate": 3.391315292401017e-05, + "loss": 1.021, + "step": 4004 + }, + { + "epoch": 0.74, + "learning_rate": 3.3868365712129405e-05, + "loss": 1.1059, + "step": 4005 + }, + { + "epoch": 0.74, + "learning_rate": 3.382360206388153e-05, + "loss": 1.0669, + "step": 4006 + }, + { + "epoch": 0.74, + "learning_rate": 3.377886199521644e-05, + "loss": 1.1203, + "step": 4007 + }, + { + "epoch": 0.74, + "learning_rate": 3.373414552207572e-05, + "loss": 1.0487, + "step": 4008 + }, + { + "epoch": 0.74, + "learning_rate": 3.368945266039237e-05, + "loss": 1.1325, + "step": 4009 + }, + { + "epoch": 0.74, + "learning_rate": 3.3644783426091176e-05, + "loss": 1.1753, + "step": 4010 + }, + { + "epoch": 0.74, + "learning_rate": 3.3600137835088344e-05, + "loss": 1.0965, + "step": 4011 + }, + { + "epoch": 0.74, + "learning_rate": 3.35555159032918e-05, + "loss": 1.2198, + "step": 4012 + }, + { + "epoch": 0.74, + "learning_rate": 3.351091764660093e-05, + "loss": 1.1164, + "step": 4013 + }, + { + "epoch": 0.74, + "learning_rate": 3.3466343080906716e-05, + "loss": 1.108, + "step": 4014 + }, + { + "epoch": 0.74, + "learning_rate": 3.3421792222091684e-05, + "loss": 1.0333, + "step": 4015 + }, + { + "epoch": 0.74, + "learning_rate": 3.337726508602999e-05, + "loss": 1.1409, + "step": 4016 + }, + { + "epoch": 0.74, + "learning_rate": 3.333276168858722e-05, + "loss": 1.1209, + "step": 4017 + }, + { + "epoch": 0.74, + "learning_rate": 3.328828204562062e-05, + "loss": 1.1063, + "step": 4018 + }, + { + "epoch": 0.74, + "learning_rate": 3.324382617297887e-05, + "loss": 1.1368, + "step": 4019 + }, + { + "epoch": 0.74, + "learning_rate": 3.319939408650225e-05, + "loss": 1.1074, + "step": 4020 + }, + { + "epoch": 0.74, + "learning_rate": 3.31549858020225e-05, + "loss": 1.1695, + "step": 4021 + }, + { + "epoch": 0.74, + "learning_rate": 3.311060133536297e-05, + "loss": 1.0984, + "step": 4022 + }, + { + "epoch": 0.74, + "learning_rate": 3.306624070233842e-05, + "loss": 1.1461, + "step": 4023 + }, + { + "epoch": 0.74, + "learning_rate": 3.3021903918755236e-05, + "loss": 1.031, + "step": 4024 + }, + { + "epoch": 0.74, + "learning_rate": 3.297759100041113e-05, + "loss": 1.0834, + "step": 4025 + }, + { + "epoch": 0.74, + "learning_rate": 3.293330196309553e-05, + "loss": 1.1385, + "step": 4026 + }, + { + "epoch": 0.74, + "learning_rate": 3.288903682258917e-05, + "loss": 1.2092, + "step": 4027 + }, + { + "epoch": 0.74, + "learning_rate": 3.284479559466439e-05, + "loss": 1.1014, + "step": 4028 + }, + { + "epoch": 0.74, + "learning_rate": 3.280057829508492e-05, + "loss": 1.1597, + "step": 4029 + }, + { + "epoch": 0.74, + "learning_rate": 3.275638493960608e-05, + "loss": 1.0684, + "step": 4030 + }, + { + "epoch": 0.74, + "learning_rate": 3.2712215543974475e-05, + "loss": 1.1991, + "step": 4031 + }, + { + "epoch": 0.74, + "learning_rate": 3.2668070123928385e-05, + "loss": 1.1358, + "step": 4032 + }, + { + "epoch": 0.74, + "learning_rate": 3.262394869519736e-05, + "loss": 1.1505, + "step": 4033 + }, + { + "epoch": 0.74, + "learning_rate": 3.257985127350257e-05, + "loss": 1.1023, + "step": 4034 + }, + { + "epoch": 0.74, + "learning_rate": 3.253577787455651e-05, + "loss": 1.0287, + "step": 4035 + }, + { + "epoch": 0.74, + "learning_rate": 3.249172851406313e-05, + "loss": 1.0859, + "step": 4036 + }, + { + "epoch": 0.74, + "learning_rate": 3.244770320771784e-05, + "loss": 1.0562, + "step": 4037 + }, + { + "epoch": 0.74, + "learning_rate": 3.2403701971207536e-05, + "loss": 1.0959, + "step": 4038 + }, + { + "epoch": 0.74, + "learning_rate": 3.235972482021039e-05, + "loss": 1.124, + "step": 4039 + }, + { + "epoch": 0.74, + "learning_rate": 3.2315771770396165e-05, + "loss": 1.0795, + "step": 4040 + }, + { + "epoch": 0.74, + "learning_rate": 3.227184283742591e-05, + "loss": 1.1218, + "step": 4041 + }, + { + "epoch": 0.74, + "learning_rate": 3.222793803695213e-05, + "loss": 1.2218, + "step": 4042 + }, + { + "epoch": 0.75, + "learning_rate": 3.218405738461868e-05, + "loss": 1.161, + "step": 4043 + }, + { + "epoch": 0.75, + "learning_rate": 3.214020089606094e-05, + "loss": 1.0874, + "step": 4044 + }, + { + "epoch": 0.75, + "learning_rate": 3.209636858690551e-05, + "loss": 1.0379, + "step": 4045 + }, + { + "epoch": 0.75, + "learning_rate": 3.205256047277054e-05, + "loss": 0.9527, + "step": 4046 + }, + { + "epoch": 0.75, + "learning_rate": 3.200877656926543e-05, + "loss": 1.0966, + "step": 4047 + }, + { + "epoch": 0.75, + "learning_rate": 3.1965016891991e-05, + "loss": 1.1112, + "step": 4048 + }, + { + "epoch": 0.75, + "learning_rate": 3.192128145653943e-05, + "loss": 1.1744, + "step": 4049 + }, + { + "epoch": 0.75, + "learning_rate": 3.1877570278494315e-05, + "loss": 1.1894, + "step": 4050 + }, + { + "epoch": 0.75, + "learning_rate": 3.1833883373430515e-05, + "loss": 1.1662, + "step": 4051 + }, + { + "epoch": 0.75, + "learning_rate": 3.179022075691435e-05, + "loss": 1.1394, + "step": 4052 + }, + { + "epoch": 0.75, + "learning_rate": 3.174658244450337e-05, + "loss": 1.1085, + "step": 4053 + }, + { + "epoch": 0.75, + "learning_rate": 3.1702968451746554e-05, + "loss": 1.214, + "step": 4054 + }, + { + "epoch": 0.75, + "learning_rate": 3.1659378794184126e-05, + "loss": 1.0739, + "step": 4055 + }, + { + "epoch": 0.75, + "learning_rate": 3.161581348734777e-05, + "loss": 1.0185, + "step": 4056 + }, + { + "epoch": 0.75, + "learning_rate": 3.1572272546760383e-05, + "loss": 1.0019, + "step": 4057 + }, + { + "epoch": 0.75, + "learning_rate": 3.1528755987936186e-05, + "loss": 1.1427, + "step": 4058 + }, + { + "epoch": 0.75, + "learning_rate": 3.148526382638081e-05, + "loss": 0.9478, + "step": 4059 + }, + { + "epoch": 0.75, + "learning_rate": 3.144179607759108e-05, + "loss": 1.1791, + "step": 4060 + }, + { + "epoch": 0.75, + "learning_rate": 3.139835275705514e-05, + "loss": 1.1714, + "step": 4061 + }, + { + "epoch": 0.75, + "learning_rate": 3.135493388025253e-05, + "loss": 1.0059, + "step": 4062 + }, + { + "epoch": 0.75, + "learning_rate": 3.131153946265397e-05, + "loss": 1.1601, + "step": 4063 + }, + { + "epoch": 0.75, + "learning_rate": 3.126816951972147e-05, + "loss": 1.0811, + "step": 4064 + }, + { + "epoch": 0.75, + "learning_rate": 3.122482406690842e-05, + "loss": 1.0183, + "step": 4065 + }, + { + "epoch": 0.75, + "learning_rate": 3.118150311965937e-05, + "loss": 1.1174, + "step": 4066 + }, + { + "epoch": 0.75, + "learning_rate": 3.113820669341017e-05, + "loss": 1.2645, + "step": 4067 + }, + { + "epoch": 0.75, + "learning_rate": 3.1094934803587996e-05, + "loss": 1.0673, + "step": 4068 + }, + { + "epoch": 0.75, + "learning_rate": 3.10516874656112e-05, + "loss": 1.1835, + "step": 4069 + }, + { + "epoch": 0.75, + "learning_rate": 3.100846469488939e-05, + "loss": 0.9629, + "step": 4070 + }, + { + "epoch": 0.75, + "learning_rate": 3.096526650682352e-05, + "loss": 1.1195, + "step": 4071 + }, + { + "epoch": 0.75, + "learning_rate": 3.0922092916805665e-05, + "loss": 1.1401, + "step": 4072 + }, + { + "epoch": 0.75, + "learning_rate": 3.087894394021917e-05, + "loss": 1.017, + "step": 4073 + }, + { + "epoch": 0.75, + "learning_rate": 3.0835819592438674e-05, + "loss": 1.2009, + "step": 4074 + }, + { + "epoch": 0.75, + "learning_rate": 3.079271988882997e-05, + "loss": 1.1013, + "step": 4075 + }, + { + "epoch": 0.75, + "learning_rate": 3.074964484475004e-05, + "loss": 1.0393, + "step": 4076 + }, + { + "epoch": 0.75, + "learning_rate": 3.070659447554719e-05, + "loss": 1.0915, + "step": 4077 + }, + { + "epoch": 0.75, + "learning_rate": 3.066356879656087e-05, + "loss": 1.172, + "step": 4078 + }, + { + "epoch": 0.75, + "learning_rate": 3.06205678231217e-05, + "loss": 0.9634, + "step": 4079 + }, + { + "epoch": 0.75, + "learning_rate": 3.057759157055153e-05, + "loss": 0.9433, + "step": 4080 + }, + { + "epoch": 0.75, + "learning_rate": 3.053464005416347e-05, + "loss": 1.1977, + "step": 4081 + }, + { + "epoch": 0.75, + "learning_rate": 3.0491713289261657e-05, + "loss": 0.9426, + "step": 4082 + }, + { + "epoch": 0.75, + "learning_rate": 3.0448811291141577e-05, + "loss": 1.0548, + "step": 4083 + }, + { + "epoch": 0.75, + "learning_rate": 3.0405934075089802e-05, + "loss": 1.1745, + "step": 4084 + }, + { + "epoch": 0.75, + "learning_rate": 3.0363081656384075e-05, + "loss": 1.0448, + "step": 4085 + }, + { + "epoch": 0.75, + "learning_rate": 3.032025405029327e-05, + "loss": 1.0903, + "step": 4086 + }, + { + "epoch": 0.75, + "learning_rate": 3.0277451272077562e-05, + "loss": 1.1961, + "step": 4087 + }, + { + "epoch": 0.75, + "learning_rate": 3.0234673336988094e-05, + "loss": 1.2333, + "step": 4088 + }, + { + "epoch": 0.75, + "learning_rate": 3.0191920260267325e-05, + "loss": 1.1165, + "step": 4089 + }, + { + "epoch": 0.75, + "learning_rate": 3.0149192057148734e-05, + "loss": 1.1327, + "step": 4090 + }, + { + "epoch": 0.75, + "learning_rate": 3.0106488742856996e-05, + "loss": 1.0277, + "step": 4091 + }, + { + "epoch": 0.75, + "learning_rate": 3.0063810332607865e-05, + "loss": 1.1368, + "step": 4092 + }, + { + "epoch": 0.75, + "learning_rate": 3.002115684160832e-05, + "loss": 0.9614, + "step": 4093 + }, + { + "epoch": 0.75, + "learning_rate": 2.997852828505634e-05, + "loss": 1.049, + "step": 4094 + }, + { + "epoch": 0.75, + "learning_rate": 2.993592467814115e-05, + "loss": 1.2416, + "step": 4095 + }, + { + "epoch": 0.75, + "learning_rate": 2.989334603604297e-05, + "loss": 1.135, + "step": 4096 + }, + { + "epoch": 0.75, + "learning_rate": 2.9850792373933178e-05, + "loss": 1.1255, + "step": 4097 + }, + { + "epoch": 0.76, + "learning_rate": 2.9808263706974216e-05, + "loss": 1.0261, + "step": 4098 + }, + { + "epoch": 0.76, + "learning_rate": 2.9765760050319703e-05, + "loss": 1.2663, + "step": 4099 + }, + { + "epoch": 0.76, + "learning_rate": 2.9723281419114225e-05, + "loss": 1.2087, + "step": 4100 + }, + { + "epoch": 0.76, + "learning_rate": 2.9680827828493618e-05, + "loss": 1.1019, + "step": 4101 + }, + { + "epoch": 0.76, + "learning_rate": 2.9638399293584572e-05, + "loss": 1.0079, + "step": 4102 + }, + { + "epoch": 0.76, + "learning_rate": 2.9595995829505053e-05, + "loss": 1.0628, + "step": 4103 + }, + { + "epoch": 0.76, + "learning_rate": 2.955361745136397e-05, + "loss": 0.9799, + "step": 4104 + }, + { + "epoch": 0.76, + "learning_rate": 2.9511264174261377e-05, + "loss": 1.143, + "step": 4105 + }, + { + "epoch": 0.76, + "learning_rate": 2.9468936013288284e-05, + "loss": 1.1997, + "step": 4106 + }, + { + "epoch": 0.76, + "learning_rate": 2.9426632983526924e-05, + "loss": 1.0896, + "step": 4107 + }, + { + "epoch": 0.76, + "learning_rate": 2.9384355100050322e-05, + "loss": 1.0908, + "step": 4108 + }, + { + "epoch": 0.76, + "learning_rate": 2.9342102377922774e-05, + "loss": 1.1977, + "step": 4109 + }, + { + "epoch": 0.76, + "learning_rate": 2.9299874832199458e-05, + "loss": 1.2401, + "step": 4110 + }, + { + "epoch": 0.76, + "learning_rate": 2.9257672477926722e-05, + "loss": 1.1556, + "step": 4111 + }, + { + "epoch": 0.76, + "learning_rate": 2.9215495330141806e-05, + "loss": 1.0213, + "step": 4112 + }, + { + "epoch": 0.76, + "learning_rate": 2.9173343403873043e-05, + "loss": 0.9492, + "step": 4113 + }, + { + "epoch": 0.76, + "learning_rate": 2.913121671413971e-05, + "loss": 1.1066, + "step": 4114 + }, + { + "epoch": 0.76, + "learning_rate": 2.9089115275952218e-05, + "loss": 1.0366, + "step": 4115 + }, + { + "epoch": 0.76, + "learning_rate": 2.9047039104311824e-05, + "loss": 1.0275, + "step": 4116 + }, + { + "epoch": 0.76, + "learning_rate": 2.9004988214210927e-05, + "loss": 1.0602, + "step": 4117 + }, + { + "epoch": 0.76, + "learning_rate": 2.8962962620632827e-05, + "loss": 1.1819, + "step": 4118 + }, + { + "epoch": 0.76, + "learning_rate": 2.8920962338551838e-05, + "loss": 1.0267, + "step": 4119 + }, + { + "epoch": 0.76, + "learning_rate": 2.8878987382933198e-05, + "loss": 1.1702, + "step": 4120 + }, + { + "epoch": 0.76, + "learning_rate": 2.8837037768733265e-05, + "loss": 1.1104, + "step": 4121 + }, + { + "epoch": 0.76, + "learning_rate": 2.879511351089921e-05, + "loss": 1.0961, + "step": 4122 + }, + { + "epoch": 0.76, + "learning_rate": 2.8753214624369316e-05, + "loss": 1.0863, + "step": 4123 + }, + { + "epoch": 0.76, + "learning_rate": 2.8711341124072633e-05, + "loss": 1.1373, + "step": 4124 + }, + { + "epoch": 0.76, + "learning_rate": 2.866949302492936e-05, + "loss": 1.1661, + "step": 4125 + }, + { + "epoch": 0.76, + "learning_rate": 2.8627670341850522e-05, + "loss": 1.1561, + "step": 4126 + }, + { + "epoch": 0.76, + "learning_rate": 2.858587308973818e-05, + "loss": 1.1237, + "step": 4127 + }, + { + "epoch": 0.76, + "learning_rate": 2.8544101283485223e-05, + "loss": 1.1094, + "step": 4128 + }, + { + "epoch": 0.76, + "learning_rate": 2.850235493797563e-05, + "loss": 1.1171, + "step": 4129 + }, + { + "epoch": 0.76, + "learning_rate": 2.846063406808408e-05, + "loss": 1.2339, + "step": 4130 + }, + { + "epoch": 0.76, + "learning_rate": 2.841893868867641e-05, + "loss": 1.1503, + "step": 4131 + }, + { + "epoch": 0.76, + "learning_rate": 2.8377268814609203e-05, + "loss": 1.1379, + "step": 4132 + }, + { + "epoch": 0.76, + "learning_rate": 2.8335624460730083e-05, + "loss": 1.0068, + "step": 4133 + }, + { + "epoch": 0.76, + "learning_rate": 2.8294005641877486e-05, + "loss": 1.009, + "step": 4134 + }, + { + "epoch": 0.76, + "learning_rate": 2.82524123728808e-05, + "loss": 1.063, + "step": 4135 + }, + { + "epoch": 0.76, + "learning_rate": 2.8210844668560244e-05, + "loss": 1.0135, + "step": 4136 + }, + { + "epoch": 0.76, + "learning_rate": 2.816930254372705e-05, + "loss": 1.0616, + "step": 4137 + }, + { + "epoch": 0.76, + "learning_rate": 2.8127786013183187e-05, + "loss": 1.0898, + "step": 4138 + }, + { + "epoch": 0.76, + "learning_rate": 2.808629509172165e-05, + "loss": 1.233, + "step": 4139 + }, + { + "epoch": 0.76, + "learning_rate": 2.8044829794126215e-05, + "loss": 1.1632, + "step": 4140 + }, + { + "epoch": 0.76, + "learning_rate": 2.8003390135171537e-05, + "loss": 1.3135, + "step": 4141 + }, + { + "epoch": 0.76, + "learning_rate": 2.7961976129623134e-05, + "loss": 1.0983, + "step": 4142 + }, + { + "epoch": 0.76, + "learning_rate": 2.792058779223744e-05, + "loss": 1.0675, + "step": 4143 + }, + { + "epoch": 0.76, + "learning_rate": 2.7879225137761666e-05, + "loss": 1.0494, + "step": 4144 + }, + { + "epoch": 0.76, + "learning_rate": 2.783788818093399e-05, + "loss": 1.0686, + "step": 4145 + }, + { + "epoch": 0.76, + "learning_rate": 2.7796576936483233e-05, + "loss": 1.1424, + "step": 4146 + }, + { + "epoch": 0.76, + "learning_rate": 2.7755291419129247e-05, + "loss": 1.1846, + "step": 4147 + }, + { + "epoch": 0.76, + "learning_rate": 2.7714031643582607e-05, + "loss": 1.1444, + "step": 4148 + }, + { + "epoch": 0.76, + "learning_rate": 2.76727976245448e-05, + "loss": 1.0493, + "step": 4149 + }, + { + "epoch": 0.76, + "learning_rate": 2.7631589376708035e-05, + "loss": 0.9427, + "step": 4150 + }, + { + "epoch": 0.76, + "learning_rate": 2.7590406914755463e-05, + "loss": 1.1571, + "step": 4151 + }, + { + "epoch": 0.77, + "learning_rate": 2.754925025336088e-05, + "loss": 1.0743, + "step": 4152 + }, + { + "epoch": 0.77, + "learning_rate": 2.750811940718906e-05, + "loss": 1.1467, + "step": 4153 + }, + { + "epoch": 0.77, + "learning_rate": 2.746701439089544e-05, + "loss": 1.1467, + "step": 4154 + }, + { + "epoch": 0.77, + "learning_rate": 2.7425935219126387e-05, + "loss": 1.0198, + "step": 4155 + }, + { + "epoch": 0.77, + "learning_rate": 2.7384881906518957e-05, + "loss": 1.0293, + "step": 4156 + }, + { + "epoch": 0.77, + "learning_rate": 2.7343854467701014e-05, + "loss": 0.9827, + "step": 4157 + }, + { + "epoch": 0.77, + "learning_rate": 2.7302852917291187e-05, + "loss": 1.145, + "step": 4158 + }, + { + "epoch": 0.77, + "learning_rate": 2.7261877269898972e-05, + "loss": 1.1868, + "step": 4159 + }, + { + "epoch": 0.77, + "learning_rate": 2.7220927540124507e-05, + "loss": 1.1033, + "step": 4160 + }, + { + "epoch": 0.77, + "learning_rate": 2.718000374255881e-05, + "loss": 1.083, + "step": 4161 + }, + { + "epoch": 0.77, + "learning_rate": 2.7139105891783588e-05, + "loss": 1.0114, + "step": 4162 + }, + { + "epoch": 0.77, + "learning_rate": 2.7098234002371313e-05, + "loss": 1.0034, + "step": 4163 + }, + { + "epoch": 0.77, + "learning_rate": 2.705738808888518e-05, + "loss": 1.1612, + "step": 4164 + }, + { + "epoch": 0.77, + "learning_rate": 2.701656816587924e-05, + "loss": 1.2265, + "step": 4165 + }, + { + "epoch": 0.77, + "learning_rate": 2.6975774247898133e-05, + "loss": 1.2942, + "step": 4166 + }, + { + "epoch": 0.77, + "learning_rate": 2.693500634947741e-05, + "loss": 1.2067, + "step": 4167 + }, + { + "epoch": 0.77, + "learning_rate": 2.6894264485143107e-05, + "loss": 1.1523, + "step": 4168 + }, + { + "epoch": 0.77, + "learning_rate": 2.6853548669412233e-05, + "loss": 1.1097, + "step": 4169 + }, + { + "epoch": 0.77, + "learning_rate": 2.6812858916792328e-05, + "loss": 1.1583, + "step": 4170 + }, + { + "epoch": 0.77, + "learning_rate": 2.6772195241781805e-05, + "loss": 1.088, + "step": 4171 + }, + { + "epoch": 0.77, + "learning_rate": 2.6731557658869666e-05, + "loss": 1.1223, + "step": 4172 + }, + { + "epoch": 0.77, + "learning_rate": 2.669094618253565e-05, + "loss": 1.2614, + "step": 4173 + }, + { + "epoch": 0.77, + "learning_rate": 2.6650360827250166e-05, + "loss": 1.067, + "step": 4174 + }, + { + "epoch": 0.77, + "learning_rate": 2.6609801607474415e-05, + "loss": 1.0934, + "step": 4175 + }, + { + "epoch": 0.77, + "learning_rate": 2.6569268537660163e-05, + "loss": 1.034, + "step": 4176 + }, + { + "epoch": 0.77, + "learning_rate": 2.6528761632249965e-05, + "loss": 1.043, + "step": 4177 + }, + { + "epoch": 0.77, + "learning_rate": 2.6488280905676965e-05, + "loss": 1.013, + "step": 4178 + }, + { + "epoch": 0.77, + "learning_rate": 2.6447826372365037e-05, + "loss": 1.1152, + "step": 4179 + }, + { + "epoch": 0.77, + "learning_rate": 2.6407398046728648e-05, + "loss": 1.1616, + "step": 4180 + }, + { + "epoch": 0.77, + "learning_rate": 2.6366995943173057e-05, + "loss": 1.1169, + "step": 4181 + }, + { + "epoch": 0.77, + "learning_rate": 2.6326620076094033e-05, + "loss": 1.0443, + "step": 4182 + }, + { + "epoch": 0.77, + "learning_rate": 2.628627045987817e-05, + "loss": 1.0118, + "step": 4183 + }, + { + "epoch": 0.77, + "learning_rate": 2.6245947108902468e-05, + "loss": 1.0001, + "step": 4184 + }, + { + "epoch": 0.77, + "learning_rate": 2.62056500375348e-05, + "loss": 1.1085, + "step": 4185 + }, + { + "epoch": 0.77, + "learning_rate": 2.6165379260133516e-05, + "loss": 1.1914, + "step": 4186 + }, + { + "epoch": 0.77, + "learning_rate": 2.6125134791047733e-05, + "loss": 0.9617, + "step": 4187 + }, + { + "epoch": 0.77, + "learning_rate": 2.6084916644617062e-05, + "loss": 1.1158, + "step": 4188 + }, + { + "epoch": 0.77, + "learning_rate": 2.6044724835171874e-05, + "loss": 1.0361, + "step": 4189 + }, + { + "epoch": 0.77, + "learning_rate": 2.6004559377032955e-05, + "loss": 1.0057, + "step": 4190 + }, + { + "epoch": 0.77, + "learning_rate": 2.5964420284511936e-05, + "loss": 1.0281, + "step": 4191 + }, + { + "epoch": 0.77, + "learning_rate": 2.5924307571910857e-05, + "loss": 1.1368, + "step": 4192 + }, + { + "epoch": 0.77, + "learning_rate": 2.588422125352251e-05, + "loss": 1.1279, + "step": 4193 + }, + { + "epoch": 0.77, + "learning_rate": 2.584416134363019e-05, + "loss": 1.1715, + "step": 4194 + }, + { + "epoch": 0.77, + "learning_rate": 2.5804127856507796e-05, + "loss": 1.1862, + "step": 4195 + }, + { + "epoch": 0.77, + "learning_rate": 2.5764120806419812e-05, + "loss": 1.1994, + "step": 4196 + }, + { + "epoch": 0.77, + "learning_rate": 2.572414020762136e-05, + "loss": 1.1222, + "step": 4197 + }, + { + "epoch": 0.77, + "learning_rate": 2.568418607435803e-05, + "loss": 1.1184, + "step": 4198 + }, + { + "epoch": 0.77, + "learning_rate": 2.5644258420866107e-05, + "loss": 1.1373, + "step": 4199 + }, + { + "epoch": 0.77, + "learning_rate": 2.5604357261372335e-05, + "loss": 0.9658, + "step": 4200 + }, + { + "epoch": 0.77, + "learning_rate": 2.5564482610094088e-05, + "loss": 1.1367, + "step": 4201 + }, + { + "epoch": 0.77, + "learning_rate": 2.5524634481239197e-05, + "loss": 1.0883, + "step": 4202 + }, + { + "epoch": 0.77, + "learning_rate": 2.5484812889006205e-05, + "loss": 0.8986, + "step": 4203 + }, + { + "epoch": 0.77, + "learning_rate": 2.5445017847584028e-05, + "loss": 1.1434, + "step": 4204 + }, + { + "epoch": 0.77, + "learning_rate": 2.5405249371152273e-05, + "loss": 1.1032, + "step": 4205 + }, + { + "epoch": 0.78, + "learning_rate": 2.5365507473880957e-05, + "loss": 1.2246, + "step": 4206 + }, + { + "epoch": 0.78, + "learning_rate": 2.5325792169930705e-05, + "loss": 0.9815, + "step": 4207 + }, + { + "epoch": 0.78, + "learning_rate": 2.5286103473452583e-05, + "loss": 1.1005, + "step": 4208 + }, + { + "epoch": 0.78, + "learning_rate": 2.5246441398588316e-05, + "loss": 1.176, + "step": 4209 + }, + { + "epoch": 0.78, + "learning_rate": 2.5206805959469982e-05, + "loss": 1.0249, + "step": 4210 + }, + { + "epoch": 0.78, + "learning_rate": 2.5167197170220314e-05, + "loss": 1.0971, + "step": 4211 + }, + { + "epoch": 0.78, + "learning_rate": 2.5127615044952446e-05, + "loss": 1.0398, + "step": 4212 + }, + { + "epoch": 0.78, + "learning_rate": 2.5088059597770062e-05, + "loss": 1.1619, + "step": 4213 + }, + { + "epoch": 0.78, + "learning_rate": 2.504853084276728e-05, + "loss": 1.2046, + "step": 4214 + }, + { + "epoch": 0.78, + "learning_rate": 2.500902879402881e-05, + "loss": 1.1076, + "step": 4215 + }, + { + "epoch": 0.78, + "learning_rate": 2.496955346562978e-05, + "loss": 1.0954, + "step": 4216 + }, + { + "epoch": 0.78, + "learning_rate": 2.4930104871635752e-05, + "loss": 1.0814, + "step": 4217 + }, + { + "epoch": 0.78, + "learning_rate": 2.4890683026102878e-05, + "loss": 1.1664, + "step": 4218 + }, + { + "epoch": 0.78, + "learning_rate": 2.4851287943077706e-05, + "loss": 1.0219, + "step": 4219 + }, + { + "epoch": 0.78, + "learning_rate": 2.4811919636597214e-05, + "loss": 1.2661, + "step": 4220 + }, + { + "epoch": 0.78, + "learning_rate": 2.4772578120688938e-05, + "loss": 1.1392, + "step": 4221 + }, + { + "epoch": 0.78, + "learning_rate": 2.4733263409370798e-05, + "loss": 1.0788, + "step": 4222 + }, + { + "epoch": 0.78, + "learning_rate": 2.4693975516651147e-05, + "loss": 1.277, + "step": 4223 + }, + { + "epoch": 0.78, + "learning_rate": 2.4654714456528873e-05, + "loss": 1.0426, + "step": 4224 + }, + { + "epoch": 0.78, + "learning_rate": 2.4615480242993217e-05, + "loss": 1.138, + "step": 4225 + }, + { + "epoch": 0.78, + "learning_rate": 2.4576272890023844e-05, + "loss": 1.132, + "step": 4226 + }, + { + "epoch": 0.78, + "learning_rate": 2.4537092411590945e-05, + "loss": 1.0454, + "step": 4227 + }, + { + "epoch": 0.78, + "learning_rate": 2.4497938821655063e-05, + "loss": 0.9764, + "step": 4228 + }, + { + "epoch": 0.78, + "learning_rate": 2.4458812134167132e-05, + "loss": 1.0024, + "step": 4229 + }, + { + "epoch": 0.78, + "learning_rate": 2.4419712363068593e-05, + "loss": 1.1076, + "step": 4230 + }, + { + "epoch": 0.78, + "learning_rate": 2.4380639522291237e-05, + "loss": 1.1181, + "step": 4231 + }, + { + "epoch": 0.78, + "learning_rate": 2.434159362575722e-05, + "loss": 1.141, + "step": 4232 + }, + { + "epoch": 0.78, + "learning_rate": 2.430257468737921e-05, + "loss": 1.2729, + "step": 4233 + }, + { + "epoch": 0.78, + "learning_rate": 2.4263582721060174e-05, + "loss": 1.11, + "step": 4234 + }, + { + "epoch": 0.78, + "learning_rate": 2.4224617740693478e-05, + "loss": 1.1655, + "step": 4235 + }, + { + "epoch": 0.78, + "learning_rate": 2.418567976016294e-05, + "loss": 1.0366, + "step": 4236 + }, + { + "epoch": 0.78, + "learning_rate": 2.414676879334269e-05, + "loss": 1.0359, + "step": 4237 + }, + { + "epoch": 0.78, + "learning_rate": 2.410788485409724e-05, + "loss": 1.0018, + "step": 4238 + }, + { + "epoch": 0.78, + "learning_rate": 2.4069027956281475e-05, + "loss": 1.0979, + "step": 4239 + }, + { + "epoch": 0.78, + "learning_rate": 2.40301981137407e-05, + "loss": 1.0648, + "step": 4240 + }, + { + "epoch": 0.78, + "learning_rate": 2.3991395340310484e-05, + "loss": 1.1131, + "step": 4241 + }, + { + "epoch": 0.78, + "learning_rate": 2.3952619649816864e-05, + "loss": 1.1267, + "step": 4242 + }, + { + "epoch": 0.78, + "learning_rate": 2.391387105607612e-05, + "loss": 1.0809, + "step": 4243 + }, + { + "epoch": 0.78, + "learning_rate": 2.3875149572894927e-05, + "loss": 1.1709, + "step": 4244 + }, + { + "epoch": 0.78, + "learning_rate": 2.3836455214070276e-05, + "loss": 1.2498, + "step": 4245 + }, + { + "epoch": 0.78, + "learning_rate": 2.3797787993389563e-05, + "loss": 1.1342, + "step": 4246 + }, + { + "epoch": 0.78, + "learning_rate": 2.375914792463041e-05, + "loss": 1.1245, + "step": 4247 + }, + { + "epoch": 0.78, + "learning_rate": 2.3720535021560865e-05, + "loss": 1.2619, + "step": 4248 + }, + { + "epoch": 0.78, + "learning_rate": 2.3681949297939233e-05, + "loss": 1.2297, + "step": 4249 + }, + { + "epoch": 0.78, + "learning_rate": 2.364339076751414e-05, + "loss": 1.1778, + "step": 4250 + }, + { + "epoch": 0.78, + "learning_rate": 2.360485944402452e-05, + "loss": 1.0304, + "step": 4251 + }, + { + "epoch": 0.78, + "learning_rate": 2.3566355341199663e-05, + "loss": 1.0399, + "step": 4252 + }, + { + "epoch": 0.78, + "learning_rate": 2.3527878472759078e-05, + "loss": 1.0736, + "step": 4253 + }, + { + "epoch": 0.78, + "learning_rate": 2.3489428852412676e-05, + "loss": 1.1242, + "step": 4254 + }, + { + "epoch": 0.78, + "learning_rate": 2.345100649386056e-05, + "loss": 1.1274, + "step": 4255 + }, + { + "epoch": 0.78, + "learning_rate": 2.341261141079316e-05, + "loss": 1.2116, + "step": 4256 + }, + { + "epoch": 0.78, + "learning_rate": 2.3374243616891166e-05, + "loss": 1.0445, + "step": 4257 + }, + { + "epoch": 0.78, + "learning_rate": 2.333590312582561e-05, + "loss": 1.1256, + "step": 4258 + }, + { + "epoch": 0.78, + "learning_rate": 2.3297589951257694e-05, + "loss": 1.0186, + "step": 4259 + }, + { + "epoch": 0.79, + "learning_rate": 2.325930410683903e-05, + "loss": 1.121, + "step": 4260 + }, + { + "epoch": 0.79, + "learning_rate": 2.322104560621129e-05, + "loss": 1.0446, + "step": 4261 + }, + { + "epoch": 0.79, + "learning_rate": 2.3182814463006596e-05, + "loss": 1.052, + "step": 4262 + }, + { + "epoch": 0.79, + "learning_rate": 2.3144610690847203e-05, + "loss": 1.1521, + "step": 4263 + }, + { + "epoch": 0.79, + "learning_rate": 2.3106434303345683e-05, + "loss": 1.1353, + "step": 4264 + }, + { + "epoch": 0.79, + "learning_rate": 2.3068285314104787e-05, + "loss": 1.1106, + "step": 4265 + }, + { + "epoch": 0.79, + "learning_rate": 2.3030163736717613e-05, + "loss": 1.2656, + "step": 4266 + }, + { + "epoch": 0.79, + "learning_rate": 2.2992069584767306e-05, + "loss": 1.2125, + "step": 4267 + }, + { + "epoch": 0.79, + "learning_rate": 2.2954002871827417e-05, + "loss": 1.0285, + "step": 4268 + }, + { + "epoch": 0.79, + "learning_rate": 2.291596361146162e-05, + "loss": 1.0393, + "step": 4269 + }, + { + "epoch": 0.79, + "learning_rate": 2.2877951817223896e-05, + "loss": 1.1407, + "step": 4270 + }, + { + "epoch": 0.79, + "learning_rate": 2.2839967502658334e-05, + "loss": 1.0589, + "step": 4271 + }, + { + "epoch": 0.79, + "learning_rate": 2.280201068129929e-05, + "loss": 1.1144, + "step": 4272 + }, + { + "epoch": 0.79, + "learning_rate": 2.2764081366671285e-05, + "loss": 1.0612, + "step": 4273 + }, + { + "epoch": 0.79, + "learning_rate": 2.272617957228913e-05, + "loss": 0.9605, + "step": 4274 + }, + { + "epoch": 0.79, + "learning_rate": 2.268830531165771e-05, + "loss": 1.1535, + "step": 4275 + }, + { + "epoch": 0.79, + "learning_rate": 2.2650458598272206e-05, + "loss": 1.1314, + "step": 4276 + }, + { + "epoch": 0.79, + "learning_rate": 2.2612639445617912e-05, + "loss": 1.1242, + "step": 4277 + }, + { + "epoch": 0.79, + "learning_rate": 2.2574847867170334e-05, + "loss": 1.004, + "step": 4278 + }, + { + "epoch": 0.79, + "learning_rate": 2.2537083876395105e-05, + "loss": 1.1339, + "step": 4279 + }, + { + "epoch": 0.79, + "learning_rate": 2.2499347486748112e-05, + "loss": 1.1065, + "step": 4280 + }, + { + "epoch": 0.79, + "learning_rate": 2.2461638711675337e-05, + "loss": 1.1869, + "step": 4281 + }, + { + "epoch": 0.79, + "learning_rate": 2.2423957564613006e-05, + "loss": 1.0223, + "step": 4282 + }, + { + "epoch": 0.79, + "learning_rate": 2.238630405898734e-05, + "loss": 1.0554, + "step": 4283 + }, + { + "epoch": 0.79, + "learning_rate": 2.2348678208214903e-05, + "loss": 1.1672, + "step": 4284 + }, + { + "epoch": 0.79, + "learning_rate": 2.2311080025702235e-05, + "loss": 1.2218, + "step": 4285 + }, + { + "epoch": 0.79, + "learning_rate": 2.2273509524846192e-05, + "loss": 1.0779, + "step": 4286 + }, + { + "epoch": 0.79, + "learning_rate": 2.2235966719033586e-05, + "loss": 1.0472, + "step": 4287 + }, + { + "epoch": 0.79, + "learning_rate": 2.2198451621641546e-05, + "loss": 1.0488, + "step": 4288 + }, + { + "epoch": 0.79, + "learning_rate": 2.216096424603711e-05, + "loss": 1.1172, + "step": 4289 + }, + { + "epoch": 0.79, + "learning_rate": 2.212350460557765e-05, + "loss": 1.074, + "step": 4290 + }, + { + "epoch": 0.79, + "learning_rate": 2.2086072713610505e-05, + "loss": 1.1081, + "step": 4291 + }, + { + "epoch": 0.79, + "learning_rate": 2.2048668583473232e-05, + "loss": 1.0054, + "step": 4292 + }, + { + "epoch": 0.79, + "learning_rate": 2.201129222849344e-05, + "loss": 1.0691, + "step": 4293 + }, + { + "epoch": 0.79, + "learning_rate": 2.197394366198884e-05, + "loss": 1.0983, + "step": 4294 + }, + { + "epoch": 0.79, + "learning_rate": 2.193662289726721e-05, + "loss": 1.1393, + "step": 4295 + }, + { + "epoch": 0.79, + "learning_rate": 2.1899329947626544e-05, + "loss": 1.0063, + "step": 4296 + }, + { + "epoch": 0.79, + "learning_rate": 2.186206482635479e-05, + "loss": 0.9719, + "step": 4297 + }, + { + "epoch": 0.79, + "learning_rate": 2.1824827546730066e-05, + "loss": 1.1626, + "step": 4298 + }, + { + "epoch": 0.79, + "learning_rate": 2.1787618122020538e-05, + "loss": 1.0711, + "step": 4299 + }, + { + "epoch": 0.79, + "learning_rate": 2.1750436565484433e-05, + "loss": 1.0344, + "step": 4300 + }, + { + "epoch": 0.79, + "learning_rate": 2.1713282890370034e-05, + "loss": 1.0874, + "step": 4301 + }, + { + "epoch": 0.79, + "learning_rate": 2.1676157109915786e-05, + "loss": 1.1651, + "step": 4302 + }, + { + "epoch": 0.79, + "learning_rate": 2.163905923735007e-05, + "loss": 1.1628, + "step": 4303 + }, + { + "epoch": 0.79, + "learning_rate": 2.1601989285891456e-05, + "loss": 0.9838, + "step": 4304 + }, + { + "epoch": 0.79, + "learning_rate": 2.156494726874838e-05, + "loss": 1.0984, + "step": 4305 + }, + { + "epoch": 0.79, + "learning_rate": 2.152793319911952e-05, + "loss": 1.1336, + "step": 4306 + }, + { + "epoch": 0.79, + "learning_rate": 2.1490947090193457e-05, + "loss": 1.0897, + "step": 4307 + }, + { + "epoch": 0.79, + "learning_rate": 2.145398895514892e-05, + "loss": 1.0782, + "step": 4308 + }, + { + "epoch": 0.79, + "learning_rate": 2.1417058807154555e-05, + "loss": 1.1334, + "step": 4309 + }, + { + "epoch": 0.79, + "learning_rate": 2.1380156659369178e-05, + "loss": 1.1153, + "step": 4310 + }, + { + "epoch": 0.79, + "learning_rate": 2.1343282524941422e-05, + "loss": 1.1319, + "step": 4311 + }, + { + "epoch": 0.79, + "learning_rate": 2.1306436417010168e-05, + "loss": 1.1638, + "step": 4312 + }, + { + "epoch": 0.79, + "learning_rate": 2.1269618348704125e-05, + "loss": 1.0933, + "step": 4313 + }, + { + "epoch": 0.79, + "learning_rate": 2.1232828333142152e-05, + "loss": 1.1332, + "step": 4314 + }, + { + "epoch": 0.8, + "learning_rate": 2.1196066383433024e-05, + "loss": 1.0257, + "step": 4315 + }, + { + "epoch": 0.8, + "learning_rate": 2.1159332512675534e-05, + "loss": 1.0487, + "step": 4316 + }, + { + "epoch": 0.8, + "learning_rate": 2.112262673395847e-05, + "loss": 1.0834, + "step": 4317 + }, + { + "epoch": 0.8, + "learning_rate": 2.1085949060360654e-05, + "loss": 1.0034, + "step": 4318 + }, + { + "epoch": 0.8, + "learning_rate": 2.1049299504950803e-05, + "loss": 1.1796, + "step": 4319 + }, + { + "epoch": 0.8, + "learning_rate": 2.101267808078774e-05, + "loss": 1.1904, + "step": 4320 + }, + { + "epoch": 0.8, + "learning_rate": 2.097608480092016e-05, + "loss": 1.1033, + "step": 4321 + }, + { + "epoch": 0.8, + "learning_rate": 2.0939519678386753e-05, + "loss": 1.2439, + "step": 4322 + }, + { + "epoch": 0.8, + "learning_rate": 2.090298272621617e-05, + "loss": 0.9343, + "step": 4323 + }, + { + "epoch": 0.8, + "learning_rate": 2.086647395742709e-05, + "loss": 1.1613, + "step": 4324 + }, + { + "epoch": 0.8, + "learning_rate": 2.082999338502806e-05, + "loss": 1.0808, + "step": 4325 + }, + { + "epoch": 0.8, + "learning_rate": 2.0793541022017692e-05, + "loss": 1.1723, + "step": 4326 + }, + { + "epoch": 0.8, + "learning_rate": 2.0757116881384374e-05, + "loss": 1.1608, + "step": 4327 + }, + { + "epoch": 0.8, + "learning_rate": 2.072072097610661e-05, + "loss": 1.1708, + "step": 4328 + }, + { + "epoch": 0.8, + "learning_rate": 2.0684353319152737e-05, + "loss": 0.9553, + "step": 4329 + }, + { + "epoch": 0.8, + "learning_rate": 2.0648013923481115e-05, + "loss": 1.0479, + "step": 4330 + }, + { + "epoch": 0.8, + "learning_rate": 2.0611702802039912e-05, + "loss": 1.2434, + "step": 4331 + }, + { + "epoch": 0.8, + "learning_rate": 2.0575419967767385e-05, + "loss": 1.1878, + "step": 4332 + }, + { + "epoch": 0.8, + "learning_rate": 2.0539165433591513e-05, + "loss": 1.0348, + "step": 4333 + }, + { + "epoch": 0.8, + "learning_rate": 2.050293921243036e-05, + "loss": 1.1361, + "step": 4334 + }, + { + "epoch": 0.8, + "learning_rate": 2.0466741317191816e-05, + "loss": 1.279, + "step": 4335 + }, + { + "epoch": 0.8, + "learning_rate": 2.0430571760773742e-05, + "loss": 1.0074, + "step": 4336 + }, + { + "epoch": 0.8, + "learning_rate": 2.0394430556063816e-05, + "loss": 1.0559, + "step": 4337 + }, + { + "epoch": 0.8, + "learning_rate": 2.035831771593968e-05, + "loss": 1.1377, + "step": 4338 + }, + { + "epoch": 0.8, + "learning_rate": 2.0322233253268818e-05, + "loss": 0.9873, + "step": 4339 + }, + { + "epoch": 0.8, + "learning_rate": 2.0286177180908673e-05, + "loss": 1.0825, + "step": 4340 + }, + { + "epoch": 0.8, + "learning_rate": 2.0250149511706483e-05, + "loss": 1.1851, + "step": 4341 + }, + { + "epoch": 0.8, + "learning_rate": 2.0214150258499487e-05, + "loss": 1.0325, + "step": 4342 + }, + { + "epoch": 0.8, + "learning_rate": 2.0178179434114674e-05, + "loss": 1.08, + "step": 4343 + }, + { + "epoch": 0.8, + "learning_rate": 2.0142237051368963e-05, + "loss": 1.1093, + "step": 4344 + }, + { + "epoch": 0.8, + "learning_rate": 2.0106323123069104e-05, + "loss": 1.0713, + "step": 4345 + }, + { + "epoch": 0.8, + "learning_rate": 2.0070437662011798e-05, + "loss": 1.1025, + "step": 4346 + }, + { + "epoch": 0.8, + "learning_rate": 2.0034580680983473e-05, + "loss": 1.0426, + "step": 4347 + }, + { + "epoch": 0.8, + "learning_rate": 1.999875219276054e-05, + "loss": 0.8875, + "step": 4348 + }, + { + "epoch": 0.8, + "learning_rate": 1.9962952210109166e-05, + "loss": 1.1241, + "step": 4349 + }, + { + "epoch": 0.8, + "learning_rate": 1.992718074578539e-05, + "loss": 1.2039, + "step": 4350 + }, + { + "epoch": 0.8, + "learning_rate": 1.9891437812535052e-05, + "loss": 1.0333, + "step": 4351 + }, + { + "epoch": 0.8, + "learning_rate": 1.985572342309393e-05, + "loss": 1.1065, + "step": 4352 + }, + { + "epoch": 0.8, + "learning_rate": 1.98200375901875e-05, + "loss": 0.9877, + "step": 4353 + }, + { + "epoch": 0.8, + "learning_rate": 1.9784380326531183e-05, + "loss": 1.0509, + "step": 4354 + }, + { + "epoch": 0.8, + "learning_rate": 1.9748751644830142e-05, + "loss": 1.091, + "step": 4355 + }, + { + "epoch": 0.8, + "learning_rate": 1.9713151557779374e-05, + "loss": 1.018, + "step": 4356 + }, + { + "epoch": 0.8, + "learning_rate": 1.9677580078063662e-05, + "loss": 1.0932, + "step": 4357 + }, + { + "epoch": 0.8, + "learning_rate": 1.9642037218357688e-05, + "loss": 1.0799, + "step": 4358 + }, + { + "epoch": 0.8, + "learning_rate": 1.960652299132585e-05, + "loss": 0.9539, + "step": 4359 + }, + { + "epoch": 0.8, + "learning_rate": 1.957103740962232e-05, + "loss": 1.2344, + "step": 4360 + }, + { + "epoch": 0.8, + "learning_rate": 1.9535580485891182e-05, + "loss": 1.0193, + "step": 4361 + }, + { + "epoch": 0.8, + "learning_rate": 1.95001522327662e-05, + "loss": 1.1512, + "step": 4362 + }, + { + "epoch": 0.8, + "learning_rate": 1.9464752662870945e-05, + "loss": 1.0773, + "step": 4363 + }, + { + "epoch": 0.8, + "learning_rate": 1.9429381788818824e-05, + "loss": 1.1246, + "step": 4364 + }, + { + "epoch": 0.8, + "learning_rate": 1.9394039623212945e-05, + "loss": 1.1288, + "step": 4365 + }, + { + "epoch": 0.8, + "learning_rate": 1.9358726178646224e-05, + "loss": 1.1292, + "step": 4366 + }, + { + "epoch": 0.8, + "learning_rate": 1.9323441467701352e-05, + "loss": 1.2633, + "step": 4367 + }, + { + "epoch": 0.8, + "learning_rate": 1.9288185502950775e-05, + "loss": 1.0899, + "step": 4368 + }, + { + "epoch": 0.81, + "learning_rate": 1.9252958296956648e-05, + "loss": 1.032, + "step": 4369 + }, + { + "epoch": 0.81, + "learning_rate": 1.9217759862270977e-05, + "loss": 1.0682, + "step": 4370 + }, + { + "epoch": 0.81, + "learning_rate": 1.9182590211435423e-05, + "loss": 1.0267, + "step": 4371 + }, + { + "epoch": 0.81, + "learning_rate": 1.914744935698143e-05, + "loss": 1.09, + "step": 4372 + }, + { + "epoch": 0.81, + "learning_rate": 1.9112337311430194e-05, + "loss": 1.1402, + "step": 4373 + }, + { + "epoch": 0.81, + "learning_rate": 1.907725408729263e-05, + "loss": 1.1302, + "step": 4374 + }, + { + "epoch": 0.81, + "learning_rate": 1.904219969706935e-05, + "loss": 1.077, + "step": 4375 + }, + { + "epoch": 0.81, + "learning_rate": 1.9007174153250797e-05, + "loss": 1.0526, + "step": 4376 + }, + { + "epoch": 0.81, + "learning_rate": 1.897217746831701e-05, + "loss": 1.1469, + "step": 4377 + }, + { + "epoch": 0.81, + "learning_rate": 1.8937209654737796e-05, + "loss": 1.0512, + "step": 4378 + }, + { + "epoch": 0.81, + "learning_rate": 1.8902270724972726e-05, + "loss": 0.9796, + "step": 4379 + }, + { + "epoch": 0.81, + "learning_rate": 1.8867360691471002e-05, + "loss": 1.0097, + "step": 4380 + }, + { + "epoch": 0.81, + "learning_rate": 1.883247956667157e-05, + "loss": 1.1034, + "step": 4381 + }, + { + "epoch": 0.81, + "learning_rate": 1.8797627363003022e-05, + "loss": 1.1056, + "step": 4382 + }, + { + "epoch": 0.81, + "learning_rate": 1.8762804092883766e-05, + "loss": 1.0854, + "step": 4383 + }, + { + "epoch": 0.81, + "learning_rate": 1.8728009768721765e-05, + "loss": 1.0703, + "step": 4384 + }, + { + "epoch": 0.81, + "learning_rate": 1.869324440291477e-05, + "loss": 1.0318, + "step": 4385 + }, + { + "epoch": 0.81, + "learning_rate": 1.8658508007850138e-05, + "loss": 0.9658, + "step": 4386 + }, + { + "epoch": 0.81, + "learning_rate": 1.862380059590495e-05, + "loss": 0.9022, + "step": 4387 + }, + { + "epoch": 0.81, + "learning_rate": 1.8589122179445917e-05, + "loss": 1.2719, + "step": 4388 + }, + { + "epoch": 0.81, + "learning_rate": 1.85544727708295e-05, + "loss": 1.0378, + "step": 4389 + }, + { + "epoch": 0.81, + "learning_rate": 1.8519852382401715e-05, + "loss": 1.1178, + "step": 4390 + }, + { + "epoch": 0.81, + "learning_rate": 1.8485261026498356e-05, + "loss": 1.2466, + "step": 4391 + }, + { + "epoch": 0.81, + "learning_rate": 1.845069871544477e-05, + "loss": 0.9731, + "step": 4392 + }, + { + "epoch": 0.81, + "learning_rate": 1.841616546155601e-05, + "loss": 1.0814, + "step": 4393 + }, + { + "epoch": 0.81, + "learning_rate": 1.838166127713672e-05, + "loss": 1.1395, + "step": 4394 + }, + { + "epoch": 0.81, + "learning_rate": 1.834718617448129e-05, + "loss": 1.097, + "step": 4395 + }, + { + "epoch": 0.81, + "learning_rate": 1.831274016587362e-05, + "loss": 1.1164, + "step": 4396 + }, + { + "epoch": 0.81, + "learning_rate": 1.8278323263587404e-05, + "loss": 1.0972, + "step": 4397 + }, + { + "epoch": 0.81, + "learning_rate": 1.8243935479885753e-05, + "loss": 1.2387, + "step": 4398 + }, + { + "epoch": 0.81, + "learning_rate": 1.82095768270216e-05, + "loss": 1.2001, + "step": 4399 + }, + { + "epoch": 0.81, + "learning_rate": 1.8175247317237366e-05, + "loss": 1.1527, + "step": 4400 + }, + { + "epoch": 0.81, + "learning_rate": 1.8140946962765194e-05, + "loss": 1.0786, + "step": 4401 + }, + { + "epoch": 0.81, + "learning_rate": 1.810667577582672e-05, + "loss": 1.1728, + "step": 4402 + }, + { + "epoch": 0.81, + "learning_rate": 1.8072433768633333e-05, + "loss": 1.026, + "step": 4403 + }, + { + "epoch": 0.81, + "learning_rate": 1.8038220953385853e-05, + "loss": 1.2197, + "step": 4404 + }, + { + "epoch": 0.81, + "learning_rate": 1.800403734227485e-05, + "loss": 1.1577, + "step": 4405 + }, + { + "epoch": 0.81, + "learning_rate": 1.7969882947480375e-05, + "loss": 1.1287, + "step": 4406 + }, + { + "epoch": 0.81, + "learning_rate": 1.793575778117218e-05, + "loss": 1.1311, + "step": 4407 + }, + { + "epoch": 0.81, + "learning_rate": 1.790166185550951e-05, + "loss": 1.0482, + "step": 4408 + }, + { + "epoch": 0.81, + "learning_rate": 1.7867595182641226e-05, + "loss": 1.0689, + "step": 4409 + }, + { + "epoch": 0.81, + "learning_rate": 1.7833557774705733e-05, + "loss": 0.997, + "step": 4410 + }, + { + "epoch": 0.81, + "learning_rate": 1.7799549643831104e-05, + "loss": 1.0839, + "step": 4411 + }, + { + "epoch": 0.81, + "learning_rate": 1.7765570802134844e-05, + "loss": 1.0842, + "step": 4412 + }, + { + "epoch": 0.81, + "learning_rate": 1.7731621261724164e-05, + "loss": 1.092, + "step": 4413 + }, + { + "epoch": 0.81, + "learning_rate": 1.7697701034695724e-05, + "loss": 1.0757, + "step": 4414 + }, + { + "epoch": 0.81, + "learning_rate": 1.7663810133135784e-05, + "loss": 1.161, + "step": 4415 + }, + { + "epoch": 0.81, + "learning_rate": 1.7629948569120126e-05, + "loss": 1.022, + "step": 4416 + }, + { + "epoch": 0.81, + "learning_rate": 1.7596116354714155e-05, + "loss": 1.1229, + "step": 4417 + }, + { + "epoch": 0.81, + "learning_rate": 1.7562313501972692e-05, + "loss": 0.9922, + "step": 4418 + }, + { + "epoch": 0.81, + "learning_rate": 1.7528540022940288e-05, + "loss": 1.0911, + "step": 4419 + }, + { + "epoch": 0.81, + "learning_rate": 1.7494795929650766e-05, + "loss": 0.9982, + "step": 4420 + }, + { + "epoch": 0.81, + "learning_rate": 1.746108123412773e-05, + "loss": 1.1743, + "step": 4421 + }, + { + "epoch": 0.81, + "learning_rate": 1.7427395948384117e-05, + "loss": 0.998, + "step": 4422 + }, + { + "epoch": 0.82, + "learning_rate": 1.739374008442256e-05, + "loss": 1.0741, + "step": 4423 + }, + { + "epoch": 0.82, + "learning_rate": 1.7360113654235034e-05, + "loss": 1.0517, + "step": 4424 + }, + { + "epoch": 0.82, + "learning_rate": 1.7326516669803193e-05, + "loss": 1.1076, + "step": 4425 + }, + { + "epoch": 0.82, + "learning_rate": 1.7292949143098026e-05, + "loss": 1.0897, + "step": 4426 + }, + { + "epoch": 0.82, + "learning_rate": 1.725941108608019e-05, + "loss": 1.0976, + "step": 4427 + }, + { + "epoch": 0.82, + "learning_rate": 1.7225902510699697e-05, + "loss": 1.1106, + "step": 4428 + }, + { + "epoch": 0.82, + "learning_rate": 1.7192423428896198e-05, + "loss": 1.1403, + "step": 4429 + }, + { + "epoch": 0.82, + "learning_rate": 1.7158973852598725e-05, + "loss": 1.1808, + "step": 4430 + }, + { + "epoch": 0.82, + "learning_rate": 1.7125553793725836e-05, + "loss": 1.1345, + "step": 4431 + }, + { + "epoch": 0.82, + "learning_rate": 1.7092163264185545e-05, + "loss": 1.0442, + "step": 4432 + }, + { + "epoch": 0.82, + "learning_rate": 1.7058802275875408e-05, + "loss": 1.0614, + "step": 4433 + }, + { + "epoch": 0.82, + "learning_rate": 1.7025470840682378e-05, + "loss": 1.0956, + "step": 4434 + }, + { + "epoch": 0.82, + "learning_rate": 1.699216897048297e-05, + "loss": 1.0534, + "step": 4435 + }, + { + "epoch": 0.82, + "learning_rate": 1.6958896677143065e-05, + "loss": 1.1523, + "step": 4436 + }, + { + "epoch": 0.82, + "learning_rate": 1.692565397251805e-05, + "loss": 1.0553, + "step": 4437 + }, + { + "epoch": 0.82, + "learning_rate": 1.6892440868452764e-05, + "loss": 0.9781, + "step": 4438 + }, + { + "epoch": 0.82, + "learning_rate": 1.6859257376781545e-05, + "loss": 0.9909, + "step": 4439 + }, + { + "epoch": 0.82, + "learning_rate": 1.6826103509328083e-05, + "loss": 0.9957, + "step": 4440 + }, + { + "epoch": 0.82, + "learning_rate": 1.679297927790565e-05, + "loss": 1.0365, + "step": 4441 + }, + { + "epoch": 0.82, + "learning_rate": 1.6759884694316774e-05, + "loss": 1.0476, + "step": 4442 + }, + { + "epoch": 0.82, + "learning_rate": 1.6726819770353585e-05, + "loss": 1.0473, + "step": 4443 + }, + { + "epoch": 0.82, + "learning_rate": 1.669378451779755e-05, + "loss": 1.0801, + "step": 4444 + }, + { + "epoch": 0.82, + "learning_rate": 1.666077894841964e-05, + "loss": 1.1323, + "step": 4445 + }, + { + "epoch": 0.82, + "learning_rate": 1.662780307398014e-05, + "loss": 1.0239, + "step": 4446 + }, + { + "epoch": 0.82, + "learning_rate": 1.6594856906228918e-05, + "loss": 1.0349, + "step": 4447 + }, + { + "epoch": 0.82, + "learning_rate": 1.6561940456905033e-05, + "loss": 1.1051, + "step": 4448 + }, + { + "epoch": 0.82, + "learning_rate": 1.6529053737737164e-05, + "loss": 1.0683, + "step": 4449 + }, + { + "epoch": 0.82, + "learning_rate": 1.649619676044327e-05, + "loss": 0.957, + "step": 4450 + }, + { + "epoch": 0.82, + "learning_rate": 1.64633695367308e-05, + "loss": 1.1026, + "step": 4451 + }, + { + "epoch": 0.82, + "learning_rate": 1.6430572078296525e-05, + "loss": 0.9712, + "step": 4452 + }, + { + "epoch": 0.82, + "learning_rate": 1.6397804396826643e-05, + "loss": 1.0689, + "step": 4453 + }, + { + "epoch": 0.82, + "learning_rate": 1.636506650399673e-05, + "loss": 1.2022, + "step": 4454 + }, + { + "epoch": 0.82, + "learning_rate": 1.6332358411471792e-05, + "loss": 0.916, + "step": 4455 + }, + { + "epoch": 0.82, + "learning_rate": 1.6299680130906138e-05, + "loss": 1.0738, + "step": 4456 + }, + { + "epoch": 0.82, + "learning_rate": 1.6267031673943543e-05, + "loss": 1.0925, + "step": 4457 + }, + { + "epoch": 0.82, + "learning_rate": 1.62344130522171e-05, + "loss": 1.1663, + "step": 4458 + }, + { + "epoch": 0.82, + "learning_rate": 1.6201824277349277e-05, + "loss": 1.0289, + "step": 4459 + }, + { + "epoch": 0.82, + "learning_rate": 1.616926536095189e-05, + "loss": 1.0351, + "step": 4460 + }, + { + "epoch": 0.82, + "learning_rate": 1.613673631462619e-05, + "loss": 1.2434, + "step": 4461 + }, + { + "epoch": 0.82, + "learning_rate": 1.6104237149962686e-05, + "loss": 1.1046, + "step": 4462 + }, + { + "epoch": 0.82, + "learning_rate": 1.6071767878541354e-05, + "loss": 1.1878, + "step": 4463 + }, + { + "epoch": 0.82, + "learning_rate": 1.6039328511931362e-05, + "loss": 1.1485, + "step": 4464 + }, + { + "epoch": 0.82, + "learning_rate": 1.6006919061691384e-05, + "loss": 1.099, + "step": 4465 + }, + { + "epoch": 0.82, + "learning_rate": 1.5974539539369328e-05, + "loss": 1.0728, + "step": 4466 + }, + { + "epoch": 0.82, + "learning_rate": 1.5942189956502497e-05, + "loss": 1.1709, + "step": 4467 + }, + { + "epoch": 0.82, + "learning_rate": 1.5909870324617472e-05, + "loss": 0.96, + "step": 4468 + }, + { + "epoch": 0.82, + "learning_rate": 1.587758065523025e-05, + "loss": 1.0229, + "step": 4469 + }, + { + "epoch": 0.82, + "learning_rate": 1.5845320959846023e-05, + "loss": 1.0554, + "step": 4470 + }, + { + "epoch": 0.82, + "learning_rate": 1.5813091249959434e-05, + "loss": 1.0018, + "step": 4471 + }, + { + "epoch": 0.82, + "learning_rate": 1.578089153705433e-05, + "loss": 1.1365, + "step": 4472 + }, + { + "epoch": 0.82, + "learning_rate": 1.5748721832603973e-05, + "loss": 1.1171, + "step": 4473 + }, + { + "epoch": 0.82, + "learning_rate": 1.571658214807087e-05, + "loss": 1.0844, + "step": 4474 + }, + { + "epoch": 0.82, + "learning_rate": 1.568447249490682e-05, + "loss": 1.1338, + "step": 4475 + }, + { + "epoch": 0.82, + "learning_rate": 1.5652392884552947e-05, + "loss": 1.0958, + "step": 4476 + }, + { + "epoch": 0.82, + "learning_rate": 1.5620343328439703e-05, + "loss": 1.1146, + "step": 4477 + }, + { + "epoch": 0.83, + "learning_rate": 1.558832383798674e-05, + "loss": 1.0593, + "step": 4478 + }, + { + "epoch": 0.83, + "learning_rate": 1.5556334424603114e-05, + "loss": 1.1555, + "step": 4479 + }, + { + "epoch": 0.83, + "learning_rate": 1.5524375099687072e-05, + "loss": 0.8507, + "step": 4480 + }, + { + "epoch": 0.83, + "learning_rate": 1.549244587462618e-05, + "loss": 1.1032, + "step": 4481 + }, + { + "epoch": 0.83, + "learning_rate": 1.5460546760797236e-05, + "loss": 0.9478, + "step": 4482 + }, + { + "epoch": 0.83, + "learning_rate": 1.54286777695664e-05, + "loss": 1.1436, + "step": 4483 + }, + { + "epoch": 0.83, + "learning_rate": 1.5396838912289e-05, + "loss": 1.2092, + "step": 4484 + }, + { + "epoch": 0.83, + "learning_rate": 1.5365030200309727e-05, + "loss": 1.0472, + "step": 4485 + }, + { + "epoch": 0.83, + "learning_rate": 1.5333251644962376e-05, + "loss": 1.0801, + "step": 4486 + }, + { + "epoch": 0.83, + "learning_rate": 1.5301503257570184e-05, + "loss": 1.1871, + "step": 4487 + }, + { + "epoch": 0.83, + "learning_rate": 1.5269785049445484e-05, + "loss": 1.0638, + "step": 4488 + }, + { + "epoch": 0.83, + "learning_rate": 1.5238097031889987e-05, + "loss": 1.0299, + "step": 4489 + }, + { + "epoch": 0.83, + "learning_rate": 1.5206439216194513e-05, + "loss": 1.0426, + "step": 4490 + }, + { + "epoch": 0.83, + "learning_rate": 1.5174811613639262e-05, + "loss": 1.0361, + "step": 4491 + }, + { + "epoch": 0.83, + "learning_rate": 1.5143214235493497e-05, + "loss": 1.1369, + "step": 4492 + }, + { + "epoch": 0.83, + "learning_rate": 1.5111647093015901e-05, + "loss": 1.157, + "step": 4493 + }, + { + "epoch": 0.83, + "learning_rate": 1.5080110197454223e-05, + "loss": 1.1365, + "step": 4494 + }, + { + "epoch": 0.83, + "learning_rate": 1.5048603560045549e-05, + "loss": 1.0744, + "step": 4495 + }, + { + "epoch": 0.83, + "learning_rate": 1.5017127192016134e-05, + "loss": 1.0166, + "step": 4496 + }, + { + "epoch": 0.83, + "learning_rate": 1.4985681104581428e-05, + "loss": 1.1754, + "step": 4497 + }, + { + "epoch": 0.83, + "learning_rate": 1.4954265308946114e-05, + "loss": 1.0248, + "step": 4498 + }, + { + "epoch": 0.83, + "learning_rate": 1.4922879816304113e-05, + "loss": 1.0108, + "step": 4499 + }, + { + "epoch": 0.83, + "learning_rate": 1.4891524637838473e-05, + "loss": 1.1446, + "step": 4500 + }, + { + "epoch": 0.83, + "learning_rate": 1.486019978472154e-05, + "loss": 1.0096, + "step": 4501 + }, + { + "epoch": 0.83, + "learning_rate": 1.482890526811478e-05, + "loss": 1.2132, + "step": 4502 + }, + { + "epoch": 0.83, + "learning_rate": 1.4797641099168858e-05, + "loss": 1.0349, + "step": 4503 + }, + { + "epoch": 0.83, + "learning_rate": 1.4766407289023609e-05, + "loss": 1.015, + "step": 4504 + }, + { + "epoch": 0.83, + "learning_rate": 1.4735203848808155e-05, + "loss": 1.1787, + "step": 4505 + }, + { + "epoch": 0.83, + "learning_rate": 1.4704030789640643e-05, + "loss": 1.1605, + "step": 4506 + }, + { + "epoch": 0.83, + "learning_rate": 1.4672888122628536e-05, + "loss": 1.1635, + "step": 4507 + }, + { + "epoch": 0.83, + "learning_rate": 1.4641775858868379e-05, + "loss": 1.1243, + "step": 4508 + }, + { + "epoch": 0.83, + "learning_rate": 1.4610694009445914e-05, + "loss": 1.0431, + "step": 4509 + }, + { + "epoch": 0.83, + "learning_rate": 1.4579642585436015e-05, + "loss": 1.2482, + "step": 4510 + }, + { + "epoch": 0.83, + "learning_rate": 1.4548621597902779e-05, + "loss": 1.1795, + "step": 4511 + }, + { + "epoch": 0.83, + "learning_rate": 1.4517631057899394e-05, + "loss": 1.1418, + "step": 4512 + }, + { + "epoch": 0.83, + "learning_rate": 1.4486670976468253e-05, + "loss": 1.1386, + "step": 4513 + }, + { + "epoch": 0.83, + "learning_rate": 1.4455741364640862e-05, + "loss": 1.0527, + "step": 4514 + }, + { + "epoch": 0.83, + "learning_rate": 1.4424842233437863e-05, + "loss": 1.2785, + "step": 4515 + }, + { + "epoch": 0.83, + "learning_rate": 1.4393973593869037e-05, + "loss": 0.8626, + "step": 4516 + }, + { + "epoch": 0.83, + "learning_rate": 1.4363135456933364e-05, + "loss": 1.1076, + "step": 4517 + }, + { + "epoch": 0.83, + "learning_rate": 1.4332327833618875e-05, + "loss": 1.109, + "step": 4518 + }, + { + "epoch": 0.83, + "learning_rate": 1.4301550734902736e-05, + "loss": 1.1147, + "step": 4519 + }, + { + "epoch": 0.83, + "learning_rate": 1.4270804171751317e-05, + "loss": 0.9804, + "step": 4520 + }, + { + "epoch": 0.83, + "learning_rate": 1.4240088155120023e-05, + "loss": 1.1061, + "step": 4521 + }, + { + "epoch": 0.83, + "learning_rate": 1.4209402695953388e-05, + "loss": 1.1665, + "step": 4522 + }, + { + "epoch": 0.83, + "learning_rate": 1.417874780518511e-05, + "loss": 1.1233, + "step": 4523 + }, + { + "epoch": 0.83, + "learning_rate": 1.414812349373793e-05, + "loss": 1.1522, + "step": 4524 + }, + { + "epoch": 0.83, + "learning_rate": 1.4117529772523707e-05, + "loss": 1.0337, + "step": 4525 + }, + { + "epoch": 0.83, + "learning_rate": 1.4086966652443468e-05, + "loss": 0.9545, + "step": 4526 + }, + { + "epoch": 0.83, + "learning_rate": 1.4056434144387253e-05, + "loss": 1.1805, + "step": 4527 + }, + { + "epoch": 0.83, + "learning_rate": 1.4025932259234198e-05, + "loss": 1.0216, + "step": 4528 + }, + { + "epoch": 0.83, + "learning_rate": 1.3995461007852605e-05, + "loss": 1.0598, + "step": 4529 + }, + { + "epoch": 0.83, + "learning_rate": 1.3965020401099781e-05, + "loss": 1.0633, + "step": 4530 + }, + { + "epoch": 0.83, + "learning_rate": 1.3934610449822128e-05, + "loss": 1.0314, + "step": 4531 + }, + { + "epoch": 0.84, + "learning_rate": 1.3904231164855186e-05, + "loss": 1.1477, + "step": 4532 + }, + { + "epoch": 0.84, + "learning_rate": 1.3873882557023488e-05, + "loss": 1.0451, + "step": 4533 + }, + { + "epoch": 0.84, + "learning_rate": 1.384356463714066e-05, + "loss": 1.2491, + "step": 4534 + }, + { + "epoch": 0.84, + "learning_rate": 1.3813277416009452e-05, + "loss": 1.2526, + "step": 4535 + }, + { + "epoch": 0.84, + "learning_rate": 1.3783020904421584e-05, + "loss": 1.0263, + "step": 4536 + }, + { + "epoch": 0.84, + "learning_rate": 1.3752795113157879e-05, + "loss": 0.973, + "step": 4537 + }, + { + "epoch": 0.84, + "learning_rate": 1.3722600052988243e-05, + "loss": 1.0481, + "step": 4538 + }, + { + "epoch": 0.84, + "learning_rate": 1.3692435734671583e-05, + "loss": 1.1587, + "step": 4539 + }, + { + "epoch": 0.84, + "learning_rate": 1.3662302168955877e-05, + "loss": 1.1702, + "step": 4540 + }, + { + "epoch": 0.84, + "learning_rate": 1.3632199366578103e-05, + "loss": 1.2457, + "step": 4541 + }, + { + "epoch": 0.84, + "learning_rate": 1.360212733826436e-05, + "loss": 1.106, + "step": 4542 + }, + { + "epoch": 0.84, + "learning_rate": 1.3572086094729709e-05, + "loss": 1.1156, + "step": 4543 + }, + { + "epoch": 0.84, + "learning_rate": 1.3542075646678286e-05, + "loss": 1.1323, + "step": 4544 + }, + { + "epoch": 0.84, + "learning_rate": 1.3512096004803232e-05, + "loss": 1.1451, + "step": 4545 + }, + { + "epoch": 0.84, + "learning_rate": 1.348214717978672e-05, + "loss": 0.9775, + "step": 4546 + }, + { + "epoch": 0.84, + "learning_rate": 1.3452229182299892e-05, + "loss": 1.1475, + "step": 4547 + }, + { + "epoch": 0.84, + "learning_rate": 1.3422342023003021e-05, + "loss": 1.0424, + "step": 4548 + }, + { + "epoch": 0.84, + "learning_rate": 1.3392485712545266e-05, + "loss": 0.9339, + "step": 4549 + }, + { + "epoch": 0.84, + "learning_rate": 1.3362660261564908e-05, + "loss": 1.1266, + "step": 4550 + }, + { + "epoch": 0.84, + "learning_rate": 1.3332865680689132e-05, + "loss": 1.023, + "step": 4551 + }, + { + "epoch": 0.84, + "learning_rate": 1.3303101980534184e-05, + "loss": 1.2101, + "step": 4552 + }, + { + "epoch": 0.84, + "learning_rate": 1.3273369171705263e-05, + "loss": 1.0938, + "step": 4553 + }, + { + "epoch": 0.84, + "learning_rate": 1.3243667264796633e-05, + "loss": 1.1656, + "step": 4554 + }, + { + "epoch": 0.84, + "learning_rate": 1.3213996270391448e-05, + "loss": 1.0657, + "step": 4555 + }, + { + "epoch": 0.84, + "learning_rate": 1.3184356199061965e-05, + "loss": 1.1322, + "step": 4556 + }, + { + "epoch": 0.84, + "learning_rate": 1.315474706136931e-05, + "loss": 0.9879, + "step": 4557 + }, + { + "epoch": 0.84, + "learning_rate": 1.3125168867863668e-05, + "loss": 1.0406, + "step": 4558 + }, + { + "epoch": 0.84, + "learning_rate": 1.3095621629084109e-05, + "loss": 0.9611, + "step": 4559 + }, + { + "epoch": 0.84, + "learning_rate": 1.306610535555881e-05, + "loss": 1.008, + "step": 4560 + }, + { + "epoch": 0.84, + "learning_rate": 1.303662005780476e-05, + "loss": 1.2059, + "step": 4561 + }, + { + "epoch": 0.84, + "learning_rate": 1.3007165746328064e-05, + "loss": 0.9282, + "step": 4562 + }, + { + "epoch": 0.84, + "learning_rate": 1.297774243162363e-05, + "loss": 1.1274, + "step": 4563 + }, + { + "epoch": 0.84, + "learning_rate": 1.2948350124175456e-05, + "loss": 1.0291, + "step": 4564 + }, + { + "epoch": 0.84, + "learning_rate": 1.2918988834456392e-05, + "loss": 1.0925, + "step": 4565 + }, + { + "epoch": 0.84, + "learning_rate": 1.288965857292832e-05, + "loss": 1.0822, + "step": 4566 + }, + { + "epoch": 0.84, + "learning_rate": 1.2860359350041984e-05, + "loss": 0.9658, + "step": 4567 + }, + { + "epoch": 0.84, + "learning_rate": 1.2831091176237175e-05, + "loss": 1.1759, + "step": 4568 + }, + { + "epoch": 0.84, + "learning_rate": 1.2801854061942475e-05, + "loss": 1.1405, + "step": 4569 + }, + { + "epoch": 0.84, + "learning_rate": 1.2772648017575539e-05, + "loss": 1.1029, + "step": 4570 + }, + { + "epoch": 0.84, + "learning_rate": 1.2743473053542842e-05, + "loss": 1.2343, + "step": 4571 + }, + { + "epoch": 0.84, + "learning_rate": 1.2714329180239892e-05, + "loss": 1.1688, + "step": 4572 + }, + { + "epoch": 0.84, + "learning_rate": 1.2685216408051026e-05, + "loss": 0.9445, + "step": 4573 + }, + { + "epoch": 0.84, + "learning_rate": 1.2656134747349557e-05, + "loss": 1.0808, + "step": 4574 + }, + { + "epoch": 0.84, + "learning_rate": 1.2627084208497642e-05, + "loss": 1.1188, + "step": 4575 + }, + { + "epoch": 0.84, + "learning_rate": 1.2598064801846454e-05, + "loss": 1.0032, + "step": 4576 + }, + { + "epoch": 0.84, + "learning_rate": 1.2569076537735969e-05, + "loss": 1.0894, + "step": 4577 + }, + { + "epoch": 0.84, + "learning_rate": 1.2540119426495156e-05, + "loss": 1.1801, + "step": 4578 + }, + { + "epoch": 0.84, + "learning_rate": 1.251119347844183e-05, + "loss": 1.2062, + "step": 4579 + }, + { + "epoch": 0.84, + "learning_rate": 1.24822987038827e-05, + "loss": 1.2462, + "step": 4580 + }, + { + "epoch": 0.84, + "learning_rate": 1.2453435113113377e-05, + "loss": 1.1142, + "step": 4581 + }, + { + "epoch": 0.84, + "learning_rate": 1.2424602716418398e-05, + "loss": 1.0634, + "step": 4582 + }, + { + "epoch": 0.84, + "learning_rate": 1.2395801524071104e-05, + "loss": 0.9893, + "step": 4583 + }, + { + "epoch": 0.84, + "learning_rate": 1.2367031546333829e-05, + "loss": 1.0788, + "step": 4584 + }, + { + "epoch": 0.84, + "learning_rate": 1.233829279345765e-05, + "loss": 1.1759, + "step": 4585 + }, + { + "epoch": 0.85, + "learning_rate": 1.2309585275682645e-05, + "loss": 1.1992, + "step": 4586 + }, + { + "epoch": 0.85, + "learning_rate": 1.2280909003237662e-05, + "loss": 1.1148, + "step": 4587 + }, + { + "epoch": 0.85, + "learning_rate": 1.2252263986340495e-05, + "loss": 1.0865, + "step": 4588 + }, + { + "epoch": 0.85, + "learning_rate": 1.2223650235197747e-05, + "loss": 1.204, + "step": 4589 + }, + { + "epoch": 0.85, + "learning_rate": 1.2195067760004953e-05, + "loss": 1.0609, + "step": 4590 + }, + { + "epoch": 0.85, + "learning_rate": 1.2166516570946363e-05, + "loss": 0.9064, + "step": 4591 + }, + { + "epoch": 0.85, + "learning_rate": 1.213799667819524e-05, + "loss": 1.1236, + "step": 4592 + }, + { + "epoch": 0.85, + "learning_rate": 1.2109508091913568e-05, + "loss": 1.1401, + "step": 4593 + }, + { + "epoch": 0.85, + "learning_rate": 1.2081050822252294e-05, + "loss": 1.2443, + "step": 4594 + }, + { + "epoch": 0.85, + "learning_rate": 1.2052624879351104e-05, + "loss": 1.1383, + "step": 4595 + }, + { + "epoch": 0.85, + "learning_rate": 1.2024230273338567e-05, + "loss": 1.0192, + "step": 4596 + }, + { + "epoch": 0.85, + "learning_rate": 1.1995867014332063e-05, + "loss": 1.0027, + "step": 4597 + }, + { + "epoch": 0.85, + "learning_rate": 1.1967535112437855e-05, + "loss": 1.0695, + "step": 4598 + }, + { + "epoch": 0.85, + "learning_rate": 1.1939234577750979e-05, + "loss": 1.0195, + "step": 4599 + }, + { + "epoch": 0.85, + "learning_rate": 1.1910965420355324e-05, + "loss": 1.0874, + "step": 4600 + }, + { + "epoch": 0.85, + "learning_rate": 1.18827276503236e-05, + "loss": 1.1043, + "step": 4601 + }, + { + "epoch": 0.85, + "learning_rate": 1.185452127771729e-05, + "loss": 1.0545, + "step": 4602 + }, + { + "epoch": 0.85, + "learning_rate": 1.1826346312586733e-05, + "loss": 1.0288, + "step": 4603 + }, + { + "epoch": 0.85, + "learning_rate": 1.179820276497109e-05, + "loss": 1.0004, + "step": 4604 + }, + { + "epoch": 0.85, + "learning_rate": 1.1770090644898257e-05, + "loss": 1.1198, + "step": 4605 + }, + { + "epoch": 0.85, + "learning_rate": 1.1742009962385048e-05, + "loss": 1.0906, + "step": 4606 + }, + { + "epoch": 0.85, + "learning_rate": 1.1713960727436924e-05, + "loss": 1.0811, + "step": 4607 + }, + { + "epoch": 0.85, + "learning_rate": 1.168594295004829e-05, + "loss": 1.2069, + "step": 4608 + }, + { + "epoch": 0.85, + "learning_rate": 1.1657956640202217e-05, + "loss": 1.0471, + "step": 4609 + }, + { + "epoch": 0.85, + "learning_rate": 1.1630001807870684e-05, + "loss": 0.9602, + "step": 4610 + }, + { + "epoch": 0.85, + "learning_rate": 1.1602078463014332e-05, + "loss": 1.079, + "step": 4611 + }, + { + "epoch": 0.85, + "learning_rate": 1.1574186615582706e-05, + "loss": 1.0714, + "step": 4612 + }, + { + "epoch": 0.85, + "learning_rate": 1.1546326275513996e-05, + "loss": 1.2122, + "step": 4613 + }, + { + "epoch": 0.85, + "learning_rate": 1.1518497452735277e-05, + "loss": 1.1043, + "step": 4614 + }, + { + "epoch": 0.85, + "learning_rate": 1.1490700157162327e-05, + "loss": 1.122, + "step": 4615 + }, + { + "epoch": 0.85, + "learning_rate": 1.1462934398699743e-05, + "loss": 0.9868, + "step": 4616 + }, + { + "epoch": 0.85, + "learning_rate": 1.1435200187240836e-05, + "loss": 1.1501, + "step": 4617 + }, + { + "epoch": 0.85, + "learning_rate": 1.140749753266771e-05, + "loss": 0.9767, + "step": 4618 + }, + { + "epoch": 0.85, + "learning_rate": 1.1379826444851172e-05, + "loss": 1.1661, + "step": 4619 + }, + { + "epoch": 0.85, + "learning_rate": 1.1352186933650866e-05, + "loss": 1.0787, + "step": 4620 + }, + { + "epoch": 0.85, + "learning_rate": 1.1324579008915104e-05, + "loss": 1.0201, + "step": 4621 + }, + { + "epoch": 0.85, + "learning_rate": 1.1297002680481017e-05, + "loss": 1.1781, + "step": 4622 + }, + { + "epoch": 0.85, + "learning_rate": 1.1269457958174412e-05, + "loss": 1.0644, + "step": 4623 + }, + { + "epoch": 0.85, + "learning_rate": 1.1241944851809883e-05, + "loss": 0.9305, + "step": 4624 + }, + { + "epoch": 0.85, + "learning_rate": 1.1214463371190686e-05, + "loss": 1.0575, + "step": 4625 + }, + { + "epoch": 0.85, + "learning_rate": 1.1187013526108925e-05, + "loss": 1.0814, + "step": 4626 + }, + { + "epoch": 0.85, + "learning_rate": 1.1159595326345318e-05, + "loss": 1.0631, + "step": 4627 + }, + { + "epoch": 0.85, + "learning_rate": 1.1132208781669417e-05, + "loss": 1.1386, + "step": 4628 + }, + { + "epoch": 0.85, + "learning_rate": 1.110485390183934e-05, + "loss": 1.1486, + "step": 4629 + }, + { + "epoch": 0.85, + "learning_rate": 1.10775306966021e-05, + "loss": 1.0878, + "step": 4630 + }, + { + "epoch": 0.85, + "learning_rate": 1.1050239175693288e-05, + "loss": 1.0271, + "step": 4631 + }, + { + "epoch": 0.85, + "learning_rate": 1.1022979348837292e-05, + "loss": 1.0401, + "step": 4632 + }, + { + "epoch": 0.85, + "learning_rate": 1.0995751225747164e-05, + "loss": 1.1617, + "step": 4633 + }, + { + "epoch": 0.85, + "learning_rate": 1.0968554816124653e-05, + "loss": 1.1252, + "step": 4634 + }, + { + "epoch": 0.85, + "learning_rate": 1.0941390129660211e-05, + "loss": 0.9315, + "step": 4635 + }, + { + "epoch": 0.85, + "learning_rate": 1.0914257176033049e-05, + "loss": 1.1999, + "step": 4636 + }, + { + "epoch": 0.85, + "learning_rate": 1.0887155964910967e-05, + "loss": 1.0759, + "step": 4637 + }, + { + "epoch": 0.85, + "learning_rate": 1.0860086505950551e-05, + "loss": 1.2932, + "step": 4638 + }, + { + "epoch": 0.85, + "learning_rate": 1.0833048808797008e-05, + "loss": 1.0347, + "step": 4639 + }, + { + "epoch": 0.86, + "learning_rate": 1.0806042883084255e-05, + "loss": 1.1041, + "step": 4640 + }, + { + "epoch": 0.86, + "learning_rate": 1.077906873843486e-05, + "loss": 1.0715, + "step": 4641 + }, + { + "epoch": 0.86, + "learning_rate": 1.0752126384460138e-05, + "loss": 1.0909, + "step": 4642 + }, + { + "epoch": 0.86, + "learning_rate": 1.0725215830759982e-05, + "loss": 1.1472, + "step": 4643 + }, + { + "epoch": 0.86, + "learning_rate": 1.0698337086923038e-05, + "loss": 1.1227, + "step": 4644 + }, + { + "epoch": 0.86, + "learning_rate": 1.0671490162526577e-05, + "loss": 1.0824, + "step": 4645 + }, + { + "epoch": 0.86, + "learning_rate": 1.0644675067136522e-05, + "loss": 1.0921, + "step": 4646 + }, + { + "epoch": 0.86, + "learning_rate": 1.0617891810307456e-05, + "loss": 0.9835, + "step": 4647 + }, + { + "epoch": 0.86, + "learning_rate": 1.0591140401582666e-05, + "loss": 1.1538, + "step": 4648 + }, + { + "epoch": 0.86, + "learning_rate": 1.0564420850494017e-05, + "loss": 1.0945, + "step": 4649 + }, + { + "epoch": 0.86, + "learning_rate": 1.0537733166562103e-05, + "loss": 1.1268, + "step": 4650 + }, + { + "epoch": 0.86, + "learning_rate": 1.0511077359296096e-05, + "loss": 1.1565, + "step": 4651 + }, + { + "epoch": 0.86, + "learning_rate": 1.0484453438193843e-05, + "loss": 1.0317, + "step": 4652 + }, + { + "epoch": 0.86, + "learning_rate": 1.045786141274181e-05, + "loss": 1.085, + "step": 4653 + }, + { + "epoch": 0.86, + "learning_rate": 1.043130129241513e-05, + "loss": 1.1702, + "step": 4654 + }, + { + "epoch": 0.86, + "learning_rate": 1.0404773086677532e-05, + "loss": 1.1101, + "step": 4655 + }, + { + "epoch": 0.86, + "learning_rate": 1.0378276804981391e-05, + "loss": 1.0383, + "step": 4656 + }, + { + "epoch": 0.86, + "learning_rate": 1.0351812456767718e-05, + "loss": 1.2931, + "step": 4657 + }, + { + "epoch": 0.86, + "learning_rate": 1.0325380051466126e-05, + "loss": 1.0755, + "step": 4658 + }, + { + "epoch": 0.86, + "learning_rate": 1.029897959849484e-05, + "loss": 1.1735, + "step": 4659 + }, + { + "epoch": 0.86, + "learning_rate": 1.0272611107260743e-05, + "loss": 1.0943, + "step": 4660 + }, + { + "epoch": 0.86, + "learning_rate": 1.0246274587159278e-05, + "loss": 1.0228, + "step": 4661 + }, + { + "epoch": 0.86, + "learning_rate": 1.0219970047574512e-05, + "loss": 1.1152, + "step": 4662 + }, + { + "epoch": 0.86, + "learning_rate": 1.019369749787914e-05, + "loss": 1.1612, + "step": 4663 + }, + { + "epoch": 0.86, + "learning_rate": 1.016745694743444e-05, + "loss": 1.071, + "step": 4664 + }, + { + "epoch": 0.86, + "learning_rate": 1.0141248405590275e-05, + "loss": 1.0229, + "step": 4665 + }, + { + "epoch": 0.86, + "learning_rate": 1.0115071881685134e-05, + "loss": 1.0829, + "step": 4666 + }, + { + "epoch": 0.86, + "learning_rate": 1.0088927385046077e-05, + "loss": 1.1291, + "step": 4667 + }, + { + "epoch": 0.86, + "learning_rate": 1.006281492498874e-05, + "loss": 1.1198, + "step": 4668 + }, + { + "epoch": 0.86, + "learning_rate": 1.0036734510817391e-05, + "loss": 0.9113, + "step": 4669 + }, + { + "epoch": 0.86, + "learning_rate": 1.0010686151824832e-05, + "loss": 1.0496, + "step": 4670 + }, + { + "epoch": 0.86, + "learning_rate": 9.98466985729245e-06, + "loss": 1.1001, + "step": 4671 + }, + { + "epoch": 0.86, + "learning_rate": 9.958685636490239e-06, + "loss": 1.0645, + "step": 4672 + }, + { + "epoch": 0.86, + "learning_rate": 9.932733498676727e-06, + "loss": 1.0529, + "step": 4673 + }, + { + "epoch": 0.86, + "learning_rate": 9.906813453099018e-06, + "loss": 0.9616, + "step": 4674 + }, + { + "epoch": 0.86, + "learning_rate": 9.880925508992822e-06, + "loss": 0.981, + "step": 4675 + }, + { + "epoch": 0.86, + "learning_rate": 9.855069675582362e-06, + "loss": 1.0335, + "step": 4676 + }, + { + "epoch": 0.86, + "learning_rate": 9.829245962080425e-06, + "loss": 1.0724, + "step": 4677 + }, + { + "epoch": 0.86, + "learning_rate": 9.803454377688359e-06, + "loss": 1.1379, + "step": 4678 + }, + { + "epoch": 0.86, + "learning_rate": 9.777694931596093e-06, + "loss": 1.0464, + "step": 4679 + }, + { + "epoch": 0.86, + "learning_rate": 9.751967632982062e-06, + "loss": 1.1679, + "step": 4680 + }, + { + "epoch": 0.86, + "learning_rate": 9.726272491013277e-06, + "loss": 1.024, + "step": 4681 + }, + { + "epoch": 0.86, + "learning_rate": 9.700609514845282e-06, + "loss": 1.1703, + "step": 4682 + }, + { + "epoch": 0.86, + "learning_rate": 9.67497871362214e-06, + "loss": 1.1177, + "step": 4683 + }, + { + "epoch": 0.86, + "learning_rate": 9.649380096476468e-06, + "loss": 1.1347, + "step": 4684 + }, + { + "epoch": 0.86, + "learning_rate": 9.623813672529435e-06, + "loss": 1.1728, + "step": 4685 + }, + { + "epoch": 0.86, + "learning_rate": 9.59827945089068e-06, + "loss": 1.2297, + "step": 4686 + }, + { + "epoch": 0.86, + "learning_rate": 9.572777440658453e-06, + "loss": 1.1337, + "step": 4687 + }, + { + "epoch": 0.86, + "learning_rate": 9.547307650919456e-06, + "loss": 1.0346, + "step": 4688 + }, + { + "epoch": 0.86, + "learning_rate": 9.52187009074893e-06, + "loss": 1.1072, + "step": 4689 + }, + { + "epoch": 0.86, + "learning_rate": 9.496464769210622e-06, + "loss": 1.1352, + "step": 4690 + }, + { + "epoch": 0.86, + "learning_rate": 9.471091695356848e-06, + "loss": 1.0404, + "step": 4691 + }, + { + "epoch": 0.86, + "learning_rate": 9.445750878228354e-06, + "loss": 1.0812, + "step": 4692 + }, + { + "epoch": 0.86, + "learning_rate": 9.420442326854451e-06, + "loss": 1.0816, + "step": 4693 + }, + { + "epoch": 0.86, + "learning_rate": 9.395166050252935e-06, + "loss": 1.0467, + "step": 4694 + }, + { + "epoch": 0.87, + "learning_rate": 9.36992205743008e-06, + "loss": 1.1059, + "step": 4695 + }, + { + "epoch": 0.87, + "learning_rate": 9.344710357380671e-06, + "loss": 1.1982, + "step": 4696 + }, + { + "epoch": 0.87, + "learning_rate": 9.31953095908803e-06, + "loss": 0.9536, + "step": 4697 + }, + { + "epoch": 0.87, + "learning_rate": 9.294383871523882e-06, + "loss": 1.1094, + "step": 4698 + }, + { + "epoch": 0.87, + "learning_rate": 9.269269103648558e-06, + "loss": 1.1066, + "step": 4699 + }, + { + "epoch": 0.87, + "learning_rate": 9.244186664410715e-06, + "loss": 0.982, + "step": 4700 + }, + { + "epoch": 0.87, + "learning_rate": 9.219136562747655e-06, + "loss": 1.278, + "step": 4701 + }, + { + "epoch": 0.87, + "learning_rate": 9.194118807585029e-06, + "loss": 1.0392, + "step": 4702 + }, + { + "epoch": 0.87, + "learning_rate": 9.169133407837049e-06, + "loss": 1.2486, + "step": 4703 + }, + { + "epoch": 0.87, + "learning_rate": 9.144180372406341e-06, + "loss": 0.9807, + "step": 4704 + }, + { + "epoch": 0.87, + "learning_rate": 9.11925971018408e-06, + "loss": 1.0851, + "step": 4705 + }, + { + "epoch": 0.87, + "learning_rate": 9.094371430049764e-06, + "loss": 0.9396, + "step": 4706 + }, + { + "epoch": 0.87, + "learning_rate": 9.069515540871499e-06, + "loss": 1.0928, + "step": 4707 + }, + { + "epoch": 0.87, + "learning_rate": 9.044692051505744e-06, + "loss": 1.0755, + "step": 4708 + }, + { + "epoch": 0.87, + "learning_rate": 9.019900970797502e-06, + "loss": 1.1545, + "step": 4709 + }, + { + "epoch": 0.87, + "learning_rate": 8.995142307580162e-06, + "loss": 1.0463, + "step": 4710 + }, + { + "epoch": 0.87, + "learning_rate": 8.970416070675603e-06, + "loss": 1.2609, + "step": 4711 + }, + { + "epoch": 0.87, + "learning_rate": 8.945722268894085e-06, + "loss": 1.1596, + "step": 4712 + }, + { + "epoch": 0.87, + "learning_rate": 8.921060911034407e-06, + "loss": 1.0385, + "step": 4713 + }, + { + "epoch": 0.87, + "learning_rate": 8.896432005883726e-06, + "loss": 1.1232, + "step": 4714 + }, + { + "epoch": 0.87, + "learning_rate": 8.87183556221769e-06, + "loss": 1.0386, + "step": 4715 + }, + { + "epoch": 0.87, + "learning_rate": 8.847271588800354e-06, + "loss": 1.1724, + "step": 4716 + }, + { + "epoch": 0.87, + "learning_rate": 8.822740094384208e-06, + "loss": 1.0903, + "step": 4717 + }, + { + "epoch": 0.87, + "learning_rate": 8.798241087710147e-06, + "loss": 1.1044, + "step": 4718 + }, + { + "epoch": 0.87, + "learning_rate": 8.773774577507542e-06, + "loss": 1.1522, + "step": 4719 + }, + { + "epoch": 0.87, + "learning_rate": 8.749340572494125e-06, + "loss": 1.0552, + "step": 4720 + }, + { + "epoch": 0.87, + "learning_rate": 8.724939081376127e-06, + "loss": 1.0534, + "step": 4721 + }, + { + "epoch": 0.87, + "learning_rate": 8.700570112848083e-06, + "loss": 1.1445, + "step": 4722 + }, + { + "epoch": 0.87, + "learning_rate": 8.676233675593037e-06, + "loss": 1.1305, + "step": 4723 + }, + { + "epoch": 0.87, + "learning_rate": 8.651929778282387e-06, + "loss": 1.0612, + "step": 4724 + }, + { + "epoch": 0.87, + "learning_rate": 8.627658429575968e-06, + "loss": 1.1627, + "step": 4725 + }, + { + "epoch": 0.87, + "learning_rate": 8.60341963812199e-06, + "loss": 0.9738, + "step": 4726 + }, + { + "epoch": 0.87, + "learning_rate": 8.579213412557107e-06, + "loss": 1.0835, + "step": 4727 + }, + { + "epoch": 0.87, + "learning_rate": 8.555039761506277e-06, + "loss": 1.1519, + "step": 4728 + }, + { + "epoch": 0.87, + "learning_rate": 8.530898693582979e-06, + "loss": 1.0958, + "step": 4729 + }, + { + "epoch": 0.87, + "learning_rate": 8.50679021738896e-06, + "loss": 1.0079, + "step": 4730 + }, + { + "epoch": 0.87, + "learning_rate": 8.482714341514453e-06, + "loss": 1.0504, + "step": 4731 + }, + { + "epoch": 0.87, + "learning_rate": 8.458671074538017e-06, + "loss": 1.0999, + "step": 4732 + }, + { + "epoch": 0.87, + "learning_rate": 8.434660425026597e-06, + "loss": 1.0899, + "step": 4733 + }, + { + "epoch": 0.87, + "learning_rate": 8.41068240153552e-06, + "loss": 1.13, + "step": 4734 + }, + { + "epoch": 0.87, + "learning_rate": 8.386737012608526e-06, + "loss": 1.1519, + "step": 4735 + }, + { + "epoch": 0.87, + "learning_rate": 8.362824266777658e-06, + "loss": 1.0573, + "step": 4736 + }, + { + "epoch": 0.87, + "learning_rate": 8.33894417256339e-06, + "loss": 1.0904, + "step": 4737 + }, + { + "epoch": 0.87, + "learning_rate": 8.315096738474537e-06, + "loss": 1.1048, + "step": 4738 + }, + { + "epoch": 0.87, + "learning_rate": 8.291281973008259e-06, + "loss": 1.0745, + "step": 4739 + }, + { + "epoch": 0.87, + "learning_rate": 8.267499884650076e-06, + "loss": 1.1991, + "step": 4740 + }, + { + "epoch": 0.87, + "learning_rate": 8.243750481873924e-06, + "loss": 1.0751, + "step": 4741 + }, + { + "epoch": 0.87, + "learning_rate": 8.220033773142023e-06, + "loss": 1.1404, + "step": 4742 + }, + { + "epoch": 0.87, + "learning_rate": 8.196349766904998e-06, + "loss": 1.1171, + "step": 4743 + }, + { + "epoch": 0.87, + "learning_rate": 8.172698471601748e-06, + "loss": 1.1011, + "step": 4744 + }, + { + "epoch": 0.87, + "learning_rate": 8.149079895659595e-06, + "loss": 1.1715, + "step": 4745 + }, + { + "epoch": 0.87, + "learning_rate": 8.125494047494153e-06, + "loss": 0.982, + "step": 4746 + }, + { + "epoch": 0.87, + "learning_rate": 8.101940935509412e-06, + "loss": 1.0897, + "step": 4747 + }, + { + "epoch": 0.87, + "learning_rate": 8.078420568097656e-06, + "loss": 1.0506, + "step": 4748 + }, + { + "epoch": 0.88, + "learning_rate": 8.054932953639571e-06, + "loss": 1.1725, + "step": 4749 + }, + { + "epoch": 0.88, + "learning_rate": 8.031478100504053e-06, + "loss": 1.0432, + "step": 4750 + }, + { + "epoch": 0.88, + "learning_rate": 8.008056017048449e-06, + "loss": 1.009, + "step": 4751 + }, + { + "epoch": 0.88, + "learning_rate": 7.984666711618337e-06, + "loss": 1.0975, + "step": 4752 + }, + { + "epoch": 0.88, + "learning_rate": 7.961310192547711e-06, + "loss": 1.1056, + "step": 4753 + }, + { + "epoch": 0.88, + "learning_rate": 7.937986468158787e-06, + "loss": 1.1234, + "step": 4754 + }, + { + "epoch": 0.88, + "learning_rate": 7.914695546762141e-06, + "loss": 0.9648, + "step": 4755 + }, + { + "epoch": 0.88, + "learning_rate": 7.891437436656646e-06, + "loss": 1.0199, + "step": 4756 + }, + { + "epoch": 0.88, + "learning_rate": 7.86821214612954e-06, + "loss": 1.1264, + "step": 4757 + }, + { + "epoch": 0.88, + "learning_rate": 7.845019683456256e-06, + "loss": 1.0433, + "step": 4758 + }, + { + "epoch": 0.88, + "learning_rate": 7.821860056900654e-06, + "loss": 1.1119, + "step": 4759 + }, + { + "epoch": 0.88, + "learning_rate": 7.79873327471482e-06, + "loss": 1.0488, + "step": 4760 + }, + { + "epoch": 0.88, + "learning_rate": 7.77563934513913e-06, + "loss": 1.2036, + "step": 4761 + }, + { + "epoch": 0.88, + "learning_rate": 7.752578276402278e-06, + "loss": 1.028, + "step": 4762 + }, + { + "epoch": 0.88, + "learning_rate": 7.72955007672127e-06, + "loss": 1.1706, + "step": 4763 + }, + { + "epoch": 0.88, + "learning_rate": 7.706554754301365e-06, + "loss": 1.0833, + "step": 4764 + }, + { + "epoch": 0.88, + "learning_rate": 7.683592317336142e-06, + "loss": 1.095, + "step": 4765 + }, + { + "epoch": 0.88, + "learning_rate": 7.660662774007398e-06, + "loss": 1.0097, + "step": 4766 + }, + { + "epoch": 0.88, + "learning_rate": 7.63776613248528e-06, + "loss": 1.0721, + "step": 4767 + }, + { + "epoch": 0.88, + "learning_rate": 7.614902400928181e-06, + "loss": 1.0151, + "step": 4768 + }, + { + "epoch": 0.88, + "learning_rate": 7.592071587482774e-06, + "loss": 1.0896, + "step": 4769 + }, + { + "epoch": 0.88, + "learning_rate": 7.569273700283974e-06, + "loss": 1.0892, + "step": 4770 + }, + { + "epoch": 0.88, + "learning_rate": 7.54650874745505e-06, + "loss": 1.104, + "step": 4771 + }, + { + "epoch": 0.88, + "learning_rate": 7.523776737107402e-06, + "loss": 1.1255, + "step": 4772 + }, + { + "epoch": 0.88, + "learning_rate": 7.5010776773408176e-06, + "loss": 1.1362, + "step": 4773 + }, + { + "epoch": 0.88, + "learning_rate": 7.478411576243249e-06, + "loss": 0.9929, + "step": 4774 + }, + { + "epoch": 0.88, + "learning_rate": 7.455778441890993e-06, + "loss": 1.2026, + "step": 4775 + }, + { + "epoch": 0.88, + "learning_rate": 7.4331782823485295e-06, + "loss": 1.1838, + "step": 4776 + }, + { + "epoch": 0.88, + "learning_rate": 7.410611105668619e-06, + "loss": 1.0532, + "step": 4777 + }, + { + "epoch": 0.88, + "learning_rate": 7.38807691989225e-06, + "loss": 1.1568, + "step": 4778 + }, + { + "epoch": 0.88, + "learning_rate": 7.365575733048691e-06, + "loss": 1.1277, + "step": 4779 + }, + { + "epoch": 0.88, + "learning_rate": 7.343107553155404e-06, + "loss": 1.0359, + "step": 4780 + }, + { + "epoch": 0.88, + "learning_rate": 7.320672388218164e-06, + "loss": 0.9714, + "step": 4781 + }, + { + "epoch": 0.88, + "learning_rate": 7.2982702462308986e-06, + "loss": 1.042, + "step": 4782 + }, + { + "epoch": 0.88, + "learning_rate": 7.2759011351758225e-06, + "loss": 1.1202, + "step": 4783 + }, + { + "epoch": 0.88, + "learning_rate": 7.253565063023349e-06, + "loss": 1.0842, + "step": 4784 + }, + { + "epoch": 0.88, + "learning_rate": 7.231262037732156e-06, + "loss": 1.1149, + "step": 4785 + }, + { + "epoch": 0.88, + "learning_rate": 7.208992067249099e-06, + "loss": 1.0811, + "step": 4786 + }, + { + "epoch": 0.88, + "learning_rate": 7.1867551595093396e-06, + "loss": 1.2003, + "step": 4787 + }, + { + "epoch": 0.88, + "learning_rate": 7.16455132243612e-06, + "loss": 1.1016, + "step": 4788 + }, + { + "epoch": 0.88, + "learning_rate": 7.142380563941042e-06, + "loss": 1.1024, + "step": 4789 + }, + { + "epoch": 0.88, + "learning_rate": 7.120242891923823e-06, + "loss": 0.9457, + "step": 4790 + }, + { + "epoch": 0.88, + "learning_rate": 7.098138314272451e-06, + "loss": 1.1389, + "step": 4791 + }, + { + "epoch": 0.88, + "learning_rate": 7.076066838863083e-06, + "loss": 1.053, + "step": 4792 + }, + { + "epoch": 0.88, + "learning_rate": 7.054028473560126e-06, + "loss": 0.9613, + "step": 4793 + }, + { + "epoch": 0.88, + "learning_rate": 7.032023226216111e-06, + "loss": 1.1645, + "step": 4794 + }, + { + "epoch": 0.88, + "learning_rate": 7.010051104671866e-06, + "loss": 1.1669, + "step": 4795 + }, + { + "epoch": 0.88, + "learning_rate": 6.988112116756318e-06, + "loss": 1.1529, + "step": 4796 + }, + { + "epoch": 0.88, + "learning_rate": 6.9662062702866906e-06, + "loss": 1.1762, + "step": 4797 + }, + { + "epoch": 0.88, + "learning_rate": 6.944333573068307e-06, + "loss": 0.9973, + "step": 4798 + }, + { + "epoch": 0.88, + "learning_rate": 6.922494032894744e-06, + "loss": 1.061, + "step": 4799 + }, + { + "epoch": 0.88, + "learning_rate": 6.900687657547699e-06, + "loss": 1.0916, + "step": 4800 + }, + { + "epoch": 0.88, + "learning_rate": 6.878914454797136e-06, + "loss": 1.2637, + "step": 4801 + }, + { + "epoch": 0.88, + "learning_rate": 6.857174432401115e-06, + "loss": 1.145, + "step": 4802 + }, + { + "epoch": 0.89, + "learning_rate": 6.835467598105949e-06, + "loss": 1.0736, + "step": 4803 + }, + { + "epoch": 0.89, + "learning_rate": 6.813793959646064e-06, + "loss": 1.0571, + "step": 4804 + }, + { + "epoch": 0.89, + "learning_rate": 6.792153524744093e-06, + "loss": 1.1893, + "step": 4805 + }, + { + "epoch": 0.89, + "learning_rate": 6.770546301110803e-06, + "loss": 1.0011, + "step": 4806 + }, + { + "epoch": 0.89, + "learning_rate": 6.748972296445199e-06, + "loss": 1.129, + "step": 4807 + }, + { + "epoch": 0.89, + "learning_rate": 6.727431518434357e-06, + "loss": 1.039, + "step": 4808 + }, + { + "epoch": 0.89, + "learning_rate": 6.705923974753592e-06, + "loss": 1.227, + "step": 4809 + }, + { + "epoch": 0.89, + "learning_rate": 6.684449673066329e-06, + "loss": 1.1145, + "step": 4810 + }, + { + "epoch": 0.89, + "learning_rate": 6.6630086210241695e-06, + "loss": 1.2019, + "step": 4811 + }, + { + "epoch": 0.89, + "learning_rate": 6.641600826266847e-06, + "loss": 1.2063, + "step": 4812 + }, + { + "epoch": 0.89, + "learning_rate": 6.620226296422294e-06, + "loss": 0.8993, + "step": 4813 + }, + { + "epoch": 0.89, + "learning_rate": 6.598885039106517e-06, + "loss": 1.1393, + "step": 4814 + }, + { + "epoch": 0.89, + "learning_rate": 6.577577061923734e-06, + "loss": 1.1965, + "step": 4815 + }, + { + "epoch": 0.89, + "learning_rate": 6.556302372466272e-06, + "loss": 1.042, + "step": 4816 + }, + { + "epoch": 0.89, + "learning_rate": 6.535060978314611e-06, + "loss": 1.0066, + "step": 4817 + }, + { + "epoch": 0.89, + "learning_rate": 6.51385288703732e-06, + "loss": 1.1331, + "step": 4818 + }, + { + "epoch": 0.89, + "learning_rate": 6.492678106191186e-06, + "loss": 1.1569, + "step": 4819 + }, + { + "epoch": 0.89, + "learning_rate": 6.4715366433210634e-06, + "loss": 1.0841, + "step": 4820 + }, + { + "epoch": 0.89, + "learning_rate": 6.450428505959938e-06, + "loss": 0.9742, + "step": 4821 + }, + { + "epoch": 0.89, + "learning_rate": 6.4293537016289706e-06, + "loss": 1.0743, + "step": 4822 + }, + { + "epoch": 0.89, + "learning_rate": 6.408312237837399e-06, + "loss": 1.1183, + "step": 4823 + }, + { + "epoch": 0.89, + "learning_rate": 6.3873041220825605e-06, + "loss": 0.9797, + "step": 4824 + }, + { + "epoch": 0.89, + "learning_rate": 6.366329361849988e-06, + "loss": 1.1207, + "step": 4825 + }, + { + "epoch": 0.89, + "learning_rate": 6.345387964613259e-06, + "loss": 1.1538, + "step": 4826 + }, + { + "epoch": 0.89, + "learning_rate": 6.324479937834093e-06, + "loss": 0.9652, + "step": 4827 + }, + { + "epoch": 0.89, + "learning_rate": 6.303605288962322e-06, + "loss": 1.1432, + "step": 4828 + }, + { + "epoch": 0.89, + "learning_rate": 6.282764025435872e-06, + "loss": 1.1167, + "step": 4829 + }, + { + "epoch": 0.89, + "learning_rate": 6.261956154680759e-06, + "loss": 1.1158, + "step": 4830 + }, + { + "epoch": 0.89, + "learning_rate": 6.2411816841111634e-06, + "loss": 0.9381, + "step": 4831 + }, + { + "epoch": 0.89, + "learning_rate": 6.2204406211292865e-06, + "loss": 0.9171, + "step": 4832 + }, + { + "epoch": 0.89, + "learning_rate": 6.199732973125472e-06, + "loss": 0.9437, + "step": 4833 + }, + { + "epoch": 0.89, + "learning_rate": 6.17905874747815e-06, + "loss": 1.2455, + "step": 4834 + }, + { + "epoch": 0.89, + "learning_rate": 6.158417951553841e-06, + "loss": 1.0436, + "step": 4835 + }, + { + "epoch": 0.89, + "learning_rate": 6.137810592707127e-06, + "loss": 1.0285, + "step": 4836 + }, + { + "epoch": 0.89, + "learning_rate": 6.1172366782807354e-06, + "loss": 1.0889, + "step": 4837 + }, + { + "epoch": 0.89, + "learning_rate": 6.096696215605424e-06, + "loss": 0.9833, + "step": 4838 + }, + { + "epoch": 0.89, + "learning_rate": 6.076189212000039e-06, + "loss": 1.0073, + "step": 4839 + }, + { + "epoch": 0.89, + "learning_rate": 6.055715674771534e-06, + "loss": 1.0962, + "step": 4840 + }, + { + "epoch": 0.89, + "learning_rate": 6.035275611214919e-06, + "loss": 1.0485, + "step": 4841 + }, + { + "epoch": 0.89, + "learning_rate": 6.0148690286132685e-06, + "loss": 1.117, + "step": 4842 + }, + { + "epoch": 0.89, + "learning_rate": 5.9944959342377205e-06, + "loss": 0.9238, + "step": 4843 + }, + { + "epoch": 0.89, + "learning_rate": 5.974156335347514e-06, + "loss": 1.2221, + "step": 4844 + }, + { + "epoch": 0.89, + "learning_rate": 5.953850239189929e-06, + "loss": 1.1863, + "step": 4845 + }, + { + "epoch": 0.89, + "learning_rate": 5.933577653000321e-06, + "loss": 0.9824, + "step": 4846 + }, + { + "epoch": 0.89, + "learning_rate": 5.913338584002093e-06, + "loss": 1.2653, + "step": 4847 + }, + { + "epoch": 0.89, + "learning_rate": 5.893133039406718e-06, + "loss": 1.0207, + "step": 4848 + }, + { + "epoch": 0.89, + "learning_rate": 5.872961026413693e-06, + "loss": 1.2159, + "step": 4849 + }, + { + "epoch": 0.89, + "learning_rate": 5.852822552210613e-06, + "loss": 0.9972, + "step": 4850 + }, + { + "epoch": 0.89, + "learning_rate": 5.832717623973072e-06, + "loss": 0.9726, + "step": 4851 + }, + { + "epoch": 0.89, + "learning_rate": 5.8126462488647835e-06, + "loss": 1.0867, + "step": 4852 + }, + { + "epoch": 0.89, + "learning_rate": 5.792608434037439e-06, + "loss": 0.9762, + "step": 4853 + }, + { + "epoch": 0.89, + "learning_rate": 5.772604186630792e-06, + "loss": 1.2168, + "step": 4854 + }, + { + "epoch": 0.89, + "learning_rate": 5.752633513772621e-06, + "loss": 1.0071, + "step": 4855 + }, + { + "epoch": 0.89, + "learning_rate": 5.732696422578787e-06, + "loss": 1.0645, + "step": 4856 + }, + { + "epoch": 0.9, + "learning_rate": 5.712792920153132e-06, + "loss": 1.1659, + "step": 4857 + }, + { + "epoch": 0.9, + "learning_rate": 5.692923013587603e-06, + "loss": 1.1427, + "step": 4858 + }, + { + "epoch": 0.9, + "learning_rate": 5.67308670996205e-06, + "loss": 1.126, + "step": 4859 + }, + { + "epoch": 0.9, + "learning_rate": 5.653284016344495e-06, + "loss": 1.0574, + "step": 4860 + }, + { + "epoch": 0.9, + "learning_rate": 5.633514939790862e-06, + "loss": 1.0219, + "step": 4861 + }, + { + "epoch": 0.9, + "learning_rate": 5.613779487345205e-06, + "loss": 1.1544, + "step": 4862 + }, + { + "epoch": 0.9, + "learning_rate": 5.59407766603951e-06, + "loss": 1.2507, + "step": 4863 + }, + { + "epoch": 0.9, + "learning_rate": 5.574409482893839e-06, + "loss": 1.036, + "step": 4864 + }, + { + "epoch": 0.9, + "learning_rate": 5.554774944916197e-06, + "loss": 1.2427, + "step": 4865 + }, + { + "epoch": 0.9, + "learning_rate": 5.535174059102699e-06, + "loss": 0.9983, + "step": 4866 + }, + { + "epoch": 0.9, + "learning_rate": 5.51560683243737e-06, + "loss": 1.1266, + "step": 4867 + }, + { + "epoch": 0.9, + "learning_rate": 5.496073271892332e-06, + "loss": 1.0747, + "step": 4868 + }, + { + "epoch": 0.9, + "learning_rate": 5.476573384427652e-06, + "loss": 1.2108, + "step": 4869 + }, + { + "epoch": 0.9, + "learning_rate": 5.457107176991405e-06, + "loss": 1.0939, + "step": 4870 + }, + { + "epoch": 0.9, + "learning_rate": 5.437674656519665e-06, + "loss": 1.0136, + "step": 4871 + }, + { + "epoch": 0.9, + "learning_rate": 5.418275829936537e-06, + "loss": 1.0175, + "step": 4872 + }, + { + "epoch": 0.9, + "learning_rate": 5.398910704154081e-06, + "loss": 1.1995, + "step": 4873 + }, + { + "epoch": 0.9, + "learning_rate": 5.379579286072378e-06, + "loss": 1.0973, + "step": 4874 + }, + { + "epoch": 0.9, + "learning_rate": 5.3602815825794735e-06, + "loss": 1.1887, + "step": 4875 + }, + { + "epoch": 0.9, + "learning_rate": 5.341017600551423e-06, + "loss": 1.078, + "step": 4876 + }, + { + "epoch": 0.9, + "learning_rate": 5.321787346852236e-06, + "loss": 1.1666, + "step": 4877 + }, + { + "epoch": 0.9, + "learning_rate": 5.302590828333931e-06, + "loss": 1.1517, + "step": 4878 + }, + { + "epoch": 0.9, + "learning_rate": 5.283428051836503e-06, + "loss": 1.0541, + "step": 4879 + }, + { + "epoch": 0.9, + "learning_rate": 5.264299024187935e-06, + "loss": 0.9948, + "step": 4880 + }, + { + "epoch": 0.9, + "learning_rate": 5.24520375220412e-06, + "loss": 1.1788, + "step": 4881 + }, + { + "epoch": 0.9, + "learning_rate": 5.226142242689014e-06, + "loss": 1.0109, + "step": 4882 + }, + { + "epoch": 0.9, + "learning_rate": 5.207114502434485e-06, + "loss": 1.0714, + "step": 4883 + }, + { + "epoch": 0.9, + "learning_rate": 5.188120538220398e-06, + "loss": 1.0359, + "step": 4884 + }, + { + "epoch": 0.9, + "learning_rate": 5.16916035681454e-06, + "loss": 1.1743, + "step": 4885 + }, + { + "epoch": 0.9, + "learning_rate": 5.150233964972751e-06, + "loss": 1.0476, + "step": 4886 + }, + { + "epoch": 0.9, + "learning_rate": 5.131341369438703e-06, + "loss": 1.1591, + "step": 4887 + }, + { + "epoch": 0.9, + "learning_rate": 5.1124825769441335e-06, + "loss": 1.1728, + "step": 4888 + }, + { + "epoch": 0.9, + "learning_rate": 5.093657594208679e-06, + "loss": 1.072, + "step": 4889 + }, + { + "epoch": 0.9, + "learning_rate": 5.0748664279399615e-06, + "loss": 1.1053, + "step": 4890 + }, + { + "epoch": 0.9, + "learning_rate": 5.056109084833527e-06, + "loss": 0.9939, + "step": 4891 + }, + { + "epoch": 0.9, + "learning_rate": 5.037385571572884e-06, + "loss": 1.0986, + "step": 4892 + }, + { + "epoch": 0.9, + "learning_rate": 5.018695894829473e-06, + "loss": 1.1285, + "step": 4893 + }, + { + "epoch": 0.9, + "learning_rate": 5.0000400612627114e-06, + "loss": 1.0665, + "step": 4894 + }, + { + "epoch": 0.9, + "learning_rate": 4.981418077519906e-06, + "loss": 1.0922, + "step": 4895 + }, + { + "epoch": 0.9, + "learning_rate": 4.962829950236369e-06, + "loss": 1.0443, + "step": 4896 + }, + { + "epoch": 0.9, + "learning_rate": 4.944275686035294e-06, + "loss": 1.1203, + "step": 4897 + }, + { + "epoch": 0.9, + "learning_rate": 4.925755291527823e-06, + "loss": 1.1114, + "step": 4898 + }, + { + "epoch": 0.9, + "learning_rate": 4.907268773313023e-06, + "loss": 1.0453, + "step": 4899 + }, + { + "epoch": 0.9, + "learning_rate": 4.888816137977925e-06, + "loss": 1.1058, + "step": 4900 + }, + { + "epoch": 0.9, + "learning_rate": 4.870397392097437e-06, + "loss": 1.1254, + "step": 4901 + }, + { + "epoch": 0.9, + "learning_rate": 4.852012542234463e-06, + "loss": 1.1478, + "step": 4902 + }, + { + "epoch": 0.9, + "learning_rate": 4.833661594939732e-06, + "loss": 0.9834, + "step": 4903 + }, + { + "epoch": 0.9, + "learning_rate": 4.815344556751966e-06, + "loss": 1.1143, + "step": 4904 + }, + { + "epoch": 0.9, + "learning_rate": 4.79706143419778e-06, + "loss": 1.0477, + "step": 4905 + }, + { + "epoch": 0.9, + "learning_rate": 4.778812233791719e-06, + "loss": 1.171, + "step": 4906 + }, + { + "epoch": 0.9, + "learning_rate": 4.760596962036212e-06, + "loss": 1.001, + "step": 4907 + }, + { + "epoch": 0.9, + "learning_rate": 4.742415625421659e-06, + "loss": 1.0514, + "step": 4908 + }, + { + "epoch": 0.9, + "learning_rate": 4.724268230426265e-06, + "loss": 1.1909, + "step": 4909 + }, + { + "epoch": 0.9, + "learning_rate": 4.706154783516248e-06, + "loss": 1.0229, + "step": 4910 + }, + { + "epoch": 0.9, + "learning_rate": 4.688075291145655e-06, + "loss": 1.1239, + "step": 4911 + }, + { + "epoch": 0.91, + "learning_rate": 4.6700297597565e-06, + "loss": 1.1357, + "step": 4912 + }, + { + "epoch": 0.91, + "learning_rate": 4.652018195778629e-06, + "loss": 1.1818, + "step": 4913 + }, + { + "epoch": 0.91, + "learning_rate": 4.63404060562983e-06, + "loss": 1.176, + "step": 4914 + }, + { + "epoch": 0.91, + "learning_rate": 4.616096995715746e-06, + "loss": 1.2285, + "step": 4915 + }, + { + "epoch": 0.91, + "learning_rate": 4.598187372429974e-06, + "loss": 1.1004, + "step": 4916 + }, + { + "epoch": 0.91, + "learning_rate": 4.580311742153942e-06, + "loss": 1.0686, + "step": 4917 + }, + { + "epoch": 0.91, + "learning_rate": 4.5624701112570004e-06, + "loss": 1.278, + "step": 4918 + }, + { + "epoch": 0.91, + "learning_rate": 4.544662486096374e-06, + "loss": 1.1184, + "step": 4919 + }, + { + "epoch": 0.91, + "learning_rate": 4.5268888730171766e-06, + "loss": 1.0896, + "step": 4920 + }, + { + "epoch": 0.91, + "learning_rate": 4.509149278352365e-06, + "loss": 1.086, + "step": 4921 + }, + { + "epoch": 0.91, + "learning_rate": 4.491443708422838e-06, + "loss": 1.0237, + "step": 4922 + }, + { + "epoch": 0.91, + "learning_rate": 4.473772169537316e-06, + "loss": 1.1065, + "step": 4923 + }, + { + "epoch": 0.91, + "learning_rate": 4.456134667992462e-06, + "loss": 1.1581, + "step": 4924 + }, + { + "epoch": 0.91, + "learning_rate": 4.438531210072716e-06, + "loss": 1.0191, + "step": 4925 + }, + { + "epoch": 0.91, + "learning_rate": 4.420961802050461e-06, + "loss": 1.0143, + "step": 4926 + }, + { + "epoch": 0.91, + "learning_rate": 4.40342645018591e-06, + "loss": 1.209, + "step": 4927 + }, + { + "epoch": 0.91, + "learning_rate": 4.385925160727178e-06, + "loss": 0.9196, + "step": 4928 + }, + { + "epoch": 0.91, + "learning_rate": 4.368457939910209e-06, + "loss": 1.1442, + "step": 4929 + }, + { + "epoch": 0.91, + "learning_rate": 4.351024793958835e-06, + "loss": 1.152, + "step": 4930 + }, + { + "epoch": 0.91, + "learning_rate": 4.33362572908469e-06, + "loss": 0.8875, + "step": 4931 + }, + { + "epoch": 0.91, + "learning_rate": 4.316260751487356e-06, + "loss": 1.1221, + "step": 4932 + }, + { + "epoch": 0.91, + "learning_rate": 4.298929867354174e-06, + "loss": 1.0307, + "step": 4933 + }, + { + "epoch": 0.91, + "learning_rate": 4.2816330828604145e-06, + "loss": 1.1306, + "step": 4934 + }, + { + "epoch": 0.91, + "learning_rate": 4.26437040416916e-06, + "loss": 1.0708, + "step": 4935 + }, + { + "epoch": 0.91, + "learning_rate": 4.247141837431345e-06, + "loss": 1.2254, + "step": 4936 + }, + { + "epoch": 0.91, + "learning_rate": 4.229947388785738e-06, + "loss": 1.0179, + "step": 4937 + }, + { + "epoch": 0.91, + "learning_rate": 4.21278706435898e-06, + "loss": 1.0309, + "step": 4938 + }, + { + "epoch": 0.91, + "learning_rate": 4.195660870265516e-06, + "loss": 1.1924, + "step": 4939 + }, + { + "epoch": 0.91, + "learning_rate": 4.178568812607675e-06, + "loss": 1.03, + "step": 4940 + }, + { + "epoch": 0.91, + "learning_rate": 4.161510897475584e-06, + "loss": 1.188, + "step": 4941 + }, + { + "epoch": 0.91, + "learning_rate": 4.144487130947217e-06, + "loss": 1.0727, + "step": 4942 + }, + { + "epoch": 0.91, + "learning_rate": 4.127497519088375e-06, + "loss": 1.0481, + "step": 4943 + }, + { + "epoch": 0.91, + "learning_rate": 4.1105420679527275e-06, + "loss": 1.2158, + "step": 4944 + }, + { + "epoch": 0.91, + "learning_rate": 4.093620783581698e-06, + "loss": 0.9981, + "step": 4945 + }, + { + "epoch": 0.91, + "learning_rate": 4.076733672004618e-06, + "loss": 1.0804, + "step": 4946 + }, + { + "epoch": 0.91, + "learning_rate": 4.059880739238575e-06, + "loss": 1.0426, + "step": 4947 + }, + { + "epoch": 0.91, + "learning_rate": 4.043061991288522e-06, + "loss": 1.0445, + "step": 4948 + }, + { + "epoch": 0.91, + "learning_rate": 4.0262774341471965e-06, + "loss": 0.9691, + "step": 4949 + }, + { + "epoch": 0.91, + "learning_rate": 4.009527073795205e-06, + "loss": 1.0642, + "step": 4950 + }, + { + "epoch": 0.91, + "learning_rate": 3.992810916200895e-06, + "loss": 1.1034, + "step": 4951 + }, + { + "epoch": 0.91, + "learning_rate": 3.9761289673205135e-06, + "loss": 1.1797, + "step": 4952 + }, + { + "epoch": 0.91, + "learning_rate": 3.95948123309805e-06, + "loss": 1.186, + "step": 4953 + }, + { + "epoch": 0.91, + "learning_rate": 3.942867719465326e-06, + "loss": 0.9731, + "step": 4954 + }, + { + "epoch": 0.91, + "learning_rate": 3.926288432341963e-06, + "loss": 0.9351, + "step": 4955 + }, + { + "epoch": 0.91, + "learning_rate": 3.909743377635422e-06, + "loss": 1.0956, + "step": 4956 + }, + { + "epoch": 0.91, + "learning_rate": 3.8932325612409115e-06, + "loss": 1.0054, + "step": 4957 + }, + { + "epoch": 0.91, + "learning_rate": 3.876755989041469e-06, + "loss": 1.092, + "step": 4958 + }, + { + "epoch": 0.91, + "learning_rate": 3.860313666907955e-06, + "loss": 1.1788, + "step": 4959 + }, + { + "epoch": 0.91, + "learning_rate": 3.843905600698982e-06, + "loss": 1.0505, + "step": 4960 + }, + { + "epoch": 0.91, + "learning_rate": 3.827531796260975e-06, + "loss": 1.166, + "step": 4961 + }, + { + "epoch": 0.91, + "learning_rate": 3.8111922594281536e-06, + "loss": 0.9974, + "step": 4962 + }, + { + "epoch": 0.91, + "learning_rate": 3.7948869960225287e-06, + "loss": 0.9994, + "step": 4963 + }, + { + "epoch": 0.91, + "learning_rate": 3.778616011853886e-06, + "loss": 1.1241, + "step": 4964 + }, + { + "epoch": 0.91, + "learning_rate": 3.7623793127198104e-06, + "loss": 1.0722, + "step": 4965 + }, + { + "epoch": 0.92, + "learning_rate": 3.746176904405685e-06, + "loss": 1.1558, + "step": 4966 + }, + { + "epoch": 0.92, + "learning_rate": 3.730008792684614e-06, + "loss": 0.9818, + "step": 4967 + }, + { + "epoch": 0.92, + "learning_rate": 3.7138749833175668e-06, + "loss": 1.0539, + "step": 4968 + }, + { + "epoch": 0.92, + "learning_rate": 3.697775482053223e-06, + "loss": 1.0997, + "step": 4969 + }, + { + "epoch": 0.92, + "learning_rate": 3.6817102946280602e-06, + "loss": 1.1308, + "step": 4970 + }, + { + "epoch": 0.92, + "learning_rate": 3.665679426766355e-06, + "loss": 0.9806, + "step": 4971 + }, + { + "epoch": 0.92, + "learning_rate": 3.6496828841801056e-06, + "loss": 1.0561, + "step": 4972 + }, + { + "epoch": 0.92, + "learning_rate": 3.6337206725691075e-06, + "loss": 1.1438, + "step": 4973 + }, + { + "epoch": 0.92, + "learning_rate": 3.6177927976209335e-06, + "loss": 1.1778, + "step": 4974 + }, + { + "epoch": 0.92, + "learning_rate": 3.6018992650109106e-06, + "loss": 1.1048, + "step": 4975 + }, + { + "epoch": 0.92, + "learning_rate": 3.5860400804021198e-06, + "loss": 1.1025, + "step": 4976 + }, + { + "epoch": 0.92, + "learning_rate": 3.570215249445419e-06, + "loss": 1.0408, + "step": 4977 + }, + { + "epoch": 0.92, + "learning_rate": 3.554424777779408e-06, + "loss": 1.0478, + "step": 4978 + }, + { + "epoch": 0.92, + "learning_rate": 3.538668671030465e-06, + "loss": 1.0621, + "step": 4979 + }, + { + "epoch": 0.92, + "learning_rate": 3.522946934812699e-06, + "loss": 1.0937, + "step": 4980 + }, + { + "epoch": 0.92, + "learning_rate": 3.5072595747280057e-06, + "loss": 1.0604, + "step": 4981 + }, + { + "epoch": 0.92, + "learning_rate": 3.4916065963659927e-06, + "loss": 1.0492, + "step": 4982 + }, + { + "epoch": 0.92, + "learning_rate": 3.4759880053040538e-06, + "loss": 1.0799, + "step": 4983 + }, + { + "epoch": 0.92, + "learning_rate": 3.460403807107304e-06, + "loss": 1.1953, + "step": 4984 + }, + { + "epoch": 0.92, + "learning_rate": 3.4448540073286238e-06, + "loss": 1.1565, + "step": 4985 + }, + { + "epoch": 0.92, + "learning_rate": 3.4293386115085924e-06, + "loss": 1.0645, + "step": 4986 + }, + { + "epoch": 0.92, + "learning_rate": 3.4138576251756094e-06, + "loss": 1.0587, + "step": 4987 + }, + { + "epoch": 0.92, + "learning_rate": 3.398411053845729e-06, + "loss": 1.1291, + "step": 4988 + }, + { + "epoch": 0.92, + "learning_rate": 3.382998903022816e-06, + "loss": 1.0108, + "step": 4989 + }, + { + "epoch": 0.92, + "learning_rate": 3.36762117819841e-06, + "loss": 0.9893, + "step": 4990 + }, + { + "epoch": 0.92, + "learning_rate": 3.3522778848518287e-06, + "loss": 1.1613, + "step": 4991 + }, + { + "epoch": 0.92, + "learning_rate": 3.3369690284500878e-06, + "loss": 1.0803, + "step": 4992 + }, + { + "epoch": 0.92, + "learning_rate": 3.321694614447968e-06, + "loss": 1.1458, + "step": 4993 + }, + { + "epoch": 0.92, + "learning_rate": 3.3064546482879377e-06, + "loss": 1.0997, + "step": 4994 + }, + { + "epoch": 0.92, + "learning_rate": 3.2912491354002205e-06, + "loss": 1.1329, + "step": 4995 + }, + { + "epoch": 0.92, + "learning_rate": 3.2760780812027715e-06, + "loss": 1.0355, + "step": 4996 + }, + { + "epoch": 0.92, + "learning_rate": 3.2609414911012215e-06, + "loss": 0.9484, + "step": 4997 + }, + { + "epoch": 0.92, + "learning_rate": 3.2458393704889568e-06, + "loss": 0.9982, + "step": 4998 + }, + { + "epoch": 0.92, + "learning_rate": 3.230771724747106e-06, + "loss": 1.0008, + "step": 4999 + }, + { + "epoch": 0.92, + "learning_rate": 3.215738559244441e-06, + "loss": 0.9984, + "step": 5000 + }, + { + "epoch": 0.92, + "learning_rate": 3.200739879337522e-06, + "loss": 1.1602, + "step": 5001 + }, + { + "epoch": 0.92, + "learning_rate": 3.1857756903705737e-06, + "loss": 0.9704, + "step": 5002 + }, + { + "epoch": 0.92, + "learning_rate": 3.1708459976755533e-06, + "loss": 1.1174, + "step": 5003 + }, + { + "epoch": 0.92, + "learning_rate": 3.1559508065721055e-06, + "loss": 1.2723, + "step": 5004 + }, + { + "epoch": 0.92, + "learning_rate": 3.141090122367629e-06, + "loss": 1.0781, + "step": 5005 + }, + { + "epoch": 0.92, + "learning_rate": 3.1262639503571666e-06, + "loss": 1.2147, + "step": 5006 + }, + { + "epoch": 0.92, + "learning_rate": 3.1114722958235144e-06, + "loss": 1.1479, + "step": 5007 + }, + { + "epoch": 0.92, + "learning_rate": 3.096715164037123e-06, + "loss": 1.0927, + "step": 5008 + }, + { + "epoch": 0.92, + "learning_rate": 3.081992560256186e-06, + "loss": 1.0892, + "step": 5009 + }, + { + "epoch": 0.92, + "learning_rate": 3.067304489726552e-06, + "loss": 1.0388, + "step": 5010 + }, + { + "epoch": 0.92, + "learning_rate": 3.052650957681813e-06, + "loss": 1.15, + "step": 5011 + }, + { + "epoch": 0.92, + "learning_rate": 3.0380319693432247e-06, + "loss": 1.0364, + "step": 5012 + }, + { + "epoch": 0.92, + "learning_rate": 3.0234475299197317e-06, + "loss": 1.1205, + "step": 5013 + }, + { + "epoch": 0.92, + "learning_rate": 3.0088976446079554e-06, + "loss": 1.1037, + "step": 5014 + }, + { + "epoch": 0.92, + "learning_rate": 2.994382318592259e-06, + "loss": 1.0989, + "step": 5015 + }, + { + "epoch": 0.92, + "learning_rate": 2.97990155704464e-06, + "loss": 1.1885, + "step": 5016 + }, + { + "epoch": 0.92, + "learning_rate": 2.965455365124803e-06, + "loss": 1.1514, + "step": 5017 + }, + { + "epoch": 0.92, + "learning_rate": 2.9510437479801312e-06, + "loss": 1.0882, + "step": 5018 + }, + { + "epoch": 0.92, + "learning_rate": 2.9366667107456837e-06, + "loss": 1.0325, + "step": 5019 + }, + { + "epoch": 0.93, + "learning_rate": 2.922324258544196e-06, + "loss": 0.9147, + "step": 5020 + }, + { + "epoch": 0.93, + "learning_rate": 2.9080163964861017e-06, + "loss": 1.1736, + "step": 5021 + }, + { + "epoch": 0.93, + "learning_rate": 2.8937431296694686e-06, + "loss": 0.9818, + "step": 5022 + }, + { + "epoch": 0.93, + "learning_rate": 2.879504463180094e-06, + "loss": 1.0715, + "step": 5023 + }, + { + "epoch": 0.93, + "learning_rate": 2.8653004020913753e-06, + "loss": 1.2085, + "step": 5024 + }, + { + "epoch": 0.93, + "learning_rate": 2.8511309514644644e-06, + "loss": 1.0781, + "step": 5025 + }, + { + "epoch": 0.93, + "learning_rate": 2.836996116348101e-06, + "loss": 1.1885, + "step": 5026 + }, + { + "epoch": 0.93, + "learning_rate": 2.822895901778744e-06, + "loss": 1.1961, + "step": 5027 + }, + { + "epoch": 0.93, + "learning_rate": 2.808830312780486e-06, + "loss": 1.1805, + "step": 5028 + }, + { + "epoch": 0.93, + "learning_rate": 2.7947993543651295e-06, + "loss": 1.0032, + "step": 5029 + }, + { + "epoch": 0.93, + "learning_rate": 2.7808030315320534e-06, + "loss": 1.096, + "step": 5030 + }, + { + "epoch": 0.93, + "learning_rate": 2.7668413492683674e-06, + "loss": 1.167, + "step": 5031 + }, + { + "epoch": 0.93, + "learning_rate": 2.7529143125488157e-06, + "loss": 1.1298, + "step": 5032 + }, + { + "epoch": 0.93, + "learning_rate": 2.7390219263358054e-06, + "loss": 1.1451, + "step": 5033 + }, + { + "epoch": 0.93, + "learning_rate": 2.7251641955793773e-06, + "loss": 1.0965, + "step": 5034 + }, + { + "epoch": 0.93, + "learning_rate": 2.711341125217237e-06, + "loss": 1.0075, + "step": 5035 + }, + { + "epoch": 0.93, + "learning_rate": 2.6975527201747342e-06, + "loss": 1.0196, + "step": 5036 + }, + { + "epoch": 0.93, + "learning_rate": 2.683798985364894e-06, + "loss": 1.015, + "step": 5037 + }, + { + "epoch": 0.93, + "learning_rate": 2.67007992568834e-06, + "loss": 1.0298, + "step": 5038 + }, + { + "epoch": 0.93, + "learning_rate": 2.6563955460333856e-06, + "loss": 1.0345, + "step": 5039 + }, + { + "epoch": 0.93, + "learning_rate": 2.642745851275963e-06, + "loss": 1.0552, + "step": 5040 + }, + { + "epoch": 0.93, + "learning_rate": 2.629130846279648e-06, + "loss": 1.0295, + "step": 5041 + }, + { + "epoch": 0.93, + "learning_rate": 2.61555053589565e-06, + "loss": 1.0309, + "step": 5042 + }, + { + "epoch": 0.93, + "learning_rate": 2.602004924962842e-06, + "loss": 0.9245, + "step": 5043 + }, + { + "epoch": 0.93, + "learning_rate": 2.5884940183076966e-06, + "loss": 1.0318, + "step": 5044 + }, + { + "epoch": 0.93, + "learning_rate": 2.5750178207443744e-06, + "loss": 1.1024, + "step": 5045 + }, + { + "epoch": 0.93, + "learning_rate": 2.5615763370745895e-06, + "loss": 1.1694, + "step": 5046 + }, + { + "epoch": 0.93, + "learning_rate": 2.5481695720877663e-06, + "loss": 0.9674, + "step": 5047 + }, + { + "epoch": 0.93, + "learning_rate": 2.534797530560895e-06, + "loss": 1.0522, + "step": 5048 + }, + { + "epoch": 0.93, + "learning_rate": 2.5214602172586533e-06, + "loss": 1.0363, + "step": 5049 + }, + { + "epoch": 0.93, + "learning_rate": 2.508157636933284e-06, + "loss": 1.0639, + "step": 5050 + }, + { + "epoch": 0.93, + "learning_rate": 2.4948897943247284e-06, + "loss": 1.1392, + "step": 5051 + }, + { + "epoch": 0.93, + "learning_rate": 2.48165669416045e-06, + "loss": 1.2711, + "step": 5052 + }, + { + "epoch": 0.93, + "learning_rate": 2.4684583411556217e-06, + "loss": 1.0962, + "step": 5053 + }, + { + "epoch": 0.93, + "learning_rate": 2.4552947400129922e-06, + "loss": 1.1312, + "step": 5054 + }, + { + "epoch": 0.93, + "learning_rate": 2.442165895422954e-06, + "loss": 0.9876, + "step": 5055 + }, + { + "epoch": 0.93, + "learning_rate": 2.429071812063488e-06, + "loss": 1.1495, + "step": 5056 + }, + { + "epoch": 0.93, + "learning_rate": 2.416012494600195e-06, + "loss": 1.1044, + "step": 5057 + }, + { + "epoch": 0.93, + "learning_rate": 2.402987947686286e-06, + "loss": 1.1287, + "step": 5058 + }, + { + "epoch": 0.93, + "learning_rate": 2.3899981759626155e-06, + "loss": 1.0715, + "step": 5059 + }, + { + "epoch": 0.93, + "learning_rate": 2.3770431840576036e-06, + "loss": 1.1431, + "step": 5060 + }, + { + "epoch": 0.93, + "learning_rate": 2.364122976587313e-06, + "loss": 0.9982, + "step": 5061 + }, + { + "epoch": 0.93, + "learning_rate": 2.3512375581553837e-06, + "loss": 1.1659, + "step": 5062 + }, + { + "epoch": 0.93, + "learning_rate": 2.338386933353065e-06, + "loss": 0.981, + "step": 5063 + }, + { + "epoch": 0.93, + "learning_rate": 2.325571106759228e-06, + "loss": 1.1387, + "step": 5064 + }, + { + "epoch": 0.93, + "learning_rate": 2.3127900829403306e-06, + "loss": 1.3083, + "step": 5065 + }, + { + "epoch": 0.93, + "learning_rate": 2.3000438664504188e-06, + "loss": 1.204, + "step": 5066 + }, + { + "epoch": 0.93, + "learning_rate": 2.287332461831182e-06, + "loss": 1.1022, + "step": 5067 + }, + { + "epoch": 0.93, + "learning_rate": 2.2746558736118305e-06, + "loss": 1.0263, + "step": 5068 + }, + { + "epoch": 0.93, + "learning_rate": 2.2620141063092405e-06, + "loss": 1.0285, + "step": 5069 + }, + { + "epoch": 0.93, + "learning_rate": 2.24940716442783e-06, + "loss": 1.2277, + "step": 5070 + }, + { + "epoch": 0.93, + "learning_rate": 2.2368350524596513e-06, + "loss": 1.0792, + "step": 5071 + }, + { + "epoch": 0.93, + "learning_rate": 2.22429777488431e-06, + "loss": 1.1459, + "step": 5072 + }, + { + "epoch": 0.93, + "learning_rate": 2.2117953361690425e-06, + "loss": 1.1945, + "step": 5073 + }, + { + "epoch": 0.93, + "learning_rate": 2.1993277407685974e-06, + "loss": 0.9891, + "step": 5074 + }, + { + "epoch": 0.94, + "learning_rate": 2.186894993125388e-06, + "loss": 1.2296, + "step": 5075 + }, + { + "epoch": 0.94, + "learning_rate": 2.174497097669359e-06, + "loss": 1.1288, + "step": 5076 + }, + { + "epoch": 0.94, + "learning_rate": 2.1621340588180772e-06, + "loss": 1.0588, + "step": 5077 + }, + { + "epoch": 0.94, + "learning_rate": 2.1498058809766517e-06, + "loss": 1.1508, + "step": 5078 + }, + { + "epoch": 0.94, + "learning_rate": 2.1375125685377915e-06, + "loss": 1.0477, + "step": 5079 + }, + { + "epoch": 0.94, + "learning_rate": 2.1252541258817703e-06, + "loss": 1.0072, + "step": 5080 + }, + { + "epoch": 0.94, + "learning_rate": 2.1130305573764495e-06, + "loss": 0.982, + "step": 5081 + }, + { + "epoch": 0.94, + "learning_rate": 2.100841867377257e-06, + "loss": 1.0752, + "step": 5082 + }, + { + "epoch": 0.94, + "learning_rate": 2.0886880602272063e-06, + "loss": 1.0752, + "step": 5083 + }, + { + "epoch": 0.94, + "learning_rate": 2.0765691402568454e-06, + "loss": 1.0647, + "step": 5084 + }, + { + "epoch": 0.94, + "learning_rate": 2.064485111784342e-06, + "loss": 1.0203, + "step": 5085 + }, + { + "epoch": 0.94, + "learning_rate": 2.0524359791153746e-06, + "loss": 0.9323, + "step": 5086 + }, + { + "epoch": 0.94, + "learning_rate": 2.040421746543253e-06, + "loss": 1.2125, + "step": 5087 + }, + { + "epoch": 0.94, + "learning_rate": 2.028442418348797e-06, + "loss": 1.0169, + "step": 5088 + }, + { + "epoch": 0.94, + "learning_rate": 2.016497998800426e-06, + "loss": 1.1081, + "step": 5089 + }, + { + "epoch": 0.94, + "learning_rate": 2.0045884921540804e-06, + "loss": 1.0779, + "step": 5090 + }, + { + "epoch": 0.94, + "learning_rate": 1.992713902653309e-06, + "loss": 1.0951, + "step": 5091 + }, + { + "epoch": 0.94, + "learning_rate": 1.980874234529173e-06, + "loss": 1.0798, + "step": 5092 + }, + { + "epoch": 0.94, + "learning_rate": 1.9690694920003416e-06, + "loss": 1.1386, + "step": 5093 + }, + { + "epoch": 0.94, + "learning_rate": 1.9572996792729836e-06, + "loss": 1.1497, + "step": 5094 + }, + { + "epoch": 0.94, + "learning_rate": 1.945564800540867e-06, + "loss": 1.0507, + "step": 5095 + }, + { + "epoch": 0.94, + "learning_rate": 1.9338648599852792e-06, + "loss": 1.1859, + "step": 5096 + }, + { + "epoch": 0.94, + "learning_rate": 1.922199861775087e-06, + "loss": 1.1431, + "step": 5097 + }, + { + "epoch": 0.94, + "learning_rate": 1.910569810066687e-06, + "loss": 1.183, + "step": 5098 + }, + { + "epoch": 0.94, + "learning_rate": 1.898974709004031e-06, + "loss": 1.1329, + "step": 5099 + }, + { + "epoch": 0.94, + "learning_rate": 1.8874145627186257e-06, + "loss": 1.16, + "step": 5100 + }, + { + "epoch": 0.94, + "learning_rate": 1.8758893753295092e-06, + "loss": 1.0495, + "step": 5101 + }, + { + "epoch": 0.94, + "learning_rate": 1.8643991509432523e-06, + "loss": 1.0592, + "step": 5102 + }, + { + "epoch": 0.94, + "learning_rate": 1.8529438936540021e-06, + "loss": 1.273, + "step": 5103 + }, + { + "epoch": 0.94, + "learning_rate": 1.8415236075434162e-06, + "loss": 1.0229, + "step": 5104 + }, + { + "epoch": 0.94, + "learning_rate": 1.8301382966807278e-06, + "loss": 1.0721, + "step": 5105 + }, + { + "epoch": 0.94, + "learning_rate": 1.8187879651226592e-06, + "loss": 1.1227, + "step": 5106 + }, + { + "epoch": 0.94, + "learning_rate": 1.807472616913497e-06, + "loss": 1.0432, + "step": 5107 + }, + { + "epoch": 0.94, + "learning_rate": 1.7961922560850609e-06, + "loss": 1.1071, + "step": 5108 + }, + { + "epoch": 0.94, + "learning_rate": 1.784946886656702e-06, + "loss": 1.1478, + "step": 5109 + }, + { + "epoch": 0.94, + "learning_rate": 1.7737365126353045e-06, + "loss": 1.1612, + "step": 5110 + }, + { + "epoch": 0.94, + "learning_rate": 1.762561138015284e-06, + "loss": 1.1226, + "step": 5111 + }, + { + "epoch": 0.94, + "learning_rate": 1.7514207667785997e-06, + "loss": 1.2875, + "step": 5112 + }, + { + "epoch": 0.94, + "learning_rate": 1.7403154028946877e-06, + "loss": 1.1155, + "step": 5113 + }, + { + "epoch": 0.94, + "learning_rate": 1.7292450503205715e-06, + "loss": 1.1022, + "step": 5114 + }, + { + "epoch": 0.94, + "learning_rate": 1.7182097130007734e-06, + "loss": 1.1877, + "step": 5115 + }, + { + "epoch": 0.94, + "learning_rate": 1.7072093948673263e-06, + "loss": 1.0387, + "step": 5116 + }, + { + "epoch": 0.94, + "learning_rate": 1.6962440998397944e-06, + "loss": 1.1317, + "step": 5117 + }, + { + "epoch": 0.94, + "learning_rate": 1.685313831825297e-06, + "loss": 1.2064, + "step": 5118 + }, + { + "epoch": 0.94, + "learning_rate": 1.6744185947184077e-06, + "loss": 1.0981, + "step": 5119 + }, + { + "epoch": 0.94, + "learning_rate": 1.6635583924012765e-06, + "loss": 1.0213, + "step": 5120 + }, + { + "epoch": 0.94, + "learning_rate": 1.6527332287435305e-06, + "loss": 1.1555, + "step": 5121 + }, + { + "epoch": 0.94, + "learning_rate": 1.6419431076023506e-06, + "loss": 1.1812, + "step": 5122 + }, + { + "epoch": 0.94, + "learning_rate": 1.6311880328223728e-06, + "loss": 1.057, + "step": 5123 + }, + { + "epoch": 0.94, + "learning_rate": 1.6204680082358203e-06, + "loss": 1.0278, + "step": 5124 + }, + { + "epoch": 0.94, + "learning_rate": 1.6097830376623713e-06, + "loss": 1.0467, + "step": 5125 + }, + { + "epoch": 0.94, + "learning_rate": 1.5991331249092246e-06, + "loss": 1.0334, + "step": 5126 + }, + { + "epoch": 0.94, + "learning_rate": 1.5885182737711003e-06, + "loss": 1.1094, + "step": 5127 + }, + { + "epoch": 0.94, + "learning_rate": 1.5779384880302283e-06, + "loss": 1.0795, + "step": 5128 + }, + { + "epoch": 0.95, + "learning_rate": 1.567393771456316e-06, + "loss": 0.9051, + "step": 5129 + }, + { + "epoch": 0.95, + "learning_rate": 1.5568841278066237e-06, + "loss": 1.0881, + "step": 5130 + }, + { + "epoch": 0.95, + "learning_rate": 1.5464095608258566e-06, + "loss": 1.113, + "step": 5131 + }, + { + "epoch": 0.95, + "learning_rate": 1.5359700742462512e-06, + "loss": 1.0202, + "step": 5132 + }, + { + "epoch": 0.95, + "learning_rate": 1.5255656717875655e-06, + "loss": 1.2327, + "step": 5133 + }, + { + "epoch": 0.95, + "learning_rate": 1.5151963571570227e-06, + "loss": 1.1244, + "step": 5134 + }, + { + "epoch": 0.95, + "learning_rate": 1.5048621340493562e-06, + "loss": 1.1094, + "step": 5135 + }, + { + "epoch": 0.95, + "learning_rate": 1.4945630061467985e-06, + "loss": 1.0016, + "step": 5136 + }, + { + "epoch": 0.95, + "learning_rate": 1.4842989771190584e-06, + "loss": 1.1182, + "step": 5137 + }, + { + "epoch": 0.95, + "learning_rate": 1.4740700506233773e-06, + "loss": 1.1291, + "step": 5138 + }, + { + "epoch": 0.95, + "learning_rate": 1.4638762303044506e-06, + "loss": 0.9794, + "step": 5139 + }, + { + "epoch": 0.95, + "learning_rate": 1.4537175197944842e-06, + "loss": 1.1072, + "step": 5140 + }, + { + "epoch": 0.95, + "learning_rate": 1.4435939227131713e-06, + "loss": 0.984, + "step": 5141 + }, + { + "epoch": 0.95, + "learning_rate": 1.433505442667682e-06, + "loss": 1.0934, + "step": 5142 + }, + { + "epoch": 0.95, + "learning_rate": 1.4234520832527076e-06, + "loss": 1.0764, + "step": 5143 + }, + { + "epoch": 0.95, + "learning_rate": 1.4134338480503829e-06, + "loss": 1.2295, + "step": 5144 + }, + { + "epoch": 0.95, + "learning_rate": 1.403450740630341e-06, + "loss": 1.1393, + "step": 5145 + }, + { + "epoch": 0.95, + "learning_rate": 1.3935027645497146e-06, + "loss": 1.0448, + "step": 5146 + }, + { + "epoch": 0.95, + "learning_rate": 1.3835899233531013e-06, + "loss": 1.1514, + "step": 5147 + }, + { + "epoch": 0.95, + "learning_rate": 1.3737122205725982e-06, + "loss": 1.0138, + "step": 5148 + }, + { + "epoch": 0.95, + "learning_rate": 1.3638696597277679e-06, + "loss": 1.1058, + "step": 5149 + }, + { + "epoch": 0.95, + "learning_rate": 1.354062244325638e-06, + "loss": 1.0483, + "step": 5150 + }, + { + "epoch": 0.95, + "learning_rate": 1.3442899778607353e-06, + "loss": 1.121, + "step": 5151 + }, + { + "epoch": 0.95, + "learning_rate": 1.3345528638150751e-06, + "loss": 1.1022, + "step": 5152 + }, + { + "epoch": 0.95, + "learning_rate": 1.3248509056580926e-06, + "loss": 1.187, + "step": 5153 + }, + { + "epoch": 0.95, + "learning_rate": 1.3151841068467675e-06, + "loss": 0.9274, + "step": 5154 + }, + { + "epoch": 0.95, + "learning_rate": 1.3055524708254885e-06, + "loss": 1.0737, + "step": 5155 + }, + { + "epoch": 0.95, + "learning_rate": 1.2959560010261662e-06, + "loss": 1.1858, + "step": 5156 + }, + { + "epoch": 0.95, + "learning_rate": 1.2863947008681433e-06, + "loss": 1.2378, + "step": 5157 + }, + { + "epoch": 0.95, + "learning_rate": 1.2768685737582497e-06, + "loss": 1.1711, + "step": 5158 + }, + { + "epoch": 0.95, + "learning_rate": 1.2673776230907708e-06, + "loss": 0.9905, + "step": 5159 + }, + { + "epoch": 0.95, + "learning_rate": 1.25792185224749e-06, + "loss": 1.1081, + "step": 5160 + }, + { + "epoch": 0.95, + "learning_rate": 1.2485012645976014e-06, + "loss": 1.0604, + "step": 5161 + }, + { + "epoch": 0.95, + "learning_rate": 1.2391158634978194e-06, + "loss": 0.9934, + "step": 5162 + }, + { + "epoch": 0.95, + "learning_rate": 1.2297656522922695e-06, + "loss": 1.1166, + "step": 5163 + }, + { + "epoch": 0.95, + "learning_rate": 1.2204506343125866e-06, + "loss": 0.9062, + "step": 5164 + }, + { + "epoch": 0.95, + "learning_rate": 1.2111708128778266e-06, + "loss": 1.1013, + "step": 5165 + }, + { + "epoch": 0.95, + "learning_rate": 1.2019261912945334e-06, + "loss": 1.0317, + "step": 5166 + }, + { + "epoch": 0.95, + "learning_rate": 1.1927167728566835e-06, + "loss": 1.1802, + "step": 5167 + }, + { + "epoch": 0.95, + "learning_rate": 1.1835425608457296e-06, + "loss": 1.1185, + "step": 5168 + }, + { + "epoch": 0.95, + "learning_rate": 1.174403558530568e-06, + "loss": 1.1468, + "step": 5169 + }, + { + "epoch": 0.95, + "learning_rate": 1.1652997691675605e-06, + "loss": 1.077, + "step": 5170 + }, + { + "epoch": 0.95, + "learning_rate": 1.1562311960005234e-06, + "loss": 1.068, + "step": 5171 + }, + { + "epoch": 0.95, + "learning_rate": 1.1471978422606943e-06, + "loss": 1.0773, + "step": 5172 + }, + { + "epoch": 0.95, + "learning_rate": 1.1381997111667874e-06, + "loss": 1.0322, + "step": 5173 + }, + { + "epoch": 0.95, + "learning_rate": 1.1292368059249936e-06, + "loss": 1.1575, + "step": 5174 + }, + { + "epoch": 0.95, + "learning_rate": 1.1203091297288804e-06, + "loss": 1.0104, + "step": 5175 + }, + { + "epoch": 0.95, + "learning_rate": 1.1114166857595365e-06, + "loss": 1.0404, + "step": 5176 + }, + { + "epoch": 0.95, + "learning_rate": 1.1025594771854497e-06, + "loss": 1.1183, + "step": 5177 + }, + { + "epoch": 0.95, + "learning_rate": 1.0937375071625844e-06, + "loss": 1.1172, + "step": 5178 + }, + { + "epoch": 0.95, + "learning_rate": 1.084950778834304e-06, + "loss": 0.9908, + "step": 5179 + }, + { + "epoch": 0.95, + "learning_rate": 1.0761992953314703e-06, + "loss": 1.082, + "step": 5180 + }, + { + "epoch": 0.95, + "learning_rate": 1.067483059772334e-06, + "loss": 1.0515, + "step": 5181 + }, + { + "epoch": 0.95, + "learning_rate": 1.058802075262655e-06, + "loss": 1.124, + "step": 5182 + }, + { + "epoch": 0.96, + "learning_rate": 1.0501563448955365e-06, + "loss": 1.1516, + "step": 5183 + }, + { + "epoch": 0.96, + "learning_rate": 1.0415458717516146e-06, + "loss": 1.0563, + "step": 5184 + }, + { + "epoch": 0.96, + "learning_rate": 1.032970658898913e-06, + "loss": 1.093, + "step": 5185 + }, + { + "epoch": 0.96, + "learning_rate": 1.0244307093928873e-06, + "loss": 1.2772, + "step": 5186 + }, + { + "epoch": 0.96, + "learning_rate": 1.0159260262764591e-06, + "loss": 0.967, + "step": 5187 + }, + { + "epoch": 0.96, + "learning_rate": 1.0074566125799601e-06, + "loss": 1.1734, + "step": 5188 + }, + { + "epoch": 0.96, + "learning_rate": 9.990224713211538e-07, + "loss": 1.0395, + "step": 5189 + }, + { + "epoch": 0.96, + "learning_rate": 9.906236055052586e-07, + "loss": 1.2157, + "step": 5190 + }, + { + "epoch": 0.96, + "learning_rate": 9.82260018124881e-07, + "loss": 1.0924, + "step": 5191 + }, + { + "epoch": 0.96, + "learning_rate": 9.739317121601254e-07, + "loss": 1.0267, + "step": 5192 + }, + { + "epoch": 0.96, + "learning_rate": 9.656386905784632e-07, + "loss": 1.1127, + "step": 5193 + }, + { + "epoch": 0.96, + "learning_rate": 9.573809563348079e-07, + "loss": 1.2254, + "step": 5194 + }, + { + "epoch": 0.96, + "learning_rate": 9.491585123715063e-07, + "loss": 1.0336, + "step": 5195 + }, + { + "epoch": 0.96, + "learning_rate": 9.40971361618348e-07, + "loss": 1.1747, + "step": 5196 + }, + { + "epoch": 0.96, + "learning_rate": 9.328195069925217e-07, + "loss": 1.146, + "step": 5197 + }, + { + "epoch": 0.96, + "learning_rate": 9.247029513986482e-07, + "loss": 1.1022, + "step": 5198 + }, + { + "epoch": 0.96, + "learning_rate": 9.16621697728759e-07, + "loss": 1.1704, + "step": 5199 + }, + { + "epoch": 0.96, + "learning_rate": 9.085757488623392e-07, + "loss": 1.0252, + "step": 5200 + }, + { + "epoch": 0.96, + "learning_rate": 9.005651076662624e-07, + "loss": 1.112, + "step": 5201 + }, + { + "epoch": 0.96, + "learning_rate": 8.925897769948344e-07, + "loss": 1.0904, + "step": 5202 + }, + { + "epoch": 0.96, + "learning_rate": 8.846497596897707e-07, + "loss": 1.1268, + "step": 5203 + }, + { + "epoch": 0.96, + "learning_rate": 8.767450585802306e-07, + "loss": 1.1993, + "step": 5204 + }, + { + "epoch": 0.96, + "learning_rate": 8.688756764827388e-07, + "loss": 0.9887, + "step": 5205 + }, + { + "epoch": 0.96, + "learning_rate": 8.610416162013079e-07, + "loss": 1.0434, + "step": 5206 + }, + { + "epoch": 0.96, + "learning_rate": 8.532428805272829e-07, + "loss": 0.9683, + "step": 5207 + }, + { + "epoch": 0.96, + "learning_rate": 8.454794722394965e-07, + "loss": 0.9725, + "step": 5208 + }, + { + "epoch": 0.96, + "learning_rate": 8.377513941041359e-07, + "loss": 1.1331, + "step": 5209 + }, + { + "epoch": 0.96, + "learning_rate": 8.300586488748541e-07, + "loss": 1.1535, + "step": 5210 + }, + { + "epoch": 0.96, + "learning_rate": 8.224012392926472e-07, + "loss": 1.2134, + "step": 5211 + }, + { + "epoch": 0.96, + "learning_rate": 8.147791680859773e-07, + "loss": 1.1166, + "step": 5212 + }, + { + "epoch": 0.96, + "learning_rate": 8.071924379706941e-07, + "loss": 1.1223, + "step": 5213 + }, + { + "epoch": 0.96, + "learning_rate": 7.996410516500685e-07, + "loss": 1.2418, + "step": 5214 + }, + { + "epoch": 0.96, + "learning_rate": 7.921250118147483e-07, + "loss": 1.2182, + "step": 5215 + }, + { + "epoch": 0.96, + "learning_rate": 7.846443211428023e-07, + "loss": 1.1497, + "step": 5216 + }, + { + "epoch": 0.96, + "learning_rate": 7.771989822997206e-07, + "loss": 1.1815, + "step": 5217 + }, + { + "epoch": 0.96, + "learning_rate": 7.69788997938381e-07, + "loss": 1.145, + "step": 5218 + }, + { + "epoch": 0.96, + "learning_rate": 7.624143706990494e-07, + "loss": 1.0281, + "step": 5219 + }, + { + "epoch": 0.96, + "learning_rate": 7.550751032094239e-07, + "loss": 1.042, + "step": 5220 + }, + { + "epoch": 0.96, + "learning_rate": 7.477711980845903e-07, + "loss": 1.0341, + "step": 5221 + }, + { + "epoch": 0.96, + "learning_rate": 7.405026579270224e-07, + "loss": 1.1248, + "step": 5222 + }, + { + "epoch": 0.96, + "learning_rate": 7.332694853266042e-07, + "loss": 1.1705, + "step": 5223 + }, + { + "epoch": 0.96, + "learning_rate": 7.260716828606295e-07, + "loss": 0.9899, + "step": 5224 + }, + { + "epoch": 0.96, + "learning_rate": 7.18909253093758e-07, + "loss": 1.0665, + "step": 5225 + }, + { + "epoch": 0.96, + "learning_rate": 7.117821985780926e-07, + "loss": 1.066, + "step": 5226 + }, + { + "epoch": 0.96, + "learning_rate": 7.046905218530686e-07, + "loss": 1.1695, + "step": 5227 + }, + { + "epoch": 0.96, + "learning_rate": 6.976342254455759e-07, + "loss": 1.1695, + "step": 5228 + }, + { + "epoch": 0.96, + "learning_rate": 6.906133118698588e-07, + "loss": 1.0596, + "step": 5229 + }, + { + "epoch": 0.96, + "learning_rate": 6.836277836275828e-07, + "loss": 1.0463, + "step": 5230 + }, + { + "epoch": 0.96, + "learning_rate": 6.766776432077682e-07, + "loss": 1.147, + "step": 5231 + }, + { + "epoch": 0.96, + "learning_rate": 6.697628930868671e-07, + "loss": 0.9972, + "step": 5232 + }, + { + "epoch": 0.96, + "learning_rate": 6.628835357286978e-07, + "loss": 1.1396, + "step": 5233 + }, + { + "epoch": 0.96, + "learning_rate": 6.560395735844549e-07, + "loss": 1.0292, + "step": 5234 + }, + { + "epoch": 0.96, + "learning_rate": 6.492310090927656e-07, + "loss": 1.1397, + "step": 5235 + }, + { + "epoch": 0.96, + "learning_rate": 6.424578446796003e-07, + "loss": 1.1494, + "step": 5236 + }, + { + "epoch": 0.97, + "learning_rate": 6.357200827583398e-07, + "loss": 0.9675, + "step": 5237 + }, + { + "epoch": 0.97, + "learning_rate": 6.290177257297414e-07, + "loss": 1.1603, + "step": 5238 + }, + { + "epoch": 0.97, + "learning_rate": 6.223507759819392e-07, + "loss": 1.0995, + "step": 5239 + }, + { + "epoch": 0.97, + "learning_rate": 6.157192358904774e-07, + "loss": 1.1194, + "step": 5240 + }, + { + "epoch": 0.97, + "learning_rate": 6.091231078182547e-07, + "loss": 1.1495, + "step": 5241 + }, + { + "epoch": 0.97, + "learning_rate": 6.0256239411558e-07, + "loss": 1.0307, + "step": 5242 + }, + { + "epoch": 0.97, + "learning_rate": 5.960370971201168e-07, + "loss": 1.0493, + "step": 5243 + }, + { + "epoch": 0.97, + "learning_rate": 5.895472191569274e-07, + "loss": 1.0051, + "step": 5244 + }, + { + "epoch": 0.97, + "learning_rate": 5.830927625384286e-07, + "loss": 1.0457, + "step": 5245 + }, + { + "epoch": 0.97, + "learning_rate": 5.766737295644586e-07, + "loss": 1.0513, + "step": 5246 + }, + { + "epoch": 0.97, + "learning_rate": 5.702901225221879e-07, + "loss": 1.0672, + "step": 5247 + }, + { + "epoch": 0.97, + "learning_rate": 5.63941943686197e-07, + "loss": 1.1393, + "step": 5248 + }, + { + "epoch": 0.97, + "learning_rate": 5.576291953184321e-07, + "loss": 1.1236, + "step": 5249 + }, + { + "epoch": 0.97, + "learning_rate": 5.51351879668216e-07, + "loss": 1.1455, + "step": 5250 + }, + { + "epoch": 0.97, + "learning_rate": 5.451099989722375e-07, + "loss": 1.0831, + "step": 5251 + }, + { + "epoch": 0.97, + "learning_rate": 5.38903555454573e-07, + "loss": 1.097, + "step": 5252 + }, + { + "epoch": 0.97, + "learning_rate": 5.327325513266534e-07, + "loss": 1.1015, + "step": 5253 + }, + { + "epoch": 0.97, + "learning_rate": 5.26596988787309e-07, + "loss": 1.0808, + "step": 5254 + }, + { + "epoch": 0.97, + "learning_rate": 5.204968700227242e-07, + "loss": 1.0423, + "step": 5255 + }, + { + "epoch": 0.97, + "learning_rate": 5.144321972064603e-07, + "loss": 1.0624, + "step": 5256 + }, + { + "epoch": 0.97, + "learning_rate": 5.084029724994332e-07, + "loss": 1.0655, + "step": 5257 + }, + { + "epoch": 0.97, + "learning_rate": 5.024091980499468e-07, + "loss": 1.0891, + "step": 5258 + }, + { + "epoch": 0.97, + "learning_rate": 4.964508759936704e-07, + "loss": 1.0787, + "step": 5259 + }, + { + "epoch": 0.97, + "learning_rate": 4.905280084536279e-07, + "loss": 1.0543, + "step": 5260 + }, + { + "epoch": 0.97, + "learning_rate": 4.846405975402424e-07, + "loss": 1.2689, + "step": 5261 + }, + { + "epoch": 0.97, + "learning_rate": 4.78788645351258e-07, + "loss": 1.0734, + "step": 5262 + }, + { + "epoch": 0.97, + "learning_rate": 4.7297215397181795e-07, + "loss": 1.1216, + "step": 5263 + }, + { + "epoch": 0.97, + "learning_rate": 4.671911254744199e-07, + "loss": 1.0562, + "step": 5264 + }, + { + "epoch": 0.97, + "learning_rate": 4.614455619189273e-07, + "loss": 0.9704, + "step": 5265 + }, + { + "epoch": 0.97, + "learning_rate": 4.557354653525581e-07, + "loss": 1.0406, + "step": 5266 + }, + { + "epoch": 0.97, + "learning_rate": 4.5006083780991804e-07, + "loss": 1.1506, + "step": 5267 + }, + { + "epoch": 0.97, + "learning_rate": 4.4442168131293427e-07, + "loss": 1.2013, + "step": 5268 + }, + { + "epoch": 0.97, + "learning_rate": 4.388179978709217e-07, + "loss": 1.1047, + "step": 5269 + }, + { + "epoch": 0.97, + "learning_rate": 4.3324978948057205e-07, + "loss": 1.0833, + "step": 5270 + }, + { + "epoch": 0.97, + "learning_rate": 4.2771705812588714e-07, + "loss": 1.0509, + "step": 5271 + }, + { + "epoch": 0.97, + "learning_rate": 4.222198057782789e-07, + "loss": 1.1156, + "step": 5272 + }, + { + "epoch": 0.97, + "learning_rate": 4.167580343964916e-07, + "loss": 1.0356, + "step": 5273 + }, + { + "epoch": 0.97, + "learning_rate": 4.113317459266242e-07, + "loss": 1.0635, + "step": 5274 + }, + { + "epoch": 0.97, + "learning_rate": 4.0594094230214096e-07, + "loss": 1.1529, + "step": 5275 + }, + { + "epoch": 0.97, + "learning_rate": 4.0058562544387224e-07, + "loss": 0.9414, + "step": 5276 + }, + { + "epoch": 0.97, + "learning_rate": 3.952657972599805e-07, + "loss": 0.9776, + "step": 5277 + }, + { + "epoch": 0.97, + "learning_rate": 3.8998145964599386e-07, + "loss": 1.134, + "step": 5278 + }, + { + "epoch": 0.97, + "learning_rate": 3.8473261448480623e-07, + "loss": 0.9647, + "step": 5279 + }, + { + "epoch": 0.97, + "learning_rate": 3.795192636466549e-07, + "loss": 1.0463, + "step": 5280 + }, + { + "epoch": 0.97, + "learning_rate": 3.7434140898912063e-07, + "loss": 1.1119, + "step": 5281 + }, + { + "epoch": 0.97, + "learning_rate": 3.691990523571498e-07, + "loss": 1.0147, + "step": 5282 + }, + { + "epoch": 0.97, + "learning_rate": 3.640921955830434e-07, + "loss": 1.175, + "step": 5283 + }, + { + "epoch": 0.97, + "learning_rate": 3.590208404864348e-07, + "loss": 1.1244, + "step": 5284 + }, + { + "epoch": 0.97, + "learning_rate": 3.539849888743341e-07, + "loss": 1.0701, + "step": 5285 + }, + { + "epoch": 0.97, + "learning_rate": 3.4898464254107257e-07, + "loss": 0.9664, + "step": 5286 + }, + { + "epoch": 0.97, + "learning_rate": 3.4401980326835836e-07, + "loss": 1.1356, + "step": 5287 + }, + { + "epoch": 0.97, + "learning_rate": 3.390904728252098e-07, + "loss": 1.0698, + "step": 5288 + }, + { + "epoch": 0.97, + "learning_rate": 3.3419665296804404e-07, + "loss": 1.0924, + "step": 5289 + }, + { + "epoch": 0.97, + "learning_rate": 3.293383454405774e-07, + "loss": 0.9682, + "step": 5290 + }, + { + "epoch": 0.97, + "learning_rate": 3.245155519739029e-07, + "loss": 1.1202, + "step": 5291 + }, + { + "epoch": 0.98, + "learning_rate": 3.197282742864571e-07, + "loss": 1.0619, + "step": 5292 + }, + { + "epoch": 0.98, + "learning_rate": 3.1497651408399776e-07, + "loss": 1.1743, + "step": 5293 + }, + { + "epoch": 0.98, + "learning_rate": 3.1026027305964823e-07, + "loss": 1.0713, + "step": 5294 + }, + { + "epoch": 0.98, + "learning_rate": 3.0557955289387543e-07, + "loss": 0.9807, + "step": 5295 + }, + { + "epoch": 0.98, + "learning_rate": 3.009343552544897e-07, + "loss": 1.1561, + "step": 5296 + }, + { + "epoch": 0.98, + "learning_rate": 2.9632468179664474e-07, + "loss": 0.9726, + "step": 5297 + }, + { + "epoch": 0.98, + "learning_rate": 2.917505341628157e-07, + "loss": 1.0475, + "step": 5298 + }, + { + "epoch": 0.98, + "learning_rate": 2.872119139828433e-07, + "loss": 1.1383, + "step": 5299 + }, + { + "epoch": 0.98, + "learning_rate": 2.8270882287390055e-07, + "loss": 1.0857, + "step": 5300 + }, + { + "epoch": 0.98, + "learning_rate": 2.78241262440504e-07, + "loss": 1.1581, + "step": 5301 + }, + { + "epoch": 0.98, + "learning_rate": 2.7380923427451355e-07, + "loss": 1.0199, + "step": 5302 + }, + { + "epoch": 0.98, + "learning_rate": 2.694127399551216e-07, + "loss": 1.1851, + "step": 5303 + }, + { + "epoch": 0.98, + "learning_rate": 2.6505178104885285e-07, + "loss": 1.0126, + "step": 5304 + }, + { + "epoch": 0.98, + "learning_rate": 2.607263591095754e-07, + "loss": 1.0344, + "step": 5305 + }, + { + "epoch": 0.98, + "learning_rate": 2.5643647567851205e-07, + "loss": 1.0242, + "step": 5306 + }, + { + "epoch": 0.98, + "learning_rate": 2.5218213228420664e-07, + "loss": 1.0795, + "step": 5307 + }, + { + "epoch": 0.98, + "learning_rate": 2.479633304425355e-07, + "loss": 0.976, + "step": 5308 + }, + { + "epoch": 0.98, + "learning_rate": 2.4378007165671843e-07, + "loss": 1.1856, + "step": 5309 + }, + { + "epoch": 0.98, + "learning_rate": 2.396323574173076e-07, + "loss": 1.2325, + "step": 5310 + }, + { + "epoch": 0.98, + "learning_rate": 2.3552018920219855e-07, + "loss": 1.118, + "step": 5311 + }, + { + "epoch": 0.98, + "learning_rate": 2.314435684766081e-07, + "loss": 1.0427, + "step": 5312 + }, + { + "epoch": 0.98, + "learning_rate": 2.2740249669309655e-07, + "loss": 1.016, + "step": 5313 + }, + { + "epoch": 0.98, + "learning_rate": 2.2339697529155656e-07, + "loss": 1.1043, + "step": 5314 + }, + { + "epoch": 0.98, + "learning_rate": 2.1942700569921314e-07, + "loss": 1.0229, + "step": 5315 + }, + { + "epoch": 0.98, + "learning_rate": 2.1549258933061255e-07, + "loss": 1.0361, + "step": 5316 + }, + { + "epoch": 0.98, + "learning_rate": 2.115937275876445e-07, + "loss": 1.2238, + "step": 5317 + }, + { + "epoch": 0.98, + "learning_rate": 2.0773042185954218e-07, + "loss": 1.0374, + "step": 5318 + }, + { + "epoch": 0.98, + "learning_rate": 2.0390267352284888e-07, + "loss": 1.2312, + "step": 5319 + }, + { + "epoch": 0.98, + "learning_rate": 2.0011048394142917e-07, + "loss": 0.9094, + "step": 5320 + }, + { + "epoch": 0.98, + "learning_rate": 1.9635385446652442e-07, + "loss": 1.0541, + "step": 5321 + }, + { + "epoch": 0.98, + "learning_rate": 1.9263278643664175e-07, + "loss": 1.0582, + "step": 5322 + }, + { + "epoch": 0.98, + "learning_rate": 1.88947281177676e-07, + "loss": 1.157, + "step": 5323 + }, + { + "epoch": 0.98, + "learning_rate": 1.8529734000281017e-07, + "loss": 1.0366, + "step": 5324 + }, + { + "epoch": 0.98, + "learning_rate": 1.8168296421258169e-07, + "loss": 1.0346, + "step": 5325 + }, + { + "epoch": 0.98, + "learning_rate": 1.7810415509483812e-07, + "loss": 1.0187, + "step": 5326 + }, + { + "epoch": 0.98, + "learning_rate": 1.7456091392474838e-07, + "loss": 1.0703, + "step": 5327 + }, + { + "epoch": 0.98, + "learning_rate": 1.7105324196482475e-07, + "loss": 1.1149, + "step": 5328 + }, + { + "epoch": 0.98, + "learning_rate": 1.675811404649119e-07, + "loss": 1.0587, + "step": 5329 + }, + { + "epoch": 0.98, + "learning_rate": 1.6414461066216468e-07, + "loss": 1.2965, + "step": 5330 + }, + { + "epoch": 0.98, + "learning_rate": 1.6074365378105915e-07, + "loss": 0.957, + "step": 5331 + }, + { + "epoch": 0.98, + "learning_rate": 1.5737827103340375e-07, + "loss": 1.0557, + "step": 5332 + }, + { + "epoch": 0.98, + "learning_rate": 1.5404846361833926e-07, + "loss": 1.2173, + "step": 5333 + }, + { + "epoch": 0.98, + "learning_rate": 1.5075423272231658e-07, + "loss": 1.0139, + "step": 5334 + }, + { + "epoch": 0.98, + "learning_rate": 1.4749557951911906e-07, + "loss": 1.1486, + "step": 5335 + }, + { + "epoch": 0.98, + "learning_rate": 1.4427250516985124e-07, + "loss": 1.0918, + "step": 5336 + }, + { + "epoch": 0.98, + "learning_rate": 1.4108501082295e-07, + "loss": 0.9742, + "step": 5337 + }, + { + "epoch": 0.98, + "learning_rate": 1.3793309761414018e-07, + "loss": 1.0624, + "step": 5338 + }, + { + "epoch": 0.98, + "learning_rate": 1.3481676666651234e-07, + "loss": 1.0353, + "step": 5339 + }, + { + "epoch": 0.98, + "learning_rate": 1.3173601909045597e-07, + "loss": 0.9924, + "step": 5340 + }, + { + "epoch": 0.98, + "learning_rate": 1.2869085598368191e-07, + "loss": 1.1221, + "step": 5341 + }, + { + "epoch": 0.98, + "learning_rate": 1.2568127843122223e-07, + "loss": 0.9961, + "step": 5342 + }, + { + "epoch": 0.98, + "learning_rate": 1.2270728750544137e-07, + "loss": 1.0793, + "step": 5343 + }, + { + "epoch": 0.98, + "learning_rate": 1.1976888426600275e-07, + "loss": 1.1007, + "step": 5344 + }, + { + "epoch": 0.98, + "learning_rate": 1.1686606975991333e-07, + "loss": 1.0587, + "step": 5345 + }, + { + "epoch": 0.99, + "learning_rate": 1.1399884502146796e-07, + "loss": 1.2819, + "step": 5346 + }, + { + "epoch": 0.99, + "learning_rate": 1.111672110723272e-07, + "loss": 1.0973, + "step": 5347 + }, + { + "epoch": 0.99, + "learning_rate": 1.083711689214062e-07, + "loss": 1.0853, + "step": 5348 + }, + { + "epoch": 0.99, + "learning_rate": 1.0561071956500801e-07, + "loss": 1.051, + "step": 5349 + }, + { + "epoch": 0.99, + "learning_rate": 1.0288586398670142e-07, + "loss": 1.145, + "step": 5350 + }, + { + "epoch": 0.99, + "learning_rate": 1.0019660315738755e-07, + "loss": 1.0119, + "step": 5351 + }, + { + "epoch": 0.99, + "learning_rate": 9.75429380352999e-08, + "loss": 1.0718, + "step": 5352 + }, + { + "epoch": 0.99, + "learning_rate": 9.492486956597102e-08, + "loss": 1.1555, + "step": 5353 + }, + { + "epoch": 0.99, + "learning_rate": 9.23423986822547e-08, + "loss": 1.1669, + "step": 5354 + }, + { + "epoch": 0.99, + "learning_rate": 8.979552630433708e-08, + "loss": 1.0573, + "step": 5355 + }, + { + "epoch": 0.99, + "learning_rate": 8.728425333967005e-08, + "loss": 0.9589, + "step": 5356 + }, + { + "epoch": 0.99, + "learning_rate": 8.480858068309339e-08, + "loss": 1.0101, + "step": 5357 + }, + { + "epoch": 0.99, + "learning_rate": 8.236850921670148e-08, + "loss": 1.012, + "step": 5358 + }, + { + "epoch": 0.99, + "learning_rate": 7.996403980993217e-08, + "loss": 1.0173, + "step": 5359 + }, + { + "epoch": 0.99, + "learning_rate": 7.759517331952238e-08, + "loss": 1.1157, + "step": 5360 + }, + { + "epoch": 0.99, + "learning_rate": 7.526191058955245e-08, + "loss": 1.1358, + "step": 5361 + }, + { + "epoch": 0.99, + "learning_rate": 7.296425245137961e-08, + "loss": 1.1957, + "step": 5362 + }, + { + "epoch": 0.99, + "learning_rate": 7.070219972370451e-08, + "loss": 1.0647, + "step": 5363 + }, + { + "epoch": 0.99, + "learning_rate": 6.847575321251576e-08, + "loss": 1.2166, + "step": 5364 + }, + { + "epoch": 0.99, + "learning_rate": 6.628491371114543e-08, + "loss": 1.2078, + "step": 5365 + }, + { + "epoch": 0.99, + "learning_rate": 6.412968200019131e-08, + "loss": 1.0642, + "step": 5366 + }, + { + "epoch": 0.99, + "learning_rate": 6.201005884762801e-08, + "loss": 0.9913, + "step": 5367 + }, + { + "epoch": 0.99, + "learning_rate": 5.99260450086736e-08, + "loss": 1.0939, + "step": 5368 + }, + { + "epoch": 0.99, + "learning_rate": 5.787764122592298e-08, + "loss": 1.1051, + "step": 5369 + }, + { + "epoch": 0.99, + "learning_rate": 5.5864848229225666e-08, + "loss": 1.1082, + "step": 5370 + }, + { + "epoch": 0.99, + "learning_rate": 5.3887666735785716e-08, + "loss": 1.1491, + "step": 5371 + }, + { + "epoch": 0.99, + "learning_rate": 5.1946097450084054e-08, + "loss": 1.0936, + "step": 5372 + }, + { + "epoch": 0.99, + "learning_rate": 5.004014106394506e-08, + "loss": 1.0384, + "step": 5373 + }, + { + "epoch": 0.99, + "learning_rate": 4.816979825648105e-08, + "loss": 1.0223, + "step": 5374 + }, + { + "epoch": 0.99, + "learning_rate": 4.6335069694125597e-08, + "loss": 0.9985, + "step": 5375 + }, + { + "epoch": 0.99, + "learning_rate": 4.4535956030611336e-08, + "loss": 1.1231, + "step": 5376 + }, + { + "epoch": 0.99, + "learning_rate": 4.277245790698103e-08, + "loss": 1.3097, + "step": 5377 + }, + { + "epoch": 0.99, + "learning_rate": 4.1044575951620924e-08, + "loss": 1.1772, + "step": 5378 + }, + { + "epoch": 0.99, + "learning_rate": 3.935231078017187e-08, + "loss": 1.1011, + "step": 5379 + }, + { + "epoch": 0.99, + "learning_rate": 3.7695662995618216e-08, + "loss": 1.1034, + "step": 5380 + }, + { + "epoch": 0.99, + "learning_rate": 3.6074633188265536e-08, + "loss": 1.0844, + "step": 5381 + }, + { + "epoch": 0.99, + "learning_rate": 3.4489221935685156e-08, + "loss": 1.113, + "step": 5382 + }, + { + "epoch": 0.99, + "learning_rate": 3.2939429802791856e-08, + "loss": 1.0624, + "step": 5383 + }, + { + "epoch": 0.99, + "learning_rate": 3.1425257341799464e-08, + "loss": 1.0436, + "step": 5384 + }, + { + "epoch": 0.99, + "learning_rate": 2.994670509223196e-08, + "loss": 1.1982, + "step": 5385 + }, + { + "epoch": 0.99, + "learning_rate": 2.8503773580912386e-08, + "loss": 1.1944, + "step": 5386 + }, + { + "epoch": 0.99, + "learning_rate": 2.7096463321973908e-08, + "loss": 1.0543, + "step": 5387 + }, + { + "epoch": 0.99, + "learning_rate": 2.5724774816870965e-08, + "loss": 1.2243, + "step": 5388 + }, + { + "epoch": 0.99, + "learning_rate": 2.438870855434594e-08, + "loss": 1.1814, + "step": 5389 + }, + { + "epoch": 0.99, + "learning_rate": 2.3088265010473565e-08, + "loss": 1.054, + "step": 5390 + }, + { + "epoch": 0.99, + "learning_rate": 2.1823444648605416e-08, + "loss": 1.2051, + "step": 5391 + }, + { + "epoch": 0.99, + "learning_rate": 2.0594247919414333e-08, + "loss": 1.1925, + "step": 5392 + }, + { + "epoch": 0.99, + "learning_rate": 1.9400675260883294e-08, + "loss": 1.2997, + "step": 5393 + }, + { + "epoch": 0.99, + "learning_rate": 1.8242727098305435e-08, + "loss": 1.005, + "step": 5394 + }, + { + "epoch": 0.99, + "learning_rate": 1.7120403844272937e-08, + "loss": 1.0594, + "step": 5395 + }, + { + "epoch": 0.99, + "learning_rate": 1.603370589867703e-08, + "loss": 1.108, + "step": 5396 + }, + { + "epoch": 0.99, + "learning_rate": 1.4982633648730204e-08, + "loss": 1.1185, + "step": 5397 + }, + { + "epoch": 0.99, + "learning_rate": 1.3967187468932885e-08, + "loss": 1.1278, + "step": 5398 + }, + { + "epoch": 0.99, + "learning_rate": 1.2987367721128963e-08, + "loss": 1.0359, + "step": 5399 + }, + { + "epoch": 1.0, + "learning_rate": 1.2043174754405861e-08, + "loss": 1.045, + "step": 5400 + }, + { + "epoch": 1.0, + "learning_rate": 1.1134608905227773e-08, + "loss": 1.131, + "step": 5401 + }, + { + "epoch": 1.0, + "learning_rate": 1.0261670497313525e-08, + "loss": 1.0967, + "step": 5402 + }, + { + "epoch": 1.0, + "learning_rate": 9.4243598417032e-09, + "loss": 1.1691, + "step": 5403 + }, + { + "epoch": 1.0, + "learning_rate": 8.622677236735932e-09, + "loss": 1.1894, + "step": 5404 + }, + { + "epoch": 1.0, + "learning_rate": 7.856622968072103e-09, + "loss": 1.0784, + "step": 5405 + }, + { + "epoch": 1.0, + "learning_rate": 7.126197308671145e-09, + "loss": 0.9691, + "step": 5406 + }, + { + "epoch": 1.0, + "learning_rate": 6.4314005187804394e-09, + "loss": 1.1681, + "step": 5407 + }, + { + "epoch": 1.0, + "learning_rate": 5.7722328459797195e-09, + "loss": 0.9371, + "step": 5408 + }, + { + "epoch": 1.0, + "learning_rate": 5.148694525125564e-09, + "loss": 1.1532, + "step": 5409 + }, + { + "epoch": 1.0, + "learning_rate": 4.5607857784069065e-09, + "loss": 1.0514, + "step": 5410 + }, + { + "epoch": 1.0, + "learning_rate": 4.0085068153006274e-09, + "loss": 0.9596, + "step": 5411 + }, + { + "epoch": 1.0, + "learning_rate": 3.491857832593759e-09, + "loss": 1.0504, + "step": 5412 + }, + { + "epoch": 1.0, + "learning_rate": 3.0108390143612774e-09, + "loss": 1.0813, + "step": 5413 + }, + { + "epoch": 1.0, + "learning_rate": 2.565450532010516e-09, + "loss": 1.1206, + "step": 5414 + }, + { + "epoch": 1.0, + "learning_rate": 2.1556925442367535e-09, + "loss": 1.0285, + "step": 5415 + }, + { + "epoch": 1.0, + "learning_rate": 1.7815651970343182e-09, + "loss": 1.1434, + "step": 5416 + }, + { + "epoch": 1.0, + "learning_rate": 1.443068623729893e-09, + "loss": 1.1967, + "step": 5417 + }, + { + "epoch": 1.0, + "learning_rate": 1.1402029449159024e-09, + "loss": 1.0487, + "step": 5418 + }, + { + "epoch": 1.0, + "learning_rate": 8.729682685171269e-10, + "loss": 1.0647, + "step": 5419 + }, + { + "epoch": 1.0, + "learning_rate": 6.413646897462933e-10, + "loss": 1.1945, + "step": 5420 + }, + { + "epoch": 1.0, + "learning_rate": 4.453922911262787e-10, + "loss": 1.0537, + "step": 5421 + }, + { + "epoch": 1.0, + "learning_rate": 2.8505114250121367e-10, + "loss": 1.1131, + "step": 5422 + }, + { + "epoch": 1.0, + "learning_rate": 1.6034130098097067e-10, + "loss": 1.135, + "step": 5423 + }, + { + "epoch": 1.0, + "learning_rate": 7.126281101887955e-11, + "loss": 0.9845, + "step": 5424 + }, + { + "epoch": 1.0, + "learning_rate": 1.781570434511437e-11, + "loss": 1.0743, + "step": 5425 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 0.9744, + "step": 5426 + }, + { + "epoch": 1.0, + "step": 5426, + "total_flos": 4836567874666496.0, + "train_loss": 1.177212331827127, + "train_runtime": 28954.0997, + "train_samples_per_second": 23.99, + "train_steps_per_second": 0.187 + } + ], + "logging_steps": 1.0, + "max_steps": 5426, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 4836567874666496.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}