diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" new file mode 100644--- /dev/null +++ "b/last-checkpoint/trainer_state.json" @@ -0,0 +1,90256 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.015, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5.000000000000001e-07, + "loss": 0.931, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 0.6816, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-06, + "loss": 1.2796, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 1.119, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-06, + "loss": 0.8744, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 0.1561, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 3.5000000000000004e-06, + "loss": 1.0062, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 1.1478, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-06, + "loss": 0.5617, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 0.5931, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 5.500000000000001e-06, + "loss": 0.6567, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 0.6785, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 6.5000000000000004e-06, + "loss": 1.0113, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-06, + "loss": 0.8829, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 7.5e-06, + "loss": 0.8225, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 1.1247, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 8.500000000000002e-06, + "loss": 1.168, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 9e-06, + "loss": 1.0633, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 9.5e-06, + "loss": 1.0441, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 1.2362, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 1.05e-05, + "loss": 1.1747, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 1.1000000000000001e-05, + "loss": 0.9891, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 1.1500000000000002e-05, + "loss": 0.8767, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 1.2e-05, + "loss": 0.9538, + "step": 24 + }, + { + "epoch": 0.0, + "learning_rate": 1.25e-05, + "loss": 0.9734, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 1.3000000000000001e-05, + "loss": 0.839, + "step": 26 + }, + { + "epoch": 0.0, + "learning_rate": 1.3500000000000001e-05, + "loss": 0.7487, + "step": 27 + }, + { + "epoch": 0.0, + "learning_rate": 1.4000000000000001e-05, + "loss": 0.9824, + "step": 28 + }, + { + "epoch": 0.0, + "learning_rate": 1.45e-05, + "loss": 1.0037, + "step": 29 + }, + { + "epoch": 0.0, + "learning_rate": 1.5e-05, + "loss": 1.32, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 1.55e-05, + "loss": 0.9389, + "step": 31 + }, + { + "epoch": 0.0, + "learning_rate": 1.6000000000000003e-05, + "loss": 0.9922, + "step": 32 + }, + { + "epoch": 0.0, + "learning_rate": 1.65e-05, + "loss": 0.9843, + "step": 33 + }, + { + "epoch": 0.0, + "learning_rate": 1.7000000000000003e-05, + "loss": 1.0919, + "step": 34 + }, + { + "epoch": 0.0, + "learning_rate": 1.75e-05, + "loss": 0.8617, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 1.8e-05, + "loss": 1.0183, + "step": 36 + }, + { + "epoch": 0.0, + "learning_rate": 1.85e-05, + "loss": 0.9565, + "step": 37 + }, + { + "epoch": 0.0, + "learning_rate": 1.9e-05, + "loss": 0.79, + "step": 38 + }, + { + "epoch": 0.0, + "learning_rate": 1.9500000000000003e-05, + "loss": 1.2117, + "step": 39 + }, + { + "epoch": 0.0, + "learning_rate": 2e-05, + "loss": 1.111, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 2.05e-05, + "loss": 1.8326, + "step": 41 + }, + { + "epoch": 0.0, + "learning_rate": 2.1e-05, + "loss": 1.7969, + "step": 42 + }, + { + "epoch": 0.0, + "learning_rate": 2.15e-05, + "loss": 1.0872, + "step": 43 + }, + { + "epoch": 0.0, + "learning_rate": 2.2000000000000003e-05, + "loss": 1.0279, + "step": 44 + }, + { + "epoch": 0.0, + "learning_rate": 2.25e-05, + "loss": 1.2225, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 2.3000000000000003e-05, + "loss": 0.9716, + "step": 46 + }, + { + "epoch": 0.0, + "learning_rate": 2.35e-05, + "loss": 1.2187, + "step": 47 + }, + { + "epoch": 0.0, + "learning_rate": 2.4e-05, + "loss": 0.871, + "step": 48 + }, + { + "epoch": 0.0, + "learning_rate": 2.45e-05, + "loss": 0.8476, + "step": 49 + }, + { + "epoch": 0.0, + "learning_rate": 2.5e-05, + "loss": 0.8047, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 2.5500000000000003e-05, + "loss": 1.2494, + "step": 51 + }, + { + "epoch": 0.0, + "learning_rate": 2.6000000000000002e-05, + "loss": 1.0532, + "step": 52 + }, + { + "epoch": 0.0, + "learning_rate": 2.6500000000000004e-05, + "loss": 1.0323, + "step": 53 + }, + { + "epoch": 0.0, + "learning_rate": 2.7000000000000002e-05, + "loss": 1.0223, + "step": 54 + }, + { + "epoch": 0.0, + "learning_rate": 2.7500000000000004e-05, + "loss": 0.9494, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 2.8000000000000003e-05, + "loss": 1.0272, + "step": 56 + }, + { + "epoch": 0.0, + "learning_rate": 2.8499999999999998e-05, + "loss": 1.2829, + "step": 57 + }, + { + "epoch": 0.0, + "learning_rate": 2.9e-05, + "loss": 1.2682, + "step": 58 + }, + { + "epoch": 0.0, + "learning_rate": 2.95e-05, + "loss": 1.6736, + "step": 59 + }, + { + "epoch": 0.0, + "learning_rate": 3e-05, + "loss": 2.3082, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 3.05e-05, + "loss": 1.1302, + "step": 61 + }, + { + "epoch": 0.0, + "learning_rate": 3.1e-05, + "loss": 0.7822, + "step": 62 + }, + { + "epoch": 0.0, + "learning_rate": 3.15e-05, + "loss": 0.9861, + "step": 63 + }, + { + "epoch": 0.0, + "learning_rate": 3.2000000000000005e-05, + "loss": 0.9721, + "step": 64 + }, + { + "epoch": 0.0, + "learning_rate": 3.2500000000000004e-05, + "loss": 0.6753, + "step": 65 + }, + { + "epoch": 0.0, + "learning_rate": 3.3e-05, + "loss": 1.047, + "step": 66 + }, + { + "epoch": 0.0, + "learning_rate": 3.35e-05, + "loss": 1.0422, + "step": 67 + }, + { + "epoch": 0.0, + "learning_rate": 3.4000000000000007e-05, + "loss": 0.9134, + "step": 68 + }, + { + "epoch": 0.0, + "learning_rate": 3.45e-05, + "loss": 1.5718, + "step": 69 + }, + { + "epoch": 0.0, + "learning_rate": 3.5e-05, + "loss": 1.0625, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 3.55e-05, + "loss": 1.0779, + "step": 71 + }, + { + "epoch": 0.0, + "learning_rate": 3.6e-05, + "loss": 1.1702, + "step": 72 + }, + { + "epoch": 0.0, + "learning_rate": 3.65e-05, + "loss": 1.1024, + "step": 73 + }, + { + "epoch": 0.0, + "learning_rate": 3.7e-05, + "loss": 1.0382, + "step": 74 + }, + { + "epoch": 0.0, + "learning_rate": 3.7500000000000003e-05, + "loss": 1.0676, + "step": 75 + }, + { + "epoch": 0.0, + "learning_rate": 3.8e-05, + "loss": 1.2035, + "step": 76 + }, + { + "epoch": 0.0, + "learning_rate": 3.85e-05, + "loss": 1.0576, + "step": 77 + }, + { + "epoch": 0.0, + "learning_rate": 3.9000000000000006e-05, + "loss": 1.1175, + "step": 78 + }, + { + "epoch": 0.0, + "learning_rate": 3.9500000000000005e-05, + "loss": 1.0663, + "step": 79 + }, + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.2545, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 4.05e-05, + "loss": 0.7244, + "step": 81 + }, + { + "epoch": 0.0, + "learning_rate": 4.1e-05, + "loss": 1.1455, + "step": 82 + }, + { + "epoch": 0.0, + "learning_rate": 4.15e-05, + "loss": 1.1514, + "step": 83 + }, + { + "epoch": 0.0, + "learning_rate": 4.2e-05, + "loss": 1.0883, + "step": 84 + }, + { + "epoch": 0.0, + "learning_rate": 4.25e-05, + "loss": 0.8759, + "step": 85 + }, + { + "epoch": 0.0, + "learning_rate": 4.3e-05, + "loss": 1.0519, + "step": 86 + }, + { + "epoch": 0.0, + "learning_rate": 4.35e-05, + "loss": 1.1014, + "step": 87 + }, + { + "epoch": 0.0, + "learning_rate": 4.4000000000000006e-05, + "loss": 0.918, + "step": 88 + }, + { + "epoch": 0.0, + "learning_rate": 4.4500000000000004e-05, + "loss": 1.0174, + "step": 89 + }, + { + "epoch": 0.0, + "learning_rate": 4.5e-05, + "loss": 1.0974, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 4.55e-05, + "loss": 1.0889, + "step": 91 + }, + { + "epoch": 0.0, + "learning_rate": 4.600000000000001e-05, + "loss": 1.0881, + "step": 92 + }, + { + "epoch": 0.0, + "learning_rate": 4.6500000000000005e-05, + "loss": 0.8923, + "step": 93 + }, + { + "epoch": 0.0, + "learning_rate": 4.7e-05, + "loss": 1.0121, + "step": 94 + }, + { + "epoch": 0.0, + "learning_rate": 4.75e-05, + "loss": 1.3273, + "step": 95 + }, + { + "epoch": 0.0, + "learning_rate": 4.8e-05, + "loss": 1.0579, + "step": 96 + }, + { + "epoch": 0.0, + "learning_rate": 4.85e-05, + "loss": 1.149, + "step": 97 + }, + { + "epoch": 0.0, + "learning_rate": 4.9e-05, + "loss": 1.4682, + "step": 98 + }, + { + "epoch": 0.0, + "learning_rate": 4.9500000000000004e-05, + "loss": 1.4034, + "step": 99 + }, + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 0.8914, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999987661e-05, + "loss": 1.0526, + "step": 101 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999950642e-05, + "loss": 1.2849, + "step": 102 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999888945e-05, + "loss": 1.2292, + "step": 103 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999802569e-05, + "loss": 1.0693, + "step": 104 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999996915136e-05, + "loss": 0.952, + "step": 105 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999555779e-05, + "loss": 1.1392, + "step": 106 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999993953664e-05, + "loss": 1.0248, + "step": 107 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999999210274e-05, + "loss": 1.2939, + "step": 108 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999990005034e-05, + "loss": 1.2429, + "step": 109 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999998766053e-05, + "loss": 1.1394, + "step": 110 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999998506924e-05, + "loss": 1.3022, + "step": 111 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999998223116e-05, + "loss": 0.9869, + "step": 112 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999979146295e-05, + "loss": 1.2054, + "step": 113 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999997581464e-05, + "loss": 1.1226, + "step": 114 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999997223619e-05, + "loss": 1.0491, + "step": 115 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999996841095e-05, + "loss": 0.8425, + "step": 116 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999964338926e-05, + "loss": 1.305, + "step": 117 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999996002011e-05, + "loss": 1.0233, + "step": 118 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999999554545e-05, + "loss": 0.5656, + "step": 119 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999950642105e-05, + "loss": 1.0478, + "step": 120 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999945582926e-05, + "loss": 1.191, + "step": 121 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999940276956e-05, + "loss": 1.3638, + "step": 122 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999993472419e-05, + "loss": 0.355, + "step": 123 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999928924634e-05, + "loss": 1.095, + "step": 124 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999922878296e-05, + "loss": 1.151, + "step": 125 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999991658516e-05, + "loss": 1.4362, + "step": 126 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999991004524e-05, + "loss": 1.0988, + "step": 127 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999990325853e-05, + "loss": 0.8029, + "step": 128 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999896225036e-05, + "loss": 1.2147, + "step": 129 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999988894474e-05, + "loss": 1.1023, + "step": 130 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999881417667e-05, + "loss": 0.9649, + "step": 131 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999998736438e-05, + "loss": 1.059, + "step": 132 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999986562314e-05, + "loss": 1.4863, + "step": 133 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999998573557e-05, + "loss": 1.4881, + "step": 134 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999984884146e-05, + "loss": 1.0374, + "step": 135 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999840080426e-05, + "loss": 1.0476, + "step": 136 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999983107262e-05, + "loss": 1.0538, + "step": 137 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999982181801e-05, + "loss": 0.8671, + "step": 138 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999981231661e-05, + "loss": 1.1986, + "step": 139 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999802568433e-05, + "loss": 1.2, + "step": 140 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999979257346e-05, + "loss": 1.1648, + "step": 141 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999997823317e-05, + "loss": 1.0579, + "step": 142 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999977184314e-05, + "loss": 1.0379, + "step": 143 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999997611078e-05, + "loss": 0.5544, + "step": 144 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999975012567e-05, + "loss": 0.9992, + "step": 145 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999973889675e-05, + "loss": 1.2599, + "step": 146 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999972742104e-05, + "loss": 1.3262, + "step": 147 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999715698544e-05, + "loss": 1.3029, + "step": 148 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999970372925e-05, + "loss": 1.238, + "step": 149 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999969151317e-05, + "loss": 1.1567, + "step": 150 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999996790503e-05, + "loss": 1.1385, + "step": 151 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999666340646e-05, + "loss": 1.068, + "step": 152 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999996533842e-05, + "loss": 1.2784, + "step": 153 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999964018096e-05, + "loss": 0.7641, + "step": 154 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999962673094e-05, + "loss": 0.6886, + "step": 155 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999961303412e-05, + "loss": 1.1172, + "step": 156 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999959909052e-05, + "loss": 0.847, + "step": 157 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999958490012e-05, + "loss": 0.8203, + "step": 158 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999957046294e-05, + "loss": 1.3131, + "step": 159 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999955577897e-05, + "loss": 1.4838, + "step": 160 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999995408482e-05, + "loss": 1.1396, + "step": 161 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999525670646e-05, + "loss": 1.3872, + "step": 162 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999951024631e-05, + "loss": 1.9415, + "step": 163 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999949457518e-05, + "loss": 1.5973, + "step": 164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999478657254e-05, + "loss": 1.2385, + "step": 165 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999462492545e-05, + "loss": 1.2302, + "step": 166 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999446081045e-05, + "loss": 1.08, + "step": 167 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999942942276e-05, + "loss": 1.3935, + "step": 168 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999941251768e-05, + "loss": 1.2663, + "step": 169 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999395365814e-05, + "loss": 1.3094, + "step": 170 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999377967155e-05, + "loss": 1.2008, + "step": 171 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999936032172e-05, + "loss": 0.8156, + "step": 172 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999342429474e-05, + "loss": 4.8589, + "step": 173 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999324290446e-05, + "loss": 6.0248, + "step": 174 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999305904634e-05, + "loss": 5.9313, + "step": 175 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999928727202e-05, + "loss": 5.9243, + "step": 176 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999926839264e-05, + "loss": 5.7681, + "step": 177 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999924926645e-05, + "loss": 5.6746, + "step": 178 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999229893476e-05, + "loss": 5.7433, + "step": 179 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999921027372e-05, + "loss": 5.7031, + "step": 180 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999190407165e-05, + "loss": 5.5989, + "step": 181 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999917029382e-05, + "loss": 5.24, + "step": 182 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999149933695e-05, + "loss": 5.0161, + "step": 183 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999912932677e-05, + "loss": 4.415, + "step": 184 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999910847306e-05, + "loss": 5.2371, + "step": 185 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999908737256e-05, + "loss": 5.5717, + "step": 186 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999066025276e-05, + "loss": 5.6388, + "step": 187 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999990444312e-05, + "loss": 5.6072, + "step": 188 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999902259033e-05, + "loss": 5.6016, + "step": 189 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999999000502676e-05, + "loss": 5.5708, + "step": 190 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999897816823e-05, + "loss": 5.5852, + "step": 191 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999895558699e-05, + "loss": 5.572, + "step": 192 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999893275897e-05, + "loss": 5.529, + "step": 193 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998909684156e-05, + "loss": 5.5539, + "step": 194 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999888636255e-05, + "loss": 5.563, + "step": 195 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998862794154e-05, + "loss": 2.3107, + "step": 196 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999883897897e-05, + "loss": 1.4454, + "step": 197 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998814917e-05, + "loss": 1.4917, + "step": 198 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999879060824e-05, + "loss": 1.0891, + "step": 199 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998766052684e-05, + "loss": 0.7431, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999874125034e-05, + "loss": 1.0081, + "step": 201 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999871620121e-05, + "loss": 1.4269, + "step": 202 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999869090529e-05, + "loss": 1.1632, + "step": 203 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998665362585e-05, + "loss": 0.9132, + "step": 204 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999863957309e-05, + "loss": 0.708, + "step": 205 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999986135368e-05, + "loss": 1.159, + "step": 206 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999858725372e-05, + "loss": 1.2212, + "step": 207 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998560723855e-05, + "loss": 1.1131, + "step": 208 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999985339472e-05, + "loss": 0.9072, + "step": 209 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999850692375e-05, + "loss": 1.3964, + "step": 210 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999847965352e-05, + "loss": 1.2253, + "step": 211 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999845213649e-05, + "loss": 0.8362, + "step": 212 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999842437267e-05, + "loss": 0.6663, + "step": 213 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999839636207e-05, + "loss": 1.006, + "step": 214 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998368104686e-05, + "loss": 1.6042, + "step": 215 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998339600496e-05, + "loss": 1.1701, + "step": 216 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999831084953e-05, + "loss": 1.1874, + "step": 217 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999828185177e-05, + "loss": 1.1069, + "step": 218 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999825260722e-05, + "loss": 0.9765, + "step": 219 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998223115875e-05, + "loss": 0.8295, + "step": 220 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999819337774e-05, + "loss": 1.012, + "step": 221 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999816339282e-05, + "loss": 1.1255, + "step": 222 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999813316111e-05, + "loss": 1.2156, + "step": 223 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999998102682616e-05, + "loss": 1.0133, + "step": 224 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999807195733e-05, + "loss": 1.111, + "step": 225 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999804098525e-05, + "loss": 1.0167, + "step": 226 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999800976639e-05, + "loss": 1.2424, + "step": 227 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999797830073e-05, + "loss": 1.1881, + "step": 228 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999794658828e-05, + "loss": 1.2338, + "step": 229 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999791462905e-05, + "loss": 1.277, + "step": 230 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999788242302e-05, + "loss": 1.1273, + "step": 231 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999784997021e-05, + "loss": 1.1403, + "step": 232 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999978172706e-05, + "loss": 1.1259, + "step": 233 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999778432421e-05, + "loss": 1.1319, + "step": 234 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999775113103e-05, + "loss": 1.1755, + "step": 235 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999771769106e-05, + "loss": 0.8809, + "step": 236 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999768400429e-05, + "loss": 0.8313, + "step": 237 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999765007075e-05, + "loss": 0.4808, + "step": 238 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999761589041e-05, + "loss": 0.3866, + "step": 239 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999758146328e-05, + "loss": 0.4625, + "step": 240 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999754678936e-05, + "loss": 1.005, + "step": 241 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999751186866e-05, + "loss": 0.478, + "step": 242 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999747670115e-05, + "loss": 0.4184, + "step": 243 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999744128687e-05, + "loss": 0.3674, + "step": 244 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999740562579e-05, + "loss": 0.3536, + "step": 245 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999736971793e-05, + "loss": 0.3212, + "step": 246 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999733356327e-05, + "loss": 0.3954, + "step": 247 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999997297161825e-05, + "loss": 0.5134, + "step": 248 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999726051359e-05, + "loss": 0.2877, + "step": 249 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999722361857e-05, + "loss": 0.3694, + "step": 250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999718647676e-05, + "loss": 0.4496, + "step": 251 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999997149088154e-05, + "loss": 0.2565, + "step": 252 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999711145276e-05, + "loss": 0.3561, + "step": 253 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999707357058e-05, + "loss": 0.3123, + "step": 254 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999970354416e-05, + "loss": 0.2779, + "step": 255 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996997065845e-05, + "loss": 0.6054, + "step": 256 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999969584433e-05, + "loss": 1.317, + "step": 257 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999691957395e-05, + "loss": 1.1632, + "step": 258 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996880457834e-05, + "loss": 1.1206, + "step": 259 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999684109491e-05, + "loss": 1.2338, + "step": 260 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996801485205e-05, + "loss": 1.0508, + "step": 261 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999676162871e-05, + "loss": 1.3887, + "step": 262 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996721525424e-05, + "loss": 1.1023, + "step": 263 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999668117534e-05, + "loss": 1.3006, + "step": 264 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996640578476e-05, + "loss": 1.1455, + "step": 265 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999659973482e-05, + "loss": 1.1464, + "step": 266 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996558644376e-05, + "loss": 1.1131, + "step": 267 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999651730715e-05, + "loss": 1.2394, + "step": 268 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996475723124e-05, + "loss": 1.2348, + "step": 269 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996433892314e-05, + "loss": 1.2294, + "step": 270 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996391814705e-05, + "loss": 0.9849, + "step": 271 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999634949032e-05, + "loss": 1.1667, + "step": 272 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999630691914e-05, + "loss": 1.28, + "step": 273 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996264101164e-05, + "loss": 1.1602, + "step": 274 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999962210364e-05, + "loss": 1.2452, + "step": 275 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996177724864e-05, + "loss": 1.4849, + "step": 276 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999613416652e-05, + "loss": 1.1768, + "step": 277 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999609036139e-05, + "loss": 1.0437, + "step": 278 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999604630947e-05, + "loss": 0.8973, + "step": 279 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999996002010766e-05, + "loss": 1.0841, + "step": 280 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995957465276e-05, + "loss": 0.7193, + "step": 281 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999591267299e-05, + "loss": 1.3937, + "step": 282 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999586763391e-05, + "loss": 1.2132, + "step": 283 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995822348044e-05, + "loss": 1.2343, + "step": 284 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995776815395e-05, + "loss": 1.1092, + "step": 285 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995731035955e-05, + "loss": 1.2969, + "step": 286 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995685009716e-05, + "loss": 0.8917, + "step": 287 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999956387367e-05, + "loss": 0.9051, + "step": 288 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995592216884e-05, + "loss": 1.0114, + "step": 289 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995545450284e-05, + "loss": 0.9757, + "step": 290 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999549843689e-05, + "loss": 1.237, + "step": 291 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999545117671e-05, + "loss": 1.1462, + "step": 292 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999540366974e-05, + "loss": 1.1132, + "step": 293 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999535591598e-05, + "loss": 1.1785, + "step": 294 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999530791544e-05, + "loss": 1.0634, + "step": 295 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995259668096e-05, + "loss": 1.3001, + "step": 296 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999521117397e-05, + "loss": 1.7609, + "step": 297 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999516243306e-05, + "loss": 0.939, + "step": 298 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999995113445354e-05, + "loss": 1.0552, + "step": 299 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999506421086e-05, + "loss": 0.9572, + "step": 300 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999501472957e-05, + "loss": 0.925, + "step": 301 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994965001495e-05, + "loss": 0.8796, + "step": 302 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999491502664e-05, + "loss": 0.9126, + "step": 303 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999486480498e-05, + "loss": 1.2216, + "step": 304 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999481433654e-05, + "loss": 1.1434, + "step": 305 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999947636213e-05, + "loss": 1.1265, + "step": 306 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999471265929e-05, + "loss": 1.1126, + "step": 307 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999466145048e-05, + "loss": 1.0995, + "step": 308 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994609994876e-05, + "loss": 0.9912, + "step": 309 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994558292486e-05, + "loss": 1.1196, + "step": 310 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994506343305e-05, + "loss": 1.2063, + "step": 311 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999445414734e-05, + "loss": 1.1179, + "step": 312 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999440170459e-05, + "loss": 1.292, + "step": 313 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999434901503e-05, + "loss": 1.1431, + "step": 314 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999942960787e-05, + "loss": 1.0945, + "step": 315 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994242895574e-05, + "loss": 1.1347, + "step": 316 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999418946566e-05, + "loss": 1.0005, + "step": 317 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994135788956e-05, + "loss": 1.0945, + "step": 318 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999994081865456e-05, + "loss": 1.1316, + "step": 319 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999402769517e-05, + "loss": 1.1023, + "step": 320 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999939732781e-05, + "loss": 1.2254, + "step": 321 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999391861424e-05, + "loss": 1.1501, + "step": 322 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999386370359e-05, + "loss": 0.8656, + "step": 323 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999380854615e-05, + "loss": 1.197, + "step": 324 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993753141917e-05, + "loss": 1.109, + "step": 325 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993697490896e-05, + "loss": 1.1346, + "step": 326 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999364159309e-05, + "loss": 1.0394, + "step": 327 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999358544849e-05, + "loss": 1.27, + "step": 328 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999935290571e-05, + "loss": 1.2734, + "step": 329 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999347241893e-05, + "loss": 1.0557, + "step": 330 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999341553396e-05, + "loss": 1.1114, + "step": 331 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999933584022e-05, + "loss": 1.4432, + "step": 332 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993301023654e-05, + "loss": 0.9807, + "step": 333 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999324339832e-05, + "loss": 1.1951, + "step": 334 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999318552619e-05, + "loss": 1.0012, + "step": 335 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993127407284e-05, + "loss": 1.1595, + "step": 336 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999993069041584e-05, + "loss": 1.1289, + "step": 337 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999301042909e-05, + "loss": 1.2733, + "step": 338 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999295156981e-05, + "loss": 1.1768, + "step": 339 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999289246373e-05, + "loss": 1.2367, + "step": 340 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999283311088e-05, + "loss": 1.0992, + "step": 341 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999277351122e-05, + "loss": 1.0069, + "step": 342 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999992713664784e-05, + "loss": 0.9432, + "step": 343 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999265357155e-05, + "loss": 1.1886, + "step": 344 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999259323154e-05, + "loss": 1.1029, + "step": 345 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999253264473e-05, + "loss": 0.7799, + "step": 346 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999247181113e-05, + "loss": 1.2012, + "step": 347 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999241073074e-05, + "loss": 0.8971, + "step": 348 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999234940357e-05, + "loss": 1.1842, + "step": 349 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999922878296e-05, + "loss": 1.114, + "step": 350 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999222600885e-05, + "loss": 1.3876, + "step": 351 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999921639413e-05, + "loss": 1.2368, + "step": 352 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999992101626966e-05, + "loss": 1.2026, + "step": 353 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999992039065844e-05, + "loss": 1.1897, + "step": 354 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999197625794e-05, + "loss": 1.0612, + "step": 355 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999191320324e-05, + "loss": 1.0526, + "step": 356 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999184990174e-05, + "loss": 1.1144, + "step": 357 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999178635346e-05, + "loss": 1.3094, + "step": 358 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999172255839e-05, + "loss": 0.4662, + "step": 359 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999165851653e-05, + "loss": 0.2525, + "step": 360 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999991594227885e-05, + "loss": 0.8735, + "step": 361 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999991529692444e-05, + "loss": 1.0231, + "step": 362 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999991464910226e-05, + "loss": 1.2666, + "step": 363 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999139988121e-05, + "loss": 1.1313, + "step": 364 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999913346054e-05, + "loss": 1.2996, + "step": 365 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999912690828e-05, + "loss": 1.1301, + "step": 366 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999120331342e-05, + "loss": 1.1243, + "step": 367 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999113729724e-05, + "loss": 1.3049, + "step": 368 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999107103428e-05, + "loss": 1.3117, + "step": 369 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999100452453e-05, + "loss": 1.1549, + "step": 370 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999093776798e-05, + "loss": 1.0233, + "step": 371 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999087076465e-05, + "loss": 1.3463, + "step": 372 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999080351453e-05, + "loss": 1.0907, + "step": 373 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999073601762e-05, + "loss": 0.9733, + "step": 374 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999066827392e-05, + "loss": 1.4368, + "step": 375 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999060028343e-05, + "loss": 5.9099, + "step": 376 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999990532046156e-05, + "loss": 3.7277, + "step": 377 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999990463562086e-05, + "loss": 1.1872, + "step": 378 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999039483123e-05, + "loss": 1.0093, + "step": 379 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999990325853577e-05, + "loss": 1.0568, + "step": 380 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999990256629145e-05, + "loss": 0.831, + "step": 381 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999018715792e-05, + "loss": 0.9143, + "step": 382 + }, + { + "epoch": 0.0, + "learning_rate": 4.999999011743991e-05, + "loss": 1.6355, + "step": 383 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999900474751e-05, + "loss": 1.4826, + "step": 384 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999899772635e-05, + "loss": 1.0458, + "step": 385 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998990680512e-05, + "loss": 1.1318, + "step": 386 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999989836099946e-05, + "loss": 1.0718, + "step": 387 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999989765147994e-05, + "loss": 0.8531, + "step": 388 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998969394924e-05, + "loss": 0.9618, + "step": 389 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999989622503695e-05, + "loss": 1.2255, + "step": 390 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998955081137e-05, + "loss": 1.3624, + "step": 391 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998947887225e-05, + "loss": 1.1627, + "step": 392 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998940668634e-05, + "loss": 1.2721, + "step": 393 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998933425365e-05, + "loss": 1.1621, + "step": 394 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999989261574154e-05, + "loss": 1.2259, + "step": 395 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998918864788e-05, + "loss": 1.1092, + "step": 396 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998911547481e-05, + "loss": 1.205, + "step": 397 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999989042054954e-05, + "loss": 1.164, + "step": 398 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998896838831e-05, + "loss": 1.083, + "step": 399 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998889447488e-05, + "loss": 1.0954, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998882031466e-05, + "loss": 0.9265, + "step": 401 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999988745907645e-05, + "loss": 0.8312, + "step": 402 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998867125384e-05, + "loss": 0.8345, + "step": 403 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998859635325e-05, + "loss": 0.7232, + "step": 404 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999988521205864e-05, + "loss": 0.6289, + "step": 405 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999884458117e-05, + "loss": 0.9681, + "step": 406 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999988370170735e-05, + "loss": 1.2402, + "step": 407 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998829428299e-05, + "loss": 1.1044, + "step": 408 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998821814845e-05, + "loss": 0.9407, + "step": 409 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998814176713e-05, + "loss": 1.0668, + "step": 410 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998806513901e-05, + "loss": 1.1896, + "step": 411 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987988264104e-05, + "loss": 1.1691, + "step": 412 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998791114241e-05, + "loss": 1.2159, + "step": 413 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998783377392e-05, + "loss": 1.3894, + "step": 414 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998775615865e-05, + "loss": 1.329, + "step": 415 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987678296585e-05, + "loss": 1.0454, + "step": 416 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998760018773e-05, + "loss": 0.9511, + "step": 417 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998752183209e-05, + "loss": 0.4759, + "step": 418 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987443229654e-05, + "loss": 0.8928, + "step": 419 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987364380435e-05, + "loss": 0.9391, + "step": 420 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987285284424e-05, + "loss": 1.1387, + "step": 421 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998720594162e-05, + "loss": 1.1293, + "step": 422 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999987126352034e-05, + "loss": 1.2704, + "step": 423 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998704651566e-05, + "loss": 1.137, + "step": 424 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998696643249e-05, + "loss": 0.9722, + "step": 425 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999986886102536e-05, + "loss": 1.2422, + "step": 426 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998680552579e-05, + "loss": 1.0776, + "step": 427 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998672470225e-05, + "loss": 0.8233, + "step": 428 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998664363192e-05, + "loss": 1.2957, + "step": 429 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998656231481e-05, + "loss": 1.0972, + "step": 430 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998648075091e-05, + "loss": 1.2265, + "step": 431 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998639894021e-05, + "loss": 1.3584, + "step": 432 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998631688273e-05, + "loss": 1.9795, + "step": 433 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998623457846e-05, + "loss": 1.4203, + "step": 434 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999861520274e-05, + "loss": 1.595, + "step": 435 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998606922955e-05, + "loss": 0.6731, + "step": 436 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998598618491e-05, + "loss": 1.2655, + "step": 437 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999985902893484e-05, + "loss": 1.1741, + "step": 438 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999985819355266e-05, + "loss": 1.0423, + "step": 439 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998573557026e-05, + "loss": 1.1478, + "step": 440 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998565153846e-05, + "loss": 1.044, + "step": 441 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998556725987e-05, + "loss": 0.7539, + "step": 442 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998548273449e-05, + "loss": 0.685, + "step": 443 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998539796233e-05, + "loss": 5.6911, + "step": 444 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998531294338e-05, + "loss": 5.9401, + "step": 445 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998522767764e-05, + "loss": 6.034, + "step": 446 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999851421651e-05, + "loss": 5.9444, + "step": 447 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998505640578e-05, + "loss": 5.9153, + "step": 448 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998497039967e-05, + "loss": 5.8, + "step": 449 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984884146765e-05, + "loss": 5.7652, + "step": 450 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984797647076e-05, + "loss": 5.5309, + "step": 451 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984710900596e-05, + "loss": 5.6267, + "step": 452 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984623907324e-05, + "loss": 1.8665, + "step": 453 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984536667274e-05, + "loss": 1.5152, + "step": 454 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984449180425e-05, + "loss": 0.5595, + "step": 455 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984361446786e-05, + "loss": 1.2043, + "step": 456 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998427346636e-05, + "loss": 1.1112, + "step": 457 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984185239145e-05, + "loss": 1.4293, + "step": 458 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998409676514e-05, + "loss": 1.0783, + "step": 459 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999984008044345e-05, + "loss": 1.049, + "step": 460 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998391907677e-05, + "loss": 1.029, + "step": 461 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998382986239e-05, + "loss": 0.8213, + "step": 462 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998374040123e-05, + "loss": 1.2395, + "step": 463 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999983650693274e-05, + "loss": 1.3614, + "step": 464 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998356073854e-05, + "loss": 1.1796, + "step": 465 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999983470537e-05, + "loss": 1.2114, + "step": 466 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998338008869e-05, + "loss": 0.973, + "step": 467 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998328939358e-05, + "loss": 0.9211, + "step": 468 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999983198451684e-05, + "loss": 1.0192, + "step": 469 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998310726299e-05, + "loss": 1.1802, + "step": 470 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998301582752e-05, + "loss": 0.9868, + "step": 471 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998292414525e-05, + "loss": 1.195, + "step": 472 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999828322162e-05, + "loss": 1.1628, + "step": 473 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998274004035e-05, + "loss": 0.9806, + "step": 474 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998264761772e-05, + "loss": 1.0956, + "step": 475 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999825549483e-05, + "loss": 1.3023, + "step": 476 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999982462032085e-05, + "loss": 1.203, + "step": 477 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998236886908e-05, + "loss": 1.5488, + "step": 478 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998227545929e-05, + "loss": 1.3148, + "step": 479 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998218180271e-05, + "loss": 1.2602, + "step": 480 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999982087899346e-05, + "loss": 1.1743, + "step": 481 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998199374918e-05, + "loss": 0.9299, + "step": 482 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998189935224e-05, + "loss": 0.7564, + "step": 483 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999981804708504e-05, + "loss": 1.0182, + "step": 484 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998170981798e-05, + "loss": 1.1082, + "step": 485 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998161468066e-05, + "loss": 1.3019, + "step": 486 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998151929656e-05, + "loss": 0.8937, + "step": 487 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998142366566e-05, + "loss": 0.8715, + "step": 488 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998132778798e-05, + "loss": 0.6235, + "step": 489 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998123166351e-05, + "loss": 0.5427, + "step": 490 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998113529224e-05, + "loss": 0.9652, + "step": 491 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998103867419e-05, + "loss": 1.0241, + "step": 492 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998094180935e-05, + "loss": 1.2874, + "step": 493 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998084469772e-05, + "loss": 1.1236, + "step": 494 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999980747339305e-05, + "loss": 1.2312, + "step": 495 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998064973409e-05, + "loss": 1.1679, + "step": 496 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999980551882095e-05, + "loss": 0.7927, + "step": 497 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999980453783306e-05, + "loss": 0.7081, + "step": 498 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998035543773e-05, + "loss": 1.2299, + "step": 499 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998025684536e-05, + "loss": 1.1899, + "step": 500 + }, + { + "epoch": 0.0, + "eval_loss": 1.1330914497375488, + "eval_runtime": 85.2447, + "eval_samples_per_second": 16.247, + "eval_steps_per_second": 4.071, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998015800621e-05, + "loss": 0.9488, + "step": 501 + }, + { + "epoch": 0.0, + "learning_rate": 4.999998005892026e-05, + "loss": 0.9233, + "step": 502 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997995958753e-05, + "loss": 1.0061, + "step": 503 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999979860008006e-05, + "loss": 0.8307, + "step": 504 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999797601817e-05, + "loss": 0.9544, + "step": 505 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999979660108597e-05, + "loss": 0.5917, + "step": 506 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999979559788704e-05, + "loss": 0.5971, + "step": 507 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997945922202e-05, + "loss": 1.3349, + "step": 508 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997935840855e-05, + "loss": 1.0993, + "step": 509 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999792573483e-05, + "loss": 1.0986, + "step": 510 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999979156041255e-05, + "loss": 1.0709, + "step": 511 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997905448741e-05, + "loss": 1.1506, + "step": 512 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997895268679e-05, + "loss": 0.9141, + "step": 513 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997885063937e-05, + "loss": 1.5013, + "step": 514 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997874834517e-05, + "loss": 0.9547, + "step": 515 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997864580418e-05, + "loss": 1.09, + "step": 516 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997854301639e-05, + "loss": 1.1179, + "step": 517 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997843998182e-05, + "loss": 1.0737, + "step": 518 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997833670046e-05, + "loss": 0.7466, + "step": 519 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997823317231e-05, + "loss": 0.9024, + "step": 520 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997812939737e-05, + "loss": 0.9933, + "step": 521 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997802537564e-05, + "loss": 0.8115, + "step": 522 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997792110712e-05, + "loss": 0.8077, + "step": 523 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999977816591816e-05, + "loss": 0.6865, + "step": 524 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997771182972e-05, + "loss": 0.5826, + "step": 525 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999977606820834e-05, + "loss": 1.1181, + "step": 526 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997750156515e-05, + "loss": 1.4692, + "step": 527 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997739606269e-05, + "loss": 1.0833, + "step": 528 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997729031344e-05, + "loss": 1.1319, + "step": 529 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997718431739e-05, + "loss": 1.252, + "step": 530 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997707807456e-05, + "loss": 1.0724, + "step": 531 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997697158494e-05, + "loss": 1.2738, + "step": 532 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997686484853e-05, + "loss": 0.9913, + "step": 533 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997675786533e-05, + "loss": 1.1738, + "step": 534 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997665063534e-05, + "loss": 1.3109, + "step": 535 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997654315855e-05, + "loss": 1.3323, + "step": 536 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997643543499e-05, + "loss": 1.1117, + "step": 537 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999976327464635e-05, + "loss": 1.0758, + "step": 538 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997621924749e-05, + "loss": 1.029, + "step": 539 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997611078355e-05, + "loss": 1.2414, + "step": 540 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997600207282e-05, + "loss": 1.1829, + "step": 541 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975893115315e-05, + "loss": 1.037, + "step": 542 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975783911016e-05, + "loss": 0.9917, + "step": 543 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997567445992e-05, + "loss": 0.9406, + "step": 544 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997556476204e-05, + "loss": 1.1532, + "step": 545 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997545481736e-05, + "loss": 0.9999, + "step": 546 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975344625906e-05, + "loss": 1.0536, + "step": 547 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975234187656e-05, + "loss": 1.1719, + "step": 548 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999975123502615e-05, + "loss": 1.0755, + "step": 549 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997501257079e-05, + "loss": 0.1063, + "step": 550 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997490139217e-05, + "loss": 0.0709, + "step": 551 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997478996676e-05, + "loss": 0.2467, + "step": 552 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997467829457e-05, + "loss": 0.2245, + "step": 553 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999974566375585e-05, + "loss": 0.1978, + "step": 554 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997445420981e-05, + "loss": 0.1696, + "step": 555 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999974341797254e-05, + "loss": 0.1546, + "step": 556 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999974229137894e-05, + "loss": 0.1919, + "step": 557 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997411623175e-05, + "loss": 0.2029, + "step": 558 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997400307882e-05, + "loss": 0.1537, + "step": 559 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999738896791e-05, + "loss": 0.1449, + "step": 560 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999973776032594e-05, + "loss": 0.7279, + "step": 561 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999736621393e-05, + "loss": 0.6953, + "step": 562 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997354799921e-05, + "loss": 0.6295, + "step": 563 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997343361233e-05, + "loss": 0.5171, + "step": 564 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999973318978664e-05, + "loss": 0.486, + "step": 565 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997320409821e-05, + "loss": 1.2449, + "step": 566 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997308897097e-05, + "loss": 1.4136, + "step": 567 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999972973596935e-05, + "loss": 1.2475, + "step": 568 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997285797611e-05, + "loss": 1.3221, + "step": 569 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999972742108495e-05, + "loss": 1.1538, + "step": 570 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999972625994095e-05, + "loss": 0.8241, + "step": 571 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997250963291e-05, + "loss": 1.1044, + "step": 572 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999972393024926e-05, + "loss": 1.2559, + "step": 573 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997227617016e-05, + "loss": 1.0469, + "step": 574 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999721590686e-05, + "loss": 1.2542, + "step": 575 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997204172025e-05, + "loss": 1.1862, + "step": 576 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997192412512e-05, + "loss": 1.3234, + "step": 577 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999971806283185e-05, + "loss": 0.8016, + "step": 578 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997168819447e-05, + "loss": 1.3645, + "step": 579 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997156985897e-05, + "loss": 1.1961, + "step": 580 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997145127668e-05, + "loss": 1.2315, + "step": 581 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999971332447594e-05, + "loss": 1.0403, + "step": 582 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997121337172e-05, + "loss": 1.1351, + "step": 583 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999971094049066e-05, + "loss": 1.1116, + "step": 584 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997097447961e-05, + "loss": 1.2778, + "step": 585 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997085466337e-05, + "loss": 1.1626, + "step": 586 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997073460034e-05, + "loss": 1.0976, + "step": 587 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999970614290527e-05, + "loss": 1.3518, + "step": 588 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997049373392e-05, + "loss": 1.1483, + "step": 589 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997037293053e-05, + "loss": 1.1493, + "step": 590 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997025188034e-05, + "loss": 1.1073, + "step": 591 + }, + { + "epoch": 0.0, + "learning_rate": 4.999997013058337e-05, + "loss": 1.3351, + "step": 592 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999970009039604e-05, + "loss": 1.0137, + "step": 593 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996988724905e-05, + "loss": 1.159, + "step": 594 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996976521171e-05, + "loss": 1.0414, + "step": 595 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996964292758e-05, + "loss": 1.2767, + "step": 596 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999969520396664e-05, + "loss": 1.0204, + "step": 597 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999969397618954e-05, + "loss": 0.8231, + "step": 598 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996927459445e-05, + "loss": 1.2339, + "step": 599 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999969151323165e-05, + "loss": 1.1286, + "step": 600 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996902780509e-05, + "loss": 1.3251, + "step": 601 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999968904040224e-05, + "loss": 1.1275, + "step": 602 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996878002857e-05, + "loss": 0.9398, + "step": 603 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999968655770124e-05, + "loss": 1.1224, + "step": 604 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996853126489e-05, + "loss": 1.0039, + "step": 605 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999968406512865e-05, + "loss": 1.0953, + "step": 606 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996828151405e-05, + "loss": 1.0682, + "step": 607 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996815626846e-05, + "loss": 0.981, + "step": 608 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996803077607e-05, + "loss": 1.135, + "step": 609 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996790503689e-05, + "loss": 2.0639, + "step": 610 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999967779050915e-05, + "loss": 1.208, + "step": 611 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996765281816e-05, + "loss": 0.9878, + "step": 612 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999967526338615e-05, + "loss": 1.0357, + "step": 613 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996739961228e-05, + "loss": 1.0447, + "step": 614 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999967272639156e-05, + "loss": 1.0037, + "step": 615 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996714541924e-05, + "loss": 1.256, + "step": 616 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999967017952544e-05, + "loss": 1.0181, + "step": 617 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996689023905e-05, + "loss": 0.8774, + "step": 618 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999966762278766e-05, + "loss": 1.0415, + "step": 619 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999966634071696e-05, + "loss": 1.1236, + "step": 620 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999966505617835e-05, + "loss": 1.1345, + "step": 621 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996637691718e-05, + "loss": 1.103, + "step": 622 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999966247969745e-05, + "loss": 0.8407, + "step": 623 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996611877552e-05, + "loss": 1.2822, + "step": 624 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996598933451e-05, + "loss": 1.3973, + "step": 625 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999965859646704e-05, + "loss": 1.1597, + "step": 626 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996572971211e-05, + "loss": 1.9025, + "step": 627 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999965599530726e-05, + "loss": 1.2152, + "step": 628 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996546910255e-05, + "loss": 1.157, + "step": 629 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996533842759e-05, + "loss": 1.3811, + "step": 630 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996520750584e-05, + "loss": 1.2883, + "step": 631 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999650763373e-05, + "loss": 1.4924, + "step": 632 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999964944921974e-05, + "loss": 1.0566, + "step": 633 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996481325985e-05, + "loss": 1.3231, + "step": 634 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996468135094e-05, + "loss": 1.3814, + "step": 635 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996454919524e-05, + "loss": 1.0348, + "step": 636 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999964416792764e-05, + "loss": 1.1868, + "step": 637 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996428414349e-05, + "loss": 1.2078, + "step": 638 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996415124743e-05, + "loss": 1.1857, + "step": 639 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999964018104574e-05, + "loss": 1.083, + "step": 640 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996388471493e-05, + "loss": 1.2147, + "step": 641 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999637510785e-05, + "loss": 3.2249, + "step": 642 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996361719528e-05, + "loss": 3.0262, + "step": 643 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996348306527e-05, + "loss": 1.0029, + "step": 644 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996334868847e-05, + "loss": 1.5653, + "step": 645 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999963214064885e-05, + "loss": 1.0408, + "step": 646 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999963079194505e-05, + "loss": 1.037, + "step": 647 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996294407734e-05, + "loss": 1.2011, + "step": 648 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999962808713385e-05, + "loss": 1.1739, + "step": 649 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996267310264e-05, + "loss": 1.3935, + "step": 650 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996253724511e-05, + "loss": 1.3371, + "step": 651 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996240114079e-05, + "loss": 1.3247, + "step": 652 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996226478968e-05, + "loss": 1.2976, + "step": 653 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999962128191774e-05, + "loss": 1.1112, + "step": 654 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996199134709e-05, + "loss": 1.1392, + "step": 655 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999961854255606e-05, + "loss": 0.9464, + "step": 656 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999961716917345e-05, + "loss": 1.1398, + "step": 657 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996157933228e-05, + "loss": 1.2702, + "step": 658 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999961441500436e-05, + "loss": 1.046, + "step": 659 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996130342181e-05, + "loss": 1.203, + "step": 660 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996116509637e-05, + "loss": 1.1421, + "step": 661 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996102652417e-05, + "loss": 0.8796, + "step": 662 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999960887705165e-05, + "loss": 1.0829, + "step": 663 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996074863938e-05, + "loss": 1.2028, + "step": 664 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996060932679e-05, + "loss": 1.2499, + "step": 665 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999960469767427e-05, + "loss": 1.5727, + "step": 666 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999960329961264e-05, + "loss": 1.4672, + "step": 667 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996018990832e-05, + "loss": 1.3084, + "step": 668 + }, + { + "epoch": 0.0, + "learning_rate": 4.999996004960858e-05, + "loss": 1.1889, + "step": 669 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999959909062054e-05, + "loss": 1.1393, + "step": 670 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995976826874e-05, + "loss": 1.2297, + "step": 671 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995962722864e-05, + "loss": 1.2911, + "step": 672 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995948594174e-05, + "loss": 1.0032, + "step": 673 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999959344408066e-05, + "loss": 0.9387, + "step": 674 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995920262759e-05, + "loss": 0.9681, + "step": 675 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995906060033e-05, + "loss": 0.9953, + "step": 676 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995891832628e-05, + "loss": 1.0853, + "step": 677 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999958775805446e-05, + "loss": 0.9147, + "step": 678 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995863303781e-05, + "loss": 1.2149, + "step": 679 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999584900234e-05, + "loss": 0.9156, + "step": 680 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999583467622e-05, + "loss": 1.2145, + "step": 681 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999582032542e-05, + "loss": 1.2697, + "step": 682 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995805949942e-05, + "loss": 1.5116, + "step": 683 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995791549784e-05, + "loss": 1.0923, + "step": 684 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999957771249483e-05, + "loss": 1.0256, + "step": 685 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999957626754336e-05, + "loss": 1.2579, + "step": 686 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995748201239e-05, + "loss": 1.8896, + "step": 687 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995733702366e-05, + "loss": 1.9751, + "step": 688 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995719178815e-05, + "loss": 2.1633, + "step": 689 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995704630584e-05, + "loss": 1.6439, + "step": 690 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995690057674e-05, + "loss": 0.7501, + "step": 691 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999956754600865e-05, + "loss": 0.9558, + "step": 692 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999956608378184e-05, + "loss": 1.0196, + "step": 693 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999956461908725e-05, + "loss": 1.3408, + "step": 694 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999956315192475e-05, + "loss": 1.1365, + "step": 695 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995616822943e-05, + "loss": 1.2901, + "step": 696 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999560210196e-05, + "loss": 1.348, + "step": 697 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995587356298e-05, + "loss": 1.3301, + "step": 698 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995572585957e-05, + "loss": 1.2014, + "step": 699 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999955577909376e-05, + "loss": 1.1904, + "step": 700 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995542971238e-05, + "loss": 1.4346, + "step": 701 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995528126862e-05, + "loss": 1.1291, + "step": 702 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995513257805e-05, + "loss": 1.0225, + "step": 703 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999954983640696e-05, + "loss": 1.902, + "step": 704 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995483445655e-05, + "loss": 1.111, + "step": 705 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995468502562e-05, + "loss": 1.0966, + "step": 706 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999954535347906e-05, + "loss": 0.9121, + "step": 707 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999543854234e-05, + "loss": 0.9493, + "step": 708 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999542352521e-05, + "loss": 0.9592, + "step": 709 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995408483401e-05, + "loss": 0.8201, + "step": 710 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995393416913e-05, + "loss": 1.1763, + "step": 711 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995378325747e-05, + "loss": 1.1213, + "step": 712 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999953632099015e-05, + "loss": 1.0797, + "step": 713 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995348069377e-05, + "loss": 0.9418, + "step": 714 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995332904174e-05, + "loss": 1.2825, + "step": 715 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999953177142915e-05, + "loss": 1.2253, + "step": 716 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999953024997305e-05, + "loss": 1.0951, + "step": 717 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999952872604903e-05, + "loss": 1.2137, + "step": 718 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995271996572e-05, + "loss": 1.1174, + "step": 719 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995256707975e-05, + "loss": 1.2376, + "step": 720 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995241394698e-05, + "loss": 1.2504, + "step": 721 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995226056742e-05, + "loss": 1.1348, + "step": 722 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995210694107e-05, + "loss": 1.1022, + "step": 723 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995195306794e-05, + "loss": 1.1405, + "step": 724 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995179894802e-05, + "loss": 1.237, + "step": 725 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999951644581314e-05, + "loss": 1.009, + "step": 726 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995148996781e-05, + "loss": 1.4083, + "step": 727 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995133510752e-05, + "loss": 1.8452, + "step": 728 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995118000044e-05, + "loss": 1.0214, + "step": 729 + }, + { + "epoch": 0.0, + "learning_rate": 4.999995102464657e-05, + "loss": 1.0336, + "step": 730 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950869045916e-05, + "loss": 1.1379, + "step": 731 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950713198474e-05, + "loss": 1.0476, + "step": 732 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950557104234e-05, + "loss": 1.3398, + "step": 733 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950400763216e-05, + "loss": 1.0744, + "step": 734 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950244175406e-05, + "loss": 1.0007, + "step": 735 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999950087340805e-05, + "loss": 1.3092, + "step": 736 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994993025941e-05, + "loss": 1.1256, + "step": 737 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999949772931235e-05, + "loss": 1.6756, + "step": 738 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999949615356266e-05, + "loss": 1.9275, + "step": 739 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994945753451e-05, + "loss": 0.9195, + "step": 740 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994929946597e-05, + "loss": 1.3474, + "step": 741 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999949141150624e-05, + "loss": 1.4102, + "step": 742 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994898258851e-05, + "loss": 1.3585, + "step": 743 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994882377959e-05, + "loss": 1.2699, + "step": 744 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994866472389e-05, + "loss": 1.0139, + "step": 745 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999485054214e-05, + "loss": 0.8573, + "step": 746 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999948345872114e-05, + "loss": 0.7978, + "step": 747 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994818607605e-05, + "loss": 0.9753, + "step": 748 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994802603319e-05, + "loss": 1.3202, + "step": 749 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994786574354e-05, + "loss": 0.3981, + "step": 750 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999477052071e-05, + "loss": 0.3395, + "step": 751 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999947544423885e-05, + "loss": 0.3216, + "step": 752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999947383393876e-05, + "loss": 0.215, + "step": 753 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999947222117074e-05, + "loss": 0.2558, + "step": 754 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999947060593475e-05, + "loss": 0.1257, + "step": 755 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999468988231e-05, + "loss": 0.096, + "step": 756 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994673680592e-05, + "loss": 0.0708, + "step": 757 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994657454197e-05, + "loss": 0.0575, + "step": 758 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994641203122e-05, + "loss": 0.0431, + "step": 759 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999946249273687e-05, + "loss": 0.1335, + "step": 760 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999946086269365e-05, + "loss": 0.1145, + "step": 761 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999945923018245e-05, + "loss": 0.3766, + "step": 762 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994575952035e-05, + "loss": 1.2983, + "step": 763 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994559577565e-05, + "loss": 1.3596, + "step": 764 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994543178417e-05, + "loss": 1.3108, + "step": 765 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999452675459e-05, + "loss": 1.0224, + "step": 766 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994510306085e-05, + "loss": 1.2363, + "step": 767 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999944938329e-05, + "loss": 1.2409, + "step": 768 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994477335036e-05, + "loss": 1.1756, + "step": 769 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999944608124935e-05, + "loss": 0.8086, + "step": 770 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999944442652726e-05, + "loss": 1.2566, + "step": 771 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994427693372e-05, + "loss": 1.3554, + "step": 772 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999944110967925e-05, + "loss": 1.1814, + "step": 773 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994394475535e-05, + "loss": 1.1509, + "step": 774 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994377829598e-05, + "loss": 0.5566, + "step": 775 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994361158982e-05, + "loss": 1.0299, + "step": 776 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999943444636874e-05, + "loss": 1.093, + "step": 777 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994327743713e-05, + "loss": 1.3153, + "step": 778 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994310999061e-05, + "loss": 1.3283, + "step": 779 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999942942297296e-05, + "loss": 1.1775, + "step": 780 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994277435719e-05, + "loss": 1.3292, + "step": 781 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999942606170296e-05, + "loss": 1.416, + "step": 782 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999942437736616e-05, + "loss": 1.2101, + "step": 783 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999942269056143e-05, + "loss": 1.0418, + "step": 784 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999942100128887e-05, + "loss": 1.1471, + "step": 785 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994193095484e-05, + "loss": 1.1467, + "step": 786 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999941761534005e-05, + "loss": 1.0736, + "step": 787 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994159186638e-05, + "loss": 1.2616, + "step": 788 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994142195197e-05, + "loss": 1.2858, + "step": 789 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994125179076e-05, + "loss": 1.3641, + "step": 790 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994108138277e-05, + "loss": 1.2877, + "step": 791 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994091072799e-05, + "loss": 0.8191, + "step": 792 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994073982642e-05, + "loss": 0.6352, + "step": 793 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994056867806e-05, + "loss": 1.0221, + "step": 794 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999940397282906e-05, + "loss": 0.8679, + "step": 795 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999940225640976e-05, + "loss": 1.6629, + "step": 796 + }, + { + "epoch": 0.0, + "learning_rate": 4.999994005375225e-05, + "loss": 1.2877, + "step": 797 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993988161673e-05, + "loss": 1.117, + "step": 798 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993970923443e-05, + "loss": 1.016, + "step": 799 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993953660534e-05, + "loss": 1.0511, + "step": 800 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993936372946e-05, + "loss": 1.0768, + "step": 801 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993919060679e-05, + "loss": 1.0474, + "step": 802 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993901723733e-05, + "loss": 0.9152, + "step": 803 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993884362108e-05, + "loss": 1.1368, + "step": 804 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938669758045e-05, + "loss": 1.0106, + "step": 805 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993849564822e-05, + "loss": 1.1477, + "step": 806 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938321291604e-05, + "loss": 1.2261, + "step": 807 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999381466882e-05, + "loss": 0.8828, + "step": 808 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999937971838004e-05, + "loss": 0.9267, + "step": 809 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999937796741033e-05, + "loss": 0.9857, + "step": 810 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999937621397265e-05, + "loss": 0.7243, + "step": 811 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999374458067e-05, + "loss": 0.2533, + "step": 812 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993726996935e-05, + "loss": 0.1383, + "step": 813 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999937093885217e-05, + "loss": 0.1169, + "step": 814 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993691755429e-05, + "loss": 0.5441, + "step": 815 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993674097658e-05, + "loss": 1.1295, + "step": 816 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993656415208e-05, + "loss": 1.2738, + "step": 817 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993638708079e-05, + "loss": 1.1248, + "step": 818 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999362097627e-05, + "loss": 1.1047, + "step": 819 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993603219784e-05, + "loss": 1.0343, + "step": 820 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993585438618e-05, + "loss": 1.3274, + "step": 821 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993567632773e-05, + "loss": 1.233, + "step": 822 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999354980225e-05, + "loss": 0.9655, + "step": 823 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993531947047e-05, + "loss": 1.2446, + "step": 824 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993514067166e-05, + "loss": 1.204, + "step": 825 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993496162606e-05, + "loss": 1.334, + "step": 826 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999934782333666e-05, + "loss": 1.5279, + "step": 827 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993460279448e-05, + "loss": 0.6742, + "step": 828 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993442300851e-05, + "loss": 0.4579, + "step": 829 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993424297576e-05, + "loss": 0.55, + "step": 830 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999934062696215e-05, + "loss": 0.8013, + "step": 831 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993388216988e-05, + "loss": 0.9383, + "step": 832 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993370139675e-05, + "loss": 1.1955, + "step": 833 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993352037684e-05, + "loss": 1.0137, + "step": 834 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993333911014e-05, + "loss": 0.9991, + "step": 835 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999933157596645e-05, + "loss": 0.9199, + "step": 836 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993297583637e-05, + "loss": 0.4115, + "step": 837 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999327938293e-05, + "loss": 0.3562, + "step": 838 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993261157544e-05, + "loss": 1.0957, + "step": 839 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999324290748e-05, + "loss": 0.9675, + "step": 840 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993224632736e-05, + "loss": 0.9914, + "step": 841 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999932063333144e-05, + "loss": 0.9066, + "step": 842 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999931880092135e-05, + "loss": 1.0886, + "step": 843 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993169660433e-05, + "loss": 0.969, + "step": 844 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993151286974e-05, + "loss": 1.0533, + "step": 845 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993132888836e-05, + "loss": 1.0183, + "step": 846 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999311446602e-05, + "loss": 1.1548, + "step": 847 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993096018524e-05, + "loss": 0.987, + "step": 848 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999307754635e-05, + "loss": 1.4268, + "step": 849 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999930590494956e-05, + "loss": 1.5397, + "step": 850 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999930405279635e-05, + "loss": 0.9337, + "step": 851 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993021981752e-05, + "loss": 1.3076, + "step": 852 + }, + { + "epoch": 0.0, + "learning_rate": 4.999993003410863e-05, + "loss": 1.1003, + "step": 853 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999929848152936e-05, + "loss": 1.0851, + "step": 854 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992966195046e-05, + "loss": 1.2169, + "step": 855 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999294755012e-05, + "loss": 1.2069, + "step": 856 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992928880514e-05, + "loss": 1.184, + "step": 857 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999291018623e-05, + "loss": 1.4656, + "step": 858 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992891467267e-05, + "loss": 1.2775, + "step": 859 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992872723624e-05, + "loss": 1.6209, + "step": 860 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999928539553034e-05, + "loss": 1.8363, + "step": 861 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992835162304e-05, + "loss": 1.6992, + "step": 862 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999928163446254e-05, + "loss": 2.3404, + "step": 863 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992797502268e-05, + "loss": 1.5144, + "step": 864 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992778635231e-05, + "loss": 1.5439, + "step": 865 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999927597435156e-05, + "loss": 1.3937, + "step": 866 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992740827121e-05, + "loss": 1.6777, + "step": 867 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992721886048e-05, + "loss": 1.1788, + "step": 868 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992702920296e-05, + "loss": 1.2119, + "step": 869 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992683929865e-05, + "loss": 1.3642, + "step": 870 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992664914755e-05, + "loss": 0.9385, + "step": 871 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992645874967e-05, + "loss": 1.0575, + "step": 872 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992626810499e-05, + "loss": 1.091, + "step": 873 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992607721353e-05, + "loss": 1.205, + "step": 874 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999925886075275e-05, + "loss": 0.9686, + "step": 875 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999925694690233e-05, + "loss": 1.0848, + "step": 876 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992550305841e-05, + "loss": 1.1814, + "step": 877 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992531117978e-05, + "loss": 0.8181, + "step": 878 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992511905438e-05, + "loss": 1.0436, + "step": 879 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992492668219e-05, + "loss": 1.1664, + "step": 880 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999924734063195e-05, + "loss": 1.5344, + "step": 881 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992454119743e-05, + "loss": 1.1005, + "step": 882 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999924348084864e-05, + "loss": 1.3829, + "step": 883 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992415472551e-05, + "loss": 1.0473, + "step": 884 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992396111938e-05, + "loss": 0.9251, + "step": 885 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999923767266444e-05, + "loss": 1.0935, + "step": 886 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992357316673e-05, + "loss": 1.082, + "step": 887 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999923378820224e-05, + "loss": 0.3298, + "step": 888 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992318422693e-05, + "loss": 0.5366, + "step": 889 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992298938685e-05, + "loss": 1.022, + "step": 890 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992279429997e-05, + "loss": 0.8881, + "step": 891 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999922598966313e-05, + "loss": 0.8658, + "step": 892 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999922403385864e-05, + "loss": 1.2105, + "step": 893 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992220755862e-05, + "loss": 1.1332, + "step": 894 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999922011484604e-05, + "loss": 5.3122, + "step": 895 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999921815163786e-05, + "loss": 3.7951, + "step": 896 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992161859618e-05, + "loss": 5.5103, + "step": 897 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999921421781784e-05, + "loss": 3.4476, + "step": 898 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999921224720605e-05, + "loss": 2.2574, + "step": 899 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999921027412635e-05, + "loss": 1.302, + "step": 900 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999920829857874e-05, + "loss": 1.1378, + "step": 901 + }, + { + "epoch": 0.0, + "learning_rate": 4.999992063205633e-05, + "loss": 1.1153, + "step": 902 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999920434008e-05, + "loss": 1.1502, + "step": 903 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999920235712874e-05, + "loss": 1.266, + "step": 904 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999920037170953e-05, + "loss": 1.4186, + "step": 905 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999919838382255e-05, + "loss": 0.9976, + "step": 906 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991963934676e-05, + "loss": 1.2569, + "step": 907 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991944006448e-05, + "loss": 1.2904, + "step": 908 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999919240535416e-05, + "loss": 0.6151, + "step": 909 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991904075956e-05, + "loss": 1.2937, + "step": 910 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999918840736915e-05, + "loss": 1.1849, + "step": 911 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991864046748e-05, + "loss": 1.1805, + "step": 912 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999918439951255e-05, + "loss": 1.2076, + "step": 913 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999918239188245e-05, + "loss": 1.1705, + "step": 914 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999918038178436e-05, + "loss": 0.8461, + "step": 915 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991783692185e-05, + "loss": 1.127, + "step": 916 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991763541848e-05, + "loss": 1.2225, + "step": 917 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991743366831e-05, + "loss": 1.0974, + "step": 918 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991723167135e-05, + "loss": 0.53, + "step": 919 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999917029427614e-05, + "loss": 0.7431, + "step": 920 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999916826937076e-05, + "loss": 1.4294, + "step": 921 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991662419976e-05, + "loss": 1.2934, + "step": 922 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991642121565e-05, + "loss": 0.9691, + "step": 923 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999916217984755e-05, + "loss": 0.9543, + "step": 924 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999916014507065e-05, + "loss": 1.2091, + "step": 925 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991581078259e-05, + "loss": 1.0221, + "step": 926 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999915606811323e-05, + "loss": 1.0449, + "step": 927 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991540259327e-05, + "loss": 0.9845, + "step": 928 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999915198128436e-05, + "loss": 1.1451, + "step": 929 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999149934168e-05, + "loss": 1.0454, + "step": 930 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999914788458376e-05, + "loss": 1.2146, + "step": 931 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991458325318e-05, + "loss": 1.3721, + "step": 932 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999914377801184e-05, + "loss": 1.1491, + "step": 933 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999141721024e-05, + "loss": 0.996, + "step": 934 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991396615682e-05, + "loss": 1.0118, + "step": 935 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991375996446e-05, + "loss": 0.6254, + "step": 936 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999913553525315e-05, + "loss": 0.995, + "step": 937 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991334683937e-05, + "loss": 1.1517, + "step": 938 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999913139906645e-05, + "loss": 1.1732, + "step": 939 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991293272713e-05, + "loss": 0.7338, + "step": 940 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991272530082e-05, + "loss": 1.2936, + "step": 941 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991251762773e-05, + "loss": 1.0981, + "step": 942 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999912309707855e-05, + "loss": 1.2161, + "step": 943 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991210154118e-05, + "loss": 1.5329, + "step": 944 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991189312772e-05, + "loss": 1.1148, + "step": 945 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991168446747e-05, + "loss": 1.5379, + "step": 946 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991147556043e-05, + "loss": 1.166, + "step": 947 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999911266406606e-05, + "loss": 1.0611, + "step": 948 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999911057006e-05, + "loss": 1.2562, + "step": 949 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999108473586e-05, + "loss": 1.1501, + "step": 950 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991063746441e-05, + "loss": 1.2442, + "step": 951 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991042732343e-05, + "loss": 0.8504, + "step": 952 + }, + { + "epoch": 0.0, + "learning_rate": 4.999991021693566e-05, + "loss": 0.6655, + "step": 953 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999100063011e-05, + "loss": 0.8098, + "step": 954 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990979541976e-05, + "loss": 0.6979, + "step": 955 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990958429163e-05, + "loss": 0.6262, + "step": 956 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999093729167e-05, + "loss": 0.9136, + "step": 957 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990916129499e-05, + "loss": 0.6292, + "step": 958 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990894942649e-05, + "loss": 0.7384, + "step": 959 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990873731121e-05, + "loss": 0.6144, + "step": 960 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990852494913e-05, + "loss": 0.8134, + "step": 961 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990831234026e-05, + "loss": 0.6764, + "step": 962 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990809948461e-05, + "loss": 1.2664, + "step": 963 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990788638217e-05, + "loss": 1.2815, + "step": 964 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990767303293e-05, + "loss": 1.1905, + "step": 965 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990745943692e-05, + "loss": 1.2767, + "step": 966 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999907245594114e-05, + "loss": 1.1545, + "step": 967 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990703150451e-05, + "loss": 1.0269, + "step": 968 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990681716813e-05, + "loss": 1.0386, + "step": 969 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999906602584956e-05, + "loss": 1.2325, + "step": 970 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990638775499e-05, + "loss": 1.1714, + "step": 971 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990617267824e-05, + "loss": 1.1213, + "step": 972 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999059573547e-05, + "loss": 1.087, + "step": 973 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990574178438e-05, + "loss": 0.8446, + "step": 974 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990552596726e-05, + "loss": 1.1204, + "step": 975 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990530990336e-05, + "loss": 1.1727, + "step": 976 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999905093592664e-05, + "loss": 1.0612, + "step": 977 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990487703518e-05, + "loss": 1.2293, + "step": 978 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990466023091e-05, + "loss": 1.2555, + "step": 979 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999904443179855e-05, + "loss": 1.0958, + "step": 980 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999904225882e-05, + "loss": 1.0161, + "step": 981 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990400833737e-05, + "loss": 1.1351, + "step": 982 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990379054594e-05, + "loss": 1.3182, + "step": 983 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999903572507736e-05, + "loss": 1.3075, + "step": 984 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990335422273e-05, + "loss": 1.2306, + "step": 985 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990313569094e-05, + "loss": 1.2427, + "step": 986 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990291691237e-05, + "loss": 0.4983, + "step": 987 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999902697886994e-05, + "loss": 0.4873, + "step": 988 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990247861484e-05, + "loss": 0.4938, + "step": 989 + }, + { + "epoch": 0.0, + "learning_rate": 4.99999022590959e-05, + "loss": 0.9492, + "step": 990 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999902039330167e-05, + "loss": 1.1465, + "step": 991 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990181931765e-05, + "loss": 1.27, + "step": 992 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990159905834e-05, + "loss": 0.4938, + "step": 993 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999901378552236e-05, + "loss": 1.0486, + "step": 994 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999901157799356e-05, + "loss": 1.1557, + "step": 995 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990093679968e-05, + "loss": 1.3443, + "step": 996 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999900715553216e-05, + "loss": 1.1414, + "step": 997 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990049405996e-05, + "loss": 1.1585, + "step": 998 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990027231992e-05, + "loss": 1.1172, + "step": 999 + }, + { + "epoch": 0.0, + "learning_rate": 4.999990005033309e-05, + "loss": 1.2645, + "step": 1000 + }, + { + "epoch": 0.0, + "eval_loss": 1.1082977056503296, + "eval_runtime": 84.9058, + "eval_samples_per_second": 16.312, + "eval_steps_per_second": 4.087, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989982809948e-05, + "loss": 1.1184, + "step": 1001 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989960561907e-05, + "loss": 1.1318, + "step": 1002 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989938289188e-05, + "loss": 1.0316, + "step": 1003 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999899159917895e-05, + "loss": 1.1357, + "step": 1004 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989893669712e-05, + "loss": 1.1337, + "step": 1005 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989871322956e-05, + "loss": 0.9274, + "step": 1006 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989848951522e-05, + "loss": 0.9013, + "step": 1007 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989826555408e-05, + "loss": 0.8428, + "step": 1008 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999898041346155e-05, + "loss": 0.8037, + "step": 1009 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999897816891436e-05, + "loss": 0.7398, + "step": 1010 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989759218994e-05, + "loss": 0.6844, + "step": 1011 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989736724165e-05, + "loss": 0.577, + "step": 1012 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989714204657e-05, + "loss": 0.6148, + "step": 1013 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989691660471e-05, + "loss": 0.8637, + "step": 1014 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999896690916045e-05, + "loss": 1.3844, + "step": 1015 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998964649806e-05, + "loss": 1.4286, + "step": 1016 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989623879837e-05, + "loss": 0.8812, + "step": 1017 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989601236935e-05, + "loss": 0.8998, + "step": 1018 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989578569354e-05, + "loss": 1.0472, + "step": 1019 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989555877094e-05, + "loss": 1.1919, + "step": 1020 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999895331601556e-05, + "loss": 1.1963, + "step": 1021 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989510418538e-05, + "loss": 1.198, + "step": 1022 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989487652242e-05, + "loss": 1.0706, + "step": 1023 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999894648612666e-05, + "loss": 1.2298, + "step": 1024 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989442045613e-05, + "loss": 1.3413, + "step": 1025 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998941920528e-05, + "loss": 0.837, + "step": 1026 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989396340268e-05, + "loss": 1.1898, + "step": 1027 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999893734505775e-05, + "loss": 1.1632, + "step": 1028 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999893505362086e-05, + "loss": 1.014, + "step": 1029 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998932759716e-05, + "loss": 0.9097, + "step": 1030 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989304633433e-05, + "loss": 1.3816, + "step": 1031 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999892816450274e-05, + "loss": 1.1644, + "step": 1032 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999892586319425e-05, + "loss": 1.205, + "step": 1033 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989235594179e-05, + "loss": 1.3464, + "step": 1034 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999892125317366e-05, + "loss": 1.2883, + "step": 1035 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989189444615e-05, + "loss": 1.085, + "step": 1036 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999891663328155e-05, + "loss": 1.1615, + "step": 1037 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989143196336e-05, + "loss": 1.2407, + "step": 1038 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999891200351784e-05, + "loss": 1.1057, + "step": 1039 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989096849342e-05, + "loss": 1.2035, + "step": 1040 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989073638827e-05, + "loss": 1.2299, + "step": 1041 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999890504036315e-05, + "loss": 1.3304, + "step": 1042 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989027143759e-05, + "loss": 1.0313, + "step": 1043 + }, + { + "epoch": 0.0, + "learning_rate": 4.999989003859207e-05, + "loss": 1.3824, + "step": 1044 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988980549976e-05, + "loss": 1.0822, + "step": 1045 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999889572160665e-05, + "loss": 1.9536, + "step": 1046 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988933857478e-05, + "loss": 1.9319, + "step": 1047 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998891047421e-05, + "loss": 1.1924, + "step": 1048 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988887066265e-05, + "loss": 0.978, + "step": 1049 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998886363364e-05, + "loss": 1.021, + "step": 1050 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988840176336e-05, + "loss": 1.1141, + "step": 1051 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988816694353e-05, + "loss": 1.2393, + "step": 1052 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988793187692e-05, + "loss": 1.2058, + "step": 1053 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999887696563514e-05, + "loss": 1.3326, + "step": 1054 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999887461003326e-05, + "loss": 1.24, + "step": 1055 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988722519634e-05, + "loss": 1.3852, + "step": 1056 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999886989142576e-05, + "loss": 0.7316, + "step": 1057 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988675284202e-05, + "loss": 1.0525, + "step": 1058 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999886516294666e-05, + "loss": 1.064, + "step": 1059 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988627950054e-05, + "loss": 1.0082, + "step": 1060 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988604245962e-05, + "loss": 0.9874, + "step": 1061 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988580517191e-05, + "loss": 0.9579, + "step": 1062 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998855676374e-05, + "loss": 1.0129, + "step": 1063 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999885329856125e-05, + "loss": 1.2541, + "step": 1064 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988509182805e-05, + "loss": 1.1398, + "step": 1065 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988485355318e-05, + "loss": 0.6827, + "step": 1066 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999884615031536e-05, + "loss": 0.9613, + "step": 1067 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988437626309e-05, + "loss": 1.0136, + "step": 1068 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999884137247864e-05, + "loss": 0.9416, + "step": 1069 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999883897985844e-05, + "loss": 1.2144, + "step": 1070 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999883658477046e-05, + "loss": 1.235, + "step": 1071 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988341872145e-05, + "loss": 0.8769, + "step": 1072 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988317871906e-05, + "loss": 0.8905, + "step": 1073 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999882938469896e-05, + "loss": 1.2377, + "step": 1074 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988269797394e-05, + "loss": 1.2858, + "step": 1075 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988245723119e-05, + "loss": 1.1109, + "step": 1076 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999882216241656e-05, + "loss": 1.2624, + "step": 1077 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988197500533e-05, + "loss": 0.8667, + "step": 1078 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988173352222e-05, + "loss": 1.0818, + "step": 1079 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988149179232e-05, + "loss": 0.8727, + "step": 1080 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999881249815634e-05, + "loss": 1.1173, + "step": 1081 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999881007592156e-05, + "loss": 1.2649, + "step": 1082 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988076512189e-05, + "loss": 1.3832, + "step": 1083 + }, + { + "epoch": 0.0, + "learning_rate": 4.999988052240484e-05, + "loss": 1.2634, + "step": 1084 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999880279440995e-05, + "loss": 1.2364, + "step": 1085 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999880036230365e-05, + "loss": 1.3662, + "step": 1086 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987979277295e-05, + "loss": 1.0146, + "step": 1087 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999879549068737e-05, + "loss": 1.1297, + "step": 1088 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999879305117745e-05, + "loss": 1.1345, + "step": 1089 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987906091996e-05, + "loss": 0.7416, + "step": 1090 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999878816475395e-05, + "loss": 0.8146, + "step": 1091 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999878571784037e-05, + "loss": 0.7677, + "step": 1092 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999878326845886e-05, + "loss": 0.7257, + "step": 1093 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987808166096e-05, + "loss": 0.7448, + "step": 1094 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999877836229224e-05, + "loss": 0.7, + "step": 1095 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987759055071e-05, + "loss": 0.6121, + "step": 1096 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987734462541e-05, + "loss": 0.7789, + "step": 1097 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987709845333e-05, + "loss": 0.649, + "step": 1098 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987685203445e-05, + "loss": 0.8089, + "step": 1099 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999876605368786e-05, + "loss": 0.7264, + "step": 1100 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987635845633e-05, + "loss": 0.6354, + "step": 1101 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987611129709e-05, + "loss": 0.7173, + "step": 1102 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987586389106e-05, + "loss": 0.7339, + "step": 1103 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987561623824e-05, + "loss": 0.7132, + "step": 1104 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999875368338634e-05, + "loss": 1.0778, + "step": 1105 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987512019224e-05, + "loss": 1.1789, + "step": 1106 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987487179906e-05, + "loss": 1.0053, + "step": 1107 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987462315908e-05, + "loss": 1.1175, + "step": 1108 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987437427233e-05, + "loss": 1.2642, + "step": 1109 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987412513878e-05, + "loss": 1.1858, + "step": 1110 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987387575844e-05, + "loss": 1.1409, + "step": 1111 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999873626131323e-05, + "loss": 1.4153, + "step": 1112 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999873376257404e-05, + "loss": 1.3314, + "step": 1113 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987312613671e-05, + "loss": 1.2541, + "step": 1114 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999872875769216e-05, + "loss": 1.2764, + "step": 1115 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987262515494e-05, + "loss": 1.5257, + "step": 1116 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987237429388e-05, + "loss": 1.2683, + "step": 1117 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987212318602e-05, + "loss": 0.6983, + "step": 1118 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999871871831385e-05, + "loss": 1.0439, + "step": 1119 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987162022995e-05, + "loss": 1.1931, + "step": 1120 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987136838173e-05, + "loss": 0.7782, + "step": 1121 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999871116286724e-05, + "loss": 1.1135, + "step": 1122 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987086394493e-05, + "loss": 1.1923, + "step": 1123 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987061135635e-05, + "loss": 1.0822, + "step": 1124 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987035852098e-05, + "loss": 1.1745, + "step": 1125 + }, + { + "epoch": 0.0, + "learning_rate": 4.999987010543882e-05, + "loss": 0.9733, + "step": 1126 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986985210987e-05, + "loss": 0.8489, + "step": 1127 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986959853414e-05, + "loss": 1.1524, + "step": 1128 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999869344711615e-05, + "loss": 1.1779, + "step": 1129 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998690906423e-05, + "loss": 1.19, + "step": 1130 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998688363262e-05, + "loss": 1.0589, + "step": 1131 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986858176332e-05, + "loss": 1.1153, + "step": 1132 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999868326953635e-05, + "loss": 1.199, + "step": 1133 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999868071897174e-05, + "loss": 0.846, + "step": 1134 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986781659392e-05, + "loss": 0.9576, + "step": 1135 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986756104388e-05, + "loss": 0.5289, + "step": 1136 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986730524705e-05, + "loss": 0.4011, + "step": 1137 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986704920343e-05, + "loss": 0.2992, + "step": 1138 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986679291303e-05, + "loss": 0.2196, + "step": 1139 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986653637584e-05, + "loss": 0.1587, + "step": 1140 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999866279591856e-05, + "loss": 0.2478, + "step": 1141 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999866022561084e-05, + "loss": 0.2585, + "step": 1142 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986576528353e-05, + "loss": 0.2091, + "step": 1143 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986550775918e-05, + "loss": 0.3503, + "step": 1144 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999865249988045e-05, + "loss": 0.1784, + "step": 1145 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986499197013e-05, + "loss": 0.2116, + "step": 1146 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986473370542e-05, + "loss": 0.1828, + "step": 1147 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986447519392e-05, + "loss": 0.2068, + "step": 1148 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986421643563e-05, + "loss": 0.2104, + "step": 1149 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986395743056e-05, + "loss": 1.1287, + "step": 1150 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986369817869e-05, + "loss": 1.2888, + "step": 1151 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986343868004e-05, + "loss": 1.1025, + "step": 1152 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998631789346e-05, + "loss": 1.352, + "step": 1153 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986291894238e-05, + "loss": 1.4697, + "step": 1154 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986265870336e-05, + "loss": 1.2039, + "step": 1155 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999862398217556e-05, + "loss": 1.298, + "step": 1156 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999862137484965e-05, + "loss": 1.1438, + "step": 1157 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986187650558e-05, + "loss": 1.1965, + "step": 1158 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986161527942e-05, + "loss": 1.2826, + "step": 1159 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999861353806464e-05, + "loss": 0.9909, + "step": 1160 + }, + { + "epoch": 0.0, + "learning_rate": 4.999986109208672e-05, + "loss": 1.1301, + "step": 1161 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999860830120186e-05, + "loss": 1.2068, + "step": 1162 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999860567906866e-05, + "loss": 1.205, + "step": 1163 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999860305446755e-05, + "loss": 0.9465, + "step": 1164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999860042739866e-05, + "loss": 1.0319, + "step": 1165 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985977978618e-05, + "loss": 1.2249, + "step": 1166 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999859516585706e-05, + "loss": 1.1404, + "step": 1167 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985925313845e-05, + "loss": 1.1021, + "step": 1168 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998589894444e-05, + "loss": 0.7697, + "step": 1169 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985872550356e-05, + "loss": 1.2475, + "step": 1170 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999858461315936e-05, + "loss": 1.346, + "step": 1171 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985819688152e-05, + "loss": 1.2817, + "step": 1172 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999857932200326e-05, + "loss": 0.905, + "step": 1173 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985766727233e-05, + "loss": 0.8015, + "step": 1174 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985740209756e-05, + "loss": 0.9413, + "step": 1175 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999857136675994e-05, + "loss": 1.1069, + "step": 1176 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985687100764e-05, + "loss": 1.0106, + "step": 1177 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999856605092496e-05, + "loss": 1.1865, + "step": 1178 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999856338930566e-05, + "loss": 1.0042, + "step": 1179 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985607252186e-05, + "loss": 1.2633, + "step": 1180 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999855805866345e-05, + "loss": 1.1735, + "step": 1181 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999855538964055e-05, + "loss": 1.2769, + "step": 1182 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985527181497e-05, + "loss": 1.1381, + "step": 1183 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999855004419105e-05, + "loss": 1.0372, + "step": 1184 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985473677644e-05, + "loss": 1.5588, + "step": 1185 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999854468887e-05, + "loss": 1.1077, + "step": 1186 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985420075077e-05, + "loss": 1.1749, + "step": 1187 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985393236775e-05, + "loss": 0.679, + "step": 1188 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985366373794e-05, + "loss": 1.0461, + "step": 1189 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985339486134e-05, + "loss": 1.2824, + "step": 1190 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985312573795e-05, + "loss": 1.0176, + "step": 1191 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999852856367776e-05, + "loss": 1.2261, + "step": 1192 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985258675082e-05, + "loss": 1.4413, + "step": 1193 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999852316887064e-05, + "loss": 1.1552, + "step": 1194 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985204677653e-05, + "loss": 1.1012, + "step": 1195 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985177641921e-05, + "loss": 1.3743, + "step": 1196 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985150581509e-05, + "loss": 1.2877, + "step": 1197 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985123496419e-05, + "loss": 1.0601, + "step": 1198 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999850963866504e-05, + "loss": 1.0229, + "step": 1199 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985069252202e-05, + "loss": 0.5861, + "step": 1200 + }, + { + "epoch": 0.0, + "learning_rate": 4.999985042093076e-05, + "loss": 1.0952, + "step": 1201 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999850149092705e-05, + "loss": 0.8716, + "step": 1202 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984987700787e-05, + "loss": 1.3299, + "step": 1203 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984960467623e-05, + "loss": 1.3952, + "step": 1204 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999849332097816e-05, + "loss": 1.201, + "step": 1205 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984905927261e-05, + "loss": 0.9686, + "step": 1206 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984878620061e-05, + "loss": 1.1564, + "step": 1207 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984851288183e-05, + "loss": 0.7878, + "step": 1208 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999848239316264e-05, + "loss": 1.445, + "step": 1209 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984796550391e-05, + "loss": 1.3462, + "step": 1210 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984769144476e-05, + "loss": 1.3696, + "step": 1211 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984741713883e-05, + "loss": 1.0053, + "step": 1212 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984714258611e-05, + "loss": 1.0261, + "step": 1213 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998468677866e-05, + "loss": 1.2284, + "step": 1214 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998465927403e-05, + "loss": 1.2134, + "step": 1215 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984631744722e-05, + "loss": 1.3078, + "step": 1216 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999846041907336e-05, + "loss": 1.2526, + "step": 1217 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984576612068e-05, + "loss": 1.0028, + "step": 1218 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984549008723e-05, + "loss": 0.9113, + "step": 1219 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999845213807e-05, + "loss": 0.8816, + "step": 1220 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984493727997e-05, + "loss": 0.8277, + "step": 1221 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984466050616e-05, + "loss": 0.8075, + "step": 1222 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984438348556e-05, + "loss": 0.7724, + "step": 1223 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984410621817e-05, + "loss": 0.9919, + "step": 1224 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999843828703994e-05, + "loss": 1.1758, + "step": 1225 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999843550943026e-05, + "loss": 1.1239, + "step": 1226 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984327293528e-05, + "loss": 1.1041, + "step": 1227 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999842994680734e-05, + "loss": 1.1451, + "step": 1228 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999842716179405e-05, + "loss": 1.1521, + "step": 1229 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984243743129e-05, + "loss": 1.0867, + "step": 1230 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999842158436385e-05, + "loss": 1.0515, + "step": 1231 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998418791947e-05, + "loss": 1.0314, + "step": 1232 + }, + { + "epoch": 0.0, + "learning_rate": 4.999984159970621e-05, + "loss": 0.9818, + "step": 1233 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999841319970945e-05, + "loss": 1.0535, + "step": 1234 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999841039988894e-05, + "loss": 1.0195, + "step": 1235 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999840759760044e-05, + "loss": 1.1967, + "step": 1236 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999840479284416e-05, + "loss": 1.082, + "step": 1237 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999840198562e-05, + "loss": 1.3885, + "step": 1238 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999839917592786e-05, + "loss": 0.8606, + "step": 1239 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998396363768e-05, + "loss": 1.3506, + "step": 1240 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983935491401e-05, + "loss": 1.1122, + "step": 1241 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983907320444e-05, + "loss": 1.3343, + "step": 1242 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983879124808e-05, + "loss": 1.3739, + "step": 1243 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983850904494e-05, + "loss": 1.3176, + "step": 1244 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999838226594994e-05, + "loss": 0.8282, + "step": 1245 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999837943898276e-05, + "loss": 0.6075, + "step": 1246 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983766095477e-05, + "loss": 0.7719, + "step": 1247 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999837377764467e-05, + "loss": 0.5532, + "step": 1248 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983709432738e-05, + "loss": 0.5228, + "step": 1249 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983681064351e-05, + "loss": 0.6887, + "step": 1250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983652671285e-05, + "loss": 1.1635, + "step": 1251 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998362425354e-05, + "loss": 1.174, + "step": 1252 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999835958111165e-05, + "loss": 1.3225, + "step": 1253 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999835673440136e-05, + "loss": 1.2051, + "step": 1254 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983538852232e-05, + "loss": 1.1052, + "step": 1255 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983510335772e-05, + "loss": 1.0838, + "step": 1256 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983481794633e-05, + "loss": 0.8987, + "step": 1257 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983453228816e-05, + "loss": 0.5425, + "step": 1258 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983424638319e-05, + "loss": 0.5464, + "step": 1259 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983396023144e-05, + "loss": 0.5096, + "step": 1260 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998336738329e-05, + "loss": 1.0058, + "step": 1261 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999833387187575e-05, + "loss": 1.2524, + "step": 1262 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999833100295456e-05, + "loss": 1.0138, + "step": 1263 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983281315656e-05, + "loss": 1.1427, + "step": 1264 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983252577086e-05, + "loss": 1.548, + "step": 1265 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983223813838e-05, + "loss": 1.1719, + "step": 1266 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983195025911e-05, + "loss": 1.4455, + "step": 1267 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983166213306e-05, + "loss": 1.4792, + "step": 1268 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983137376022e-05, + "loss": 1.1389, + "step": 1269 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999831085140594e-05, + "loss": 1.0004, + "step": 1270 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983079627417e-05, + "loss": 0.4547, + "step": 1271 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983050716096e-05, + "loss": 0.8922, + "step": 1272 + }, + { + "epoch": 0.0, + "learning_rate": 4.999983021780097e-05, + "loss": 0.8002, + "step": 1273 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982992819419e-05, + "loss": 1.1497, + "step": 1274 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999829638340625e-05, + "loss": 0.9905, + "step": 1275 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999829348240264e-05, + "loss": 1.1683, + "step": 1276 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999829057893124e-05, + "loss": 1.0706, + "step": 1277 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999828767299187e-05, + "loss": 1.1382, + "step": 1278 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999828476458464e-05, + "loss": 1.2152, + "step": 1279 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982818537096e-05, + "loss": 1.0906, + "step": 1280 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982789403666e-05, + "loss": 1.0177, + "step": 1281 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999827602455574e-05, + "loss": 1.2553, + "step": 1282 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999827310627706e-05, + "loss": 0.9395, + "step": 1283 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999827018553046e-05, + "loss": 1.0483, + "step": 1284 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998267262316e-05, + "loss": 1.2686, + "step": 1285 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999826433663366e-05, + "loss": 1.2582, + "step": 1286 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982614084834e-05, + "loss": 1.1988, + "step": 1287 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982584778653e-05, + "loss": 1.1987, + "step": 1288 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982555447793e-05, + "loss": 0.9949, + "step": 1289 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999825260922554e-05, + "loss": 1.3115, + "step": 1290 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999824967120373e-05, + "loss": 1.182, + "step": 1291 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999824673071415e-05, + "loss": 1.107, + "step": 1292 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982437877566e-05, + "loss": 1.0447, + "step": 1293 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999824084233125e-05, + "loss": 0.838, + "step": 1294 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998237894438e-05, + "loss": 1.0267, + "step": 1295 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999823494407695e-05, + "loss": 0.9289, + "step": 1296 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982319912479e-05, + "loss": 1.1514, + "step": 1297 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999822903595106e-05, + "loss": 1.3189, + "step": 1298 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982260781863e-05, + "loss": 1.1155, + "step": 1299 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999822311795364e-05, + "loss": 1.1044, + "step": 1300 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982201552531e-05, + "loss": 1.2472, + "step": 1301 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982171900848e-05, + "loss": 0.9781, + "step": 1302 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982142224485e-05, + "loss": 0.716, + "step": 1303 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982112523444e-05, + "loss": 0.5798, + "step": 1304 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982082797724e-05, + "loss": 1.3287, + "step": 1305 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982053047324e-05, + "loss": 1.0971, + "step": 1306 + }, + { + "epoch": 0.0, + "learning_rate": 4.999982023272247e-05, + "loss": 1.2485, + "step": 1307 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998199347249e-05, + "loss": 0.9496, + "step": 1308 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981963648055e-05, + "loss": 1.0331, + "step": 1309 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981933798941e-05, + "loss": 1.045, + "step": 1310 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981903925148e-05, + "loss": 0.9116, + "step": 1311 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999818740266766e-05, + "loss": 1.256, + "step": 1312 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981844103526e-05, + "loss": 1.1436, + "step": 1313 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981814155698e-05, + "loss": 1.1256, + "step": 1314 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999817841831896e-05, + "loss": 1.0737, + "step": 1315 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981754186003e-05, + "loss": 1.2136, + "step": 1316 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981724164138e-05, + "loss": 1.008, + "step": 1317 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999816941175935e-05, + "loss": 0.9908, + "step": 1318 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998166404637e-05, + "loss": 1.1998, + "step": 1319 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981633950469e-05, + "loss": 1.0553, + "step": 1320 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999816038298885e-05, + "loss": 1.2117, + "step": 1321 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981573684629e-05, + "loss": 0.8267, + "step": 1322 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981543514691e-05, + "loss": 1.5073, + "step": 1323 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999815133200745e-05, + "loss": 1.2936, + "step": 1324 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981483100778e-05, + "loss": 1.1269, + "step": 1325 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981452856805e-05, + "loss": 1.1422, + "step": 1326 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999814225881515e-05, + "loss": 1.0733, + "step": 1327 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999813922948205e-05, + "loss": 1.1965, + "step": 1328 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999813619768096e-05, + "loss": 1.5187, + "step": 1329 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999813316341196e-05, + "loss": 1.0381, + "step": 1330 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981301266752e-05, + "loss": 1.0388, + "step": 1331 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999812708747054e-05, + "loss": 1.3295, + "step": 1332 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981240457979e-05, + "loss": 1.1589, + "step": 1333 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999812100165754e-05, + "loss": 1.2435, + "step": 1334 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999811795504916e-05, + "loss": 1.0928, + "step": 1335 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998114905973e-05, + "loss": 1.1308, + "step": 1336 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999811185442894e-05, + "loss": 1.06, + "step": 1337 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999810880041695e-05, + "loss": 1.6996, + "step": 1338 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981057439372e-05, + "loss": 2.2645, + "step": 1339 + }, + { + "epoch": 0.0, + "learning_rate": 4.999981026849895e-05, + "loss": 2.2996, + "step": 1340 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980996235739e-05, + "loss": 1.1277, + "step": 1341 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999809655969047e-05, + "loss": 1.1166, + "step": 1342 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980934933392e-05, + "loss": 1.1923, + "step": 1343 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999809042452e-05, + "loss": 1.2633, + "step": 1344 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980873532329e-05, + "loss": 0.8925, + "step": 1345 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999808427947795e-05, + "loss": 0.9157, + "step": 1346 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980812032551e-05, + "loss": 1.1102, + "step": 1347 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980781245645e-05, + "loss": 1.0015, + "step": 1348 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980750434058e-05, + "loss": 1.0523, + "step": 1349 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999807195977946e-05, + "loss": 1.1134, + "step": 1350 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999806887368505e-05, + "loss": 0.9844, + "step": 1351 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999806578512287e-05, + "loss": 1.3819, + "step": 1352 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999806269409276e-05, + "loss": 1.3509, + "step": 1353 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980596005949e-05, + "loss": 0.937, + "step": 1354 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980565046291e-05, + "loss": 1.0986, + "step": 1355 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980534061953e-05, + "loss": 0.9307, + "step": 1356 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999805030529374e-05, + "loss": 1.0425, + "step": 1357 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999804720192426e-05, + "loss": 1.0921, + "step": 1358 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998044096087e-05, + "loss": 1.4279, + "step": 1359 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999804098778176e-05, + "loss": 1.0246, + "step": 1360 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980378770087e-05, + "loss": 1.2811, + "step": 1361 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999803476376774e-05, + "loss": 1.2623, + "step": 1362 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980316480589e-05, + "loss": 1.1786, + "step": 1363 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980285298822e-05, + "loss": 1.7484, + "step": 1364 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980254092376e-05, + "loss": 1.5065, + "step": 1365 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980222861251e-05, + "loss": 1.063, + "step": 1366 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999801916054475e-05, + "loss": 1.1653, + "step": 1367 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980160324966e-05, + "loss": 1.2328, + "step": 1368 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980129019805e-05, + "loss": 1.1489, + "step": 1369 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999800976899654e-05, + "loss": 1.5534, + "step": 1370 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980066335447e-05, + "loss": 2.7763, + "step": 1371 + }, + { + "epoch": 0.0, + "learning_rate": 4.99998003495625e-05, + "loss": 2.337, + "step": 1372 + }, + { + "epoch": 0.0, + "learning_rate": 4.999980003552375e-05, + "loss": 2.4659, + "step": 1373 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997997212382e-05, + "loss": 2.3945, + "step": 1374 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999799406705864e-05, + "loss": 2.1941, + "step": 1375 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999799091926743e-05, + "loss": 1.6053, + "step": 1376 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979877690084e-05, + "loss": 1.0788, + "step": 1377 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979846162814e-05, + "loss": 1.3246, + "step": 1378 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979814610865e-05, + "loss": 1.4813, + "step": 1379 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979783034239e-05, + "loss": 1.3126, + "step": 1380 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979751432933e-05, + "loss": 1.178, + "step": 1381 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979719806949e-05, + "loss": 1.5922, + "step": 1382 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979688156285e-05, + "loss": 1.3641, + "step": 1383 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999796564809434e-05, + "loss": 1.3358, + "step": 1384 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979624780923e-05, + "loss": 0.9263, + "step": 1385 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979593056223e-05, + "loss": 1.2803, + "step": 1386 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979561306844e-05, + "loss": 0.8805, + "step": 1387 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979529532788e-05, + "loss": 1.5963, + "step": 1388 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979497734052e-05, + "loss": 1.3765, + "step": 1389 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979465910637e-05, + "loss": 1.1817, + "step": 1390 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979434062544e-05, + "loss": 1.3105, + "step": 1391 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979402189772e-05, + "loss": 1.043, + "step": 1392 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999793702923216e-05, + "loss": 1.0101, + "step": 1393 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979338370192e-05, + "loss": 1.3187, + "step": 1394 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979306423384e-05, + "loss": 1.1142, + "step": 1395 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979274451897e-05, + "loss": 0.9806, + "step": 1396 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999792424557315e-05, + "loss": 1.1195, + "step": 1397 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979210434886e-05, + "loss": 1.2506, + "step": 1398 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979178389363e-05, + "loss": 1.13, + "step": 1399 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999791463191616e-05, + "loss": 1.2353, + "step": 1400 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997911422428e-05, + "loss": 1.1788, + "step": 1401 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979082104721e-05, + "loss": 1.0935, + "step": 1402 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999790499604835e-05, + "loss": 1.0114, + "step": 1403 + }, + { + "epoch": 0.0, + "learning_rate": 4.999979017791566e-05, + "loss": 1.1398, + "step": 1404 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978985597971e-05, + "loss": 1.2195, + "step": 1405 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978953379696e-05, + "loss": 1.1132, + "step": 1406 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999789211367435e-05, + "loss": 1.2819, + "step": 1407 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999788888691114e-05, + "loss": 0.621, + "step": 1408 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999788565768e-05, + "loss": 0.3403, + "step": 1409 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978824259811e-05, + "loss": 0.3945, + "step": 1410 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978791918143e-05, + "loss": 1.0303, + "step": 1411 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999787595517965e-05, + "loss": 1.1808, + "step": 1412 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999787271607714e-05, + "loss": 1.3936, + "step": 1413 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999786947450665e-05, + "loss": 2.0301, + "step": 1414 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978662304684e-05, + "loss": 1.1428, + "step": 1415 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978629839622e-05, + "loss": 0.9583, + "step": 1416 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999785973498816e-05, + "loss": 1.1777, + "step": 1417 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978564835462e-05, + "loss": 1.103, + "step": 1418 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978532296364e-05, + "loss": 1.017, + "step": 1419 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978499732587e-05, + "loss": 1.0472, + "step": 1420 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978467144132e-05, + "loss": 0.9334, + "step": 1421 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978434530998e-05, + "loss": 1.0641, + "step": 1422 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978401893185e-05, + "loss": 1.1676, + "step": 1423 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978369230693e-05, + "loss": 0.9384, + "step": 1424 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978336543523e-05, + "loss": 0.9575, + "step": 1425 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978303831674e-05, + "loss": 1.1896, + "step": 1426 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978271095146e-05, + "loss": 1.1317, + "step": 1427 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999782383339396e-05, + "loss": 1.0979, + "step": 1428 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978205548054e-05, + "loss": 1.0988, + "step": 1429 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997817273749e-05, + "loss": 1.1114, + "step": 1430 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978139902247e-05, + "loss": 2.2097, + "step": 1431 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978107042326e-05, + "loss": 1.0582, + "step": 1432 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999780741577265e-05, + "loss": 0.8381, + "step": 1433 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978041248447e-05, + "loss": 0.8595, + "step": 1434 + }, + { + "epoch": 0.0, + "learning_rate": 4.999978008314489e-05, + "loss": 0.8939, + "step": 1435 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977975355853e-05, + "loss": 1.2282, + "step": 1436 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977942372538e-05, + "loss": 1.3273, + "step": 1437 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977909364544e-05, + "loss": 1.1084, + "step": 1438 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977876331872e-05, + "loss": 1.0852, + "step": 1439 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999778432745204e-05, + "loss": 1.3899, + "step": 1440 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997781019249e-05, + "loss": 1.2291, + "step": 1441 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999777770857816e-05, + "loss": 1.1377, + "step": 1442 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999777439543935e-05, + "loss": 1.0589, + "step": 1443 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999777107983276e-05, + "loss": 1.0669, + "step": 1444 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977677617582e-05, + "loss": 0.9086, + "step": 1445 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977644412159e-05, + "loss": 1.2046, + "step": 1446 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977611182056e-05, + "loss": 1.195, + "step": 1447 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977577927275e-05, + "loss": 1.3642, + "step": 1448 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977544647816e-05, + "loss": 1.078, + "step": 1449 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977511343677e-05, + "loss": 1.2531, + "step": 1450 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997747801486e-05, + "loss": 1.2787, + "step": 1451 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999774446613637e-05, + "loss": 0.9946, + "step": 1452 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977411283189e-05, + "loss": 1.0959, + "step": 1453 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999773778803354e-05, + "loss": 1.214, + "step": 1454 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977344452803e-05, + "loss": 1.1483, + "step": 1455 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977311000592e-05, + "loss": 1.084, + "step": 1456 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977277523703e-05, + "loss": 0.9704, + "step": 1457 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999772440221344e-05, + "loss": 0.7158, + "step": 1458 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977210495887e-05, + "loss": 1.0292, + "step": 1459 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977176944961e-05, + "loss": 1.1296, + "step": 1460 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999771433693564e-05, + "loss": 1.0778, + "step": 1461 + }, + { + "epoch": 0.0, + "learning_rate": 4.999977109769074e-05, + "loss": 3.3233, + "step": 1462 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999770761441115e-05, + "loss": 3.9099, + "step": 1463 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999770424944707e-05, + "loss": 1.3, + "step": 1464 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999770088201514e-05, + "loss": 0.973, + "step": 1465 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999769751211536e-05, + "loss": 1.1086, + "step": 1466 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976941397476e-05, + "loss": 0.8508, + "step": 1467 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976907649121e-05, + "loss": 1.0619, + "step": 1468 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976873876087e-05, + "loss": 1.0714, + "step": 1469 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976840078374e-05, + "loss": 2.7562, + "step": 1470 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976806255982e-05, + "loss": 0.9887, + "step": 1471 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976772408912e-05, + "loss": 0.974, + "step": 1472 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999767385371624e-05, + "loss": 1.245, + "step": 1473 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976704640734e-05, + "loss": 1.5144, + "step": 1474 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976670719628e-05, + "loss": 1.0975, + "step": 1475 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976636773843e-05, + "loss": 1.8456, + "step": 1476 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976602803379e-05, + "loss": 1.646, + "step": 1477 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999765688082356e-05, + "loss": 1.3266, + "step": 1478 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976534788414e-05, + "loss": 1.3988, + "step": 1479 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999765007439144e-05, + "loss": 1.3974, + "step": 1480 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976466674735e-05, + "loss": 1.1976, + "step": 1481 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976432580878e-05, + "loss": 1.5675, + "step": 1482 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999763984623415e-05, + "loss": 1.0349, + "step": 1483 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999763643191264e-05, + "loss": 1.3132, + "step": 1484 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976330151233e-05, + "loss": 1.1919, + "step": 1485 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997629595866e-05, + "loss": 0.9964, + "step": 1486 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976261741409e-05, + "loss": 1.2105, + "step": 1487 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999762274994794e-05, + "loss": 0.8005, + "step": 1488 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999761932328706e-05, + "loss": 1.0002, + "step": 1489 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999761589415834e-05, + "loss": 1.3317, + "step": 1490 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976124625617e-05, + "loss": 1.5958, + "step": 1491 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976090284972e-05, + "loss": 0.9418, + "step": 1492 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999760559196494e-05, + "loss": 1.1749, + "step": 1493 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976021529647e-05, + "loss": 1.041, + "step": 1494 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999759871149666e-05, + "loss": 1.3281, + "step": 1495 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999759526756064e-05, + "loss": 0.9078, + "step": 1496 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999759182115685e-05, + "loss": 1.1638, + "step": 1497 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999758837228514e-05, + "loss": 1.2198, + "step": 1498 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975849209456e-05, + "loss": 1.4662, + "step": 1499 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975814671382e-05, + "loss": 1.384, + "step": 1500 + }, + { + "epoch": 0.0, + "eval_loss": 1.1025360822677612, + "eval_runtime": 85.1844, + "eval_samples_per_second": 16.259, + "eval_steps_per_second": 4.074, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999757801086286e-05, + "loss": 1.0703, + "step": 1501 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975745521196e-05, + "loss": 1.1068, + "step": 1502 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975710909086e-05, + "loss": 1.5193, + "step": 1503 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975676272297e-05, + "loss": 1.0456, + "step": 1504 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975641610829e-05, + "loss": 1.1337, + "step": 1505 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975606924682e-05, + "loss": 1.0031, + "step": 1506 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975572213856e-05, + "loss": 1.2249, + "step": 1507 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975537478353e-05, + "loss": 1.0518, + "step": 1508 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997550271817e-05, + "loss": 1.1407, + "step": 1509 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975467933309e-05, + "loss": 1.0663, + "step": 1510 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975433123768e-05, + "loss": 1.0735, + "step": 1511 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999753982895494e-05, + "loss": 1.2828, + "step": 1512 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999753634306525e-05, + "loss": 1.3558, + "step": 1513 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975328547076e-05, + "loss": 1.0236, + "step": 1514 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999752936388205e-05, + "loss": 0.9801, + "step": 1515 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975258705887e-05, + "loss": 1.0956, + "step": 1516 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975223748275e-05, + "loss": 1.2429, + "step": 1517 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999751887659833e-05, + "loss": 1.0751, + "step": 1518 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999751537590136e-05, + "loss": 1.5672, + "step": 1519 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975118727365e-05, + "loss": 1.2634, + "step": 1520 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975083671038e-05, + "loss": 1.2086, + "step": 1521 + }, + { + "epoch": 0.0, + "learning_rate": 4.999975048590032e-05, + "loss": 1.0235, + "step": 1522 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999750134843476e-05, + "loss": 1.1494, + "step": 1523 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974978353984e-05, + "loss": 1.0878, + "step": 1524 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974943198942e-05, + "loss": 1.1574, + "step": 1525 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999749080192217e-05, + "loss": 1.267, + "step": 1526 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974872814822e-05, + "loss": 1.0234, + "step": 1527 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974837585744e-05, + "loss": 1.2157, + "step": 1528 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974802331987e-05, + "loss": 1.3503, + "step": 1529 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974767053552e-05, + "loss": 1.2035, + "step": 1530 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999747317504375e-05, + "loss": 1.1835, + "step": 1531 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974696422645e-05, + "loss": 1.3205, + "step": 1532 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974661070174e-05, + "loss": 1.2915, + "step": 1533 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974625693023e-05, + "loss": 1.0228, + "step": 1534 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999745902911946e-05, + "loss": 1.0057, + "step": 1535 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974554864687e-05, + "loss": 0.9307, + "step": 1536 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999745194135e-05, + "loss": 1.2788, + "step": 1537 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974483937635e-05, + "loss": 1.3145, + "step": 1538 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999744484370914e-05, + "loss": 1.4141, + "step": 1539 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974412911869e-05, + "loss": 0.8175, + "step": 1540 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974377361968e-05, + "loss": 1.0975, + "step": 1541 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974341787388e-05, + "loss": 1.3513, + "step": 1542 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974306188129e-05, + "loss": 1.0626, + "step": 1543 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974270564192e-05, + "loss": 1.0888, + "step": 1544 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999742349155766e-05, + "loss": 1.0065, + "step": 1545 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974199242282e-05, + "loss": 1.0015, + "step": 1546 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974163544309e-05, + "loss": 1.1223, + "step": 1547 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974127821656e-05, + "loss": 1.1169, + "step": 1548 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999740920743256e-05, + "loss": 1.3053, + "step": 1549 + }, + { + "epoch": 0.0, + "learning_rate": 4.999974056302317e-05, + "loss": 1.1333, + "step": 1550 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999740205056286e-05, + "loss": 0.7307, + "step": 1551 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973984684262e-05, + "loss": 1.3703, + "step": 1552 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973948838216e-05, + "loss": 1.4943, + "step": 1553 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973912967492e-05, + "loss": 1.1022, + "step": 1554 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999738770720895e-05, + "loss": 0.9436, + "step": 1555 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973841152008e-05, + "loss": 1.1686, + "step": 1556 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999738052072474e-05, + "loss": 1.0699, + "step": 1557 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973769237809e-05, + "loss": 1.023, + "step": 1558 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999737332436914e-05, + "loss": 1.1993, + "step": 1559 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973697224895e-05, + "loss": 0.9739, + "step": 1560 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999736611814195e-05, + "loss": 1.2015, + "step": 1561 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973625113266e-05, + "loss": 1.3037, + "step": 1562 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999735890204344e-05, + "loss": 1.1989, + "step": 1563 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973552902923e-05, + "loss": 0.9803, + "step": 1564 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973516760733e-05, + "loss": 1.1173, + "step": 1565 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973480593865e-05, + "loss": 0.8269, + "step": 1566 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973444402318e-05, + "loss": 1.2442, + "step": 1567 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999734081860925e-05, + "loss": 1.1248, + "step": 1568 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973371945188e-05, + "loss": 1.2366, + "step": 1569 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999733356796046e-05, + "loss": 1.3797, + "step": 1570 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999732993893426e-05, + "loss": 1.4179, + "step": 1571 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973263074402e-05, + "loss": 1.1726, + "step": 1572 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973226734783e-05, + "loss": 0.4636, + "step": 1573 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973190370485e-05, + "loss": 0.8854, + "step": 1574 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999731539815086e-05, + "loss": 1.2157, + "step": 1575 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999731175678536e-05, + "loss": 1.6862, + "step": 1576 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997308112952e-05, + "loss": 0.8838, + "step": 1577 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999730446665075e-05, + "loss": 1.1561, + "step": 1578 + }, + { + "epoch": 0.0, + "learning_rate": 4.999973008178816e-05, + "loss": 0.9012, + "step": 1579 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972971666446e-05, + "loss": 1.02, + "step": 1580 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972935129398e-05, + "loss": 1.1196, + "step": 1581 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997289856767e-05, + "loss": 1.2262, + "step": 1582 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999728619812644e-05, + "loss": 1.259, + "step": 1583 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999728253701796e-05, + "loss": 1.1746, + "step": 1584 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972788734416e-05, + "loss": 1.0513, + "step": 1585 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999727520739744e-05, + "loss": 1.0383, + "step": 1586 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999727153888535e-05, + "loss": 1.0011, + "step": 1587 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972678679055e-05, + "loss": 1.0647, + "step": 1588 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972641944576e-05, + "loss": 0.817, + "step": 1589 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997260518542e-05, + "loss": 1.0495, + "step": 1590 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972568401584e-05, + "loss": 1.1049, + "step": 1591 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997253159307e-05, + "loss": 0.9831, + "step": 1592 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972494759878e-05, + "loss": 0.9951, + "step": 1593 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999724579020055e-05, + "loss": 0.8695, + "step": 1594 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972421019456e-05, + "loss": 1.1208, + "step": 1595 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972384112227e-05, + "loss": 1.3532, + "step": 1596 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997234718032e-05, + "loss": 1.1476, + "step": 1597 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972310223734e-05, + "loss": 1.2644, + "step": 1598 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972273242469e-05, + "loss": 1.1528, + "step": 1599 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972236236525e-05, + "loss": 1.3417, + "step": 1600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999721992059034e-05, + "loss": 1.1045, + "step": 1601 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999721621506026e-05, + "loss": 1.084, + "step": 1602 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999721250706234e-05, + "loss": 1.1412, + "step": 1603 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972087965965e-05, + "loss": 0.6463, + "step": 1604 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972050836628e-05, + "loss": 0.5842, + "step": 1605 + }, + { + "epoch": 0.0, + "learning_rate": 4.999972013682612e-05, + "loss": 0.4859, + "step": 1606 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971976503919e-05, + "loss": 1.1645, + "step": 1607 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971939300546e-05, + "loss": 0.9786, + "step": 1608 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999719020724945e-05, + "loss": 1.294, + "step": 1609 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971864819764e-05, + "loss": 1.2126, + "step": 1610 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999718275423555e-05, + "loss": 1.1962, + "step": 1611 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971790240268e-05, + "loss": 1.073, + "step": 1612 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971752913501e-05, + "loss": 1.1562, + "step": 1613 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971715562057e-05, + "loss": 1.1068, + "step": 1614 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971678185933e-05, + "loss": 1.1081, + "step": 1615 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971640785131e-05, + "loss": 1.0775, + "step": 1616 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997160335965e-05, + "loss": 1.083, + "step": 1617 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999715659094906e-05, + "loss": 1.0268, + "step": 1618 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999715284346524e-05, + "loss": 0.9439, + "step": 1619 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971490935136e-05, + "loss": 0.7094, + "step": 1620 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999714534109405e-05, + "loss": 1.4053, + "step": 1621 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971415862066e-05, + "loss": 0.924, + "step": 1622 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999713782885134e-05, + "loss": 1.1412, + "step": 1623 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999713406902814e-05, + "loss": 1.0323, + "step": 1624 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999713030673717e-05, + "loss": 1.2048, + "step": 1625 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971265419783e-05, + "loss": 0.7828, + "step": 1626 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999712277475154e-05, + "loss": 0.9369, + "step": 1627 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999711900505695e-05, + "loss": 1.1362, + "step": 1628 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999711523289445e-05, + "loss": 1.134, + "step": 1629 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971114582641e-05, + "loss": 1.2234, + "step": 1630 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971076811659e-05, + "loss": 1.2499, + "step": 1631 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971039015998e-05, + "loss": 1.2591, + "step": 1632 + }, + { + "epoch": 0.0, + "learning_rate": 4.999971001195659e-05, + "loss": 1.4315, + "step": 1633 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970963350641e-05, + "loss": 1.0759, + "step": 1634 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970925480944e-05, + "loss": 1.169, + "step": 1635 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970887586568e-05, + "loss": 1.393, + "step": 1636 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970849667515e-05, + "loss": 1.3423, + "step": 1637 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999708117237814e-05, + "loss": 1.0546, + "step": 1638 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997077375537e-05, + "loss": 0.8396, + "step": 1639 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970735762281e-05, + "loss": 1.1582, + "step": 1640 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970697744511e-05, + "loss": 1.021, + "step": 1641 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970659702064e-05, + "loss": 1.0423, + "step": 1642 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970621634938e-05, + "loss": 0.8419, + "step": 1643 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970583543133e-05, + "loss": 1.0889, + "step": 1644 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999705454266496e-05, + "loss": 1.2187, + "step": 1645 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970507285488e-05, + "loss": 1.3234, + "step": 1646 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970469119647e-05, + "loss": 1.1936, + "step": 1647 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970430929128e-05, + "loss": 1.1664, + "step": 1648 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999703927139296e-05, + "loss": 0.8755, + "step": 1649 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970354474053e-05, + "loss": 0.4793, + "step": 1650 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970316209498e-05, + "loss": 0.4663, + "step": 1651 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970277920264e-05, + "loss": 1.0177, + "step": 1652 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970239606352e-05, + "loss": 0.8776, + "step": 1653 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970201267761e-05, + "loss": 1.1567, + "step": 1654 + }, + { + "epoch": 0.0, + "learning_rate": 4.99997016290449e-05, + "loss": 1.1212, + "step": 1655 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970124516542e-05, + "loss": 1.0755, + "step": 1656 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970086103915e-05, + "loss": 1.2144, + "step": 1657 + }, + { + "epoch": 0.0, + "learning_rate": 4.999970047666609e-05, + "loss": 1.1866, + "step": 1658 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999700092046245e-05, + "loss": 0.9839, + "step": 1659 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999699707179614e-05, + "loss": 1.3086, + "step": 1660 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996993220662e-05, + "loss": 1.105, + "step": 1661 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969893670599e-05, + "loss": 1.253, + "step": 1662 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999698551099e-05, + "loss": 1.546, + "step": 1663 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969816524522e-05, + "loss": 1.0515, + "step": 1664 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969777914466e-05, + "loss": 1.2819, + "step": 1665 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999697392797306e-05, + "loss": 0.9853, + "step": 1666 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999697006203175e-05, + "loss": 1.0811, + "step": 1667 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999696619362245e-05, + "loss": 1.0727, + "step": 1668 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969623227454e-05, + "loss": 1.1194, + "step": 1669 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999695844940045e-05, + "loss": 0.961, + "step": 1670 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969545735876e-05, + "loss": 1.0931, + "step": 1671 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969506953069e-05, + "loss": 1.2467, + "step": 1672 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969468145583e-05, + "loss": 1.2632, + "step": 1673 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999694293134195e-05, + "loss": 1.3062, + "step": 1674 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999693904565765e-05, + "loss": 1.0255, + "step": 1675 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969351575055e-05, + "loss": 0.4736, + "step": 1676 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969312668855e-05, + "loss": 0.4632, + "step": 1677 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969273737976e-05, + "loss": 1.1215, + "step": 1678 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999692347824186e-05, + "loss": 1.2863, + "step": 1679 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999691958021826e-05, + "loss": 1.2319, + "step": 1680 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969156797268e-05, + "loss": 1.1239, + "step": 1681 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999691177676745e-05, + "loss": 1.1725, + "step": 1682 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999690787134024e-05, + "loss": 1.3194, + "step": 1683 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999690396344525e-05, + "loss": 1.767, + "step": 1684 + }, + { + "epoch": 0.0, + "learning_rate": 4.999969000530823e-05, + "loss": 1.1727, + "step": 1685 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999689614025146e-05, + "loss": 0.942, + "step": 1686 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999689222495286e-05, + "loss": 0.8949, + "step": 1687 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968883071863e-05, + "loss": 1.0356, + "step": 1688 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968843869519e-05, + "loss": 1.1545, + "step": 1689 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968804642496e-05, + "loss": 1.0806, + "step": 1690 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968765390796e-05, + "loss": 1.6976, + "step": 1691 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968726114416e-05, + "loss": 1.4155, + "step": 1692 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968686813358e-05, + "loss": 1.2434, + "step": 1693 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999686474876205e-05, + "loss": 1.0439, + "step": 1694 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999686081372046e-05, + "loss": 0.9256, + "step": 1695 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996856876211e-05, + "loss": 1.2005, + "step": 1696 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999685293623376e-05, + "loss": 1.0603, + "step": 1697 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968489937886e-05, + "loss": 1.3092, + "step": 1698 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968450488756e-05, + "loss": 1.3336, + "step": 1699 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968411014947e-05, + "loss": 1.0654, + "step": 1700 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968371516459e-05, + "loss": 1.1481, + "step": 1701 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968331993293e-05, + "loss": 0.9474, + "step": 1702 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968292445448e-05, + "loss": 1.5143, + "step": 1703 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999682528729254e-05, + "loss": 2.0976, + "step": 1704 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968213275723e-05, + "loss": 1.2388, + "step": 1705 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968173653843e-05, + "loss": 1.3075, + "step": 1706 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999681340072835e-05, + "loss": 1.5414, + "step": 1707 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968094336045e-05, + "loss": 0.8223, + "step": 1708 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968054640128e-05, + "loss": 1.1105, + "step": 1709 + }, + { + "epoch": 0.0, + "learning_rate": 4.999968014919534e-05, + "loss": 1.3593, + "step": 1710 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996797517426e-05, + "loss": 1.2691, + "step": 1711 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967935404307e-05, + "loss": 0.9297, + "step": 1712 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967895609676e-05, + "loss": 1.17, + "step": 1713 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967855790366e-05, + "loss": 0.9769, + "step": 1714 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967815946378e-05, + "loss": 0.8034, + "step": 1715 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999677760777114e-05, + "loss": 0.9674, + "step": 1716 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967736184366e-05, + "loss": 0.9551, + "step": 1717 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967696266341e-05, + "loss": 1.3977, + "step": 1718 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999676563236384e-05, + "loss": 0.9968, + "step": 1719 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967616356257e-05, + "loss": 0.7371, + "step": 1720 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967576364197e-05, + "loss": 0.9805, + "step": 1721 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999675363474584e-05, + "loss": 1.1072, + "step": 1722 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967496306041e-05, + "loss": 1.0521, + "step": 1723 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967456239945e-05, + "loss": 0.7374, + "step": 1724 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967416149171e-05, + "loss": 1.0237, + "step": 1725 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999673760337175e-05, + "loss": 1.0984, + "step": 1726 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967335893585e-05, + "loss": 1.1678, + "step": 1727 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967295728775e-05, + "loss": 1.3822, + "step": 1728 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967255539286e-05, + "loss": 1.2259, + "step": 1729 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967215325118e-05, + "loss": 1.2165, + "step": 1730 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967175086272e-05, + "loss": 1.2102, + "step": 1731 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999671348227465e-05, + "loss": 1.3913, + "step": 1732 + }, + { + "epoch": 0.0, + "learning_rate": 4.999967094534543e-05, + "loss": 0.938, + "step": 1733 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999670542216614e-05, + "loss": 1.6322, + "step": 1734 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999670138841005e-05, + "loss": 1.2323, + "step": 1735 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999669735218605e-05, + "loss": 1.0992, + "step": 1736 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966933134943e-05, + "loss": 1.1555, + "step": 1737 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999668927233456e-05, + "loss": 1.0509, + "step": 1738 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966852287071e-05, + "loss": 1.0388, + "step": 1739 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966811826117e-05, + "loss": 1.2675, + "step": 1740 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966771340484e-05, + "loss": 1.161, + "step": 1741 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999667308301736e-05, + "loss": 1.75, + "step": 1742 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999666902951835e-05, + "loss": 0.7144, + "step": 1743 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966649735515e-05, + "loss": 0.7856, + "step": 1744 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966609151168e-05, + "loss": 1.1215, + "step": 1745 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999665685421425e-05, + "loss": 1.0481, + "step": 1746 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966527908438e-05, + "loss": 1.1639, + "step": 1747 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966487250055e-05, + "loss": 0.5334, + "step": 1748 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966446566994e-05, + "loss": 1.1725, + "step": 1749 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966405859253e-05, + "loss": 1.1146, + "step": 1750 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999663651268346e-05, + "loss": 1.1789, + "step": 1751 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999663243697376e-05, + "loss": 1.1226, + "step": 1752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999662835879615e-05, + "loss": 1.1025, + "step": 1753 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966242781507e-05, + "loss": 1.0359, + "step": 1754 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966201950374e-05, + "loss": 1.102, + "step": 1755 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966161094562e-05, + "loss": 1.283, + "step": 1756 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999661202140715e-05, + "loss": 1.1886, + "step": 1757 + }, + { + "epoch": 0.0, + "learning_rate": 4.999966079308902e-05, + "loss": 1.2364, + "step": 1758 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999660383790546e-05, + "loss": 1.0793, + "step": 1759 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965997424529e-05, + "loss": 0.7617, + "step": 1760 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965956445324e-05, + "loss": 1.2649, + "step": 1761 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996591544144e-05, + "loss": 1.3203, + "step": 1762 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999658744128785e-05, + "loss": 0.9644, + "step": 1763 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965833359638e-05, + "loss": 1.1444, + "step": 1764 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999657922817186e-05, + "loss": 1.0832, + "step": 1765 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965751179121e-05, + "loss": 1.0513, + "step": 1766 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965710051844e-05, + "loss": 0.9997, + "step": 1767 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999656688998895e-05, + "loss": 1.2584, + "step": 1768 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965627723256e-05, + "loss": 1.4774, + "step": 1769 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965586521943e-05, + "loss": 1.3373, + "step": 1770 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965545295952e-05, + "loss": 0.9803, + "step": 1771 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965504045283e-05, + "loss": 0.8081, + "step": 1772 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999654627699345e-05, + "loss": 1.2502, + "step": 1773 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965421469908e-05, + "loss": 0.9243, + "step": 1774 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999653801452024e-05, + "loss": 0.9536, + "step": 1775 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999653387958186e-05, + "loss": 1.033, + "step": 1776 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965297421756e-05, + "loss": 1.0354, + "step": 1777 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965256023015e-05, + "loss": 1.0212, + "step": 1778 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999652145995944e-05, + "loss": 1.3128, + "step": 1779 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965173151496e-05, + "loss": 0.8684, + "step": 1780 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996513167872e-05, + "loss": 1.0906, + "step": 1781 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965090181264e-05, + "loss": 1.2967, + "step": 1782 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999650486591295e-05, + "loss": 1.1864, + "step": 1783 + }, + { + "epoch": 0.0, + "learning_rate": 4.999965007112317e-05, + "loss": 1.2853, + "step": 1784 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964965540825e-05, + "loss": 1.3813, + "step": 1785 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999649239446553e-05, + "loss": 1.1605, + "step": 1786 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964882323807e-05, + "loss": 1.12, + "step": 1787 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999648406782795e-05, + "loss": 1.1585, + "step": 1788 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964799008074e-05, + "loss": 0.8351, + "step": 1789 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996475731319e-05, + "loss": 1.0039, + "step": 1790 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964715593627e-05, + "loss": 0.9833, + "step": 1791 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999646738493855e-05, + "loss": 1.0115, + "step": 1792 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964632080465e-05, + "loss": 0.8991, + "step": 1793 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999645902868666e-05, + "loss": 0.9605, + "step": 1794 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999645484685884e-05, + "loss": 0.9477, + "step": 1795 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964506625633e-05, + "loss": 0.8547, + "step": 1796 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964464757998e-05, + "loss": 0.9197, + "step": 1797 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964422865685e-05, + "loss": 1.047, + "step": 1798 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964380948694e-05, + "loss": 0.7114, + "step": 1799 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999643390070225e-05, + "loss": 1.0012, + "step": 1800 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964297040674e-05, + "loss": 1.1069, + "step": 1801 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999642550496467e-05, + "loss": 1.0559, + "step": 1802 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964213033941e-05, + "loss": 1.1771, + "step": 1803 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999641709935556e-05, + "loss": 0.9095, + "step": 1804 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964128928493e-05, + "loss": 1.3731, + "step": 1805 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964086838751e-05, + "loss": 1.2707, + "step": 1806 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996404472433e-05, + "loss": 1.3383, + "step": 1807 + }, + { + "epoch": 0.0, + "learning_rate": 4.999964002585231e-05, + "loss": 1.1379, + "step": 1808 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963960421453e-05, + "loss": 1.5423, + "step": 1809 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963918232997e-05, + "loss": 1.1511, + "step": 1810 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963876019862e-05, + "loss": 1.3019, + "step": 1811 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963833782049e-05, + "loss": 1.1668, + "step": 1812 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963791519556e-05, + "loss": 0.9874, + "step": 1813 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999637492323856e-05, + "loss": 1.431, + "step": 1814 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999637069205366e-05, + "loss": 1.8638, + "step": 1815 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999636645840084e-05, + "loss": 1.1395, + "step": 1816 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963622222803e-05, + "loss": 1.125, + "step": 1817 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999635798369174e-05, + "loss": 1.0607, + "step": 1818 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999635374263545e-05, + "loss": 1.1098, + "step": 1819 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999634949911124e-05, + "loss": 0.6851, + "step": 1820 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963452531191e-05, + "loss": 1.1234, + "step": 1821 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963410046592e-05, + "loss": 1.3048, + "step": 1822 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963367537313e-05, + "loss": 1.1247, + "step": 1823 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999633250033574e-05, + "loss": 1.0778, + "step": 1824 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963282444722e-05, + "loss": 0.8994, + "step": 1825 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963239861409e-05, + "loss": 1.1417, + "step": 1826 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999631972534166e-05, + "loss": 1.0574, + "step": 1827 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963154620746e-05, + "loss": 1.1599, + "step": 1828 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963111963396e-05, + "loss": 1.238, + "step": 1829 + }, + { + "epoch": 0.0, + "learning_rate": 4.999963069281368e-05, + "loss": 1.6336, + "step": 1830 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999630265746616e-05, + "loss": 2.0859, + "step": 1831 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999629838432765e-05, + "loss": 1.2952, + "step": 1832 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962941087213e-05, + "loss": 1.384, + "step": 1833 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962898306471e-05, + "loss": 0.9765, + "step": 1834 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962855501049e-05, + "loss": 1.0935, + "step": 1835 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996281267095e-05, + "loss": 1.1919, + "step": 1836 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999627698161725e-05, + "loss": 0.9998, + "step": 1837 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962726936715e-05, + "loss": 1.155, + "step": 1838 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999626840325794e-05, + "loss": 1.1796, + "step": 1839 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962641103766e-05, + "loss": 1.3332, + "step": 1840 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962598150274e-05, + "loss": 1.0477, + "step": 1841 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962555172103e-05, + "loss": 1.112, + "step": 1842 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999625121692536e-05, + "loss": 0.9866, + "step": 1843 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999624691417254e-05, + "loss": 0.7711, + "step": 1844 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962426089518e-05, + "loss": 0.9799, + "step": 1845 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962383012633e-05, + "loss": 0.6033, + "step": 1846 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999623399110694e-05, + "loss": 0.7475, + "step": 1847 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962296784827e-05, + "loss": 0.7878, + "step": 1848 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962253633906e-05, + "loss": 1.272, + "step": 1849 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999622104583065e-05, + "loss": 1.2431, + "step": 1850 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962167258029e-05, + "loss": 1.1266, + "step": 1851 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962124033072e-05, + "loss": 1.2129, + "step": 1852 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999620807834366e-05, + "loss": 1.1271, + "step": 1853 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962037509123e-05, + "loss": 1.1641, + "step": 1854 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996199421013e-05, + "loss": 1.5963, + "step": 1855 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961950886459e-05, + "loss": 1.5356, + "step": 1856 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996190753811e-05, + "loss": 1.118, + "step": 1857 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961864165081e-05, + "loss": 1.4149, + "step": 1858 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999618207673746e-05, + "loss": 0.9634, + "step": 1859 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961777344989e-05, + "loss": 1.0974, + "step": 1860 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961733897925e-05, + "loss": 1.1586, + "step": 1861 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961690426183e-05, + "loss": 1.1899, + "step": 1862 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961646929762e-05, + "loss": 1.4185, + "step": 1863 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961603408663e-05, + "loss": 0.9067, + "step": 1864 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961559862885e-05, + "loss": 1.2734, + "step": 1865 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999615162924276e-05, + "loss": 1.0768, + "step": 1866 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999614726972925e-05, + "loss": 1.469, + "step": 1867 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961429077479e-05, + "loss": 1.3085, + "step": 1868 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961385432986e-05, + "loss": 1.064, + "step": 1869 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961341763816e-05, + "loss": 1.2152, + "step": 1870 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961298069966e-05, + "loss": 0.6752, + "step": 1871 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961254351438e-05, + "loss": 0.4043, + "step": 1872 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961210608232e-05, + "loss": 0.7948, + "step": 1873 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999611668403455e-05, + "loss": 1.1583, + "step": 1874 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961123047782e-05, + "loss": 1.4685, + "step": 1875 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996107923054e-05, + "loss": 1.2818, + "step": 1876 + }, + { + "epoch": 0.0, + "learning_rate": 4.999961035388619e-05, + "loss": 1.0971, + "step": 1877 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996099152202e-05, + "loss": 1.4049, + "step": 1878 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999609476307415e-05, + "loss": 1.1807, + "step": 1879 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960903714785e-05, + "loss": 1.435, + "step": 1880 + }, + { + "epoch": 0.0, + "learning_rate": 4.99996085977415e-05, + "loss": 1.14, + "step": 1881 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960815808836e-05, + "loss": 1.0288, + "step": 1882 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999607718188435e-05, + "loss": 0.9891, + "step": 1883 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960727804173e-05, + "loss": 1.1035, + "step": 1884 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960683764824e-05, + "loss": 1.2492, + "step": 1885 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960639700796e-05, + "loss": 1.1905, + "step": 1886 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960595612089e-05, + "loss": 1.0795, + "step": 1887 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960551498704e-05, + "loss": 1.0844, + "step": 1888 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999605073606406e-05, + "loss": 1.2944, + "step": 1889 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999604631978986e-05, + "loss": 1.1076, + "step": 1890 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999604190104774e-05, + "loss": 1.2012, + "step": 1891 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999603747983784e-05, + "loss": 1.2852, + "step": 1892 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999603305616e-05, + "loss": 1.0921, + "step": 1893 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960286300144e-05, + "loss": 1.2454, + "step": 1894 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960242014009e-05, + "loss": 1.0453, + "step": 1895 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999601977031957e-05, + "loss": 1.1153, + "step": 1896 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999601533677036e-05, + "loss": 1.0698, + "step": 1897 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960109007533e-05, + "loss": 1.2448, + "step": 1898 + }, + { + "epoch": 0.0, + "learning_rate": 4.999960064622684e-05, + "loss": 1.2356, + "step": 1899 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999600202131567e-05, + "loss": 0.9474, + "step": 1900 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995997577895e-05, + "loss": 1.1506, + "step": 1901 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959931320065e-05, + "loss": 1.3618, + "step": 1902 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999598868365014e-05, + "loss": 1.0169, + "step": 1903 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995984232826e-05, + "loss": 1.2114, + "step": 1904 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999597977953395e-05, + "loss": 1.1541, + "step": 1905 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999597532377405e-05, + "loss": 0.8533, + "step": 1906 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959708655463e-05, + "loss": 1.1301, + "step": 1907 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959664048507e-05, + "loss": 0.9514, + "step": 1908 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959619416872e-05, + "loss": 1.0316, + "step": 1909 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999595747605585e-05, + "loss": 1.1069, + "step": 1910 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959530079567e-05, + "loss": 0.9771, + "step": 1911 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999594853738966e-05, + "loss": 0.8837, + "step": 1912 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959440643548e-05, + "loss": 0.7769, + "step": 1913 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995939588852e-05, + "loss": 0.7168, + "step": 1914 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959351108815e-05, + "loss": 0.6889, + "step": 1915 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999593063044306e-05, + "loss": 0.5915, + "step": 1916 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959261475367e-05, + "loss": 0.6089, + "step": 1917 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999592166216256e-05, + "loss": 0.6052, + "step": 1918 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959171743205e-05, + "loss": 0.5808, + "step": 1919 + }, + { + "epoch": 0.0, + "learning_rate": 4.999959126840107e-05, + "loss": 0.5909, + "step": 1920 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995908191233e-05, + "loss": 0.5403, + "step": 1921 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999590369598734e-05, + "loss": 0.6327, + "step": 1922 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995899198274e-05, + "loss": 0.5225, + "step": 1923 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958946980927e-05, + "loss": 1.0703, + "step": 1924 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999589019544355e-05, + "loss": 1.2725, + "step": 1925 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958856903265e-05, + "loss": 1.1882, + "step": 1926 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999588118274174e-05, + "loss": 1.03, + "step": 1927 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995876672689e-05, + "loss": 1.3146, + "step": 1928 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958721601684e-05, + "loss": 1.1833, + "step": 1929 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999586764518e-05, + "loss": 1.0596, + "step": 1930 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958631277238e-05, + "loss": 1.2622, + "step": 1931 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958586077996e-05, + "loss": 0.862, + "step": 1932 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958540854077e-05, + "loss": 1.0546, + "step": 1933 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958495605479e-05, + "loss": 0.5355, + "step": 1934 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958450332202e-05, + "loss": 1.4974, + "step": 1935 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999584050342466e-05, + "loss": 1.2706, + "step": 1936 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958359711613e-05, + "loss": 1.0658, + "step": 1937 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999583143643e-05, + "loss": 1.0819, + "step": 1938 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995826899231e-05, + "loss": 1.0302, + "step": 1939 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995822359564e-05, + "loss": 1.0842, + "step": 1940 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958178174292e-05, + "loss": 1.1691, + "step": 1941 + }, + { + "epoch": 0.0, + "learning_rate": 4.999958132728266e-05, + "loss": 1.2302, + "step": 1942 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999580872575605e-05, + "loss": 0.9427, + "step": 1943 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999580417621775e-05, + "loss": 1.3759, + "step": 1944 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957996242115e-05, + "loss": 1.1939, + "step": 1945 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957950697374e-05, + "loss": 0.8473, + "step": 1946 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999579051279555e-05, + "loss": 1.031, + "step": 1947 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957859533857e-05, + "loss": 0.941, + "step": 1948 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957813915082e-05, + "loss": 0.9295, + "step": 1949 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999577682716265e-05, + "loss": 1.6521, + "step": 1950 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999577226034935e-05, + "loss": 1.2613, + "step": 1951 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957676910681e-05, + "loss": 1.4755, + "step": 1952 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957631193191e-05, + "loss": 1.5328, + "step": 1953 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957585451022e-05, + "loss": 1.3601, + "step": 1954 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999575396841746e-05, + "loss": 1.2172, + "step": 1955 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999574938926485e-05, + "loss": 1.0954, + "step": 1956 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999574480764446e-05, + "loss": 1.0489, + "step": 1957 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957402235561e-05, + "loss": 1.1921, + "step": 1958 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995735637e-05, + "loss": 1.1143, + "step": 1959 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995731047976e-05, + "loss": 1.2846, + "step": 1960 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957264564841e-05, + "loss": 1.1944, + "step": 1961 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957218625244e-05, + "loss": 1.2594, + "step": 1962 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999571726609686e-05, + "loss": 0.9978, + "step": 1963 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957126672015e-05, + "loss": 0.971, + "step": 1964 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957080658382e-05, + "loss": 1.1946, + "step": 1965 + }, + { + "epoch": 0.0, + "learning_rate": 4.999957034620071e-05, + "loss": 1.0838, + "step": 1966 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956988557081e-05, + "loss": 1.233, + "step": 1967 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999569424694124e-05, + "loss": 1.2316, + "step": 1968 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956896357066e-05, + "loss": 1.1695, + "step": 1969 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956850220041e-05, + "loss": 1.1943, + "step": 1970 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956804058337e-05, + "loss": 1.1467, + "step": 1971 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999567578719545e-05, + "loss": 1.1771, + "step": 1972 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956711660894e-05, + "loss": 1.071, + "step": 1973 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999566654251544e-05, + "loss": 1.0812, + "step": 1974 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956619164736e-05, + "loss": 0.6495, + "step": 1975 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995657287964e-05, + "loss": 0.8987, + "step": 1976 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999565265698656e-05, + "loss": 1.7146, + "step": 1977 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956480235412e-05, + "loss": 0.9659, + "step": 1978 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995643387628e-05, + "loss": 0.9835, + "step": 1979 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956387492469e-05, + "loss": 1.2594, + "step": 1980 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999563410839806e-05, + "loss": 1.1015, + "step": 1981 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956294650813e-05, + "loss": 1.5359, + "step": 1982 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956248192967e-05, + "loss": 1.1858, + "step": 1983 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956201710443e-05, + "loss": 2.232, + "step": 1984 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956155203239e-05, + "loss": 1.0845, + "step": 1985 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999561086713576e-05, + "loss": 1.1301, + "step": 1986 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999560621147975e-05, + "loss": 3.6122, + "step": 1987 + }, + { + "epoch": 0.0, + "learning_rate": 4.999956015533559e-05, + "loss": 1.1042, + "step": 1988 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955968927642e-05, + "loss": 1.5292, + "step": 1989 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955922297047e-05, + "loss": 1.2125, + "step": 1990 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955875641772e-05, + "loss": 1.2061, + "step": 1991 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999558289618196e-05, + "loss": 1.3218, + "step": 1992 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999557822571887e-05, + "loss": 1.2492, + "step": 1993 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955735527879e-05, + "loss": 1.6787, + "step": 1994 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999556887738906e-05, + "loss": 1.187, + "step": 1995 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955641995224e-05, + "loss": 1.8543, + "step": 1996 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955595191879e-05, + "loss": 0.6718, + "step": 1997 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955548363855e-05, + "loss": 0.9185, + "step": 1998 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999555015111534e-05, + "loss": 1.3068, + "step": 1999 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999554546337724e-05, + "loss": 1.372, + "step": 2000 + }, + { + "epoch": 0.0, + "eval_loss": 1.09769606590271, + "eval_runtime": 85.4918, + "eval_samples_per_second": 16.2, + "eval_steps_per_second": 4.059, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999554077317136e-05, + "loss": 1.1012, + "step": 2001 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999553608049757e-05, + "loss": 1.2555, + "step": 2002 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955313853559e-05, + "loss": 1.185, + "step": 2003 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955266877465e-05, + "loss": 1.1243, + "step": 2004 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955219876692e-05, + "loss": 1.296, + "step": 2005 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995517285124e-05, + "loss": 1.1022, + "step": 2006 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995512580111e-05, + "loss": 1.2041, + "step": 2007 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999550787263014e-05, + "loss": 1.0095, + "step": 2008 + }, + { + "epoch": 0.0, + "learning_rate": 4.999955031626814e-05, + "loss": 1.1557, + "step": 2009 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999549845026484e-05, + "loss": 1.3879, + "step": 2010 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999549373538035e-05, + "loss": 0.9205, + "step": 2011 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999548901802815e-05, + "loss": 0.1468, + "step": 2012 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995484298208e-05, + "loss": 0.268, + "step": 2013 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999547957592e-05, + "loss": 0.6679, + "step": 2014 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954748511641e-05, + "loss": 0.8559, + "step": 2015 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999547012394054e-05, + "loss": 1.0634, + "step": 2016 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995465394249e-05, + "loss": 1.2137, + "step": 2017 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954606620896e-05, + "loss": 1.2434, + "step": 2018 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954559274624e-05, + "loss": 1.0491, + "step": 2019 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954511903673e-05, + "loss": 1.0955, + "step": 2020 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954464508044e-05, + "loss": 1.3627, + "step": 2021 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999544170877366e-05, + "loss": 0.8931, + "step": 2022 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995436964275e-05, + "loss": 1.1627, + "step": 2023 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954322173085e-05, + "loss": 1.2295, + "step": 2024 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954274678742e-05, + "loss": 1.1959, + "step": 2025 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995422715972e-05, + "loss": 1.1635, + "step": 2026 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999541796160196e-05, + "loss": 0.9611, + "step": 2027 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954132047642e-05, + "loss": 0.7987, + "step": 2028 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999540844545846e-05, + "loss": 1.4583, + "step": 2029 + }, + { + "epoch": 0.0, + "learning_rate": 4.999954036836849e-05, + "loss": 1.2441, + "step": 2030 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953989194435e-05, + "loss": 1.5323, + "step": 2031 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999539415273415e-05, + "loss": 1.1934, + "step": 2032 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953893835571e-05, + "loss": 1.0853, + "step": 2033 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953846119121e-05, + "loss": 1.1559, + "step": 2034 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999537983779934e-05, + "loss": 1.0736, + "step": 2035 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953750612186e-05, + "loss": 1.0305, + "step": 2036 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999537028217015e-05, + "loss": 1.2131, + "step": 2037 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953655006537e-05, + "loss": 1.1931, + "step": 2038 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953607166695e-05, + "loss": 1.179, + "step": 2039 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953559302174e-05, + "loss": 1.0621, + "step": 2040 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953511412976e-05, + "loss": 1.3458, + "step": 2041 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953463499098e-05, + "loss": 1.1783, + "step": 2042 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953415560542e-05, + "loss": 1.159, + "step": 2043 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999533675973076e-05, + "loss": 1.3375, + "step": 2044 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999533196093946e-05, + "loss": 0.6313, + "step": 2045 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953271596803e-05, + "loss": 0.9884, + "step": 2046 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999532235595325e-05, + "loss": 1.2119, + "step": 2047 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953175497585e-05, + "loss": 1.2758, + "step": 2048 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953127410957e-05, + "loss": 0.9148, + "step": 2049 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953079299652e-05, + "loss": 1.0894, + "step": 2050 + }, + { + "epoch": 0.0, + "learning_rate": 4.999953031163668e-05, + "loss": 0.7798, + "step": 2051 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999529830030056e-05, + "loss": 0.7435, + "step": 2052 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952934817664e-05, + "loss": 0.8528, + "step": 2053 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952886607645e-05, + "loss": 1.2994, + "step": 2054 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952838372948e-05, + "loss": 1.0807, + "step": 2055 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999527901135715e-05, + "loss": 1.2836, + "step": 2056 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952741829517e-05, + "loss": 1.291, + "step": 2057 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952693520783e-05, + "loss": 1.1041, + "step": 2058 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952645187371e-05, + "loss": 1.0205, + "step": 2059 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952596829281e-05, + "loss": 0.9736, + "step": 2060 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952548446512e-05, + "loss": 1.2984, + "step": 2061 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952500039065e-05, + "loss": 1.2229, + "step": 2062 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995245160694e-05, + "loss": 1.6288, + "step": 2063 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952403150135e-05, + "loss": 1.4125, + "step": 2064 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952354668652e-05, + "loss": 1.3746, + "step": 2065 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952306162492e-05, + "loss": 1.1113, + "step": 2066 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999522576316516e-05, + "loss": 1.2301, + "step": 2067 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952209076134e-05, + "loss": 1.1858, + "step": 2068 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952160495937e-05, + "loss": 1.0311, + "step": 2069 + }, + { + "epoch": 0.0, + "learning_rate": 4.999952111891062e-05, + "loss": 1.2018, + "step": 2070 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999520632615084e-05, + "loss": 1.0876, + "step": 2071 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999520146072766e-05, + "loss": 1.0011, + "step": 2072 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951965928366e-05, + "loss": 1.1342, + "step": 2073 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999519172247775e-05, + "loss": 0.8343, + "step": 2074 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999518684965096e-05, + "loss": 1.3023, + "step": 2075 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951819743563e-05, + "loss": 1.0484, + "step": 2076 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995177096594e-05, + "loss": 0.8818, + "step": 2077 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951722163637e-05, + "loss": 0.5781, + "step": 2078 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951673336656e-05, + "loss": 1.1657, + "step": 2079 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999516244849956e-05, + "loss": 1.2215, + "step": 2080 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951575608658e-05, + "loss": 1.0539, + "step": 2081 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951526707641e-05, + "loss": 1.2303, + "step": 2082 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951477781946e-05, + "loss": 1.5738, + "step": 2083 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951428831572e-05, + "loss": 1.1878, + "step": 2084 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999513798565204e-05, + "loss": 1.1655, + "step": 2085 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951330856789e-05, + "loss": 1.0752, + "step": 2086 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995128183238e-05, + "loss": 0.9155, + "step": 2087 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951232783293e-05, + "loss": 1.0812, + "step": 2088 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951183709527e-05, + "loss": 1.0981, + "step": 2089 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999511346110827e-05, + "loss": 1.0767, + "step": 2090 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951085487959e-05, + "loss": 0.9912, + "step": 2091 + }, + { + "epoch": 0.0, + "learning_rate": 4.999951036340158e-05, + "loss": 1.015, + "step": 2092 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950987167679e-05, + "loss": 1.6259, + "step": 2093 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995093797052e-05, + "loss": 1.4019, + "step": 2094 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950888748684e-05, + "loss": 1.5632, + "step": 2095 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999508395021685e-05, + "loss": 1.2167, + "step": 2096 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950790230975e-05, + "loss": 1.2337, + "step": 2097 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950740935103e-05, + "loss": 1.116, + "step": 2098 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950691614552e-05, + "loss": 1.1899, + "step": 2099 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950642269323e-05, + "loss": 1.1664, + "step": 2100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999505928994154e-05, + "loss": 0.9768, + "step": 2101 + }, + { + "epoch": 0.0, + "learning_rate": 4.99995054350483e-05, + "loss": 1.3383, + "step": 2102 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950494085565e-05, + "loss": 1.09, + "step": 2103 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999504446416225e-05, + "loss": 1.6185, + "step": 2104 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950395173001e-05, + "loss": 1.2097, + "step": 2105 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950345679702e-05, + "loss": 0.9709, + "step": 2106 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950296161723e-05, + "loss": 1.4045, + "step": 2107 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999502466190664e-05, + "loss": 1.187, + "step": 2108 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950197051731e-05, + "loss": 1.2063, + "step": 2109 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950147459718e-05, + "loss": 1.1091, + "step": 2110 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999500978430256e-05, + "loss": 1.2056, + "step": 2111 + }, + { + "epoch": 0.0, + "learning_rate": 4.999950048201655e-05, + "loss": 0.812, + "step": 2112 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999499985356055e-05, + "loss": 0.5464, + "step": 2113 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999499488448785e-05, + "loss": 1.2549, + "step": 2114 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949899129472e-05, + "loss": 1.3707, + "step": 2115 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949849389388e-05, + "loss": 1.2616, + "step": 2116 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949799624626e-05, + "loss": 1.1822, + "step": 2117 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949749835184e-05, + "loss": 1.0703, + "step": 2118 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949700021065e-05, + "loss": 1.0636, + "step": 2119 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949650182266e-05, + "loss": 1.1332, + "step": 2120 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994960031879e-05, + "loss": 1.0944, + "step": 2121 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949550430635e-05, + "loss": 1.0199, + "step": 2122 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999495005178015e-05, + "loss": 0.8863, + "step": 2123 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949450580289e-05, + "loss": 1.181, + "step": 2124 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999494006180994e-05, + "loss": 1.0959, + "step": 2125 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994935063123e-05, + "loss": 1.0288, + "step": 2126 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949300619683e-05, + "loss": 1.1203, + "step": 2127 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949250583457e-05, + "loss": 1.0955, + "step": 2128 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999492005225535e-05, + "loss": 1.0168, + "step": 2129 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994915043697e-05, + "loss": 0.8773, + "step": 2130 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994910032671e-05, + "loss": 0.4928, + "step": 2131 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994905019177e-05, + "loss": 0.8741, + "step": 2132 + }, + { + "epoch": 0.0, + "learning_rate": 4.999949000032153e-05, + "loss": 1.1476, + "step": 2133 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999489498478555e-05, + "loss": 1.126, + "step": 2134 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948899638881e-05, + "loss": 0.8158, + "step": 2135 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948849405228e-05, + "loss": 0.7589, + "step": 2136 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999487991468964e-05, + "loss": 0.7096, + "step": 2137 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948748863886e-05, + "loss": 0.9201, + "step": 2138 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948698556197e-05, + "loss": 0.8196, + "step": 2139 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994864822383e-05, + "loss": 1.1322, + "step": 2140 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948597866785e-05, + "loss": 1.1709, + "step": 2141 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948547485061e-05, + "loss": 1.1259, + "step": 2142 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948497078658e-05, + "loss": 1.9433, + "step": 2143 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948446647578e-05, + "loss": 1.3849, + "step": 2144 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948396191818e-05, + "loss": 1.5158, + "step": 2145 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999483457113807e-05, + "loss": 1.5195, + "step": 2146 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999482952062646e-05, + "loss": 1.4063, + "step": 2147 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994824467647e-05, + "loss": 2.0924, + "step": 2148 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948194121997e-05, + "loss": 1.1986, + "step": 2149 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999481435428456e-05, + "loss": 0.603, + "step": 2150 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999480929390164e-05, + "loss": 0.5798, + "step": 2151 + }, + { + "epoch": 0.0, + "learning_rate": 4.999948042310508e-05, + "loss": 0.2908, + "step": 2152 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947991657321e-05, + "loss": 0.2309, + "step": 2153 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947940979456e-05, + "loss": 1.0838, + "step": 2154 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999478902769126e-05, + "loss": 1.4886, + "step": 2155 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994783954969e-05, + "loss": 1.1981, + "step": 2156 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999477887977895e-05, + "loss": 1.1871, + "step": 2157 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947738021211e-05, + "loss": 1.2981, + "step": 2158 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947687219954e-05, + "loss": 0.7443, + "step": 2159 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999476363940176e-05, + "loss": 1.2174, + "step": 2160 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947585543404e-05, + "loss": 0.9883, + "step": 2161 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947534668111e-05, + "loss": 0.7254, + "step": 2162 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994748376814e-05, + "loss": 1.322, + "step": 2163 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947432843491e-05, + "loss": 1.4698, + "step": 2164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999473818941636e-05, + "loss": 1.3622, + "step": 2165 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999473309201565e-05, + "loss": 0.9578, + "step": 2166 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947279921472e-05, + "loss": 1.0192, + "step": 2167 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999472288981083e-05, + "loss": 0.8398, + "step": 2168 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947177850067e-05, + "loss": 0.6333, + "step": 2169 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999471267773476e-05, + "loss": 0.5143, + "step": 2170 + }, + { + "epoch": 0.0, + "learning_rate": 4.999947075679949e-05, + "loss": 0.8075, + "step": 2171 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999470245578717e-05, + "loss": 0.9884, + "step": 2172 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946973411117e-05, + "loss": 1.4855, + "step": 2173 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946922239683e-05, + "loss": 1.0709, + "step": 2174 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946871043571e-05, + "loss": 1.4966, + "step": 2175 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946819822781e-05, + "loss": 1.3703, + "step": 2176 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946768577312e-05, + "loss": 0.8544, + "step": 2177 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946717307164e-05, + "loss": 0.8872, + "step": 2178 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999466660123386e-05, + "loss": 1.1566, + "step": 2179 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946614692834e-05, + "loss": 1.0904, + "step": 2180 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999465633486514e-05, + "loss": 0.8584, + "step": 2181 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946511979791e-05, + "loss": 1.3786, + "step": 2182 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946460586251e-05, + "loss": 1.7855, + "step": 2183 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946409168033e-05, + "loss": 1.0695, + "step": 2184 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946357725137e-05, + "loss": 1.0416, + "step": 2185 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999463062575616e-05, + "loss": 0.7917, + "step": 2186 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462547653086e-05, + "loss": 4.7444, + "step": 2187 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946203248377e-05, + "loss": 6.0795, + "step": 2188 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946151706767e-05, + "loss": 5.9444, + "step": 2189 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946100140479e-05, + "loss": 5.8905, + "step": 2190 + }, + { + "epoch": 0.0, + "learning_rate": 4.999946048549512e-05, + "loss": 5.7887, + "step": 2191 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999459969338666e-05, + "loss": 1.2573, + "step": 2192 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999459452935435e-05, + "loss": 0.9757, + "step": 2193 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945893628541e-05, + "loss": 0.6567, + "step": 2194 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999458419388604e-05, + "loss": 0.8967, + "step": 2195 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945790224502e-05, + "loss": 1.2589, + "step": 2196 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945738485464e-05, + "loss": 1.1499, + "step": 2197 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999456867217486e-05, + "loss": 0.9498, + "step": 2198 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945634933355e-05, + "loss": 1.3002, + "step": 2199 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945583120282e-05, + "loss": 1.4875, + "step": 2200 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945531282532e-05, + "loss": 1.1735, + "step": 2201 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945479420102e-05, + "loss": 0.207, + "step": 2202 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999454275329945e-05, + "loss": 0.1521, + "step": 2203 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999453756212075e-05, + "loss": 0.1431, + "step": 2204 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945323684744e-05, + "loss": 0.0981, + "step": 2205 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945271723601e-05, + "loss": 0.082, + "step": 2206 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999452197377796e-05, + "loss": 0.0651, + "step": 2207 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945167727279e-05, + "loss": 0.0447, + "step": 2208 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945115692101e-05, + "loss": 0.0372, + "step": 2209 + }, + { + "epoch": 0.0, + "learning_rate": 4.999945063632245e-05, + "loss": 0.0263, + "step": 2210 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994501154771e-05, + "loss": 0.0216, + "step": 2211 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944959438496e-05, + "loss": 0.0183, + "step": 2212 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944907304604e-05, + "loss": 0.0182, + "step": 2213 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999448551460345e-05, + "loss": 0.0174, + "step": 2214 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944802962786e-05, + "loss": 0.0185, + "step": 2215 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944750754858e-05, + "loss": 0.0171, + "step": 2216 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944698522253e-05, + "loss": 0.0146, + "step": 2217 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999446462649694e-05, + "loss": 0.0241, + "step": 2218 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944593983007e-05, + "loss": 0.0217, + "step": 2219 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944541676367e-05, + "loss": 0.0212, + "step": 2220 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999444893450476e-05, + "loss": 0.02, + "step": 2221 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994443698905e-05, + "loss": 0.019, + "step": 2222 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944384608375e-05, + "loss": 0.019, + "step": 2223 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994433220302e-05, + "loss": 0.017, + "step": 2224 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944279772988e-05, + "loss": 0.0168, + "step": 2225 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999442273182765e-05, + "loss": 0.0184, + "step": 2226 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944174838887e-05, + "loss": 0.0185, + "step": 2227 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999441223348195e-05, + "loss": 0.0151, + "step": 2228 + }, + { + "epoch": 0.0, + "learning_rate": 4.999944069806073e-05, + "loss": 0.0121, + "step": 2229 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999440172526494e-05, + "loss": 0.0186, + "step": 2230 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943964674546e-05, + "loss": 0.0138, + "step": 2231 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999439120717646e-05, + "loss": 0.1738, + "step": 2232 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943859444305e-05, + "loss": 0.8234, + "step": 2233 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999438067921666e-05, + "loss": 0.9426, + "step": 2234 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994375411535e-05, + "loss": 1.2646, + "step": 2235 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943701413855e-05, + "loss": 1.3297, + "step": 2236 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999436486876814e-05, + "loss": 1.3769, + "step": 2237 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999435959368303e-05, + "loss": 1.1826, + "step": 2238 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999435431613e-05, + "loss": 1.0608, + "step": 2239 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999434903610914e-05, + "loss": 1.1587, + "step": 2240 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943437536205e-05, + "loss": 1.1704, + "step": 2241 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943384686639e-05, + "loss": 1.0112, + "step": 2242 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943331812396e-05, + "loss": 1.4902, + "step": 2243 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943278913474e-05, + "loss": 0.7887, + "step": 2244 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943225989873e-05, + "loss": 1.4997, + "step": 2245 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999431730415944e-05, + "loss": 1.2394, + "step": 2246 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943120068638e-05, + "loss": 1.1772, + "step": 2247 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943067071002e-05, + "loss": 1.0988, + "step": 2248 + }, + { + "epoch": 0.0, + "learning_rate": 4.999943014048688e-05, + "loss": 1.0802, + "step": 2249 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999429610016954e-05, + "loss": 1.0132, + "step": 2250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942907930025e-05, + "loss": 0.8749, + "step": 2251 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942854833676e-05, + "loss": 1.2088, + "step": 2252 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942801712648e-05, + "loss": 1.4583, + "step": 2253 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942748566942e-05, + "loss": 1.1602, + "step": 2254 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942695396558e-05, + "loss": 1.3084, + "step": 2255 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942642201495e-05, + "loss": 1.084, + "step": 2256 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942588981754e-05, + "loss": 1.3865, + "step": 2257 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942535737335e-05, + "loss": 1.3727, + "step": 2258 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942482468237e-05, + "loss": 0.8791, + "step": 2259 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999424291744615e-05, + "loss": 1.2743, + "step": 2260 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942375856006e-05, + "loss": 1.3113, + "step": 2261 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942322512873e-05, + "loss": 1.4118, + "step": 2262 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942269145062e-05, + "loss": 0.969, + "step": 2263 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942215752572e-05, + "loss": 0.9726, + "step": 2264 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942162335404e-05, + "loss": 1.5553, + "step": 2265 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999421088935585e-05, + "loss": 1.2159, + "step": 2266 + }, + { + "epoch": 0.0, + "learning_rate": 4.999942055427033e-05, + "loss": 1.1292, + "step": 2267 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999420019358304e-05, + "loss": 0.9931, + "step": 2268 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999419484199486e-05, + "loss": 1.0924, + "step": 2269 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941894879388e-05, + "loss": 1.0195, + "step": 2270 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994184131415e-05, + "loss": 1.1812, + "step": 2271 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941787724234e-05, + "loss": 1.09, + "step": 2272 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941734109639e-05, + "loss": 1.1697, + "step": 2273 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999416804703646e-05, + "loss": 0.9411, + "step": 2274 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999416268064133e-05, + "loss": 1.2827, + "step": 2275 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999415731177836e-05, + "loss": 1.2085, + "step": 2276 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941519404475e-05, + "loss": 1.2183, + "step": 2277 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941465666488e-05, + "loss": 0.8818, + "step": 2278 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941411903822e-05, + "loss": 0.7588, + "step": 2279 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941358116479e-05, + "loss": 1.3433, + "step": 2280 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941304304457e-05, + "loss": 1.3804, + "step": 2281 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999412504677565e-05, + "loss": 1.2872, + "step": 2282 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941196606378e-05, + "loss": 1.1368, + "step": 2283 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999411427203206e-05, + "loss": 1.0265, + "step": 2284 + }, + { + "epoch": 0.0, + "learning_rate": 4.999941088809585e-05, + "loss": 1.0054, + "step": 2285 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999410348741715e-05, + "loss": 1.2381, + "step": 2286 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994098091408e-05, + "loss": 0.956, + "step": 2287 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999409269293084e-05, + "loss": 1.1399, + "step": 2288 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994087291986e-05, + "loss": 1.4057, + "step": 2289 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940818885732e-05, + "loss": 1.1755, + "step": 2290 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940764826927e-05, + "loss": 1.1603, + "step": 2291 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940710743443e-05, + "loss": 1.2242, + "step": 2292 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994065663528e-05, + "loss": 0.7225, + "step": 2293 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994060250244e-05, + "loss": 0.2577, + "step": 2294 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999405483449206e-05, + "loss": 0.1954, + "step": 2295 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999404941627234e-05, + "loss": 0.1391, + "step": 2296 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940439955848e-05, + "loss": 0.1212, + "step": 2297 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940385724293e-05, + "loss": 0.0721, + "step": 2298 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994033146806e-05, + "loss": 0.0758, + "step": 2299 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940277187149e-05, + "loss": 0.0615, + "step": 2300 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994022288156e-05, + "loss": 0.0825, + "step": 2301 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940168551293e-05, + "loss": 0.0567, + "step": 2302 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940114196347e-05, + "loss": 0.0523, + "step": 2303 + }, + { + "epoch": 0.0, + "learning_rate": 4.999940059816723e-05, + "loss": 0.0547, + "step": 2304 + }, + { + "epoch": 0.0, + "learning_rate": 4.99994000541242e-05, + "loss": 0.4968, + "step": 2305 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939950983439e-05, + "loss": 1.3247, + "step": 2306 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993989652978e-05, + "loss": 1.3734, + "step": 2307 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939842051442e-05, + "loss": 1.1558, + "step": 2308 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939787548426e-05, + "loss": 1.2981, + "step": 2309 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939733020732e-05, + "loss": 1.5171, + "step": 2310 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939678468359e-05, + "loss": 1.6943, + "step": 2311 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939623891308e-05, + "loss": 1.5977, + "step": 2312 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939569289578e-05, + "loss": 1.1567, + "step": 2313 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993951466317e-05, + "loss": 1.3617, + "step": 2314 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939460012084e-05, + "loss": 1.2513, + "step": 2315 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999394053363196e-05, + "loss": 1.3635, + "step": 2316 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939350635877e-05, + "loss": 1.0933, + "step": 2317 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999392959107555e-05, + "loss": 1.3159, + "step": 2318 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939241160956e-05, + "loss": 1.1969, + "step": 2319 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939186386478e-05, + "loss": 1.2352, + "step": 2320 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939131587322e-05, + "loss": 1.3023, + "step": 2321 + }, + { + "epoch": 0.0, + "learning_rate": 4.999939076763487e-05, + "loss": 1.5029, + "step": 2322 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999390219149745e-05, + "loss": 1.002, + "step": 2323 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938967041783e-05, + "loss": 0.6181, + "step": 2324 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938912143913e-05, + "loss": 0.8718, + "step": 2325 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938857221366e-05, + "loss": 0.7674, + "step": 2326 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999388022741386e-05, + "loss": 1.4088, + "step": 2327 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938747302234e-05, + "loss": 1.239, + "step": 2328 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999386923056515e-05, + "loss": 0.9995, + "step": 2329 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993863728439e-05, + "loss": 1.0201, + "step": 2330 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999385822384505e-05, + "loss": 1.0667, + "step": 2331 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938527167832e-05, + "loss": 1.2822, + "step": 2332 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938472072536e-05, + "loss": 0.7923, + "step": 2333 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938416952561e-05, + "loss": 1.1033, + "step": 2334 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938361807908e-05, + "loss": 1.1808, + "step": 2335 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938306638576e-05, + "loss": 1.1299, + "step": 2336 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938251444567e-05, + "loss": 1.1362, + "step": 2337 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938196225879e-05, + "loss": 1.1539, + "step": 2338 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999381409825124e-05, + "loss": 0.6446, + "step": 2339 + }, + { + "epoch": 0.0, + "learning_rate": 4.999938085714468e-05, + "loss": 1.214, + "step": 2340 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999380304217446e-05, + "loss": 1.4017, + "step": 2341 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937975104343e-05, + "loss": 1.9942, + "step": 2342 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999379197622636e-05, + "loss": 1.5023, + "step": 2343 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937864395505e-05, + "loss": 1.3173, + "step": 2344 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999378090040694e-05, + "loss": 1.2286, + "step": 2345 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999377535879546e-05, + "loss": 1.4841, + "step": 2346 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937698147161e-05, + "loss": 0.9893, + "step": 2347 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993764268169e-05, + "loss": 1.1911, + "step": 2348 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993758719154e-05, + "loss": 1.119, + "step": 2349 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937531676712e-05, + "loss": 1.0328, + "step": 2350 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999374761372055e-05, + "loss": 1.1877, + "step": 2351 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937420573021e-05, + "loss": 1.46, + "step": 2352 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937364984158e-05, + "loss": 0.8874, + "step": 2353 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999373093706165e-05, + "loss": 1.2046, + "step": 2354 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937253732397e-05, + "loss": 0.9197, + "step": 2355 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999371980694985e-05, + "loss": 1.5218, + "step": 2356 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999371423819225e-05, + "loss": 0.7522, + "step": 2357 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999370866696674e-05, + "loss": 1.3164, + "step": 2358 + }, + { + "epoch": 0.0, + "learning_rate": 4.999937030932735e-05, + "loss": 1.1041, + "step": 2359 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936975171124e-05, + "loss": 1.2685, + "step": 2360 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936919384833e-05, + "loss": 1.3134, + "step": 2361 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936863573866e-05, + "loss": 1.016, + "step": 2362 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936807738219e-05, + "loss": 0.9835, + "step": 2363 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936751877895e-05, + "loss": 0.7403, + "step": 2364 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999366959928915e-05, + "loss": 0.5867, + "step": 2365 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999366400832106e-05, + "loss": 0.8516, + "step": 2366 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936584148851e-05, + "loss": 1.3296, + "step": 2367 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999365281898134e-05, + "loss": 1.0289, + "step": 2368 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936472206097e-05, + "loss": 0.4003, + "step": 2369 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936416197702e-05, + "loss": 1.0497, + "step": 2370 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999363601646296e-05, + "loss": 1.4006, + "step": 2371 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936304106878e-05, + "loss": 0.778, + "step": 2372 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936248024448e-05, + "loss": 0.7985, + "step": 2373 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993619191734e-05, + "loss": 1.0902, + "step": 2374 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999361357855544e-05, + "loss": 1.1798, + "step": 2375 + }, + { + "epoch": 0.0, + "learning_rate": 4.999936079629091e-05, + "loss": 1.3368, + "step": 2376 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999360234479474e-05, + "loss": 1.3867, + "step": 2377 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935967242126e-05, + "loss": 1.1832, + "step": 2378 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935911011627e-05, + "loss": 1.3546, + "step": 2379 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935854756449e-05, + "loss": 1.3045, + "step": 2380 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935798476593e-05, + "loss": 1.2272, + "step": 2381 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935742172059e-05, + "loss": 1.219, + "step": 2382 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935685842846e-05, + "loss": 1.1794, + "step": 2383 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999356294889554e-05, + "loss": 0.8872, + "step": 2384 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999355731103855e-05, + "loss": 1.0011, + "step": 2385 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999355167071385e-05, + "loss": 1.196, + "step": 2386 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999354602792124e-05, + "loss": 1.1468, + "step": 2387 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999354038266084e-05, + "loss": 0.9644, + "step": 2388 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935347349326e-05, + "loss": 0.9631, + "step": 2389 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935290847365e-05, + "loss": 1.2356, + "step": 2390 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935234320726e-05, + "loss": 1.1133, + "step": 2391 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935177769408e-05, + "loss": 1.3155, + "step": 2392 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935121193413e-05, + "loss": 1.4621, + "step": 2393 + }, + { + "epoch": 0.0, + "learning_rate": 4.999935064592739e-05, + "loss": 0.9106, + "step": 2394 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999350079673855e-05, + "loss": 1.099, + "step": 2395 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934951317356e-05, + "loss": 1.2296, + "step": 2396 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934894642646e-05, + "loss": 0.8653, + "step": 2397 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999348379432595e-05, + "loss": 1.285, + "step": 2398 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934781219193e-05, + "loss": 1.5817, + "step": 2399 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999347244704494e-05, + "loss": 1.3223, + "step": 2400 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934667697027e-05, + "loss": 1.2201, + "step": 2401 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934610898927e-05, + "loss": 1.035, + "step": 2402 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999345540761484e-05, + "loss": 1.4577, + "step": 2403 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934497228691e-05, + "loss": 0.9728, + "step": 2404 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999344403565556e-05, + "loss": 0.9041, + "step": 2405 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999343834597425e-05, + "loss": 0.8214, + "step": 2406 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993432653825e-05, + "loss": 0.936, + "step": 2407 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999342695920795e-05, + "loss": 0.9635, + "step": 2408 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934212621231e-05, + "loss": 1.1556, + "step": 2409 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934155625704e-05, + "loss": 1.254, + "step": 2410 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999340986054985e-05, + "loss": 1.209, + "step": 2411 + }, + { + "epoch": 0.0, + "learning_rate": 4.999934041560615e-05, + "loss": 1.0671, + "step": 2412 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933984491054e-05, + "loss": 0.8076, + "step": 2413 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933927396814e-05, + "loss": 1.0531, + "step": 2414 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933870277895e-05, + "loss": 0.9312, + "step": 2415 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933813134299e-05, + "loss": 1.3744, + "step": 2416 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999337559660244e-05, + "loss": 1.0359, + "step": 2417 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993369877307e-05, + "loss": 0.8227, + "step": 2418 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933641555439e-05, + "loss": 1.1006, + "step": 2419 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999335843131286e-05, + "loss": 0.8073, + "step": 2420 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933527046141e-05, + "loss": 1.2168, + "step": 2421 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999334697544745e-05, + "loss": 0.9359, + "step": 2422 + }, + { + "epoch": 0.0, + "learning_rate": 4.99993341243813e-05, + "loss": 1.2773, + "step": 2423 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933355097107e-05, + "loss": 1.1156, + "step": 2424 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999332977314063e-05, + "loss": 1.1494, + "step": 2425 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933240341026e-05, + "loss": 0.8976, + "step": 2426 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933182925969e-05, + "loss": 1.162, + "step": 2427 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999331254862326e-05, + "loss": 0.8506, + "step": 2428 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933068021818e-05, + "loss": 0.6927, + "step": 2429 + }, + { + "epoch": 0.0, + "learning_rate": 4.999933010532725e-05, + "loss": 1.1664, + "step": 2430 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932953018954e-05, + "loss": 0.9944, + "step": 2431 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999328954805046e-05, + "loss": 1.021, + "step": 2432 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932837917377e-05, + "loss": 0.7567, + "step": 2433 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932780329572e-05, + "loss": 1.3012, + "step": 2434 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932722717088e-05, + "loss": 1.0829, + "step": 2435 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932665079925e-05, + "loss": 1.6098, + "step": 2436 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932607418084e-05, + "loss": 1.2366, + "step": 2437 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932549731565e-05, + "loss": 1.191, + "step": 2438 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999324920203685e-05, + "loss": 1.2385, + "step": 2439 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932434284492e-05, + "loss": 0.7776, + "step": 2440 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932376523939e-05, + "loss": 0.5032, + "step": 2441 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932318738707e-05, + "loss": 0.4156, + "step": 2442 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999322609287964e-05, + "loss": 0.4252, + "step": 2443 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999322030942074e-05, + "loss": 0.3237, + "step": 2444 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999321452349406e-05, + "loss": 0.452, + "step": 2445 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932087350996e-05, + "loss": 0.4207, + "step": 2446 + }, + { + "epoch": 0.0, + "learning_rate": 4.999932029442372e-05, + "loss": 0.2601, + "step": 2447 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999319715090706e-05, + "loss": 0.4093, + "step": 2448 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999319135510905e-05, + "loss": 1.6132, + "step": 2449 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931855568432e-05, + "loss": 1.3601, + "step": 2450 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931797561096e-05, + "loss": 1.1905, + "step": 2451 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931739529081e-05, + "loss": 1.187, + "step": 2452 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999316814723876e-05, + "loss": 1.2807, + "step": 2453 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931623391016e-05, + "loss": 1.5622, + "step": 2454 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999315652849663e-05, + "loss": 1.0686, + "step": 2455 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931507154238e-05, + "loss": 1.002, + "step": 2456 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999314489988325e-05, + "loss": 1.3571, + "step": 2457 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999313908187476e-05, + "loss": 1.0784, + "step": 2458 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999313326139855e-05, + "loss": 1.0493, + "step": 2459 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999312743845436e-05, + "loss": 1.3248, + "step": 2460 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999312161304245e-05, + "loss": 1.2138, + "step": 2461 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931157851627e-05, + "loss": 1.3235, + "step": 2462 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931099548151e-05, + "loss": 1.1581, + "step": 2463 + }, + { + "epoch": 0.0, + "learning_rate": 4.999931041219997e-05, + "loss": 1.0672, + "step": 2464 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930982867165e-05, + "loss": 1.2655, + "step": 2465 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930924489654e-05, + "loss": 1.191, + "step": 2466 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930866087466e-05, + "loss": 1.1727, + "step": 2467 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930807660598e-05, + "loss": 1.3226, + "step": 2468 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930749209053e-05, + "loss": 1.5273, + "step": 2469 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999306907328294e-05, + "loss": 1.1624, + "step": 2470 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930632231927e-05, + "loss": 0.857, + "step": 2471 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999305737063474e-05, + "loss": 1.1461, + "step": 2472 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930515156089e-05, + "loss": 1.1581, + "step": 2473 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930456581152e-05, + "loss": 1.0932, + "step": 2474 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930397981537e-05, + "loss": 1.0128, + "step": 2475 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930339357244e-05, + "loss": 1.2064, + "step": 2476 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999302807082726e-05, + "loss": 1.0275, + "step": 2477 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930222034622e-05, + "loss": 0.8178, + "step": 2478 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930163336295e-05, + "loss": 1.0451, + "step": 2479 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930104613289e-05, + "loss": 1.4416, + "step": 2480 + }, + { + "epoch": 0.0, + "learning_rate": 4.999930045865604e-05, + "loss": 0.8916, + "step": 2481 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929987093241e-05, + "loss": 1.195, + "step": 2482 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999299282962e-05, + "loss": 1.3613, + "step": 2483 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929869474481e-05, + "loss": 1.3498, + "step": 2484 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929810628083e-05, + "loss": 1.0009, + "step": 2485 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999297517570076e-05, + "loss": 1.3047, + "step": 2486 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929692861253e-05, + "loss": 0.9789, + "step": 2487 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999296339408206e-05, + "loss": 1.2014, + "step": 2488 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999295749957104e-05, + "loss": 1.2294, + "step": 2489 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929516025922e-05, + "loss": 1.0622, + "step": 2490 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999294570314545e-05, + "loss": 1.1546, + "step": 2491 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929398012309e-05, + "loss": 1.2043, + "step": 2492 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999293389684855e-05, + "loss": 1.0917, + "step": 2493 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929279899984e-05, + "loss": 1.0515, + "step": 2494 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929220806804e-05, + "loss": 1.1921, + "step": 2495 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929161688946e-05, + "loss": 1.1185, + "step": 2496 + }, + { + "epoch": 0.0, + "learning_rate": 4.999929102546409e-05, + "loss": 1.4151, + "step": 2497 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999290433791946e-05, + "loss": 1.1235, + "step": 2498 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928984187301e-05, + "loss": 0.8122, + "step": 2499 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999289249707296e-05, + "loss": 1.1508, + "step": 2500 + }, + { + "epoch": 0.0, + "eval_loss": 1.0854716300964355, + "eval_runtime": 83.847, + "eval_samples_per_second": 16.518, + "eval_steps_per_second": 4.138, + "step": 2500 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999288657294804e-05, + "loss": 1.0851, + "step": 2501 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928806463553e-05, + "loss": 0.8544, + "step": 2502 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928747172946e-05, + "loss": 0.7983, + "step": 2503 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999286878576626e-05, + "loss": 1.1961, + "step": 2504 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999286285176995e-05, + "loss": 1.0891, + "step": 2505 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999285691530586e-05, + "loss": 1.0302, + "step": 2506 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992850976374e-05, + "loss": 1.1088, + "step": 2507 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928450349743e-05, + "loss": 1.0297, + "step": 2508 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928390911067e-05, + "loss": 1.2901, + "step": 2509 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928331447714e-05, + "loss": 1.0585, + "step": 2510 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928271959682e-05, + "loss": 1.1662, + "step": 2511 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999282124469714e-05, + "loss": 0.6689, + "step": 2512 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928152909583e-05, + "loss": 0.6319, + "step": 2513 + }, + { + "epoch": 0.0, + "learning_rate": 4.999928093347516e-05, + "loss": 0.9722, + "step": 2514 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999280337607715e-05, + "loss": 0.8079, + "step": 2515 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927974149348e-05, + "loss": 1.1531, + "step": 2516 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927914513247e-05, + "loss": 0.886, + "step": 2517 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927854852467e-05, + "loss": 0.9439, + "step": 2518 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999277951670096e-05, + "loss": 1.2437, + "step": 2519 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999277354568735e-05, + "loss": 1.2504, + "step": 2520 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927675722059e-05, + "loss": 1.299, + "step": 2521 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999276159625665e-05, + "loss": 0.8641, + "step": 2522 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999275561783956e-05, + "loss": 0.6387, + "step": 2523 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927496369547e-05, + "loss": 1.1639, + "step": 2524 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927436536019e-05, + "loss": 1.0768, + "step": 2525 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927376677814e-05, + "loss": 1.2307, + "step": 2526 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992731679493e-05, + "loss": 1.1291, + "step": 2527 + }, + { + "epoch": 0.0, + "learning_rate": 4.999927256887368e-05, + "loss": 1.1474, + "step": 2528 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999271969551284e-05, + "loss": 1.1732, + "step": 2529 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992713699821e-05, + "loss": 1.2119, + "step": 2530 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999270770166136e-05, + "loss": 1.0065, + "step": 2531 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999270170103385e-05, + "loss": 1.0581, + "step": 2532 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999269569793855e-05, + "loss": 1.0729, + "step": 2533 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926896923755e-05, + "loss": 1.1211, + "step": 2534 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926836843445e-05, + "loss": 1.17, + "step": 2535 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926776738457e-05, + "loss": 1.1372, + "step": 2536 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926716608791e-05, + "loss": 1.7751, + "step": 2537 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999266564544465e-05, + "loss": 1.0604, + "step": 2538 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926596275424e-05, + "loss": 1.3684, + "step": 2539 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926536071724e-05, + "loss": 1.2431, + "step": 2540 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926475843345e-05, + "loss": 1.0332, + "step": 2541 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926415590288e-05, + "loss": 1.3016, + "step": 2542 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999263553125525e-05, + "loss": 1.0958, + "step": 2543 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926295010139e-05, + "loss": 1.142, + "step": 2544 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926234683047e-05, + "loss": 1.189, + "step": 2545 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999261743312774e-05, + "loss": 1.0052, + "step": 2546 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926113954829e-05, + "loss": 1.0415, + "step": 2547 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999260535537026e-05, + "loss": 1.2109, + "step": 2548 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925993127899e-05, + "loss": 1.2191, + "step": 2549 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925932677415e-05, + "loss": 1.1117, + "step": 2550 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999258722022545e-05, + "loss": 1.0124, + "step": 2551 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999258117024154e-05, + "loss": 1.0574, + "step": 2552 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925751177898e-05, + "loss": 1.0316, + "step": 2553 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999256906287016e-05, + "loss": 0.5798, + "step": 2554 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925630054828e-05, + "loss": 0.5607, + "step": 2555 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999255694562766e-05, + "loss": 0.5936, + "step": 2556 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925508833046e-05, + "loss": 1.1024, + "step": 2557 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925448185138e-05, + "loss": 1.2206, + "step": 2558 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999253875125505e-05, + "loss": 0.9331, + "step": 2559 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999253268152856e-05, + "loss": 0.9955, + "step": 2560 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925266093343e-05, + "loss": 0.8683, + "step": 2561 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925205346721e-05, + "loss": 0.6031, + "step": 2562 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925144575421e-05, + "loss": 0.9559, + "step": 2563 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925083779444e-05, + "loss": 0.6049, + "step": 2564 + }, + { + "epoch": 0.0, + "learning_rate": 4.999925022958788e-05, + "loss": 1.2506, + "step": 2565 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999249621134534e-05, + "loss": 1.506, + "step": 2566 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924901243441e-05, + "loss": 1.3293, + "step": 2567 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924840348751e-05, + "loss": 1.9336, + "step": 2568 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924779429382e-05, + "loss": 0.9746, + "step": 2569 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924718485335e-05, + "loss": 0.761, + "step": 2570 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999246575166095e-05, + "loss": 0.308, + "step": 2571 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924596523206e-05, + "loss": 1.4471, + "step": 2572 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999245355051245e-05, + "loss": 1.4313, + "step": 2573 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924474462365e-05, + "loss": 0.8333, + "step": 2574 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924413394926e-05, + "loss": 0.7514, + "step": 2575 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999243523028105e-05, + "loss": 1.001, + "step": 2576 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924291186016e-05, + "loss": 1.1125, + "step": 2577 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999242300445435e-05, + "loss": 1.2343, + "step": 2578 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924168878393e-05, + "loss": 1.1152, + "step": 2579 + }, + { + "epoch": 0.0, + "learning_rate": 4.999924107687564e-05, + "loss": 0.926, + "step": 2580 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999240464720564e-05, + "loss": 1.0037, + "step": 2581 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923985231871e-05, + "loss": 1.2988, + "step": 2582 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999239239670074e-05, + "loss": 0.8706, + "step": 2583 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923862677466e-05, + "loss": 0.9814, + "step": 2584 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923801363246e-05, + "loss": 0.9953, + "step": 2585 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923740024347e-05, + "loss": 0.7982, + "step": 2586 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923678660771e-05, + "loss": 0.2924, + "step": 2587 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923617272516e-05, + "loss": 0.2321, + "step": 2588 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999235558595836e-05, + "loss": 0.2196, + "step": 2589 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923494421973e-05, + "loss": 0.6108, + "step": 2590 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923432959683e-05, + "loss": 1.3284, + "step": 2591 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999233714727164e-05, + "loss": 0.884, + "step": 2592 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999233099610706e-05, + "loss": 0.7834, + "step": 2593 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923248424747e-05, + "loss": 0.7138, + "step": 2594 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923186863745e-05, + "loss": 1.0278, + "step": 2595 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923125278065e-05, + "loss": 1.1555, + "step": 2596 + }, + { + "epoch": 0.0, + "learning_rate": 4.999923063667706e-05, + "loss": 1.1282, + "step": 2597 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999230020326706e-05, + "loss": 1.1194, + "step": 2598 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999229403729554e-05, + "loss": 1.0122, + "step": 2599 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922878688563e-05, + "loss": 1.1518, + "step": 2600 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922816979492e-05, + "loss": 1.2748, + "step": 2601 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922755245743e-05, + "loss": 0.9259, + "step": 2602 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922693487315e-05, + "loss": 0.8864, + "step": 2603 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922631704209e-05, + "loss": 1.0629, + "step": 2604 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922569896426e-05, + "loss": 1.0549, + "step": 2605 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922508063964e-05, + "loss": 1.0743, + "step": 2606 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999224462068236e-05, + "loss": 1.0465, + "step": 2607 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999223843250054e-05, + "loss": 1.3165, + "step": 2608 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922322418509e-05, + "loss": 1.1523, + "step": 2609 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999222604873344e-05, + "loss": 1.0081, + "step": 2610 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999221985314814e-05, + "loss": 0.9723, + "step": 2611 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992213655095e-05, + "loss": 1.3478, + "step": 2612 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922074545741e-05, + "loss": 1.2355, + "step": 2613 + }, + { + "epoch": 0.0, + "learning_rate": 4.999922012515854e-05, + "loss": 1.1177, + "step": 2614 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999219504612884e-05, + "loss": 0.9973, + "step": 2615 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999218883820445e-05, + "loss": 1.0677, + "step": 2616 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921826278122e-05, + "loss": 0.9187, + "step": 2617 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921764149522e-05, + "loss": 1.0991, + "step": 2618 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921701996244e-05, + "loss": 1.2343, + "step": 2619 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921639818288e-05, + "loss": 1.2575, + "step": 2620 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921577615653e-05, + "loss": 1.0961, + "step": 2621 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999215153883405e-05, + "loss": 1.0279, + "step": 2622 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921453136349e-05, + "loss": 1.2034, + "step": 2623 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992139085968e-05, + "loss": 4.8598, + "step": 2624 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921328558333e-05, + "loss": 3.16, + "step": 2625 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999212662323075e-05, + "loss": 1.0854, + "step": 2626 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921203881604e-05, + "loss": 1.1011, + "step": 2627 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921141506222e-05, + "loss": 1.0632, + "step": 2628 + }, + { + "epoch": 0.0, + "learning_rate": 4.999921079106162e-05, + "loss": 1.1298, + "step": 2629 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999210166814236e-05, + "loss": 1.1682, + "step": 2630 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920954232008e-05, + "loss": 1.1885, + "step": 2631 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920891757913e-05, + "loss": 1.7747, + "step": 2632 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920829259141e-05, + "loss": 1.326, + "step": 2633 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999207667356896e-05, + "loss": 1.2759, + "step": 2634 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920704187561e-05, + "loss": 0.9534, + "step": 2635 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999206416147535e-05, + "loss": 1.0857, + "step": 2636 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920579017269e-05, + "loss": 0.8371, + "step": 2637 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920516395105e-05, + "loss": 1.1331, + "step": 2638 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999204537482634e-05, + "loss": 0.9473, + "step": 2639 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999203910767435e-05, + "loss": 1.1575, + "step": 2640 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920328380546e-05, + "loss": 1.06, + "step": 2641 + }, + { + "epoch": 0.0, + "learning_rate": 4.99992026565967e-05, + "loss": 0.4564, + "step": 2642 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920202914116e-05, + "loss": 1.0863, + "step": 2643 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920140143883e-05, + "loss": 1.8009, + "step": 2644 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999200773489726e-05, + "loss": 1.1431, + "step": 2645 + }, + { + "epoch": 0.0, + "learning_rate": 4.999920014529383e-05, + "loss": 0.7587, + "step": 2646 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919951685117e-05, + "loss": 0.8565, + "step": 2647 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919888816171e-05, + "loss": 1.1586, + "step": 2648 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919825922549e-05, + "loss": 0.9109, + "step": 2649 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919763004247e-05, + "loss": 1.2484, + "step": 2650 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999197000612675e-05, + "loss": 1.5153, + "step": 2651 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919637093609e-05, + "loss": 1.4671, + "step": 2652 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919574101274e-05, + "loss": 1.179, + "step": 2653 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991951108426e-05, + "loss": 1.284, + "step": 2654 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919448042568e-05, + "loss": 0.8949, + "step": 2655 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919384976198e-05, + "loss": 1.2341, + "step": 2656 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999193218851496e-05, + "loss": 1.2149, + "step": 2657 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999192587694226e-05, + "loss": 0.8877, + "step": 2658 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919195629018e-05, + "loss": 0.9975, + "step": 2659 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919132463935e-05, + "loss": 1.095, + "step": 2660 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999190692741735e-05, + "loss": 0.7553, + "step": 2661 + }, + { + "epoch": 0.0, + "learning_rate": 4.999919006059735e-05, + "loss": 0.8629, + "step": 2662 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999189428206174e-05, + "loss": 0.904, + "step": 2663 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999188795568216e-05, + "loss": 1.3531, + "step": 2664 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918816268349e-05, + "loss": 1.2525, + "step": 2665 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918752955197e-05, + "loss": 0.8583, + "step": 2666 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918689617367e-05, + "loss": 1.2082, + "step": 2667 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918626254859e-05, + "loss": 1.3743, + "step": 2668 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918562867672e-05, + "loss": 1.4376, + "step": 2669 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918499455808e-05, + "loss": 1.577, + "step": 2670 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918436019266e-05, + "loss": 1.177, + "step": 2671 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918372558046e-05, + "loss": 1.1328, + "step": 2672 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918309072147e-05, + "loss": 1.1072, + "step": 2673 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918245561569e-05, + "loss": 1.086, + "step": 2674 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999181820263144e-05, + "loss": 1.1498, + "step": 2675 + }, + { + "epoch": 0.0, + "learning_rate": 4.999918118466381e-05, + "loss": 1.0412, + "step": 2676 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999180548817704e-05, + "loss": 1.4574, + "step": 2677 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991799127248e-05, + "loss": 0.992, + "step": 2678 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917927638513e-05, + "loss": 0.3997, + "step": 2679 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917863979867e-05, + "loss": 0.8342, + "step": 2680 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999178002965436e-05, + "loss": 1.1407, + "step": 2681 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917736588542e-05, + "loss": 1.3863, + "step": 2682 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917672855861e-05, + "loss": 1.0188, + "step": 2683 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999176090985035e-05, + "loss": 1.1638, + "step": 2684 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917545316467e-05, + "loss": 1.0254, + "step": 2685 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999174815097524e-05, + "loss": 1.1862, + "step": 2686 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917417678359e-05, + "loss": 1.2064, + "step": 2687 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917353822289e-05, + "loss": 0.6489, + "step": 2688 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991728994154e-05, + "loss": 0.8799, + "step": 2689 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917226036113e-05, + "loss": 1.1395, + "step": 2690 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999171621060076e-05, + "loss": 1.3707, + "step": 2691 + }, + { + "epoch": 0.0, + "learning_rate": 4.999917098151225e-05, + "loss": 1.0209, + "step": 2692 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999170341717626e-05, + "loss": 1.2067, + "step": 2693 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916970167623e-05, + "loss": 1.3174, + "step": 2694 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916906138806e-05, + "loss": 1.1166, + "step": 2695 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991684208531e-05, + "loss": 1.2999, + "step": 2696 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916778007136e-05, + "loss": 1.196, + "step": 2697 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999167139042834e-05, + "loss": 1.3003, + "step": 2698 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916649776754e-05, + "loss": 1.1829, + "step": 2699 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916585624546e-05, + "loss": 1.0895, + "step": 2700 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916521447659e-05, + "loss": 0.8053, + "step": 2701 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999164572460945e-05, + "loss": 1.4009, + "step": 2702 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916393019852e-05, + "loss": 1.1801, + "step": 2703 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916328768931e-05, + "loss": 1.2571, + "step": 2704 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999162644933315e-05, + "loss": 1.1818, + "step": 2705 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916200193055e-05, + "loss": 1.4611, + "step": 2706 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999161358680995e-05, + "loss": 1.211, + "step": 2707 + }, + { + "epoch": 0.0, + "learning_rate": 4.999916071518467e-05, + "loss": 1.0912, + "step": 2708 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999160071441555e-05, + "loss": 1.0762, + "step": 2709 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999159427451655e-05, + "loss": 1.2525, + "step": 2710 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915878321498e-05, + "loss": 1.1884, + "step": 2711 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915813873152e-05, + "loss": 1.2574, + "step": 2712 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915749400128e-05, + "loss": 1.4895, + "step": 2713 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915684902427e-05, + "loss": 0.8249, + "step": 2714 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999156203800464e-05, + "loss": 0.9618, + "step": 2715 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915555832988e-05, + "loss": 1.293, + "step": 2716 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999154912612516e-05, + "loss": 1.3095, + "step": 2717 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915426664837e-05, + "loss": 1.0918, + "step": 2718 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915362043744e-05, + "loss": 1.1069, + "step": 2719 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915297397974e-05, + "loss": 1.0256, + "step": 2720 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915232727526e-05, + "loss": 1.2754, + "step": 2721 + }, + { + "epoch": 0.0, + "learning_rate": 4.999915168032399e-05, + "loss": 1.1649, + "step": 2722 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999151033125934e-05, + "loss": 1.2403, + "step": 2723 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991503856811e-05, + "loss": 0.715, + "step": 2724 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914973798949e-05, + "loss": 1.0529, + "step": 2725 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991490900511e-05, + "loss": 1.1878, + "step": 2726 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914844186592e-05, + "loss": 0.9524, + "step": 2727 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999147793433965e-05, + "loss": 0.9358, + "step": 2728 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914714475523e-05, + "loss": 0.5873, + "step": 2729 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999146495829716e-05, + "loss": 0.9812, + "step": 2730 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914584665741e-05, + "loss": 1.1639, + "step": 2731 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914519723834e-05, + "loss": 1.1829, + "step": 2732 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914454757247e-05, + "loss": 1.0936, + "step": 2733 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914389765984e-05, + "loss": 1.0867, + "step": 2734 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914324750041e-05, + "loss": 0.9176, + "step": 2735 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914259709421e-05, + "loss": 1.2482, + "step": 2736 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914194644123e-05, + "loss": 1.1445, + "step": 2737 + }, + { + "epoch": 0.0, + "learning_rate": 4.999914129554146e-05, + "loss": 1.02, + "step": 2738 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999140644394914e-05, + "loss": 1.0444, + "step": 2739 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913999300159e-05, + "loss": 2.032, + "step": 2740 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913934136148e-05, + "loss": 1.6777, + "step": 2741 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913868947459e-05, + "loss": 1.1249, + "step": 2742 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913803734092e-05, + "loss": 1.2135, + "step": 2743 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913738496047e-05, + "loss": 1.1091, + "step": 2744 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913673233323e-05, + "loss": 0.9304, + "step": 2745 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913607945922e-05, + "loss": 1.0457, + "step": 2746 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913542633843e-05, + "loss": 1.1601, + "step": 2747 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913477297085e-05, + "loss": 1.213, + "step": 2748 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991341193565e-05, + "loss": 1.1778, + "step": 2749 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913346549536e-05, + "loss": 1.1547, + "step": 2750 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913281138744e-05, + "loss": 1.2153, + "step": 2751 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913215703275e-05, + "loss": 1.0718, + "step": 2752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999131502431266e-05, + "loss": 1.0972, + "step": 2753 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999130847583006e-05, + "loss": 1.3415, + "step": 2754 + }, + { + "epoch": 0.0, + "learning_rate": 4.999913019248796e-05, + "loss": 1.0715, + "step": 2755 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912953714614e-05, + "loss": 0.9399, + "step": 2756 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912888155753e-05, + "loss": 1.0309, + "step": 2757 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999128225722155e-05, + "loss": 1.2394, + "step": 2758 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999127569639985e-05, + "loss": 1.0969, + "step": 2759 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999126913311044e-05, + "loss": 1.1535, + "step": 2760 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912625673532e-05, + "loss": 1.1519, + "step": 2761 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912559991281e-05, + "loss": 1.7157, + "step": 2762 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912494284352e-05, + "loss": 1.1947, + "step": 2763 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912428552745e-05, + "loss": 0.9904, + "step": 2764 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991236279646e-05, + "loss": 0.857, + "step": 2765 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912297015497e-05, + "loss": 1.1261, + "step": 2766 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912231209855e-05, + "loss": 1.2156, + "step": 2767 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912165379537e-05, + "loss": 1.2385, + "step": 2768 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991209952454e-05, + "loss": 1.002, + "step": 2769 + }, + { + "epoch": 0.0, + "learning_rate": 4.999912033644864e-05, + "loss": 1.1895, + "step": 2770 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999119677405105e-05, + "loss": 1.2197, + "step": 2771 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911901811479e-05, + "loss": 1.1403, + "step": 2772 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911835857769e-05, + "loss": 1.2452, + "step": 2773 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911769879382e-05, + "loss": 0.8898, + "step": 2774 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999117038763155e-05, + "loss": 0.8207, + "step": 2775 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911637848572e-05, + "loss": 0.6392, + "step": 2776 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991157179615e-05, + "loss": 0.7881, + "step": 2777 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991150571905e-05, + "loss": 1.3896, + "step": 2778 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911439617272e-05, + "loss": 1.2835, + "step": 2779 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999113734908156e-05, + "loss": 1.1221, + "step": 2780 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911307339682e-05, + "loss": 0.4653, + "step": 2781 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911241163869e-05, + "loss": 0.9905, + "step": 2782 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999111749633785e-05, + "loss": 1.1892, + "step": 2783 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999111087382105e-05, + "loss": 1.2269, + "step": 2784 + }, + { + "epoch": 0.0, + "learning_rate": 4.999911042488365e-05, + "loss": 1.7996, + "step": 2785 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991097621384e-05, + "loss": 1.5628, + "step": 2786 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910909914637e-05, + "loss": 0.7233, + "step": 2787 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910843590756e-05, + "loss": 0.6555, + "step": 2788 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910777242197e-05, + "loss": 1.121, + "step": 2789 + }, + { + "epoch": 0.0, + "learning_rate": 4.99991071086896e-05, + "loss": 1.4148, + "step": 2790 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999106444710454e-05, + "loss": 1.2223, + "step": 2791 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999105780484524e-05, + "loss": 1.1463, + "step": 2792 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999105116011815e-05, + "loss": 1.0044, + "step": 2793 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910445129232e-05, + "loss": 1.2547, + "step": 2794 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910378632605e-05, + "loss": 1.0682, + "step": 2795 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999103121113e-05, + "loss": 1.0591, + "step": 2796 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910245565317e-05, + "loss": 1.3869, + "step": 2797 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999101789946555e-05, + "loss": 1.2375, + "step": 2798 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999101123993165e-05, + "loss": 1.1, + "step": 2799 + }, + { + "epoch": 0.0, + "learning_rate": 4.999910045779299e-05, + "loss": 0.9864, + "step": 2800 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909979134603e-05, + "loss": 1.2769, + "step": 2801 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999099124652294e-05, + "loss": 1.2261, + "step": 2802 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909845771178e-05, + "loss": 1.41, + "step": 2803 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999097790524485e-05, + "loss": 1.2601, + "step": 2804 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999097123090413e-05, + "loss": 1.0879, + "step": 2805 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909645540955e-05, + "loss": 1.2381, + "step": 2806 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909578748191e-05, + "loss": 1.0534, + "step": 2807 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909511930749e-05, + "loss": 1.2444, + "step": 2808 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999094450886294e-05, + "loss": 1.0326, + "step": 2809 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999093782218306e-05, + "loss": 1.0756, + "step": 2810 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909311330355e-05, + "loss": 0.9788, + "step": 2811 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909244414201e-05, + "loss": 1.0639, + "step": 2812 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909177473369e-05, + "loss": 1.063, + "step": 2813 + }, + { + "epoch": 0.0, + "learning_rate": 4.999909110507859e-05, + "loss": 1.05, + "step": 2814 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990904351767e-05, + "loss": 0.9695, + "step": 2815 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908976502804e-05, + "loss": 0.9567, + "step": 2816 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990890946326e-05, + "loss": 1.1835, + "step": 2817 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908842399037e-05, + "loss": 1.1377, + "step": 2818 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908775310137e-05, + "loss": 1.3342, + "step": 2819 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908708196559e-05, + "loss": 0.7403, + "step": 2820 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908641058302e-05, + "loss": 1.149, + "step": 2821 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908573895367e-05, + "loss": 1.3792, + "step": 2822 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908506707754e-05, + "loss": 1.1136, + "step": 2823 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908439495464e-05, + "loss": 1.1038, + "step": 2824 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908372258495e-05, + "loss": 1.0534, + "step": 2825 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908304996849e-05, + "loss": 1.4179, + "step": 2826 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908237710523e-05, + "loss": 0.6761, + "step": 2827 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908170399521e-05, + "loss": 1.2538, + "step": 2828 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990810306384e-05, + "loss": 0.8459, + "step": 2829 + }, + { + "epoch": 0.0, + "learning_rate": 4.999908035703481e-05, + "loss": 1.1891, + "step": 2830 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907968318444e-05, + "loss": 1.0343, + "step": 2831 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907900908729e-05, + "loss": 0.8952, + "step": 2832 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907833474336e-05, + "loss": 1.2053, + "step": 2833 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907766015265e-05, + "loss": 0.9051, + "step": 2834 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907698531516e-05, + "loss": 1.2077, + "step": 2835 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907631023089e-05, + "loss": 1.2348, + "step": 2836 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907563489984e-05, + "loss": 0.9451, + "step": 2837 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999074959322e-05, + "loss": 1.019, + "step": 2838 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990742834974e-05, + "loss": 1.1164, + "step": 2839 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999073607425995e-05, + "loss": 0.9338, + "step": 2840 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907293110783e-05, + "loss": 1.0339, + "step": 2841 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907225454287e-05, + "loss": 0.8994, + "step": 2842 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999071577731135e-05, + "loss": 0.8476, + "step": 2843 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907090067262e-05, + "loss": 0.8179, + "step": 2844 + }, + { + "epoch": 0.0, + "learning_rate": 4.999907022336733e-05, + "loss": 0.8631, + "step": 2845 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906954581526e-05, + "loss": 1.1398, + "step": 2846 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990688680164e-05, + "loss": 0.8859, + "step": 2847 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906818997077e-05, + "loss": 1.5999, + "step": 2848 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906751167835e-05, + "loss": 1.3751, + "step": 2849 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999066833139155e-05, + "loss": 1.2111, + "step": 2850 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906615435318e-05, + "loss": 1.1603, + "step": 2851 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999065475320424e-05, + "loss": 1.0992, + "step": 2852 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906479604088e-05, + "loss": 1.5599, + "step": 2853 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999064116514574e-05, + "loss": 1.2482, + "step": 2854 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906343674147e-05, + "loss": 0.7927, + "step": 2855 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906275672159e-05, + "loss": 1.3003, + "step": 2856 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906207645494e-05, + "loss": 0.9895, + "step": 2857 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990613959415e-05, + "loss": 1.076, + "step": 2858 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999060715181276e-05, + "loss": 1.2395, + "step": 2859 + }, + { + "epoch": 0.0, + "learning_rate": 4.999906003417428e-05, + "loss": 1.2853, + "step": 2860 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905935292051e-05, + "loss": 0.9141, + "step": 2861 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905867141995e-05, + "loss": 1.4543, + "step": 2862 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905798967261e-05, + "loss": 0.7374, + "step": 2863 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905730767849e-05, + "loss": 1.238, + "step": 2864 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905662543759e-05, + "loss": 1.0451, + "step": 2865 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905594294991e-05, + "loss": 1.1267, + "step": 2866 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905526021546e-05, + "loss": 1.1472, + "step": 2867 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905457723422e-05, + "loss": 1.1766, + "step": 2868 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990538940062e-05, + "loss": 1.3368, + "step": 2869 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990532105314e-05, + "loss": 1.1428, + "step": 2870 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905252680982e-05, + "loss": 1.3337, + "step": 2871 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905184284146e-05, + "loss": 1.0391, + "step": 2872 + }, + { + "epoch": 0.0, + "learning_rate": 4.999905115862632e-05, + "loss": 1.5314, + "step": 2873 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990504741644e-05, + "loss": 1.2044, + "step": 2874 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990497894557e-05, + "loss": 1.0157, + "step": 2875 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904910450022e-05, + "loss": 1.0111, + "step": 2876 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999048419297956e-05, + "loss": 0.7298, + "step": 2877 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904773384892e-05, + "loss": 0.9956, + "step": 2878 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990470481531e-05, + "loss": 0.5304, + "step": 2879 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990463622105e-05, + "loss": 0.5684, + "step": 2880 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904567602112e-05, + "loss": 1.1679, + "step": 2881 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999044989584954e-05, + "loss": 0.807, + "step": 2882 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904430290202e-05, + "loss": 0.5727, + "step": 2883 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990436159723e-05, + "loss": 0.6624, + "step": 2884 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904292879579e-05, + "loss": 1.0362, + "step": 2885 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904224137251e-05, + "loss": 1.2741, + "step": 2886 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999041553702455e-05, + "loss": 1.2492, + "step": 2887 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904086578561e-05, + "loss": 1.405, + "step": 2888 + }, + { + "epoch": 0.0, + "learning_rate": 4.999904017762199e-05, + "loss": 0.9898, + "step": 2889 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903948921159e-05, + "loss": 1.2159, + "step": 2890 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903880055441e-05, + "loss": 0.9097, + "step": 2891 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903811165045e-05, + "loss": 1.3631, + "step": 2892 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999037422499703e-05, + "loss": 6.0654, + "step": 2893 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903673310219e-05, + "loss": 5.9507, + "step": 2894 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903604345789e-05, + "loss": 5.8381, + "step": 2895 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903535356681e-05, + "loss": 5.6796, + "step": 2896 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903466342895e-05, + "loss": 5.9238, + "step": 2897 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903397304431e-05, + "loss": 4.1682, + "step": 2898 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903328241289e-05, + "loss": 1.2678, + "step": 2899 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903259153469e-05, + "loss": 1.1465, + "step": 2900 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903190040971e-05, + "loss": 1.0097, + "step": 2901 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903120903795e-05, + "loss": 1.0424, + "step": 2902 + }, + { + "epoch": 0.0, + "learning_rate": 4.999903051741941e-05, + "loss": 1.1693, + "step": 2903 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902982555409e-05, + "loss": 0.7356, + "step": 2904 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902913344199e-05, + "loss": 1.0111, + "step": 2905 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902844108311e-05, + "loss": 1.2294, + "step": 2906 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902774847745e-05, + "loss": 0.8252, + "step": 2907 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999027055625014e-05, + "loss": 0.9026, + "step": 2908 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902636252579e-05, + "loss": 1.0901, + "step": 2909 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902566917979e-05, + "loss": 1.0077, + "step": 2910 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999024975587015e-05, + "loss": 1.1404, + "step": 2911 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902428174745e-05, + "loss": 0.9697, + "step": 2912 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902358766112e-05, + "loss": 1.0877, + "step": 2913 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999022893327994e-05, + "loss": 1.2615, + "step": 2914 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990221987481e-05, + "loss": 1.2332, + "step": 2915 + }, + { + "epoch": 0.0, + "learning_rate": 4.999902150392142e-05, + "loss": 1.2203, + "step": 2916 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999020808847965e-05, + "loss": 1.2718, + "step": 2917 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999020113527726e-05, + "loss": 1.2264, + "step": 2918 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999019417960716e-05, + "loss": 1.016, + "step": 2919 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999018722146914e-05, + "loss": 0.9891, + "step": 2920 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999018026086334e-05, + "loss": 1.0894, + "step": 2921 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901732977898e-05, + "loss": 0.9991, + "step": 2922 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901663322484e-05, + "loss": 1.234, + "step": 2923 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901593642393e-05, + "loss": 1.3263, + "step": 2924 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999015239376235e-05, + "loss": 0.9724, + "step": 2925 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901454208176e-05, + "loss": 1.0791, + "step": 2926 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990138445405e-05, + "loss": 1.2334, + "step": 2927 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901314675246e-05, + "loss": 0.8321, + "step": 2928 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999012448717655e-05, + "loss": 1.2932, + "step": 2929 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901175043605e-05, + "loss": 1.3547, + "step": 2930 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901105190768e-05, + "loss": 0.7176, + "step": 2931 + }, + { + "epoch": 0.0, + "learning_rate": 4.999901035313252e-05, + "loss": 0.7461, + "step": 2932 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999009654110594e-05, + "loss": 1.4159, + "step": 2933 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900895484188e-05, + "loss": 1.0933, + "step": 2934 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900825532639e-05, + "loss": 1.0124, + "step": 2935 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900755556411e-05, + "loss": 1.2458, + "step": 2936 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999006855555064e-05, + "loss": 1.2437, + "step": 2937 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900615529923e-05, + "loss": 0.7774, + "step": 2938 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900545479662e-05, + "loss": 1.2332, + "step": 2939 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900475404723e-05, + "loss": 1.1314, + "step": 2940 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900405305106e-05, + "loss": 0.9918, + "step": 2941 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990033518081e-05, + "loss": 0.5156, + "step": 2942 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999002650318374e-05, + "loss": 0.386, + "step": 2943 + }, + { + "epoch": 0.0, + "learning_rate": 4.999900194858187e-05, + "loss": 0.3806, + "step": 2944 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999001246598575e-05, + "loss": 0.2731, + "step": 2945 + }, + { + "epoch": 0.0, + "learning_rate": 4.99990005443685e-05, + "loss": 0.311, + "step": 2946 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899984189166e-05, + "loss": 0.1946, + "step": 2947 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998999139168026e-05, + "loss": 0.1996, + "step": 2948 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899843619762e-05, + "loss": 0.1449, + "step": 2949 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998997732980434e-05, + "loss": 0.1897, + "step": 2950 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899702951647e-05, + "loss": 0.0818, + "step": 2951 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899632580572e-05, + "loss": 0.153, + "step": 2952 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998995621848195e-05, + "loss": 0.1676, + "step": 2953 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899491764389e-05, + "loss": 0.0627, + "step": 2954 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989942131928e-05, + "loss": 0.1163, + "step": 2955 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899350849494e-05, + "loss": 0.1337, + "step": 2956 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899280355029e-05, + "loss": 0.139, + "step": 2957 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899209835887e-05, + "loss": 0.0755, + "step": 2958 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899139292067e-05, + "loss": 0.0569, + "step": 2959 + }, + { + "epoch": 0.0, + "learning_rate": 4.999899068723568e-05, + "loss": 0.0567, + "step": 2960 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898998130392e-05, + "loss": 0.2035, + "step": 2961 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898927512538e-05, + "loss": 0.2997, + "step": 2962 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998988568700056e-05, + "loss": 0.3259, + "step": 2963 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998987862027956e-05, + "loss": 0.1894, + "step": 2964 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898715510908e-05, + "loss": 0.0832, + "step": 2965 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898644794342e-05, + "loss": 1.3086, + "step": 2966 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898574053098e-05, + "loss": 1.2232, + "step": 2967 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898503287176e-05, + "loss": 0.9243, + "step": 2968 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898432496576e-05, + "loss": 1.2614, + "step": 2969 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998983616812984e-05, + "loss": 1.134, + "step": 2970 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998982908413425e-05, + "loss": 1.37, + "step": 2971 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898219976709e-05, + "loss": 1.1254, + "step": 2972 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898149087397e-05, + "loss": 1.1534, + "step": 2973 + }, + { + "epoch": 0.0, + "learning_rate": 4.999898078173408e-05, + "loss": 1.2074, + "step": 2974 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989800723474e-05, + "loss": 0.8483, + "step": 2975 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897936271395e-05, + "loss": 1.3476, + "step": 2976 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897865283372e-05, + "loss": 1.29, + "step": 2977 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998977942706705e-05, + "loss": 1.2638, + "step": 2978 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897723233291e-05, + "loss": 0.9091, + "step": 2979 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897652171234e-05, + "loss": 1.1177, + "step": 2980 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897581084499e-05, + "loss": 1.0874, + "step": 2981 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897509973086e-05, + "loss": 1.0801, + "step": 2982 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897438836995e-05, + "loss": 1.0494, + "step": 2983 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897367676226e-05, + "loss": 1.1651, + "step": 2984 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897296490779e-05, + "loss": 0.8595, + "step": 2985 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897225280654e-05, + "loss": 0.8869, + "step": 2986 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998971540458514e-05, + "loss": 1.2285, + "step": 2987 + }, + { + "epoch": 0.0, + "learning_rate": 4.999897082786371e-05, + "loss": 1.128, + "step": 2988 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998970115022126e-05, + "loss": 1.3749, + "step": 2989 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896940193376e-05, + "loss": 1.3221, + "step": 2990 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896868859861e-05, + "loss": 1.254, + "step": 2991 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998967975016695e-05, + "loss": 1.1455, + "step": 2992 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896726118799e-05, + "loss": 1.0821, + "step": 2993 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998966547112506e-05, + "loss": 1.1394, + "step": 2994 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896583279025e-05, + "loss": 0.8994, + "step": 2995 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998965118221206e-05, + "loss": 1.2445, + "step": 2996 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998964403405386e-05, + "loss": 1.1261, + "step": 2997 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896368834279e-05, + "loss": 1.1851, + "step": 2998 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896297303341e-05, + "loss": 1.5682, + "step": 2999 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998962257477256e-05, + "loss": 0.9643, + "step": 3000 + }, + { + "epoch": 0.0, + "eval_loss": 1.0737282037734985, + "eval_runtime": 85.3847, + "eval_samples_per_second": 16.221, + "eval_steps_per_second": 4.064, + "step": 3000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999896154167432e-05, + "loss": 1.2074, + "step": 3001 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998960825624593e-05, + "loss": 1.3158, + "step": 3002 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998960109328105e-05, + "loss": 1.0965, + "step": 3003 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998959392784825e-05, + "loss": 1.0788, + "step": 3004 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895867599478e-05, + "loss": 0.8845, + "step": 3005 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998957958957946e-05, + "loss": 0.793, + "step": 3006 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895724167433e-05, + "loss": 0.9951, + "step": 3007 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895652414393e-05, + "loss": 1.0859, + "step": 3008 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895580636677e-05, + "loss": 0.8108, + "step": 3009 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998955088342816e-05, + "loss": 0.5787, + "step": 3010 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895437007209e-05, + "loss": 0.5608, + "step": 3011 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895365155458e-05, + "loss": 0.5967, + "step": 3012 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989529327903e-05, + "loss": 0.4456, + "step": 3013 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895221377923e-05, + "loss": 0.5504, + "step": 3014 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895149452138e-05, + "loss": 0.8343, + "step": 3015 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895077501676e-05, + "loss": 1.6915, + "step": 3016 + }, + { + "epoch": 0.0, + "learning_rate": 4.999895005526536e-05, + "loss": 1.2349, + "step": 3017 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998949335267174e-05, + "loss": 1.139, + "step": 3018 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894861502221e-05, + "loss": 1.1852, + "step": 3019 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894789453047e-05, + "loss": 1.2533, + "step": 3020 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894717379195e-05, + "loss": 1.2747, + "step": 3021 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998946452806655e-05, + "loss": 1.1085, + "step": 3022 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998945731574574e-05, + "loss": 1.2659, + "step": 3023 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894501009572e-05, + "loss": 0.7887, + "step": 3024 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998944288370084e-05, + "loss": 1.1709, + "step": 3025 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894356639767e-05, + "loss": 1.1869, + "step": 3026 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998942844178475e-05, + "loss": 1.1321, + "step": 3027 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989421217125e-05, + "loss": 1.2509, + "step": 3028 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998941398999754e-05, + "loss": 1.3264, + "step": 3029 + }, + { + "epoch": 0.0, + "learning_rate": 4.999894067604022e-05, + "loss": 1.0859, + "step": 3030 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998939952833915e-05, + "loss": 0.9069, + "step": 3031 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998939229380825e-05, + "loss": 1.1984, + "step": 3032 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893850568095e-05, + "loss": 0.9934, + "step": 3033 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998937781734304e-05, + "loss": 0.997, + "step": 3034 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893705754089e-05, + "loss": 1.177, + "step": 3035 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893633310068e-05, + "loss": 1.0475, + "step": 3036 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998935608413685e-05, + "loss": 0.9387, + "step": 3037 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893488347993e-05, + "loss": 1.0551, + "step": 3038 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998934158299385e-05, + "loss": 1.1771, + "step": 3039 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998933432872065e-05, + "loss": 1.2724, + "step": 3040 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998932707197966e-05, + "loss": 1.2089, + "step": 3041 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893198127708e-05, + "loss": 1.0284, + "step": 3042 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893125510943e-05, + "loss": 0.9842, + "step": 3043 + }, + { + "epoch": 0.0, + "learning_rate": 4.999893052869499e-05, + "loss": 1.0936, + "step": 3044 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892980203378e-05, + "loss": 1.0716, + "step": 3045 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998929075125784e-05, + "loss": 1.3936, + "step": 3046 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892834797101e-05, + "loss": 1.2452, + "step": 3047 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892762056946e-05, + "loss": 1.0022, + "step": 3048 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998926892921124e-05, + "loss": 1.0688, + "step": 3049 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892616502602e-05, + "loss": 1.0423, + "step": 3050 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998925436884126e-05, + "loss": 1.1952, + "step": 3051 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892470849546e-05, + "loss": 1.157, + "step": 3052 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998923979860015e-05, + "loss": 1.149, + "step": 3053 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892325097779e-05, + "loss": 0.8852, + "step": 3054 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998922521848786e-05, + "loss": 1.442, + "step": 3055 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998921792473005e-05, + "loss": 1.1477, + "step": 3056 + }, + { + "epoch": 0.0, + "learning_rate": 4.999892106285044e-05, + "loss": 1.0485, + "step": 3057 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989203329811e-05, + "loss": 0.9521, + "step": 3058 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891960286498e-05, + "loss": 1.3072, + "step": 3059 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891887250208e-05, + "loss": 1.0286, + "step": 3060 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989181418924e-05, + "loss": 1.2539, + "step": 3061 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891741103595e-05, + "loss": 1.1245, + "step": 3062 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891667993272e-05, + "loss": 0.932, + "step": 3063 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998915948582705e-05, + "loss": 1.4209, + "step": 3064 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891521698591e-05, + "loss": 1.5105, + "step": 3065 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891448514234e-05, + "loss": 1.2014, + "step": 3066 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891375305199e-05, + "loss": 1.3503, + "step": 3067 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891302071486e-05, + "loss": 0.9712, + "step": 3068 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891228813095e-05, + "loss": 1.1086, + "step": 3069 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891155530026e-05, + "loss": 1.3851, + "step": 3070 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998910822222804e-05, + "loss": 1.1959, + "step": 3071 + }, + { + "epoch": 0.0, + "learning_rate": 4.999891008889856e-05, + "loss": 0.9626, + "step": 3072 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890935532754e-05, + "loss": 1.0227, + "step": 3073 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998908621509736e-05, + "loss": 1.2477, + "step": 3074 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890788744516e-05, + "loss": 1.1234, + "step": 3075 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989071531338e-05, + "loss": 1.2972, + "step": 3076 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998906418575666e-05, + "loss": 1.0932, + "step": 3077 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890568377075e-05, + "loss": 1.2099, + "step": 3078 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998904948719054e-05, + "loss": 1.0578, + "step": 3079 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890421342058e-05, + "loss": 1.0771, + "step": 3080 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890347787533e-05, + "loss": 1.2784, + "step": 3081 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989027420833e-05, + "loss": 1.0203, + "step": 3082 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890200604449e-05, + "loss": 1.3236, + "step": 3083 + }, + { + "epoch": 0.0, + "learning_rate": 4.99989012697589e-05, + "loss": 0.9522, + "step": 3084 + }, + { + "epoch": 0.0, + "learning_rate": 4.999890053322654e-05, + "loss": 0.9891, + "step": 3085 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988997964474e-05, + "loss": 1.0306, + "step": 3086 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889905942147e-05, + "loss": 1.2939, + "step": 3087 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889832214877e-05, + "loss": 1.1802, + "step": 3088 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889758462929e-05, + "loss": 1.0999, + "step": 3089 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889684686303e-05, + "loss": 1.2702, + "step": 3090 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889610884999e-05, + "loss": 1.2539, + "step": 3091 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889537059018e-05, + "loss": 0.94, + "step": 3092 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889463208358e-05, + "loss": 1.1902, + "step": 3093 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889389333021e-05, + "loss": 0.987, + "step": 3094 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889315433006e-05, + "loss": 1.141, + "step": 3095 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998892415083134e-05, + "loss": 1.2143, + "step": 3096 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889167558942e-05, + "loss": 0.8637, + "step": 3097 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889093584893e-05, + "loss": 1.0951, + "step": 3098 + }, + { + "epoch": 0.0, + "learning_rate": 4.999889019586167e-05, + "loss": 1.2597, + "step": 3099 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998889455627625e-05, + "loss": 0.9368, + "step": 3100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998888715146794e-05, + "loss": 1.5598, + "step": 3101 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888797441919e-05, + "loss": 1.1202, + "step": 3102 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888723344482e-05, + "loss": 1.0475, + "step": 3103 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888649222366e-05, + "loss": 1.0778, + "step": 3104 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998885750755726e-05, + "loss": 1.1118, + "step": 3105 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998885009041005e-05, + "loss": 0.9362, + "step": 3106 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888426707951e-05, + "loss": 1.0847, + "step": 3107 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998883524871244e-05, + "loss": 1.0924, + "step": 3108 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888278241619e-05, + "loss": 0.9675, + "step": 3109 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888203971437e-05, + "loss": 1.0317, + "step": 3110 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888129676575e-05, + "loss": 1.1201, + "step": 3111 + }, + { + "epoch": 0.0, + "learning_rate": 4.999888055357037e-05, + "loss": 1.1086, + "step": 3112 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998879810128205e-05, + "loss": 1.2691, + "step": 3113 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887906643926e-05, + "loss": 1.0789, + "step": 3114 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887832250354e-05, + "loss": 0.8669, + "step": 3115 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998877578321046e-05, + "loss": 0.9003, + "step": 3116 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887683389177e-05, + "loss": 0.9656, + "step": 3117 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998876089215705e-05, + "loss": 0.7094, + "step": 3118 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887534429287e-05, + "loss": 0.9274, + "step": 3119 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887459912326e-05, + "loss": 1.2837, + "step": 3120 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998873853706875e-05, + "loss": 0.894, + "step": 3121 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988731080437e-05, + "loss": 0.8587, + "step": 3122 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998872362133754e-05, + "loss": 1.2183, + "step": 3123 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887161597703e-05, + "loss": 1.1431, + "step": 3124 + }, + { + "epoch": 0.0, + "learning_rate": 4.999887086957352e-05, + "loss": 1.3707, + "step": 3125 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998870122923235e-05, + "loss": 1.0031, + "step": 3126 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886937602618e-05, + "loss": 0.9755, + "step": 3127 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886862888234e-05, + "loss": 1.0886, + "step": 3128 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886788149172e-05, + "loss": 0.8562, + "step": 3129 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886713385432e-05, + "loss": 0.464, + "step": 3130 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886638597015e-05, + "loss": 0.6532, + "step": 3131 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988656378392e-05, + "loss": 1.1652, + "step": 3132 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998864889461466e-05, + "loss": 1.2203, + "step": 3133 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998864140836956e-05, + "loss": 1.025, + "step": 3134 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886339196567e-05, + "loss": 1.2002, + "step": 3135 + }, + { + "epoch": 0.0, + "learning_rate": 4.999886264284761e-05, + "loss": 1.3056, + "step": 3136 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998861893482764e-05, + "loss": 1.0095, + "step": 3137 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998861143871135e-05, + "loss": 1.3763, + "step": 3138 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998860394012734e-05, + "loss": 1.0908, + "step": 3139 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998859643907556e-05, + "loss": 1.0629, + "step": 3140 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988588935556e-05, + "loss": 1.3534, + "step": 3141 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998858142956866e-05, + "loss": 0.9142, + "step": 3142 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998857392111354e-05, + "loss": 0.9733, + "step": 3143 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998856641019064e-05, + "loss": 1.0786, + "step": 3144 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998855889679996e-05, + "loss": 0.9734, + "step": 3145 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885513809415e-05, + "loss": 1.4096, + "step": 3146 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885438626152e-05, + "loss": 1.1417, + "step": 3147 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885363418212e-05, + "loss": 1.0368, + "step": 3148 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885288185593e-05, + "loss": 1.1928, + "step": 3149 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998852129282966e-05, + "loss": 0.9801, + "step": 3150 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885137646324e-05, + "loss": 0.9239, + "step": 3151 + }, + { + "epoch": 0.0, + "learning_rate": 4.999885062339672e-05, + "loss": 0.866, + "step": 3152 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884987008342e-05, + "loss": 0.8846, + "step": 3153 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884911652335e-05, + "loss": 0.8873, + "step": 3154 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988483627165e-05, + "loss": 1.2745, + "step": 3155 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884760866287e-05, + "loss": 1.1213, + "step": 3156 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998846854362465e-05, + "loss": 0.9286, + "step": 3157 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998846099815276e-05, + "loss": 0.4891, + "step": 3158 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884534502132e-05, + "loss": 1.1473, + "step": 3159 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884458998057e-05, + "loss": 0.6707, + "step": 3160 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884383469306e-05, + "loss": 1.0229, + "step": 3161 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884307915875e-05, + "loss": 1.3011, + "step": 3162 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884232337768e-05, + "loss": 0.8993, + "step": 3163 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884156734983e-05, + "loss": 1.1166, + "step": 3164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998840811075195e-05, + "loss": 1.1444, + "step": 3165 + }, + { + "epoch": 0.0, + "learning_rate": 4.999884005455378e-05, + "loss": 1.1135, + "step": 3166 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883929778559e-05, + "loss": 0.9868, + "step": 3167 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883854077063e-05, + "loss": 1.1363, + "step": 3168 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883778350889e-05, + "loss": 1.2838, + "step": 3169 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883702600037e-05, + "loss": 1.428, + "step": 3170 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998836268245067e-05, + "loss": 1.1863, + "step": 3171 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998835510242993e-05, + "loss": 1.1418, + "step": 3172 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998834751994136e-05, + "loss": 0.9341, + "step": 3173 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988339934985e-05, + "loss": 1.0903, + "step": 3174 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998833234756086e-05, + "loss": 1.192, + "step": 3175 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988324757669e-05, + "loss": 1.1706, + "step": 3176 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883171653093e-05, + "loss": 1.1696, + "step": 3177 + }, + { + "epoch": 0.0, + "learning_rate": 4.999883095704819e-05, + "loss": 1.2754, + "step": 3178 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998830197318665e-05, + "loss": 0.8601, + "step": 3179 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882943734237e-05, + "loss": 1.2475, + "step": 3180 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882867711928e-05, + "loss": 1.006, + "step": 3181 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882791664943e-05, + "loss": 1.1577, + "step": 3182 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882715593279e-05, + "loss": 0.969, + "step": 3183 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882639496938e-05, + "loss": 1.0363, + "step": 3184 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882563375919e-05, + "loss": 1.0267, + "step": 3185 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998824872302216e-05, + "loss": 1.2042, + "step": 3186 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882411059847e-05, + "loss": 1.5013, + "step": 3187 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882334864795e-05, + "loss": 1.2098, + "step": 3188 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882258645065e-05, + "loss": 1.028, + "step": 3189 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882182400657e-05, + "loss": 0.9882, + "step": 3190 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882106131571e-05, + "loss": 1.0897, + "step": 3191 + }, + { + "epoch": 0.0, + "learning_rate": 4.999882029837808e-05, + "loss": 1.2585, + "step": 3192 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998819535193666e-05, + "loss": 1.1727, + "step": 3193 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998818771762465e-05, + "loss": 1.0733, + "step": 3194 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998818008084505e-05, + "loss": 1.1633, + "step": 3195 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998817244159754e-05, + "loss": 1.2635, + "step": 3196 + }, + { + "epoch": 0.0, + "learning_rate": 4.999881647998823e-05, + "loss": 0.9241, + "step": 3197 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998815715569925e-05, + "loss": 1.1062, + "step": 3198 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998814950904846e-05, + "loss": 1.1206, + "step": 3199 + }, + { + "epoch": 0.0, + "learning_rate": 4.999881418599299e-05, + "loss": 1.0057, + "step": 3200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998813420834356e-05, + "loss": 1.1484, + "step": 3201 + }, + { + "epoch": 0.0, + "learning_rate": 4.999881265542894e-05, + "loss": 1.0997, + "step": 3202 + }, + { + "epoch": 0.0, + "learning_rate": 4.999881188977675e-05, + "loss": 1.0839, + "step": 3203 + }, + { + "epoch": 0.0, + "learning_rate": 4.999881112387778e-05, + "loss": 1.5233, + "step": 3204 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998810357732026e-05, + "loss": 1.1323, + "step": 3205 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880959133951e-05, + "loss": 1.2861, + "step": 3206 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880882470021e-05, + "loss": 0.8339, + "step": 3207 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880805781413e-05, + "loss": 1.2662, + "step": 3208 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880729068127e-05, + "loss": 1.0926, + "step": 3209 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880652330163e-05, + "loss": 0.9723, + "step": 3210 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880575567522e-05, + "loss": 0.9267, + "step": 3211 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880498780203e-05, + "loss": 1.014, + "step": 3212 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880421968206e-05, + "loss": 1.7897, + "step": 3213 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880345131531e-05, + "loss": 0.9724, + "step": 3214 + }, + { + "epoch": 0.0, + "learning_rate": 4.99988026827018e-05, + "loss": 1.209, + "step": 3215 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998801913841494e-05, + "loss": 0.9055, + "step": 3216 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998801144734406e-05, + "loss": 1.1476, + "step": 3217 + }, + { + "epoch": 0.0, + "learning_rate": 4.999880037538055e-05, + "loss": 1.0604, + "step": 3218 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879960577992e-05, + "loss": 1.1971, + "step": 3219 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879883593251e-05, + "loss": 2.9813, + "step": 3220 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879806583832e-05, + "loss": 6.0157, + "step": 3221 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998797295497356e-05, + "loss": 5.9594, + "step": 3222 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998796524909606e-05, + "loss": 2.482, + "step": 3223 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879575407509e-05, + "loss": 1.1952, + "step": 3224 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998794982993794e-05, + "loss": 1.3123, + "step": 3225 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879421166571e-05, + "loss": 1.3332, + "step": 3226 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879344009085e-05, + "loss": 1.144, + "step": 3227 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879266826922e-05, + "loss": 1.2806, + "step": 3228 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879189620081e-05, + "loss": 1.1283, + "step": 3229 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998791123885624e-05, + "loss": 1.2873, + "step": 3230 + }, + { + "epoch": 0.0, + "learning_rate": 4.999879035132366e-05, + "loss": 1.1551, + "step": 3231 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998789578514914e-05, + "loss": 1.0223, + "step": 3232 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987888054594e-05, + "loss": 1.0714, + "step": 3233 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987880321571e-05, + "loss": 1.1004, + "step": 3234 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998787258608026e-05, + "loss": 1.0357, + "step": 3235 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878648481218e-05, + "loss": 0.8907, + "step": 3236 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878571076955e-05, + "loss": 0.9581, + "step": 3237 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878493648014e-05, + "loss": 1.0673, + "step": 3238 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998784161943954e-05, + "loss": 1.2363, + "step": 3239 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998783387160986e-05, + "loss": 1.3043, + "step": 3240 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878261213125e-05, + "loss": 0.9974, + "step": 3241 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878183685473e-05, + "loss": 0.7341, + "step": 3242 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998781061331434e-05, + "loss": 0.7755, + "step": 3243 + }, + { + "epoch": 0.0, + "learning_rate": 4.999878028556136e-05, + "loss": 0.951, + "step": 3244 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877950954451e-05, + "loss": 0.7519, + "step": 3245 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877873328089e-05, + "loss": 1.0954, + "step": 3246 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877795677048e-05, + "loss": 1.0842, + "step": 3247 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987771800133e-05, + "loss": 1.1799, + "step": 3248 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877640300934e-05, + "loss": 1.0409, + "step": 3249 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877562575861e-05, + "loss": 1.2298, + "step": 3250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877484826109e-05, + "loss": 1.1191, + "step": 3251 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998774070516796e-05, + "loss": 0.9785, + "step": 3252 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877329252573e-05, + "loss": 1.2481, + "step": 3253 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877251428788e-05, + "loss": 1.4826, + "step": 3254 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998771735803264e-05, + "loss": 1.1167, + "step": 3255 + }, + { + "epoch": 0.0, + "learning_rate": 4.999877095707186e-05, + "loss": 1.1116, + "step": 3256 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998770178093676e-05, + "loss": 0.9439, + "step": 3257 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998769398868725e-05, + "loss": 1.2351, + "step": 3258 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998768619396995e-05, + "loss": 1.0686, + "step": 3259 + }, + { + "epoch": 0.0, + "learning_rate": 4.999876783967848e-05, + "loss": 1.5349, + "step": 3260 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998767059713196e-05, + "loss": 1.1874, + "step": 3261 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998766279501133e-05, + "loss": 1.3539, + "step": 3262 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998765499042286e-05, + "loss": 1.2347, + "step": 3263 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998764718336673e-05, + "loss": 1.2608, + "step": 3264 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998763937384277e-05, + "loss": 1.4032, + "step": 3265 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987631561851e-05, + "loss": 1.3431, + "step": 3266 + }, + { + "epoch": 0.0, + "learning_rate": 4.999876237473915e-05, + "loss": 1.027, + "step": 3267 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998761593046425e-05, + "loss": 1.5965, + "step": 3268 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998760811106916e-05, + "loss": 1.0208, + "step": 3269 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998760028920636e-05, + "loss": 1.1091, + "step": 3270 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875924648758e-05, + "loss": 1.2286, + "step": 3271 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998758463807735e-05, + "loss": 0.8837, + "step": 3272 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875768088113e-05, + "loss": 0.7764, + "step": 3273 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875689770773e-05, + "loss": 0.7657, + "step": 3274 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875611428756e-05, + "loss": 0.9198, + "step": 3275 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875533062062e-05, + "loss": 1.1006, + "step": 3276 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875454670689e-05, + "loss": 1.0076, + "step": 3277 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875376254639e-05, + "loss": 1.3636, + "step": 3278 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875297813912e-05, + "loss": 1.1701, + "step": 3279 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875219348506e-05, + "loss": 1.1995, + "step": 3280 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875140858423e-05, + "loss": 0.9329, + "step": 3281 + }, + { + "epoch": 0.0, + "learning_rate": 4.999875062343662e-05, + "loss": 1.2552, + "step": 3282 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998749838042235e-05, + "loss": 1.1106, + "step": 3283 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874905240107e-05, + "loss": 1.3376, + "step": 3284 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874826651313e-05, + "loss": 1.1603, + "step": 3285 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998747480378413e-05, + "loss": 1.4663, + "step": 3286 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998746693996915e-05, + "loss": 1.3069, + "step": 3287 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874590736865e-05, + "loss": 1.1898, + "step": 3288 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987451204936e-05, + "loss": 1.2227, + "step": 3289 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874433337177e-05, + "loss": 1.0331, + "step": 3290 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874354600317e-05, + "loss": 1.115, + "step": 3291 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874275838779e-05, + "loss": 1.0944, + "step": 3292 + }, + { + "epoch": 0.0, + "learning_rate": 4.999874197052563e-05, + "loss": 1.2408, + "step": 3293 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998741182416695e-05, + "loss": 1.0281, + "step": 3294 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998740394060987e-05, + "loss": 1.1958, + "step": 3295 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987396054585e-05, + "loss": 0.6619, + "step": 3296 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998738816609235e-05, + "loss": 1.2319, + "step": 3297 + }, + { + "epoch": 0.0, + "learning_rate": 4.999873802751319e-05, + "loss": 1.0441, + "step": 3298 + }, + { + "epoch": 0.0, + "learning_rate": 4.999873723817037e-05, + "loss": 1.2682, + "step": 3299 + }, + { + "epoch": 0.0, + "learning_rate": 4.999873644858077e-05, + "loss": 1.1649, + "step": 3300 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987356587444e-05, + "loss": 0.9821, + "step": 3301 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998734868661255e-05, + "loss": 0.9071, + "step": 3302 + }, + { + "epoch": 0.0, + "learning_rate": 4.999873407833133e-05, + "loss": 0.9467, + "step": 3303 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998733287754626e-05, + "loss": 1.3175, + "step": 3304 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998732496931144e-05, + "loss": 1.2642, + "step": 3305 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998731705860884e-05, + "loss": 1.2276, + "step": 3306 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998730914543846e-05, + "loss": 1.0944, + "step": 3307 + }, + { + "epoch": 0.0, + "learning_rate": 4.999873012298004e-05, + "loss": 0.7492, + "step": 3308 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998729331169444e-05, + "loss": 1.5178, + "step": 3309 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998728539112086e-05, + "loss": 1.1938, + "step": 3310 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998727746807936e-05, + "loss": 1.2197, + "step": 3311 + }, + { + "epoch": 0.0, + "learning_rate": 4.999872695425702e-05, + "loss": 1.0732, + "step": 3312 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998726161459324e-05, + "loss": 0.9876, + "step": 3313 + }, + { + "epoch": 0.0, + "learning_rate": 4.999872536841485e-05, + "loss": 0.8108, + "step": 3314 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987245751236e-05, + "loss": 1.0706, + "step": 3315 + }, + { + "epoch": 0.0, + "learning_rate": 4.999872378158558e-05, + "loss": 1.2819, + "step": 3316 + }, + { + "epoch": 0.0, + "learning_rate": 4.999872298780077e-05, + "loss": 1.1775, + "step": 3317 + }, + { + "epoch": 0.0, + "learning_rate": 4.999872219376919e-05, + "loss": 1.0579, + "step": 3318 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998721399490835e-05, + "loss": 1.0045, + "step": 3319 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987206049657e-05, + "loss": 1.0541, + "step": 3320 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871981019378e-05, + "loss": 1.5234, + "step": 3321 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987190151751e-05, + "loss": 1.2823, + "step": 3322 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871821990964e-05, + "loss": 1.5049, + "step": 3323 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871742439739e-05, + "loss": 1.2312, + "step": 3324 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871662863837e-05, + "loss": 1.2007, + "step": 3325 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871583263258e-05, + "loss": 1.0416, + "step": 3326 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998715036380005e-05, + "loss": 1.4341, + "step": 3327 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871423988066e-05, + "loss": 0.9972, + "step": 3328 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871344313453e-05, + "loss": 1.1302, + "step": 3329 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871264614163e-05, + "loss": 1.2175, + "step": 3330 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998711848901945e-05, + "loss": 0.804, + "step": 3331 + }, + { + "epoch": 0.0, + "learning_rate": 4.999871105141549e-05, + "loss": 0.8429, + "step": 3332 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998710253682255e-05, + "loss": 0.966, + "step": 3333 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870945570225e-05, + "loss": 1.0244, + "step": 3334 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998708657475466e-05, + "loss": 1.2352, + "step": 3335 + }, + { + "epoch": 0.0, + "learning_rate": 4.99987078590019e-05, + "loss": 1.2682, + "step": 3336 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870706028156e-05, + "loss": 1.2687, + "step": 3337 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998706261314444e-05, + "loss": 1.1056, + "step": 3338 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870546210055e-05, + "loss": 0.7855, + "step": 3339 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870466263988e-05, + "loss": 1.506, + "step": 3340 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870386293244e-05, + "loss": 1.3185, + "step": 3341 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998703062978215e-05, + "loss": 1.0603, + "step": 3342 + }, + { + "epoch": 0.0, + "learning_rate": 4.999870226277721e-05, + "loss": 0.8978, + "step": 3343 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998701462329437e-05, + "loss": 0.7992, + "step": 3344 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998700661634884e-05, + "loss": 1.1016, + "step": 3345 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869986069355e-05, + "loss": 1.0676, + "step": 3346 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869905950545e-05, + "loss": 1.3322, + "step": 3347 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998698258070564e-05, + "loss": 0.8448, + "step": 3348 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869745638891e-05, + "loss": 1.2603, + "step": 3349 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869665446047e-05, + "loss": 2.0377, + "step": 3350 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869585228526e-05, + "loss": 1.1663, + "step": 3351 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998695049863263e-05, + "loss": 1.3606, + "step": 3352 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986942471945e-05, + "loss": 1.2747, + "step": 3353 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869344427896e-05, + "loss": 1.1279, + "step": 3354 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869264111664e-05, + "loss": 1.0564, + "step": 3355 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869183770754e-05, + "loss": 0.5569, + "step": 3356 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869103405167e-05, + "loss": 0.9706, + "step": 3357 + }, + { + "epoch": 0.0, + "learning_rate": 4.999869023014903e-05, + "loss": 0.992, + "step": 3358 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986894259996e-05, + "loss": 0.9301, + "step": 3359 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998688621603395e-05, + "loss": 1.0552, + "step": 3360 + }, + { + "epoch": 0.0, + "learning_rate": 4.999868781696042e-05, + "loss": 1.2832, + "step": 3361 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998687012070664e-05, + "loss": 1.0131, + "step": 3362 + }, + { + "epoch": 0.0, + "learning_rate": 4.999868620693413e-05, + "loss": 1.0057, + "step": 3363 + }, + { + "epoch": 0.0, + "learning_rate": 4.999868540155083e-05, + "loss": 0.7254, + "step": 3364 + }, + { + "epoch": 0.0, + "learning_rate": 4.999868459592074e-05, + "loss": 0.9847, + "step": 3365 + }, + { + "epoch": 0.0, + "learning_rate": 4.999868379004388e-05, + "loss": 1.0289, + "step": 3366 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998682983920245e-05, + "loss": 1.1107, + "step": 3367 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998682177549826e-05, + "loss": 0.4372, + "step": 3368 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998681370932635e-05, + "loss": 0.9341, + "step": 3369 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998680564068674e-05, + "loss": 1.035, + "step": 3370 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867975695793e-05, + "loss": 1.0862, + "step": 3371 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867894960041e-05, + "loss": 1.0916, + "step": 3372 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998678141996115e-05, + "loss": 1.4372, + "step": 3373 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867733414504e-05, + "loss": 1.4361, + "step": 3374 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986765260472e-05, + "loss": 1.2444, + "step": 3375 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867571770257e-05, + "loss": 1.3493, + "step": 3376 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867490911117e-05, + "loss": 1.1362, + "step": 3377 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867410027299e-05, + "loss": 1.1176, + "step": 3378 + }, + { + "epoch": 0.0, + "learning_rate": 4.999867329118804e-05, + "loss": 1.4117, + "step": 3379 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998672481856305e-05, + "loss": 1.6114, + "step": 3380 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986716722778e-05, + "loss": 1.5723, + "step": 3381 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998670862452515e-05, + "loss": 1.3304, + "step": 3382 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998670052380454e-05, + "loss": 1.0998, + "step": 3383 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998669242061614e-05, + "loss": 1.1452, + "step": 3384 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998668431496004e-05, + "loss": 1.1085, + "step": 3385 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998667620683615e-05, + "loss": 1.2093, + "step": 3386 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866680962445e-05, + "loss": 1.1872, + "step": 3387 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998665998318504e-05, + "loss": 1.1485, + "step": 3388 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998665186765795e-05, + "loss": 0.9688, + "step": 3389 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998664374966294e-05, + "loss": 1.3017, + "step": 3390 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866356292003e-05, + "loss": 1.1592, + "step": 3391 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866275062698e-05, + "loss": 1.1275, + "step": 3392 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866193808716e-05, + "loss": 0.9936, + "step": 3393 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866112530056e-05, + "loss": 1.0442, + "step": 3394 + }, + { + "epoch": 0.0, + "learning_rate": 4.999866031226719e-05, + "loss": 0.9005, + "step": 3395 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998659498987035e-05, + "loss": 0.7956, + "step": 3396 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865868546011e-05, + "loss": 0.771, + "step": 3397 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998657871686406e-05, + "loss": 0.7461, + "step": 3398 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998657057665924e-05, + "loss": 0.6217, + "step": 3399 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998656243398664e-05, + "loss": 0.5203, + "step": 3400 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865542888463e-05, + "loss": 0.596, + "step": 3401 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998654614123824e-05, + "loss": 0.6091, + "step": 3402 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865379911624e-05, + "loss": 0.6239, + "step": 3403 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865298386188e-05, + "loss": 0.5065, + "step": 3404 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998652168360736e-05, + "loss": 0.6384, + "step": 3405 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865135261282e-05, + "loss": 0.645, + "step": 3406 + }, + { + "epoch": 0.0, + "learning_rate": 4.999865053661814e-05, + "loss": 1.1136, + "step": 3407 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864972037667e-05, + "loss": 0.9981, + "step": 3408 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864890388843e-05, + "loss": 1.2167, + "step": 3409 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864808715341e-05, + "loss": 1.2866, + "step": 3410 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998647270171616e-05, + "loss": 1.0734, + "step": 3411 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864645294305e-05, + "loss": 1.1872, + "step": 3412 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986456354677e-05, + "loss": 1.9395, + "step": 3413 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864481774558e-05, + "loss": 1.3458, + "step": 3414 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864399977668e-05, + "loss": 1.2487, + "step": 3415 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864318156101e-05, + "loss": 1.4208, + "step": 3416 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998642363098556e-05, + "loss": 1.1311, + "step": 3417 + }, + { + "epoch": 0.0, + "learning_rate": 4.999864154438933e-05, + "loss": 1.2627, + "step": 3418 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998640725433326e-05, + "loss": 1.3189, + "step": 3419 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863990623055e-05, + "loss": 1.2433, + "step": 3420 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998639086781e-05, + "loss": 1.1867, + "step": 3421 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998638267084664e-05, + "loss": 0.8289, + "step": 3422 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863744714156e-05, + "loss": 1.4112, + "step": 3423 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998636626951674e-05, + "loss": 1.2278, + "step": 3424 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863580651501e-05, + "loss": 0.9466, + "step": 3425 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863498583158e-05, + "loss": 1.1715, + "step": 3426 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863416490137e-05, + "loss": 1.4013, + "step": 3427 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998633343724374e-05, + "loss": 1.0322, + "step": 3428 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998632522300614e-05, + "loss": 1.1111, + "step": 3429 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998631700630076e-05, + "loss": 0.8467, + "step": 3430 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863087871276e-05, + "loss": 1.0402, + "step": 3431 + }, + { + "epoch": 0.0, + "learning_rate": 4.999863005654867e-05, + "loss": 0.9011, + "step": 3432 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862923413781e-05, + "loss": 1.1107, + "step": 3433 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998628411480165e-05, + "loss": 0.7305, + "step": 3434 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998627588575744e-05, + "loss": 0.5344, + "step": 3435 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862676542455e-05, + "loss": 0.5887, + "step": 3436 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998625942026575e-05, + "loss": 0.53, + "step": 3437 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998625118381834e-05, + "loss": 0.6023, + "step": 3438 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998624294490315e-05, + "loss": 0.5898, + "step": 3439 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862347035201e-05, + "loss": 0.5584, + "step": 3440 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862264596694e-05, + "loss": 0.5397, + "step": 3441 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862182133509e-05, + "loss": 0.732, + "step": 3442 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998620996456465e-05, + "loss": 0.588, + "step": 3443 + }, + { + "epoch": 0.0, + "learning_rate": 4.999862017133106e-05, + "loss": 0.5927, + "step": 3444 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861934595889e-05, + "loss": 0.5751, + "step": 3445 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861852033994e-05, + "loss": 0.5968, + "step": 3446 + }, + { + "epoch": 0.0, + "learning_rate": 4.99986176944742e-05, + "loss": 0.6897, + "step": 3447 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998616868361694e-05, + "loss": 0.4709, + "step": 3448 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998616042002415e-05, + "loss": 0.5125, + "step": 3449 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861521539636e-05, + "loss": 0.4163, + "step": 3450 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861438854353e-05, + "loss": 0.4144, + "step": 3451 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861356144392e-05, + "loss": 0.4661, + "step": 3452 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861273409754e-05, + "loss": 0.3836, + "step": 3453 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861190650438e-05, + "loss": 0.3678, + "step": 3454 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861107866444e-05, + "loss": 0.4695, + "step": 3455 + }, + { + "epoch": 0.0, + "learning_rate": 4.999861025057773e-05, + "loss": 0.3993, + "step": 3456 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860942224425e-05, + "loss": 0.3604, + "step": 3457 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860859366398e-05, + "loss": 0.4166, + "step": 3458 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860776483695e-05, + "loss": 0.4779, + "step": 3459 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860693576313e-05, + "loss": 0.4345, + "step": 3460 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860610644253e-05, + "loss": 0.4439, + "step": 3461 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998605276875175e-05, + "loss": 0.3738, + "step": 3462 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860444706103e-05, + "loss": 0.5384, + "step": 3463 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860361700012e-05, + "loss": 1.3531, + "step": 3464 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998602786692426e-05, + "loss": 1.1958, + "step": 3465 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998601956137955e-05, + "loss": 1.3425, + "step": 3466 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860112533671e-05, + "loss": 1.3543, + "step": 3467 + }, + { + "epoch": 0.0, + "learning_rate": 4.999860029428869e-05, + "loss": 0.9571, + "step": 3468 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985994629939e-05, + "loss": 1.2027, + "step": 3469 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859863145233e-05, + "loss": 1.2444, + "step": 3470 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859779966398e-05, + "loss": 1.1483, + "step": 3471 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859696762886e-05, + "loss": 1.2692, + "step": 3472 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998596135346956e-05, + "loss": 1.1072, + "step": 3473 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859530281828e-05, + "loss": 1.0419, + "step": 3474 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859447004284e-05, + "loss": 1.103, + "step": 3475 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998593637020615e-05, + "loss": 1.3038, + "step": 3476 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859280375161e-05, + "loss": 1.1934, + "step": 3477 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998591970235836e-05, + "loss": 1.0241, + "step": 3478 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998591136473286e-05, + "loss": 1.088, + "step": 3479 + }, + { + "epoch": 0.0, + "learning_rate": 4.999859030246395e-05, + "loss": 1.1071, + "step": 3480 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858946820786e-05, + "loss": 1.1374, + "step": 3481 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858863370498e-05, + "loss": 0.9614, + "step": 3482 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858779895532e-05, + "loss": 1.1881, + "step": 3483 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998586963958894e-05, + "loss": 1.2886, + "step": 3484 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858612871569e-05, + "loss": 0.7944, + "step": 3485 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858529322571e-05, + "loss": 1.3246, + "step": 3486 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998584457488955e-05, + "loss": 1.3053, + "step": 3487 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998583621505424e-05, + "loss": 1.3665, + "step": 3488 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998582785275114e-05, + "loss": 1.2066, + "step": 3489 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858194879804e-05, + "loss": 1.1096, + "step": 3490 + }, + { + "epoch": 0.0, + "learning_rate": 4.999858111207418e-05, + "loss": 1.3171, + "step": 3491 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998580275103546e-05, + "loss": 0.9422, + "step": 3492 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857943788614e-05, + "loss": 1.0387, + "step": 3493 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857860042195e-05, + "loss": 1.2342, + "step": 3494 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998577762711e-05, + "loss": 1.0393, + "step": 3495 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857692475326e-05, + "loss": 0.9846, + "step": 3496 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998576086548756e-05, + "loss": 1.7033, + "step": 3497 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998575248097466e-05, + "loss": 1.2261, + "step": 3498 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998574409399404e-05, + "loss": 0.8879, + "step": 3499 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998573570454564e-05, + "loss": 0.8896, + "step": 3500 + }, + { + "epoch": 0.0, + "eval_loss": 1.0793774127960205, + "eval_runtime": 85.2881, + "eval_samples_per_second": 16.239, + "eval_steps_per_second": 4.069, + "step": 3500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857273126296e-05, + "loss": 0.9776, + "step": 3501 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857189182457e-05, + "loss": 1.1962, + "step": 3502 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857105213941e-05, + "loss": 0.9708, + "step": 3503 + }, + { + "epoch": 0.0, + "learning_rate": 4.999857021220747e-05, + "loss": 0.9686, + "step": 3504 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856937202876e-05, + "loss": 1.0573, + "step": 3505 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856853160327e-05, + "loss": 1.1712, + "step": 3506 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856769093101e-05, + "loss": 1.1592, + "step": 3507 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856685001196e-05, + "loss": 0.8742, + "step": 3508 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856600884615e-05, + "loss": 0.9963, + "step": 3509 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856516743356e-05, + "loss": 1.009, + "step": 3510 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985643257742e-05, + "loss": 1.2888, + "step": 3511 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856348386806e-05, + "loss": 1.0399, + "step": 3512 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856264171514e-05, + "loss": 1.138, + "step": 3513 + }, + { + "epoch": 0.0, + "learning_rate": 4.999856179931545e-05, + "loss": 0.9903, + "step": 3514 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998560956668985e-05, + "loss": 0.8786, + "step": 3515 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998560113775746e-05, + "loss": 1.2258, + "step": 3516 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855927063573e-05, + "loss": 1.2219, + "step": 3517 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855842724893e-05, + "loss": 1.0808, + "step": 3518 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998557583615365e-05, + "loss": 1.2123, + "step": 3519 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855673973503e-05, + "loss": 1.3638, + "step": 3520 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998555895607904e-05, + "loss": 1.071, + "step": 3521 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855505123402e-05, + "loss": 0.5962, + "step": 3522 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998554206613345e-05, + "loss": 0.9059, + "step": 3523 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985533617459e-05, + "loss": 0.9626, + "step": 3524 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855251663169e-05, + "loss": 1.3906, + "step": 3525 + }, + { + "epoch": 0.0, + "learning_rate": 4.999855167127069e-05, + "loss": 1.3815, + "step": 3526 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998550825662925e-05, + "loss": 1.3475, + "step": 3527 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854997980838e-05, + "loss": 1.2545, + "step": 3528 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998549133707064e-05, + "loss": 1.2959, + "step": 3529 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998548287358974e-05, + "loss": 0.9743, + "step": 3530 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985474407641e-05, + "loss": 1.2155, + "step": 3531 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854659392245e-05, + "loss": 1.107, + "step": 3532 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998545746834034e-05, + "loss": 1.0236, + "step": 3533 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854489949884e-05, + "loss": 1.0437, + "step": 3534 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998544051916865e-05, + "loss": 1.1194, + "step": 3535 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854320408812e-05, + "loss": 1.0564, + "step": 3536 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998542356012604e-05, + "loss": 1.0349, + "step": 3537 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854150769031e-05, + "loss": 1.2829, + "step": 3538 + }, + { + "epoch": 0.0, + "learning_rate": 4.999854065912124e-05, + "loss": 0.9777, + "step": 3539 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998539810305394e-05, + "loss": 1.2097, + "step": 3540 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853896124277e-05, + "loss": 0.8308, + "step": 3541 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853811193338e-05, + "loss": 0.91, + "step": 3542 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853726237721e-05, + "loss": 0.9906, + "step": 3543 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853641257426e-05, + "loss": 1.122, + "step": 3544 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853556252454e-05, + "loss": 0.9985, + "step": 3545 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853471222805e-05, + "loss": 0.9434, + "step": 3546 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998533861684784e-05, + "loss": 1.1515, + "step": 3547 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853301089473e-05, + "loss": 1.1774, + "step": 3548 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853215985791e-05, + "loss": 0.9875, + "step": 3549 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853130857432e-05, + "loss": 1.1445, + "step": 3550 + }, + { + "epoch": 0.0, + "learning_rate": 4.999853045704394e-05, + "loss": 1.3954, + "step": 3551 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998529605266805e-05, + "loss": 1.2126, + "step": 3552 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852875324288e-05, + "loss": 0.8698, + "step": 3553 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852790097219e-05, + "loss": 1.1348, + "step": 3554 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998527048454716e-05, + "loss": 0.7995, + "step": 3555 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852619569047e-05, + "loss": 1.3086, + "step": 3556 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852534267945e-05, + "loss": 1.1718, + "step": 3557 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852448942166e-05, + "loss": 1.0632, + "step": 3558 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852363591709e-05, + "loss": 1.0728, + "step": 3559 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852278216575e-05, + "loss": 1.2243, + "step": 3560 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852192816762e-05, + "loss": 1.1648, + "step": 3561 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852107392273e-05, + "loss": 0.9032, + "step": 3562 + }, + { + "epoch": 0.0, + "learning_rate": 4.999852021943106e-05, + "loss": 1.2605, + "step": 3563 + }, + { + "epoch": 0.0, + "learning_rate": 4.999851936469262e-05, + "loss": 0.9793, + "step": 3564 + }, + { + "epoch": 0.0, + "learning_rate": 4.99985185097074e-05, + "loss": 1.275, + "step": 3565 + }, + { + "epoch": 0.0, + "learning_rate": 4.999851765447541e-05, + "loss": 1.0971, + "step": 3566 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998516798996634e-05, + "loss": 1.2443, + "step": 3567 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998515943271096e-05, + "loss": 1.1871, + "step": 3568 + }, + { + "epoch": 0.0, + "learning_rate": 4.999851508729877e-05, + "loss": 1.0477, + "step": 3569 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998514231079686e-05, + "loss": 1.0575, + "step": 3570 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998513374613814e-05, + "loss": 0.9334, + "step": 3571 + }, + { + "epoch": 0.0, + "learning_rate": 4.999851251790118e-05, + "loss": 1.1136, + "step": 3572 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998511660941756e-05, + "loss": 1.2688, + "step": 3573 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998510803735564e-05, + "loss": 1.1204, + "step": 3574 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850994628259e-05, + "loss": 1.1439, + "step": 3575 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850908858286e-05, + "loss": 1.1018, + "step": 3576 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850823063634e-05, + "loss": 1.3292, + "step": 3577 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850737244305e-05, + "loss": 0.9875, + "step": 3578 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850651400298e-05, + "loss": 1.0082, + "step": 3579 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850565531614e-05, + "loss": 1.189, + "step": 3580 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850479638252e-05, + "loss": 2.1198, + "step": 3581 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850393720213e-05, + "loss": 1.5383, + "step": 3582 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850307777497e-05, + "loss": 1.1514, + "step": 3583 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850221810103e-05, + "loss": 0.9158, + "step": 3584 + }, + { + "epoch": 0.0, + "learning_rate": 4.999850135818032e-05, + "loss": 1.1748, + "step": 3585 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998500498012824e-05, + "loss": 1.2605, + "step": 3586 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849963759856e-05, + "loss": 1.286, + "step": 3587 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849877693752e-05, + "loss": 1.0966, + "step": 3588 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998497916029714e-05, + "loss": 1.1221, + "step": 3589 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849705487513e-05, + "loss": 1.2224, + "step": 3590 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998496193473765e-05, + "loss": 0.6984, + "step": 3591 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998495331825623e-05, + "loss": 1.261, + "step": 3592 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849446993071e-05, + "loss": 1.4161, + "step": 3593 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849360778903e-05, + "loss": 1.2171, + "step": 3594 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849274540057e-05, + "loss": 0.8938, + "step": 3595 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849188276534e-05, + "loss": 1.21, + "step": 3596 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849101988333e-05, + "loss": 1.4424, + "step": 3597 + }, + { + "epoch": 0.0, + "learning_rate": 4.999849015675454e-05, + "loss": 1.1366, + "step": 3598 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848929337898e-05, + "loss": 0.9923, + "step": 3599 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848842975665e-05, + "loss": 1.1938, + "step": 3600 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998487565887544e-05, + "loss": 1.095, + "step": 3601 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848670177166e-05, + "loss": 1.1377, + "step": 3602 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998485837409004e-05, + "loss": 1.1227, + "step": 3603 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848497279957e-05, + "loss": 1.1024, + "step": 3604 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848410794337e-05, + "loss": 0.7742, + "step": 3605 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848324284039e-05, + "loss": 1.2553, + "step": 3606 + }, + { + "epoch": 0.0, + "learning_rate": 4.999848237749063e-05, + "loss": 0.9677, + "step": 3607 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984815118941e-05, + "loss": 1.0944, + "step": 3608 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984806460508e-05, + "loss": 0.93, + "step": 3609 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847977996072e-05, + "loss": 1.1312, + "step": 3610 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847891362387e-05, + "loss": 1.0116, + "step": 3611 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998478047040245e-05, + "loss": 0.9964, + "step": 3612 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998477180209834e-05, + "loss": 0.875, + "step": 3613 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847631313266e-05, + "loss": 0.8657, + "step": 3614 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847544580871e-05, + "loss": 1.2225, + "step": 3615 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998474578237986e-05, + "loss": 0.9426, + "step": 3616 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998473710420484e-05, + "loss": 1.1022, + "step": 3617 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847284235622e-05, + "loss": 1.1475, + "step": 3618 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998471974045165e-05, + "loss": 1.1483, + "step": 3619 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847110548734e-05, + "loss": 0.9323, + "step": 3620 + }, + { + "epoch": 0.0, + "learning_rate": 4.999847023668275e-05, + "loss": 0.9708, + "step": 3621 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998469367631375e-05, + "loss": 0.8721, + "step": 3622 + }, + { + "epoch": 0.0, + "learning_rate": 4.999846849833323e-05, + "loss": 0.7354, + "step": 3623 + }, + { + "epoch": 0.0, + "learning_rate": 4.999846762878831e-05, + "loss": 0.7569, + "step": 3624 + }, + { + "epoch": 0.0, + "learning_rate": 4.999846675899662e-05, + "loss": 0.6744, + "step": 3625 + }, + { + "epoch": 0.0, + "learning_rate": 4.999846588895815e-05, + "loss": 0.6684, + "step": 3626 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984650186729e-05, + "loss": 0.7758, + "step": 3627 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998464148140886e-05, + "loss": 0.566, + "step": 3628 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998463277362095e-05, + "loss": 0.6658, + "step": 3629 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998462406336526e-05, + "loss": 0.5957, + "step": 3630 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998461535064186e-05, + "loss": 0.5347, + "step": 3631 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998460663545075e-05, + "loss": 0.5478, + "step": 3632 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998459791779186e-05, + "loss": 0.4344, + "step": 3633 + }, + { + "epoch": 0.0, + "learning_rate": 4.999845891976652e-05, + "loss": 0.8633, + "step": 3634 + }, + { + "epoch": 0.0, + "learning_rate": 4.999845804750709e-05, + "loss": 1.2405, + "step": 3635 + }, + { + "epoch": 0.0, + "learning_rate": 4.999845717500087e-05, + "loss": 1.6678, + "step": 3636 + }, + { + "epoch": 0.0, + "learning_rate": 4.999845630224789e-05, + "loss": 1.3302, + "step": 3637 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998455429248125e-05, + "loss": 1.0957, + "step": 3638 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998454556001595e-05, + "loss": 0.8661, + "step": 3639 + }, + { + "epoch": 0.0, + "learning_rate": 4.999845368250829e-05, + "loss": 0.9106, + "step": 3640 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984528087682e-05, + "loss": 0.9173, + "step": 3641 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998451934781344e-05, + "loss": 1.0613, + "step": 3642 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998451060547716e-05, + "loss": 0.9675, + "step": 3643 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998450186067317e-05, + "loss": 1.0819, + "step": 3644 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844931134013e-05, + "loss": 1.2227, + "step": 3645 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844843636618e-05, + "loss": 1.172, + "step": 3646 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844756114545e-05, + "loss": 1.0692, + "step": 3647 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998446685677946e-05, + "loss": 0.88, + "step": 3648 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844580996367e-05, + "loss": 1.0156, + "step": 3649 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844493400262e-05, + "loss": 1.1947, + "step": 3650 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984440577948e-05, + "loss": 1.1163, + "step": 3651 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984431813402e-05, + "loss": 1.0156, + "step": 3652 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844230463883e-05, + "loss": 1.1083, + "step": 3653 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844142769068e-05, + "loss": 1.0469, + "step": 3654 + }, + { + "epoch": 0.0, + "learning_rate": 4.999844055049576e-05, + "loss": 0.8485, + "step": 3655 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998439673054066e-05, + "loss": 0.9361, + "step": 3656 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984387953656e-05, + "loss": 1.2245, + "step": 3657 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843791743035e-05, + "loss": 1.2246, + "step": 3658 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998437039248336e-05, + "loss": 1.329, + "step": 3659 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843616081955e-05, + "loss": 1.7445, + "step": 3660 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998435282143986e-05, + "loss": 1.8381, + "step": 3661 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998434403221644e-05, + "loss": 1.6972, + "step": 3662 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843352405254e-05, + "loss": 1.7102, + "step": 3663 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843264463665e-05, + "loss": 1.7059, + "step": 3664 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998431764973985e-05, + "loss": 1.6628, + "step": 3665 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843088506455e-05, + "loss": 1.7004, + "step": 3666 + }, + { + "epoch": 0.0, + "learning_rate": 4.999843000490834e-05, + "loss": 1.675, + "step": 3667 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842912450536e-05, + "loss": 1.6609, + "step": 3668 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984282438556e-05, + "loss": 1.6648, + "step": 3669 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842736295907e-05, + "loss": 1.6662, + "step": 3670 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842648181576e-05, + "loss": 1.6543, + "step": 3671 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842560042569e-05, + "loss": 1.6691, + "step": 3672 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998424718788837e-05, + "loss": 1.6507, + "step": 3673 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998423836905207e-05, + "loss": 1.6278, + "step": 3674 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842295477481e-05, + "loss": 1.6509, + "step": 3675 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842207239763e-05, + "loss": 1.6284, + "step": 3676 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842118977369e-05, + "loss": 1.6695, + "step": 3677 + }, + { + "epoch": 0.0, + "learning_rate": 4.999842030690297e-05, + "loss": 1.6526, + "step": 3678 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841942378547e-05, + "loss": 1.6508, + "step": 3679 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984185404212e-05, + "loss": 1.6496, + "step": 3680 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998417656810156e-05, + "loss": 1.6646, + "step": 3681 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998416772952336e-05, + "loss": 1.6578, + "step": 3682 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841588884775e-05, + "loss": 1.6691, + "step": 3683 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841500449638e-05, + "loss": 1.6716, + "step": 3684 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841411989824e-05, + "loss": 1.658, + "step": 3685 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841323505333e-05, + "loss": 1.6474, + "step": 3686 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841234996164e-05, + "loss": 1.6518, + "step": 3687 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841146462319e-05, + "loss": 1.6826, + "step": 3688 + }, + { + "epoch": 0.0, + "learning_rate": 4.999841057903795e-05, + "loss": 1.6797, + "step": 3689 + }, + { + "epoch": 0.0, + "learning_rate": 4.999840969320595e-05, + "loss": 1.6301, + "step": 3690 + }, + { + "epoch": 0.0, + "learning_rate": 4.999840880712716e-05, + "loss": 1.6173, + "step": 3691 + }, + { + "epoch": 0.0, + "learning_rate": 4.999840792080161e-05, + "loss": 1.6426, + "step": 3692 + }, + { + "epoch": 0.0, + "learning_rate": 4.999840703422928e-05, + "loss": 1.6242, + "step": 3693 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998406147410176e-05, + "loss": 1.64, + "step": 3694 + }, + { + "epoch": 0.0, + "learning_rate": 4.99984052603443e-05, + "loss": 1.6469, + "step": 3695 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998404373031646e-05, + "loss": 1.6164, + "step": 3696 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998403485472225e-05, + "loss": 1.6305, + "step": 3697 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998402597666025e-05, + "loss": 1.6101, + "step": 3698 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998401709613054e-05, + "loss": 1.617, + "step": 3699 + }, + { + "epoch": 0.0, + "learning_rate": 4.999840082131331e-05, + "loss": 1.6174, + "step": 3700 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839993276679e-05, + "loss": 1.6436, + "step": 3701 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983990439735e-05, + "loss": 1.6182, + "step": 3702 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839815493343e-05, + "loss": 1.5683, + "step": 3703 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839726564659e-05, + "loss": 1.559, + "step": 3704 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839637611298e-05, + "loss": 1.5921, + "step": 3705 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839548633259e-05, + "loss": 1.5961, + "step": 3706 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839459630543e-05, + "loss": 1.5753, + "step": 3707 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983937060315e-05, + "loss": 1.568, + "step": 3708 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839281551079e-05, + "loss": 1.5881, + "step": 3709 + }, + { + "epoch": 0.0, + "learning_rate": 4.999839192474331e-05, + "loss": 1.5924, + "step": 3710 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998391033729056e-05, + "loss": 1.5891, + "step": 3711 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998390142468026e-05, + "loss": 1.5866, + "step": 3712 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998389250960225e-05, + "loss": 1.5519, + "step": 3713 + }, + { + "epoch": 0.0, + "learning_rate": 4.999838835920565e-05, + "loss": 1.5676, + "step": 3714 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983874672043e-05, + "loss": 1.5594, + "step": 3715 + }, + { + "epoch": 0.0, + "learning_rate": 4.999838657495618e-05, + "loss": 1.5622, + "step": 3716 + }, + { + "epoch": 0.0, + "learning_rate": 4.999838568246128e-05, + "loss": 1.5621, + "step": 3717 + }, + { + "epoch": 0.0, + "learning_rate": 4.999838478971961e-05, + "loss": 1.5302, + "step": 3718 + }, + { + "epoch": 0.0, + "learning_rate": 4.999838389673117e-05, + "loss": 1.5501, + "step": 3719 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998383003495955e-05, + "loss": 1.5491, + "step": 3720 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998382110013964e-05, + "loss": 1.5433, + "step": 3721 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998381216285195e-05, + "loss": 1.5156, + "step": 3722 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998380322309654e-05, + "loss": 1.5524, + "step": 3723 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837942808735e-05, + "loss": 1.5503, + "step": 3724 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837853361826e-05, + "loss": 1.5909, + "step": 3725 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998377638902406e-05, + "loss": 1.5548, + "step": 3726 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998376743939774e-05, + "loss": 1.5458, + "step": 3727 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837584873037e-05, + "loss": 1.5581, + "step": 3728 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837495327419e-05, + "loss": 1.5439, + "step": 3729 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837405757124e-05, + "loss": 1.5403, + "step": 3730 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998373161621515e-05, + "loss": 1.5434, + "step": 3731 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837226542502e-05, + "loss": 1.5112, + "step": 3732 + }, + { + "epoch": 0.0, + "learning_rate": 4.999837136898175e-05, + "loss": 1.5424, + "step": 3733 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983704722917e-05, + "loss": 1.5404, + "step": 3734 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836957535488e-05, + "loss": 1.5589, + "step": 3735 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836867817129e-05, + "loss": 1.5588, + "step": 3736 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998367780740925e-05, + "loss": 1.5285, + "step": 3737 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836688306378e-05, + "loss": 1.5774, + "step": 3738 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836598513987e-05, + "loss": 1.5473, + "step": 3739 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836508696919e-05, + "loss": 1.5688, + "step": 3740 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836418855173e-05, + "loss": 1.5648, + "step": 3741 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998363289887496e-05, + "loss": 1.4973, + "step": 3742 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836239097649e-05, + "loss": 1.5512, + "step": 3743 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998361491818716e-05, + "loss": 1.5802, + "step": 3744 + }, + { + "epoch": 0.0, + "learning_rate": 4.999836059241416e-05, + "loss": 1.5288, + "step": 3745 + }, + { + "epoch": 0.0, + "learning_rate": 4.999835969276284e-05, + "loss": 1.5351, + "step": 3746 + }, + { + "epoch": 0.0, + "learning_rate": 4.999835879286474e-05, + "loss": 1.5493, + "step": 3747 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998357892719866e-05, + "loss": 1.5846, + "step": 3748 + }, + { + "epoch": 0.0, + "learning_rate": 4.999835699232822e-05, + "loss": 1.6383, + "step": 3749 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998356091689804e-05, + "loss": 1.2697, + "step": 3750 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998355190804616e-05, + "loss": 0.8748, + "step": 3751 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998354289672644e-05, + "loss": 0.9201, + "step": 3752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998353388293907e-05, + "loss": 0.8053, + "step": 3753 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983524866684e-05, + "loss": 0.4354, + "step": 3754 + }, + { + "epoch": 0.0, + "learning_rate": 4.999835158479611e-05, + "loss": 0.7464, + "step": 3755 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998350682677055e-05, + "loss": 1.2396, + "step": 3756 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998349780311226e-05, + "loss": 1.0182, + "step": 3757 + }, + { + "epoch": 0.0, + "learning_rate": 4.999834887769862e-05, + "loss": 0.9524, + "step": 3758 + }, + { + "epoch": 0.0, + "learning_rate": 4.999834797483924e-05, + "loss": 0.3241, + "step": 3759 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983470717331e-05, + "loss": 0.3452, + "step": 3760 + }, + { + "epoch": 0.0, + "learning_rate": 4.999834616838017e-05, + "loss": 1.3963, + "step": 3761 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998345264780475e-05, + "loss": 1.3835, + "step": 3762 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998344360934e-05, + "loss": 3.2567, + "step": 3763 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998343456840765e-05, + "loss": 2.4853, + "step": 3764 + }, + { + "epoch": 0.0, + "learning_rate": 4.999834255250075e-05, + "loss": 2.1915, + "step": 3765 + }, + { + "epoch": 0.0, + "learning_rate": 4.999834164791396e-05, + "loss": 2.4211, + "step": 3766 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998340743080397e-05, + "loss": 2.2489, + "step": 3767 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998339838000065e-05, + "loss": 2.1991, + "step": 3768 + }, + { + "epoch": 0.0, + "learning_rate": 4.999833893267295e-05, + "loss": 2.1998, + "step": 3769 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998338027099074e-05, + "loss": 2.2579, + "step": 3770 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998337121278414e-05, + "loss": 2.0534, + "step": 3771 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998336215211e-05, + "loss": 2.1723, + "step": 3772 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998335308896796e-05, + "loss": 1.879, + "step": 3773 + }, + { + "epoch": 0.0, + "learning_rate": 4.999833440233582e-05, + "loss": 1.6274, + "step": 3774 + }, + { + "epoch": 0.0, + "learning_rate": 4.999833349552807e-05, + "loss": 1.8824, + "step": 3775 + }, + { + "epoch": 0.0, + "learning_rate": 4.999833258847355e-05, + "loss": 1.5347, + "step": 3776 + }, + { + "epoch": 0.0, + "learning_rate": 4.999833168117226e-05, + "loss": 1.5394, + "step": 3777 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998330773624194e-05, + "loss": 1.6869, + "step": 3778 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832986582935e-05, + "loss": 1.8133, + "step": 3779 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998328957787745e-05, + "loss": 1.4076, + "step": 3780 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832804949936e-05, + "loss": 1.6906, + "step": 3781 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998327140964205e-05, + "loss": 1.7414, + "step": 3782 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832623218227e-05, + "loss": 1.3289, + "step": 3783 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832532315357e-05, + "loss": 1.5075, + "step": 3784 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983244138781e-05, + "loss": 1.2666, + "step": 3785 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832350435584e-05, + "loss": 1.4103, + "step": 3786 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998322594586824e-05, + "loss": 1.4504, + "step": 3787 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832168457103e-05, + "loss": 1.4091, + "step": 3788 + }, + { + "epoch": 0.0, + "learning_rate": 4.999832077430846e-05, + "loss": 1.3284, + "step": 3789 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998319863799115e-05, + "loss": 1.4356, + "step": 3790 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998318953043e-05, + "loss": 1.2981, + "step": 3791 + }, + { + "epoch": 0.0, + "learning_rate": 4.999831804204012e-05, + "loss": 1.5094, + "step": 3792 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998317130790464e-05, + "loss": 1.5671, + "step": 3793 + }, + { + "epoch": 0.0, + "learning_rate": 4.999831621929403e-05, + "loss": 1.4988, + "step": 3794 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998315307550826e-05, + "loss": 1.4861, + "step": 3795 + }, + { + "epoch": 0.0, + "learning_rate": 4.999831439556084e-05, + "loss": 1.5498, + "step": 3796 + }, + { + "epoch": 0.0, + "learning_rate": 4.999831348332409e-05, + "loss": 1.2615, + "step": 3797 + }, + { + "epoch": 0.0, + "learning_rate": 4.999831257084057e-05, + "loss": 1.5546, + "step": 3798 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998311658110274e-05, + "loss": 1.5435, + "step": 3799 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983107451332e-05, + "loss": 1.6826, + "step": 3800 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830983190936e-05, + "loss": 1.4174, + "step": 3801 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830891843875e-05, + "loss": 1.6398, + "step": 3802 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830800472136e-05, + "loss": 1.4567, + "step": 3803 + }, + { + "epoch": 0.0, + "learning_rate": 4.99983070907572e-05, + "loss": 1.6013, + "step": 3804 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998306176546266e-05, + "loss": 1.7008, + "step": 3805 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830526208856e-05, + "loss": 1.6242, + "step": 3806 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998304347384085e-05, + "loss": 1.5235, + "step": 3807 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830343243283e-05, + "loss": 1.3879, + "step": 3808 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998302517234805e-05, + "loss": 1.4885, + "step": 3809 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830160179001e-05, + "loss": 1.5374, + "step": 3810 + }, + { + "epoch": 0.0, + "learning_rate": 4.999830068609844e-05, + "loss": 1.377, + "step": 3811 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998299770160095e-05, + "loss": 1.2827, + "step": 3812 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998298853974985e-05, + "loss": 1.3945, + "step": 3813 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982979375431e-05, + "loss": 1.3208, + "step": 3814 + }, + { + "epoch": 0.0, + "learning_rate": 4.999829702086444e-05, + "loss": 1.276, + "step": 3815 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998296103939e-05, + "loss": 1.1584, + "step": 3816 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982951867668e-05, + "loss": 1.002, + "step": 3817 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998294269347826e-05, + "loss": 0.9674, + "step": 3818 + }, + { + "epoch": 0.0, + "learning_rate": 4.999829335168207e-05, + "loss": 0.9041, + "step": 3819 + }, + { + "epoch": 0.0, + "learning_rate": 4.999829243376955e-05, + "loss": 0.6312, + "step": 3820 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998291515610254e-05, + "loss": 0.6914, + "step": 3821 + }, + { + "epoch": 0.0, + "learning_rate": 4.999829059720418e-05, + "loss": 0.5184, + "step": 3822 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828967855134e-05, + "loss": 0.3013, + "step": 3823 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998288759651727e-05, + "loss": 0.2505, + "step": 3824 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828784050534e-05, + "loss": 0.2985, + "step": 3825 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828692111218e-05, + "loss": 0.899, + "step": 3826 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828600147225e-05, + "loss": 1.2663, + "step": 3827 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828508158555e-05, + "loss": 1.2636, + "step": 3828 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828416145207e-05, + "loss": 1.131, + "step": 3829 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828324107182e-05, + "loss": 1.1377, + "step": 3830 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982823204448e-05, + "loss": 1.1853, + "step": 3831 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998281399571015e-05, + "loss": 1.2119, + "step": 3832 + }, + { + "epoch": 0.0, + "learning_rate": 4.999828047845044e-05, + "loss": 1.1701, + "step": 3833 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827955708311e-05, + "loss": 1.2334, + "step": 3834 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827863546899e-05, + "loss": 0.8946, + "step": 3835 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827771360811e-05, + "loss": 1.0699, + "step": 3836 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827679150045e-05, + "loss": 1.108, + "step": 3837 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827586914602e-05, + "loss": 1.0536, + "step": 3838 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827494654482e-05, + "loss": 1.1607, + "step": 3839 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998274023696845e-05, + "loss": 1.0009, + "step": 3840 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982731006021e-05, + "loss": 1.0169, + "step": 3841 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998272177260586e-05, + "loss": 1.2903, + "step": 3842 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827125367229e-05, + "loss": 1.0584, + "step": 3843 + }, + { + "epoch": 0.0, + "learning_rate": 4.999827032983723e-05, + "loss": 1.0524, + "step": 3844 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826940575539e-05, + "loss": 0.9983, + "step": 3845 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998268481426786e-05, + "loss": 0.9049, + "step": 3846 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826755685141e-05, + "loss": 1.0455, + "step": 3847 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826663202926e-05, + "loss": 1.2578, + "step": 3848 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998265706960325e-05, + "loss": 1.1371, + "step": 3849 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826478164463e-05, + "loss": 1.0095, + "step": 3850 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998263856082164e-05, + "loss": 1.1335, + "step": 3851 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826293027292e-05, + "loss": 1.1164, + "step": 3852 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982620042169e-05, + "loss": 0.9897, + "step": 3853 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998261077914114e-05, + "loss": 1.1324, + "step": 3854 + }, + { + "epoch": 0.0, + "learning_rate": 4.999826015136456e-05, + "loss": 0.9199, + "step": 3855 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825922456822e-05, + "loss": 0.6164, + "step": 3856 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998258297525124e-05, + "loss": 1.0912, + "step": 3857 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825737023525e-05, + "loss": 1.1302, + "step": 3858 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982564426986e-05, + "loss": 0.8905, + "step": 3859 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825551491518e-05, + "loss": 0.9626, + "step": 3860 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998254586884985e-05, + "loss": 0.9578, + "step": 3861 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825365860802e-05, + "loss": 0.9679, + "step": 3862 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825273008428e-05, + "loss": 1.0489, + "step": 3863 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825180131377e-05, + "loss": 0.7817, + "step": 3864 + }, + { + "epoch": 0.0, + "learning_rate": 4.999825087229649e-05, + "loss": 0.5133, + "step": 3865 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998249943032435e-05, + "loss": 0.5381, + "step": 3866 + }, + { + "epoch": 0.0, + "learning_rate": 4.999824901352161e-05, + "loss": 0.5507, + "step": 3867 + }, + { + "epoch": 0.0, + "learning_rate": 4.999824808376401e-05, + "loss": 0.8889, + "step": 3868 + }, + { + "epoch": 0.0, + "learning_rate": 4.999824715375965e-05, + "loss": 0.5353, + "step": 3869 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998246223508506e-05, + "loss": 0.484, + "step": 3870 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998245293010585e-05, + "loss": 0.3894, + "step": 3871 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982443622659e-05, + "loss": 0.3145, + "step": 3872 + }, + { + "epoch": 0.0, + "learning_rate": 4.999824343127444e-05, + "loss": 1.4916, + "step": 3873 + }, + { + "epoch": 0.0, + "learning_rate": 4.999824250003621e-05, + "loss": 0.9999, + "step": 3874 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998241568551205e-05, + "loss": 0.8343, + "step": 3875 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998240636819436e-05, + "loss": 1.1243, + "step": 3876 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823970484089e-05, + "loss": 1.0399, + "step": 3877 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823877261557e-05, + "loss": 0.8224, + "step": 3878 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823784014347e-05, + "loss": 1.0171, + "step": 3879 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823690742461e-05, + "loss": 1.9519, + "step": 3880 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823597445897e-05, + "loss": 1.0492, + "step": 3881 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823504124657e-05, + "loss": 1.2882, + "step": 3882 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823410778739e-05, + "loss": 1.1063, + "step": 3883 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998233174081434e-05, + "loss": 0.9714, + "step": 3884 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823224012871e-05, + "loss": 0.9269, + "step": 3885 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998231305929215e-05, + "loss": 1.3882, + "step": 3886 + }, + { + "epoch": 0.0, + "learning_rate": 4.999823037148295e-05, + "loss": 1.2153, + "step": 3887 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998229436789905e-05, + "loss": 0.9109, + "step": 3888 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998228501850096e-05, + "loss": 0.9884, + "step": 3889 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998227566663516e-05, + "loss": 1.0662, + "step": 3890 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822663123016e-05, + "loss": 0.9719, + "step": 3891 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822569555003e-05, + "loss": 1.3314, + "step": 3892 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822475962313e-05, + "loss": 1.1694, + "step": 3893 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822382344946e-05, + "loss": 1.3414, + "step": 3894 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822288702901e-05, + "loss": 1.2034, + "step": 3895 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982219503618e-05, + "loss": 1.2633, + "step": 3896 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822101344781e-05, + "loss": 1.0826, + "step": 3897 + }, + { + "epoch": 0.0, + "learning_rate": 4.999822007628705e-05, + "loss": 0.9299, + "step": 3898 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821913887952e-05, + "loss": 1.712, + "step": 3899 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998218201225214e-05, + "loss": 1.2466, + "step": 3900 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998217263324144e-05, + "loss": 1.0934, + "step": 3901 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998216325176296e-05, + "loss": 0.7847, + "step": 3902 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821538678168e-05, + "loss": 1.2325, + "step": 3903 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821444814029e-05, + "loss": 1.2659, + "step": 3904 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998213509252126e-05, + "loss": 0.9979, + "step": 3905 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821257011719e-05, + "loss": 1.2732, + "step": 3906 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821163073548e-05, + "loss": 0.6001, + "step": 3907 + }, + { + "epoch": 0.0, + "learning_rate": 4.999821069110701e-05, + "loss": 1.0395, + "step": 3908 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998209751231755e-05, + "loss": 0.9985, + "step": 3909 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820881110974e-05, + "loss": 0.8775, + "step": 3910 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820787074094e-05, + "loss": 1.2523, + "step": 3911 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820693012538e-05, + "loss": 1.4449, + "step": 3912 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820598926304e-05, + "loss": 1.343, + "step": 3913 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820504815394e-05, + "loss": 1.4019, + "step": 3914 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820410679805e-05, + "loss": 1.1844, + "step": 3915 + }, + { + "epoch": 0.0, + "learning_rate": 4.99982031651954e-05, + "loss": 1.3271, + "step": 3916 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820222334598e-05, + "loss": 1.2497, + "step": 3917 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998201281249776e-05, + "loss": 0.9437, + "step": 3918 + }, + { + "epoch": 0.0, + "learning_rate": 4.999820033890681e-05, + "loss": 0.9859, + "step": 3919 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998199396317076e-05, + "loss": 1.2831, + "step": 3920 + }, + { + "epoch": 0.0, + "learning_rate": 4.999819845348056e-05, + "loss": 1.0626, + "step": 3921 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998197510397284e-05, + "loss": 1.1052, + "step": 3922 + }, + { + "epoch": 0.0, + "learning_rate": 4.999819656706722e-05, + "loss": 0.8837, + "step": 3923 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981956234904e-05, + "loss": 1.1141, + "step": 3924 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981946796668e-05, + "loss": 0.9994, + "step": 3925 + }, + { + "epoch": 0.0, + "learning_rate": 4.999819373559643e-05, + "loss": 1.1416, + "step": 3926 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981927912793e-05, + "loss": 1.0897, + "step": 3927 + }, + { + "epoch": 0.0, + "learning_rate": 4.999819184671538e-05, + "loss": 1.2305, + "step": 3928 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981909019047e-05, + "loss": 1.2005, + "step": 3929 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818995684724e-05, + "loss": 0.9295, + "step": 3930 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998189011543014e-05, + "loss": 1.2663, + "step": 3931 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818806599202e-05, + "loss": 1.0269, + "step": 3932 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818712019425e-05, + "loss": 1.0723, + "step": 3933 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818617414971e-05, + "loss": 1.3211, + "step": 3934 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981852278584e-05, + "loss": 1.2296, + "step": 3935 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818428132031e-05, + "loss": 1.2031, + "step": 3936 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818333453545e-05, + "loss": 0.8096, + "step": 3937 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818238750382e-05, + "loss": 1.1207, + "step": 3938 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998181440225425e-05, + "loss": 1.106, + "step": 3939 + }, + { + "epoch": 0.0, + "learning_rate": 4.999818049270025e-05, + "loss": 1.1441, + "step": 3940 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998179544928314e-05, + "loss": 1.1043, + "step": 3941 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981785969096e-05, + "loss": 0.7397, + "step": 3942 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817764864411e-05, + "loss": 0.4566, + "step": 3943 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817670013186e-05, + "loss": 0.3368, + "step": 3944 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817575137283e-05, + "loss": 0.2477, + "step": 3945 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998174802367024e-05, + "loss": 0.1339, + "step": 3946 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817385311446e-05, + "loss": 0.1126, + "step": 3947 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998172903615116e-05, + "loss": 1.0654, + "step": 3948 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998171953869e-05, + "loss": 1.0436, + "step": 3949 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817100387612e-05, + "loss": 1.3441, + "step": 3950 + }, + { + "epoch": 0.0, + "learning_rate": 4.999817005363646e-05, + "loss": 1.4435, + "step": 3951 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998169103150026e-05, + "loss": 1.1704, + "step": 3952 + }, + { + "epoch": 0.0, + "learning_rate": 4.999816815241683e-05, + "loss": 0.2951, + "step": 3953 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998167201436864e-05, + "loss": 1.0192, + "step": 3954 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998166250210116e-05, + "loss": 1.0236, + "step": 3955 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998165298736604e-05, + "loss": 1.1952, + "step": 3956 + }, + { + "epoch": 0.0, + "learning_rate": 4.999816434701632e-05, + "loss": 1.1598, + "step": 3957 + }, + { + "epoch": 0.0, + "learning_rate": 4.999816339504926e-05, + "loss": 1.2215, + "step": 3958 + }, + { + "epoch": 0.0, + "learning_rate": 4.999816244283543e-05, + "loss": 1.4891, + "step": 3959 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998161490374836e-05, + "loss": 1.1688, + "step": 3960 + }, + { + "epoch": 0.0, + "learning_rate": 4.999816053766746e-05, + "loss": 1.1998, + "step": 3961 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815958471332e-05, + "loss": 1.111, + "step": 3962 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815863151241e-05, + "loss": 1.1179, + "step": 3963 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815767806473e-05, + "loss": 1.0286, + "step": 3964 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815672437027e-05, + "loss": 1.1219, + "step": 3965 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998155770429045e-05, + "loss": 1.2234, + "step": 3966 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815481624104e-05, + "loss": 1.1453, + "step": 3967 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998153861806276e-05, + "loss": 1.1588, + "step": 3968 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815290712473e-05, + "loss": 0.6494, + "step": 3969 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815195219643e-05, + "loss": 0.6897, + "step": 3970 + }, + { + "epoch": 0.0, + "learning_rate": 4.999815099702134e-05, + "loss": 1.1793, + "step": 3971 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998150041599484e-05, + "loss": 1.0342, + "step": 3972 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998149085930854e-05, + "loss": 1.1482, + "step": 3973 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814813001547e-05, + "loss": 1.2897, + "step": 3974 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998147173853296e-05, + "loss": 0.7381, + "step": 3975 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814621744435e-05, + "loss": 1.1291, + "step": 3976 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998145260788645e-05, + "loss": 1.1105, + "step": 3977 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814430388616e-05, + "loss": 1.2495, + "step": 3978 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814334673692e-05, + "loss": 1.0232, + "step": 3979 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814238934088e-05, + "loss": 1.4272, + "step": 3980 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814143169809e-05, + "loss": 1.3486, + "step": 3981 + }, + { + "epoch": 0.0, + "learning_rate": 4.999814047380853e-05, + "loss": 1.285, + "step": 3982 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813951567219e-05, + "loss": 1.105, + "step": 3983 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813855728908e-05, + "loss": 0.9492, + "step": 3984 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998137598659204e-05, + "loss": 1.1968, + "step": 3985 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813663978255e-05, + "loss": 0.9647, + "step": 3986 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813568065913e-05, + "loss": 1.1586, + "step": 3987 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813472128894e-05, + "loss": 1.2225, + "step": 3988 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813376167197e-05, + "loss": 1.072, + "step": 3989 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813280180824e-05, + "loss": 1.1902, + "step": 3990 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813184169774e-05, + "loss": 1.1988, + "step": 3991 + }, + { + "epoch": 0.0, + "learning_rate": 4.999813088134045e-05, + "loss": 1.206, + "step": 3992 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998129920736405e-05, + "loss": 1.1791, + "step": 3993 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998128959885595e-05, + "loss": 1.5142, + "step": 3994 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998127998788006e-05, + "loss": 1.4691, + "step": 3995 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998127037443646e-05, + "loss": 1.4362, + "step": 3996 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812607585251e-05, + "loss": 1.2838, + "step": 3997 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998125114014606e-05, + "loss": 0.9268, + "step": 3998 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812415192994e-05, + "loss": 0.9915, + "step": 3999 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812318959849e-05, + "loss": 1.1468, + "step": 4000 + }, + { + "epoch": 0.0, + "eval_loss": 1.09718918800354, + "eval_runtime": 106.6196, + "eval_samples_per_second": 12.99, + "eval_steps_per_second": 3.255, + "step": 4000 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812222702027e-05, + "loss": 0.8135, + "step": 4001 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998121264195284e-05, + "loss": 1.2887, + "step": 4002 + }, + { + "epoch": 0.0, + "learning_rate": 4.999812030112353e-05, + "loss": 0.9391, + "step": 4003 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998119337805e-05, + "loss": 1.1028, + "step": 4004 + }, + { + "epoch": 0.0, + "learning_rate": 4.99981183742397e-05, + "loss": 1.8483, + "step": 4005 + }, + { + "epoch": 0.0, + "learning_rate": 4.999811741042764e-05, + "loss": 0.5112, + "step": 4006 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998116446368794e-05, + "loss": 0.9238, + "step": 4007 + }, + { + "epoch": 0.0, + "learning_rate": 4.999811548206318e-05, + "loss": 1.0704, + "step": 4008 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998114517510795e-05, + "loss": 0.8911, + "step": 4009 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998113552711645e-05, + "loss": 0.7326, + "step": 4010 + }, + { + "epoch": 0.0, + "learning_rate": 4.999811258766572e-05, + "loss": 0.5943, + "step": 4011 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998111622373025e-05, + "loss": 0.9056, + "step": 4012 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998110656833555e-05, + "loss": 1.0, + "step": 4013 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810969104733e-05, + "loss": 1.0892, + "step": 4014 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998108725014314e-05, + "loss": 0.989, + "step": 4015 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810775873454e-05, + "loss": 1.1427, + "step": 4016 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810679220799e-05, + "loss": 1.3094, + "step": 4017 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810582543466e-05, + "loss": 1.5354, + "step": 4018 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810485841457e-05, + "loss": 1.2071, + "step": 4019 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810389114772e-05, + "loss": 1.0141, + "step": 4020 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998102923634084e-05, + "loss": 1.1229, + "step": 4021 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810195587368e-05, + "loss": 1.4388, + "step": 4022 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998100987866505e-05, + "loss": 1.651, + "step": 4023 + }, + { + "epoch": 0.0, + "learning_rate": 4.999810001961256e-05, + "loss": 1.1134, + "step": 4024 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809905111185e-05, + "loss": 1.1017, + "step": 4025 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998098082364365e-05, + "loss": 1.0174, + "step": 4026 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980971133701e-05, + "loss": 0.3147, + "step": 4027 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809614412908e-05, + "loss": 1.336, + "step": 4028 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809517464128e-05, + "loss": 0.9984, + "step": 4029 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809420490671e-05, + "loss": 0.9577, + "step": 4030 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998093234925366e-05, + "loss": 1.3655, + "step": 4031 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998092264697264e-05, + "loss": 0.8947, + "step": 4032 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809129422238e-05, + "loss": 1.1589, + "step": 4033 + }, + { + "epoch": 0.0, + "learning_rate": 4.999809032350073e-05, + "loss": 1.2562, + "step": 4034 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808935253231e-05, + "loss": 0.9964, + "step": 4035 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808838131712e-05, + "loss": 1.249, + "step": 4036 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998087409855156e-05, + "loss": 1.2319, + "step": 4037 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808643814642e-05, + "loss": 1.2463, + "step": 4038 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808546619091e-05, + "loss": 1.2636, + "step": 4039 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808449398864e-05, + "loss": 1.3586, + "step": 4040 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980835215396e-05, + "loss": 1.1718, + "step": 4041 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808254884378e-05, + "loss": 1.187, + "step": 4042 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980815759012e-05, + "loss": 1.0817, + "step": 4043 + }, + { + "epoch": 0.0, + "learning_rate": 4.999808060271184e-05, + "loss": 1.162, + "step": 4044 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998079629275705e-05, + "loss": 1.1909, + "step": 4045 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807865559282e-05, + "loss": 1.1345, + "step": 4046 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807768166315e-05, + "loss": 0.9697, + "step": 4047 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807670748671e-05, + "loss": 0.9829, + "step": 4048 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980757330635e-05, + "loss": 1.131, + "step": 4049 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807475839352e-05, + "loss": 1.06, + "step": 4050 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807378347677e-05, + "loss": 1.0493, + "step": 4051 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807280831324e-05, + "loss": 0.7613, + "step": 4052 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807183290296e-05, + "loss": 3.0747, + "step": 4053 + }, + { + "epoch": 0.0, + "learning_rate": 4.999807085724589e-05, + "loss": 4.4983, + "step": 4054 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998069881342064e-05, + "loss": 4.1956, + "step": 4055 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806890519146e-05, + "loss": 4.0042, + "step": 4056 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806792879409e-05, + "loss": 3.661, + "step": 4057 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998066952149945e-05, + "loss": 3.8345, + "step": 4058 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998065975259034e-05, + "loss": 3.6504, + "step": 4059 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998064998121344e-05, + "loss": 1.5604, + "step": 4060 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806402073689e-05, + "loss": 0.8872, + "step": 4061 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998063043105666e-05, + "loss": 0.9651, + "step": 4062 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806206522767e-05, + "loss": 1.0188, + "step": 4063 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806108710291e-05, + "loss": 1.4743, + "step": 4064 + }, + { + "epoch": 0.0, + "learning_rate": 4.999806010873137e-05, + "loss": 1.2101, + "step": 4065 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805913011307e-05, + "loss": 1.4814, + "step": 4066 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805815124799e-05, + "loss": 0.996, + "step": 4067 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998057172136144e-05, + "loss": 0.9714, + "step": 4068 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805619277753e-05, + "loss": 0.9029, + "step": 4069 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805521317214e-05, + "loss": 1.2617, + "step": 4070 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805423331998e-05, + "loss": 0.703, + "step": 4071 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805325322106e-05, + "loss": 1.2638, + "step": 4072 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805227287536e-05, + "loss": 1.2354, + "step": 4073 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998051292282885e-05, + "loss": 1.0022, + "step": 4074 + }, + { + "epoch": 0.0, + "learning_rate": 4.999805031144365e-05, + "loss": 1.1542, + "step": 4075 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804933035764e-05, + "loss": 0.9708, + "step": 4076 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804834902486e-05, + "loss": 1.0911, + "step": 4077 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804736744531e-05, + "loss": 1.4731, + "step": 4078 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998046385619e-05, + "loss": 1.1589, + "step": 4079 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998045403545904e-05, + "loss": 1.025, + "step": 4080 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804442122604e-05, + "loss": 0.9713, + "step": 4081 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804343865941e-05, + "loss": 1.3273, + "step": 4082 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804245584602e-05, + "loss": 1.2429, + "step": 4083 + }, + { + "epoch": 0.0, + "learning_rate": 4.999804147278585e-05, + "loss": 0.9776, + "step": 4084 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980404894789e-05, + "loss": 0.8755, + "step": 4085 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980395059252e-05, + "loss": 1.3534, + "step": 4086 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803852212471e-05, + "loss": 1.581, + "step": 4087 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803753807747e-05, + "loss": 2.4373, + "step": 4088 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803655378344e-05, + "loss": 3.114, + "step": 4089 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803556924265e-05, + "loss": 1.136, + "step": 4090 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998034584455096e-05, + "loss": 1.281, + "step": 4091 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803359942076e-05, + "loss": 1.1828, + "step": 4092 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803261413966e-05, + "loss": 1.0111, + "step": 4093 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803162861179e-05, + "loss": 1.7322, + "step": 4094 + }, + { + "epoch": 0.0, + "learning_rate": 4.999803064283715e-05, + "loss": 1.4425, + "step": 4095 + }, + { + "epoch": 0.0, + "learning_rate": 4.999802965681574e-05, + "loss": 1.3707, + "step": 4096 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998028670547556e-05, + "loss": 1.4249, + "step": 4097 + }, + { + "epoch": 0.0, + "learning_rate": 4.999802768403261e-05, + "loss": 1.0745, + "step": 4098 + }, + { + "epoch": 0.0, + "learning_rate": 4.999802669727089e-05, + "loss": 0.8969, + "step": 4099 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998025710262395e-05, + "loss": 1.0658, + "step": 4100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998024723007134e-05, + "loss": 0.9743, + "step": 4101 + }, + { + "epoch": 0.0, + "learning_rate": 4.999802373550511e-05, + "loss": 1.1153, + "step": 4102 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980227477563e-05, + "loss": 1.1346, + "step": 4103 + }, + { + "epoch": 0.0, + "learning_rate": 4.999802175976073e-05, + "loss": 0.8344, + "step": 4104 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998020771518394e-05, + "loss": 0.1926, + "step": 4105 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998019783029284e-05, + "loss": 0.2064, + "step": 4106 + }, + { + "epoch": 0.0, + "learning_rate": 4.99980187942934e-05, + "loss": 0.3738, + "step": 4107 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801780531075e-05, + "loss": 0.3192, + "step": 4108 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801681608133e-05, + "loss": 0.3141, + "step": 4109 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801582660514e-05, + "loss": 0.302, + "step": 4110 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801483688218e-05, + "loss": 0.3193, + "step": 4111 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801384691245e-05, + "loss": 0.2444, + "step": 4112 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801285669595e-05, + "loss": 0.2959, + "step": 4113 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801186623268e-05, + "loss": 0.1291, + "step": 4114 + }, + { + "epoch": 0.0, + "learning_rate": 4.999801087552264e-05, + "loss": 0.3326, + "step": 4115 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998009884565834e-05, + "loss": 1.5303, + "step": 4116 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998008893362254e-05, + "loss": 0.9613, + "step": 4117 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998007901911904e-05, + "loss": 1.2479, + "step": 4118 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800691021479e-05, + "loss": 1.2053, + "step": 4119 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998005918270897e-05, + "loss": 0.9582, + "step": 4120 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800492608024e-05, + "loss": 1.4681, + "step": 4121 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800393364281e-05, + "loss": 1.3266, + "step": 4122 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800294095861e-05, + "loss": 1.107, + "step": 4123 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800194802765e-05, + "loss": 1.2981, + "step": 4124 + }, + { + "epoch": 0.0, + "learning_rate": 4.999800095484991e-05, + "loss": 1.0812, + "step": 4125 + }, + { + "epoch": 0.0, + "learning_rate": 4.99979999614254e-05, + "loss": 0.9215, + "step": 4126 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799896775412e-05, + "loss": 1.056, + "step": 4127 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799797383607e-05, + "loss": 0.9254, + "step": 4128 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799697967126e-05, + "loss": 1.2472, + "step": 4129 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799598525967e-05, + "loss": 0.6347, + "step": 4130 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799499060132e-05, + "loss": 0.6053, + "step": 4131 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799399569619e-05, + "loss": 0.8091, + "step": 4132 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997993000544295e-05, + "loss": 1.1355, + "step": 4133 + }, + { + "epoch": 0.0, + "learning_rate": 4.999799200514563e-05, + "loss": 1.5105, + "step": 4134 + }, + { + "epoch": 0.0, + "learning_rate": 4.99979910095002e-05, + "loss": 1.204, + "step": 4135 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997990013607995e-05, + "loss": 1.1201, + "step": 4136 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997989017469026e-05, + "loss": 1.3261, + "step": 4137 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798802108328e-05, + "loss": 1.1437, + "step": 4138 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798702445077e-05, + "loss": 1.2163, + "step": 4139 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997986027571485e-05, + "loss": 1.0872, + "step": 4140 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798503044543e-05, + "loss": 1.0482, + "step": 4141 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997984033072606e-05, + "loss": 1.1133, + "step": 4142 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798303545302e-05, + "loss": 2.1336, + "step": 4143 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798203758666e-05, + "loss": 1.3565, + "step": 4144 + }, + { + "epoch": 0.0, + "learning_rate": 4.999798103947353e-05, + "loss": 0.8247, + "step": 4145 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997980041113634e-05, + "loss": 1.0643, + "step": 4146 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797904250696e-05, + "loss": 1.3419, + "step": 4147 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797804365353e-05, + "loss": 1.2586, + "step": 4148 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797704455331e-05, + "loss": 2.8076, + "step": 4149 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997976045206344e-05, + "loss": 1.7105, + "step": 4150 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797504561259e-05, + "loss": 1.0705, + "step": 4151 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997974045772075e-05, + "loss": 1.1643, + "step": 4152 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797304568479e-05, + "loss": 1.1731, + "step": 4153 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997972045350735e-05, + "loss": 1.028, + "step": 4154 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797104476991e-05, + "loss": 1.0386, + "step": 4155 + }, + { + "epoch": 0.0, + "learning_rate": 4.999797004394231e-05, + "loss": 1.1987, + "step": 4156 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796904286795e-05, + "loss": 1.2542, + "step": 4157 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997968041546814e-05, + "loss": 1.1764, + "step": 4158 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796703997891e-05, + "loss": 1.6205, + "step": 4159 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796603816424e-05, + "loss": 1.5423, + "step": 4160 + }, + { + "epoch": 0.0, + "learning_rate": 4.99979650361028e-05, + "loss": 1.539, + "step": 4161 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796403379459e-05, + "loss": 1.297, + "step": 4162 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796303123961e-05, + "loss": 1.1458, + "step": 4163 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796202843786e-05, + "loss": 1.2349, + "step": 4164 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997961025389336e-05, + "loss": 1.3741, + "step": 4165 + }, + { + "epoch": 0.0, + "learning_rate": 4.999796002209405e-05, + "loss": 1.2188, + "step": 4166 + }, + { + "epoch": 0.0, + "learning_rate": 4.999795901855199e-05, + "loss": 1.3562, + "step": 4167 + }, + { + "epoch": 0.0, + "learning_rate": 4.999795801476317e-05, + "loss": 0.3673, + "step": 4168 + }, + { + "epoch": 0.0, + "learning_rate": 4.999795701072757e-05, + "loss": 0.2597, + "step": 4169 + }, + { + "epoch": 0.0, + "learning_rate": 4.99979560064452e-05, + "loss": 0.6811, + "step": 4170 + }, + { + "epoch": 0.0, + "learning_rate": 4.999795500191607e-05, + "loss": 1.1625, + "step": 4171 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997953997140165e-05, + "loss": 1.1949, + "step": 4172 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997952992117494e-05, + "loss": 1.0916, + "step": 4173 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997951986848045e-05, + "loss": 0.7343, + "step": 4174 + }, + { + "epoch": 0.0, + "learning_rate": 4.999795098133184e-05, + "loss": 0.7105, + "step": 4175 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794997556886e-05, + "loss": 0.6363, + "step": 4176 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997948969559105e-05, + "loss": 0.6748, + "step": 4177 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997947963302585e-05, + "loss": 0.8273, + "step": 4178 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794695679929e-05, + "loss": 1.3885, + "step": 4179 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794595004924e-05, + "loss": 1.1066, + "step": 4180 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794494305241e-05, + "loss": 1.0197, + "step": 4181 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794393580882e-05, + "loss": 1.0707, + "step": 4182 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794292831845e-05, + "loss": 1.0189, + "step": 4183 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997941920581316e-05, + "loss": 1.2503, + "step": 4184 + }, + { + "epoch": 0.0, + "learning_rate": 4.999794091259741e-05, + "loss": 1.1458, + "step": 4185 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793990436674e-05, + "loss": 1.1863, + "step": 4186 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997938895889294e-05, + "loss": 1.2241, + "step": 4187 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793788716508e-05, + "loss": 0.8661, + "step": 4188 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997936878194107e-05, + "loss": 0.5257, + "step": 4189 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793586897635e-05, + "loss": 0.9842, + "step": 4190 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793485951184e-05, + "loss": 1.1282, + "step": 4191 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793384980055e-05, + "loss": 1.2302, + "step": 4192 + }, + { + "epoch": 0.0, + "learning_rate": 4.999793283984249e-05, + "loss": 1.2022, + "step": 4193 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997931829637665e-05, + "loss": 1.1716, + "step": 4194 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997930819186075e-05, + "loss": 1.0513, + "step": 4195 + }, + { + "epoch": 0.0, + "learning_rate": 4.999792980848771e-05, + "loss": 1.0939, + "step": 4196 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997928797542575e-05, + "loss": 1.15, + "step": 4197 + }, + { + "epoch": 0.0, + "learning_rate": 4.999792778635067e-05, + "loss": 1.2474, + "step": 4198 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997926774912004e-05, + "loss": 1.2844, + "step": 4199 + }, + { + "epoch": 0.0, + "learning_rate": 4.999792576322656e-05, + "loss": 1.1551, + "step": 4200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997924751294355e-05, + "loss": 1.2846, + "step": 4201 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997923739115374e-05, + "loss": 0.9903, + "step": 4202 + }, + { + "epoch": 0.0, + "learning_rate": 4.999792272668963e-05, + "loss": 1.1966, + "step": 4203 + }, + { + "epoch": 0.0, + "learning_rate": 4.999792171401712e-05, + "loss": 1.275, + "step": 4204 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997920701097836e-05, + "loss": 1.0189, + "step": 4205 + }, + { + "epoch": 0.0, + "learning_rate": 4.999791968793178e-05, + "loss": 1.1909, + "step": 4206 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997918674518953e-05, + "loss": 1.2318, + "step": 4207 + }, + { + "epoch": 0.0, + "learning_rate": 4.999791766085936e-05, + "loss": 1.2186, + "step": 4208 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997916646953e-05, + "loss": 0.8141, + "step": 4209 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997915632799876e-05, + "loss": 1.029, + "step": 4210 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997914618399974e-05, + "loss": 1.1777, + "step": 4211 + }, + { + "epoch": 0.0, + "learning_rate": 4.999791360375331e-05, + "loss": 1.1809, + "step": 4212 + }, + { + "epoch": 0.0, + "learning_rate": 4.999791258885988e-05, + "loss": 1.2198, + "step": 4213 + }, + { + "epoch": 0.0, + "learning_rate": 4.999791157371967e-05, + "loss": 1.2193, + "step": 4214 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997910558332696e-05, + "loss": 0.9905, + "step": 4215 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790954269895e-05, + "loss": 1.2994, + "step": 4216 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997908526818436e-05, + "loss": 1.0537, + "step": 4217 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790751069116e-05, + "loss": 1.5454, + "step": 4218 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790649431711e-05, + "loss": 1.1544, + "step": 4219 + }, + { + "epoch": 0.0, + "learning_rate": 4.99979054776963e-05, + "loss": 0.9599, + "step": 4220 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997904460828704e-05, + "loss": 1.0451, + "step": 4221 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790344371435e-05, + "loss": 1.054, + "step": 4222 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997902426353224e-05, + "loss": 1.161, + "step": 4223 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790140874534e-05, + "loss": 0.8822, + "step": 4224 + }, + { + "epoch": 0.0, + "learning_rate": 4.999790039089067e-05, + "loss": 1.0428, + "step": 4225 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997899372789244e-05, + "loss": 1.132, + "step": 4226 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997898354441044e-05, + "loss": 1.0825, + "step": 4227 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789733584608e-05, + "loss": 1.1674, + "step": 4228 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789631700434e-05, + "loss": 1.1649, + "step": 4229 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789529791583e-05, + "loss": 0.691, + "step": 4230 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789427858056e-05, + "loss": 1.1995, + "step": 4231 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997893258998517e-05, + "loss": 0.9215, + "step": 4232 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789223916971e-05, + "loss": 1.2847, + "step": 4233 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789121909413e-05, + "loss": 1.1242, + "step": 4234 + }, + { + "epoch": 0.0, + "learning_rate": 4.999789019877178e-05, + "loss": 0.6223, + "step": 4235 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788917820266e-05, + "loss": 0.8613, + "step": 4236 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788815738678e-05, + "loss": 0.9802, + "step": 4237 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788713632412e-05, + "loss": 1.3161, + "step": 4238 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978861150147e-05, + "loss": 1.0592, + "step": 4239 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788509345851e-05, + "loss": 1.2151, + "step": 4240 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997884071655546e-05, + "loss": 1.0687, + "step": 4241 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788304960582e-05, + "loss": 1.2194, + "step": 4242 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788202730932e-05, + "loss": 1.1696, + "step": 4243 + }, + { + "epoch": 0.0, + "learning_rate": 4.999788100476606e-05, + "loss": 0.9879, + "step": 4244 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997879981976016e-05, + "loss": 0.6206, + "step": 4245 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787895893922e-05, + "loss": 1.3536, + "step": 4246 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787793565565e-05, + "loss": 1.2582, + "step": 4247 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787691212531e-05, + "loss": 1.3572, + "step": 4248 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997875888348195e-05, + "loss": 1.0565, + "step": 4249 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787486432432e-05, + "loss": 1.3061, + "step": 4250 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787384005367e-05, + "loss": 1.1021, + "step": 4251 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787281553626e-05, + "loss": 0.7918, + "step": 4252 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997871790772075e-05, + "loss": 1.0945, + "step": 4253 + }, + { + "epoch": 0.0, + "learning_rate": 4.999787076576112e-05, + "loss": 1.3962, + "step": 4254 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786974050341e-05, + "loss": 1.0041, + "step": 4255 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786871499892e-05, + "loss": 0.9295, + "step": 4256 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997867689247664e-05, + "loss": 1.2329, + "step": 4257 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786666324964e-05, + "loss": 1.1603, + "step": 4258 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786563700485e-05, + "loss": 1.1955, + "step": 4259 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786461051329e-05, + "loss": 1.642, + "step": 4260 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786358377495e-05, + "loss": 1.9116, + "step": 4261 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786255678985e-05, + "loss": 1.9218, + "step": 4262 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786152955799e-05, + "loss": 1.4183, + "step": 4263 + }, + { + "epoch": 0.0, + "learning_rate": 4.999786050207935e-05, + "loss": 1.0229, + "step": 4264 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785947435395e-05, + "loss": 0.7571, + "step": 4265 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785844638178e-05, + "loss": 0.6904, + "step": 4266 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785741816284e-05, + "loss": 1.4207, + "step": 4267 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785638969713e-05, + "loss": 0.8324, + "step": 4268 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785536098466e-05, + "loss": 1.1555, + "step": 4269 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785433202541e-05, + "loss": 1.3096, + "step": 4270 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978533028194e-05, + "loss": 0.7022, + "step": 4271 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997852273366616e-05, + "loss": 0.5015, + "step": 4272 + }, + { + "epoch": 0.0, + "learning_rate": 4.999785124366706e-05, + "loss": 0.6592, + "step": 4273 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997850213720745e-05, + "loss": 0.8913, + "step": 4274 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997849183527664e-05, + "loss": 1.3936, + "step": 4275 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997848153087804e-05, + "loss": 1.2355, + "step": 4276 + }, + { + "epoch": 0.0, + "learning_rate": 4.999784712240119e-05, + "loss": 0.8601, + "step": 4277 + }, + { + "epoch": 0.0, + "learning_rate": 4.999784609146779e-05, + "loss": 1.0973, + "step": 4278 + }, + { + "epoch": 0.0, + "learning_rate": 4.999784506028763e-05, + "loss": 0.974, + "step": 4279 + }, + { + "epoch": 0.0, + "learning_rate": 4.999784402886071e-05, + "loss": 1.2342, + "step": 4280 + }, + { + "epoch": 0.0, + "learning_rate": 4.999784299718701e-05, + "loss": 1.1617, + "step": 4281 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997841965266546e-05, + "loss": 1.1919, + "step": 4282 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997840933099315e-05, + "loss": 1.281, + "step": 4283 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783990068532e-05, + "loss": 1.119, + "step": 4284 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997838868024546e-05, + "loss": 1.3177, + "step": 4285 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783783511701e-05, + "loss": 0.8571, + "step": 4286 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997836801962706e-05, + "loss": 0.7544, + "step": 4287 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783576856163e-05, + "loss": 0.5614, + "step": 4288 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997834734913795e-05, + "loss": 1.0438, + "step": 4289 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997833701019186e-05, + "loss": 1.2406, + "step": 4290 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997832666877806e-05, + "loss": 1.1987, + "step": 4291 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783163248967e-05, + "loss": 1.4006, + "step": 4292 + }, + { + "epoch": 0.0, + "learning_rate": 4.999783059785475e-05, + "loss": 0.9797, + "step": 4293 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782956297307e-05, + "loss": 1.0625, + "step": 4294 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782852784463e-05, + "loss": 1.2627, + "step": 4295 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997827492469406e-05, + "loss": 1.0369, + "step": 4296 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782645684742e-05, + "loss": 1.4083, + "step": 4297 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782542097867e-05, + "loss": 1.428, + "step": 4298 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997824384863145e-05, + "loss": 1.4295, + "step": 4299 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782334850086e-05, + "loss": 1.0226, + "step": 4300 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978223118918e-05, + "loss": 1.0865, + "step": 4301 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782127503597e-05, + "loss": 1.0492, + "step": 4302 + }, + { + "epoch": 0.0, + "learning_rate": 4.999782023793338e-05, + "loss": 1.5096, + "step": 4303 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781920058402e-05, + "loss": 1.2298, + "step": 4304 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781816298789e-05, + "loss": 1.4012, + "step": 4305 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997817125144995e-05, + "loss": 1.0163, + "step": 4306 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781608705533e-05, + "loss": 0.8868, + "step": 4307 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978150487189e-05, + "loss": 1.0451, + "step": 4308 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978140101357e-05, + "loss": 1.1224, + "step": 4309 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781297130573e-05, + "loss": 0.59, + "step": 4310 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781193222899e-05, + "loss": 0.5647, + "step": 4311 + }, + { + "epoch": 0.0, + "learning_rate": 4.999781089290549e-05, + "loss": 2.8877, + "step": 4312 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780985333522e-05, + "loss": 2.3249, + "step": 4313 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780881351818e-05, + "loss": 0.5768, + "step": 4314 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780777345438e-05, + "loss": 0.9765, + "step": 4315 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780673314379e-05, + "loss": 1.2209, + "step": 4316 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997805692586455e-05, + "loss": 1.1851, + "step": 4317 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780465178234e-05, + "loss": 1.0895, + "step": 4318 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780361073146e-05, + "loss": 1.0949, + "step": 4319 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780256943381e-05, + "loss": 1.3861, + "step": 4320 + }, + { + "epoch": 0.0, + "learning_rate": 4.99978015278894e-05, + "loss": 1.0276, + "step": 4321 + }, + { + "epoch": 0.0, + "learning_rate": 4.999780048609822e-05, + "loss": 1.2675, + "step": 4322 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779944406027e-05, + "loss": 1.2956, + "step": 4323 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997798401775556e-05, + "loss": 1.1, + "step": 4324 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779735924407e-05, + "loss": 1.1209, + "step": 4325 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997796316465814e-05, + "loss": 0.9514, + "step": 4326 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779527344079e-05, + "loss": 1.148, + "step": 4327 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997794230169e-05, + "loss": 0.991, + "step": 4328 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779318665045e-05, + "loss": 1.1714, + "step": 4329 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779214288512e-05, + "loss": 1.024, + "step": 4330 + }, + { + "epoch": 0.0, + "learning_rate": 4.999779109887304e-05, + "loss": 1.0824, + "step": 4331 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997790054614174e-05, + "loss": 1.2253, + "step": 4332 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997789010108546e-05, + "loss": 0.9916, + "step": 4333 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997787965356154e-05, + "loss": 1.2658, + "step": 4334 + }, + { + "epoch": 0.0, + "learning_rate": 4.999778692035699e-05, + "loss": 1.0458, + "step": 4335 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997785875111056e-05, + "loss": 1.299, + "step": 4336 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997784829618364e-05, + "loss": 0.933, + "step": 4337 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977837838789e-05, + "loss": 1.2263, + "step": 4338 + }, + { + "epoch": 0.0, + "learning_rate": 4.999778273789266e-05, + "loss": 1.5378, + "step": 4339 + }, + { + "epoch": 0.0, + "learning_rate": 4.999778169165966e-05, + "loss": 1.2621, + "step": 4340 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997780645179896e-05, + "loss": 1.4907, + "step": 4341 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777959845336e-05, + "loss": 0.6645, + "step": 4342 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997778551480056e-05, + "loss": 0.8306, + "step": 4343 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777750425999e-05, + "loss": 0.6357, + "step": 4344 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777645679316e-05, + "loss": 0.7529, + "step": 4345 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777540907955e-05, + "loss": 0.9319, + "step": 4346 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997774361119175e-05, + "loss": 0.7715, + "step": 4347 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777331291203e-05, + "loss": 0.8799, + "step": 4348 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777226445813e-05, + "loss": 0.8824, + "step": 4349 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777121575745e-05, + "loss": 1.3207, + "step": 4350 + }, + { + "epoch": 0.0, + "learning_rate": 4.999777016681001e-05, + "loss": 1.0959, + "step": 4351 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977691176158e-05, + "loss": 0.6392, + "step": 4352 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776806817482e-05, + "loss": 1.5281, + "step": 4353 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776701848707e-05, + "loss": 1.1428, + "step": 4354 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997765968552565e-05, + "loss": 1.2032, + "step": 4355 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776491837128e-05, + "loss": 1.1605, + "step": 4356 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776386794324e-05, + "loss": 1.1478, + "step": 4357 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776281726842e-05, + "loss": 1.1389, + "step": 4358 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776176634684e-05, + "loss": 1.0784, + "step": 4359 + }, + { + "epoch": 0.0, + "learning_rate": 4.999776071517849e-05, + "loss": 1.0957, + "step": 4360 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997759663763375e-05, + "loss": 1.1626, + "step": 4361 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997758612101484e-05, + "loss": 1.1153, + "step": 4362 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997757560192836e-05, + "loss": 1.3685, + "step": 4363 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997756508037416e-05, + "loss": 1.077, + "step": 4364 + }, + { + "epoch": 0.0, + "learning_rate": 4.999775545563523e-05, + "loss": 1.1683, + "step": 4365 + }, + { + "epoch": 0.0, + "learning_rate": 4.999775440298628e-05, + "loss": 1.2724, + "step": 4366 + }, + { + "epoch": 0.0, + "learning_rate": 4.999775335009056e-05, + "loss": 1.0369, + "step": 4367 + }, + { + "epoch": 0.0, + "learning_rate": 4.999775229694807e-05, + "loss": 0.9634, + "step": 4368 + }, + { + "epoch": 0.0, + "learning_rate": 4.999775124355881e-05, + "loss": 1.1885, + "step": 4369 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997750189922785e-05, + "loss": 1.1612, + "step": 4370 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774913604e-05, + "loss": 1.0395, + "step": 4371 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997748081910446e-05, + "loss": 0.9069, + "step": 4372 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774702753412e-05, + "loss": 1.0056, + "step": 4373 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774597291102e-05, + "loss": 1.2466, + "step": 4374 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774491804117e-05, + "loss": 0.7215, + "step": 4375 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774386292454e-05, + "loss": 1.1564, + "step": 4376 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774280756116e-05, + "loss": 1.313, + "step": 4377 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774175195099e-05, + "loss": 1.1211, + "step": 4378 + }, + { + "epoch": 0.0, + "learning_rate": 4.999774069609406e-05, + "loss": 0.9678, + "step": 4379 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773963999037e-05, + "loss": 1.1849, + "step": 4380 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773858363991e-05, + "loss": 0.7452, + "step": 4381 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997737527042675e-05, + "loss": 1.5299, + "step": 4382 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773647019868e-05, + "loss": 0.9208, + "step": 4383 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997735413107916e-05, + "loss": 0.8891, + "step": 4384 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773435577039e-05, + "loss": 1.143, + "step": 4385 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773329818609e-05, + "loss": 1.2671, + "step": 4386 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773224035502e-05, + "loss": 1.1009, + "step": 4387 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997731182277196e-05, + "loss": 1.114, + "step": 4388 + }, + { + "epoch": 0.0, + "learning_rate": 4.999773012395259e-05, + "loss": 1.0124, + "step": 4389 + }, + { + "epoch": 0.0, + "learning_rate": 4.999772906538123e-05, + "loss": 1.0004, + "step": 4390 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997728006563097e-05, + "loss": 1.1445, + "step": 4391 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977269474982e-05, + "loss": 1.0378, + "step": 4392 + }, + { + "epoch": 0.0, + "learning_rate": 4.999772588818653e-05, + "loss": 1.1042, + "step": 4393 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977248286281e-05, + "loss": 1.2522, + "step": 4394 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977237688229e-05, + "loss": 1.1237, + "step": 4395 + }, + { + "epoch": 0.0, + "learning_rate": 4.999772270877093e-05, + "loss": 1.1444, + "step": 4396 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977216484722e-05, + "loss": 1.9499, + "step": 4397 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997720587926686e-05, + "loss": 3.5417, + "step": 4398 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997719527134423e-05, + "loss": 1.2548, + "step": 4399 + }, + { + "epoch": 0.0, + "learning_rate": 4.999771846609539e-05, + "loss": 1.3411, + "step": 4400 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997717404809586e-05, + "loss": 1.3086, + "step": 4401 + }, + { + "epoch": 0.0, + "learning_rate": 4.999771634327701e-05, + "loss": 1.2577, + "step": 4402 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997715281497676e-05, + "loss": 1.2029, + "step": 4403 + }, + { + "epoch": 0.0, + "learning_rate": 4.999771421947157e-05, + "loss": 1.2334, + "step": 4404 + }, + { + "epoch": 0.0, + "learning_rate": 4.99977131571987e-05, + "loss": 1.2065, + "step": 4405 + }, + { + "epoch": 0.0, + "learning_rate": 4.999771209467907e-05, + "loss": 1.1416, + "step": 4406 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997711031912665e-05, + "loss": 1.1523, + "step": 4407 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770996889949e-05, + "loss": 1.1319, + "step": 4408 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770890563955e-05, + "loss": 1.4375, + "step": 4409 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997707842132845e-05, + "loss": 1.2708, + "step": 4410 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997706778379376e-05, + "loss": 1.4632, + "step": 4411 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770571437914e-05, + "loss": 1.3789, + "step": 4412 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770465013213e-05, + "loss": 1.0032, + "step": 4413 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770358563836e-05, + "loss": 0.7754, + "step": 4414 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770252089782e-05, + "loss": 1.0801, + "step": 4415 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770145591052e-05, + "loss": 1.0647, + "step": 4416 + }, + { + "epoch": 0.0, + "learning_rate": 4.999770039067644e-05, + "loss": 1.2998, + "step": 4417 + }, + { + "epoch": 0.0, + "learning_rate": 4.99976993251956e-05, + "loss": 1.0556, + "step": 4418 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997698259467996e-05, + "loss": 1.1464, + "step": 4419 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769719349363e-05, + "loss": 1.1842, + "step": 4420 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769612727249e-05, + "loss": 1.059, + "step": 4421 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769506080458e-05, + "loss": 1.0721, + "step": 4422 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769399408991e-05, + "loss": 1.0131, + "step": 4423 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769292712847e-05, + "loss": 1.7383, + "step": 4424 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769185992026e-05, + "loss": 1.2293, + "step": 4425 + }, + { + "epoch": 0.0, + "learning_rate": 4.999769079246529e-05, + "loss": 0.9657, + "step": 4426 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768972476355e-05, + "loss": 1.0584, + "step": 4427 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768865681505e-05, + "loss": 1.2185, + "step": 4428 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997687588619765e-05, + "loss": 1.156, + "step": 4429 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768652017773e-05, + "loss": 1.4052, + "step": 4430 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768545148893e-05, + "loss": 1.2739, + "step": 4431 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768438255336e-05, + "loss": 1.1099, + "step": 4432 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768331337101e-05, + "loss": 1.3071, + "step": 4433 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997682243941906e-05, + "loss": 0.9026, + "step": 4434 + }, + { + "epoch": 0.0, + "learning_rate": 4.999768117426603e-05, + "loss": 1.1763, + "step": 4435 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997680104343396e-05, + "loss": 0.6924, + "step": 4436 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767903417399e-05, + "loss": 1.1063, + "step": 4437 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997677963757815e-05, + "loss": 6.2614, + "step": 4438 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767689309488e-05, + "loss": 6.0042, + "step": 4439 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767582218518e-05, + "loss": 5.9976, + "step": 4440 + }, + { + "epoch": 0.0, + "learning_rate": 4.99976747510287e-05, + "loss": 5.9647, + "step": 4441 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767367962547e-05, + "loss": 5.9497, + "step": 4442 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767260797546e-05, + "loss": 5.9563, + "step": 4443 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767153607869e-05, + "loss": 5.8703, + "step": 4444 + }, + { + "epoch": 0.0, + "learning_rate": 4.999767046393515e-05, + "loss": 5.8784, + "step": 4445 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766939154484e-05, + "loss": 5.8673, + "step": 4446 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766831890777e-05, + "loss": 5.9005, + "step": 4447 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766724602394e-05, + "loss": 4.7112, + "step": 4448 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997666172893334e-05, + "loss": 1.3249, + "step": 4449 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997665099515965e-05, + "loss": 1.5641, + "step": 4450 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997664025891824e-05, + "loss": 1.8465, + "step": 4451 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997662952020926e-05, + "loss": 1.9022, + "step": 4452 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766187790326e-05, + "loss": 1.2422, + "step": 4453 + }, + { + "epoch": 0.0, + "learning_rate": 4.999766080353882e-05, + "loss": 1.1355, + "step": 4454 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765972892762e-05, + "loss": 1.0728, + "step": 4455 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765865406965e-05, + "loss": 0.7308, + "step": 4456 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997657578964915e-05, + "loss": 1.221, + "step": 4457 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765650361341e-05, + "loss": 1.0683, + "step": 4458 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765542801514e-05, + "loss": 1.0572, + "step": 4459 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765435217011e-05, + "loss": 1.3279, + "step": 4460 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997653276078316e-05, + "loss": 1.1065, + "step": 4461 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765219973975e-05, + "loss": 1.2609, + "step": 4462 + }, + { + "epoch": 0.0, + "learning_rate": 4.999765112315441e-05, + "loss": 1.0018, + "step": 4463 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997650046322315e-05, + "loss": 1.2095, + "step": 4464 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764896924345e-05, + "loss": 1.0187, + "step": 4465 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764789191782e-05, + "loss": 1.28, + "step": 4466 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764681434542e-05, + "loss": 1.14, + "step": 4467 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764573652626e-05, + "loss": 0.8774, + "step": 4468 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764465846033e-05, + "loss": 1.1061, + "step": 4469 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764358014763e-05, + "loss": 1.3555, + "step": 4470 + }, + { + "epoch": 0.0, + "learning_rate": 4.999764250158817e-05, + "loss": 1.042, + "step": 4471 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997641422781936e-05, + "loss": 1.2689, + "step": 4472 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997640343728945e-05, + "loss": 1.1008, + "step": 4473 + }, + { + "epoch": 0.0, + "learning_rate": 4.999763926442919e-05, + "loss": 1.4017, + "step": 4474 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997638184882655e-05, + "loss": 0.9575, + "step": 4475 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997637105089364e-05, + "loss": 1.3155, + "step": 4476 + }, + { + "epoch": 0.0, + "learning_rate": 4.99976360250493e-05, + "loss": 1.0595, + "step": 4477 + }, + { + "epoch": 0.0, + "learning_rate": 4.999763494476248e-05, + "loss": 1.228, + "step": 4478 + }, + { + "epoch": 0.0, + "learning_rate": 4.999763386422889e-05, + "loss": 0.9576, + "step": 4479 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997632783448527e-05, + "loss": 1.1748, + "step": 4480 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997631702421406e-05, + "loss": 1.2856, + "step": 4481 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997630621147515e-05, + "loss": 1.1432, + "step": 4482 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762953962686e-05, + "loss": 1.0681, + "step": 4483 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762845785944e-05, + "loss": 1.021, + "step": 4484 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762737584525e-05, + "loss": 1.1034, + "step": 4485 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762629358429e-05, + "loss": 0.9276, + "step": 4486 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762521107657e-05, + "loss": 0.9006, + "step": 4487 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997624128322086e-05, + "loss": 0.8601, + "step": 4488 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762304532083e-05, + "loss": 1.2345, + "step": 4489 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997621962072816e-05, + "loss": 1.0603, + "step": 4490 + }, + { + "epoch": 0.0, + "learning_rate": 4.999762087857803e-05, + "loss": 1.2632, + "step": 4491 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761979483648e-05, + "loss": 1.0961, + "step": 4492 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761871084816e-05, + "loss": 1.4085, + "step": 4493 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761762661308e-05, + "loss": 0.9822, + "step": 4494 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997616542131235e-05, + "loss": 0.8745, + "step": 4495 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761545740262e-05, + "loss": 1.2333, + "step": 4496 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997614372427236e-05, + "loss": 1.2191, + "step": 4497 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997613287205094e-05, + "loss": 1.3622, + "step": 4498 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761220173618e-05, + "loss": 1.6796, + "step": 4499 + }, + { + "epoch": 0.0, + "learning_rate": 4.99976111160205e-05, + "loss": 1.0056, + "step": 4500 + }, + { + "epoch": 0.0, + "eval_loss": 1.0784711837768555, + "eval_runtime": 85.0826, + "eval_samples_per_second": 16.278, + "eval_steps_per_second": 4.078, + "step": 4500 + }, + { + "epoch": 0.0, + "learning_rate": 4.999761003005806e-05, + "loss": 1.3483, + "step": 4501 + }, + { + "epoch": 0.0, + "learning_rate": 4.999760894384885e-05, + "loss": 1.1076, + "step": 4502 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997607857392875e-05, + "loss": 0.3563, + "step": 4503 + }, + { + "epoch": 0.0, + "learning_rate": 4.999760677069013e-05, + "loss": 0.8917, + "step": 4504 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997605683740625e-05, + "loss": 1.2865, + "step": 4505 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997604596544354e-05, + "loss": 1.0649, + "step": 4506 + }, + { + "epoch": 0.0, + "learning_rate": 4.999760350910132e-05, + "loss": 0.8918, + "step": 4507 + }, + { + "epoch": 0.0, + "learning_rate": 4.999760242141151e-05, + "loss": 1.1132, + "step": 4508 + }, + { + "epoch": 0.0, + "learning_rate": 4.999760133347494e-05, + "loss": 1.0425, + "step": 4509 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997600245291604e-05, + "loss": 1.1745, + "step": 4510 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997599156861504e-05, + "loss": 0.932, + "step": 4511 + }, + { + "epoch": 0.0, + "learning_rate": 4.999759806818464e-05, + "loss": 1.0716, + "step": 4512 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997596979261e-05, + "loss": 0.7812, + "step": 4513 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975958900906e-05, + "loss": 0.8982, + "step": 4514 + }, + { + "epoch": 0.0, + "learning_rate": 4.999759480067344e-05, + "loss": 1.2918, + "step": 4515 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975937110095e-05, + "loss": 0.9576, + "step": 4516 + }, + { + "epoch": 0.0, + "learning_rate": 4.999759262109881e-05, + "loss": 1.1978, + "step": 4517 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997591530941345e-05, + "loss": 1.0162, + "step": 4518 + }, + { + "epoch": 0.0, + "learning_rate": 4.999759044053712e-05, + "loss": 1.1299, + "step": 4519 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758934988612e-05, + "loss": 1.3023, + "step": 4520 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758825898837e-05, + "loss": 1.0354, + "step": 4521 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758716784384e-05, + "loss": 1.2752, + "step": 4522 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758607645255e-05, + "loss": 0.9105, + "step": 4523 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758498481449e-05, + "loss": 1.1427, + "step": 4524 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997583892929676e-05, + "loss": 1.123, + "step": 4525 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758280079809e-05, + "loss": 1.0269, + "step": 4526 + }, + { + "epoch": 0.0, + "learning_rate": 4.999758170841973e-05, + "loss": 1.4752, + "step": 4527 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997580615794615e-05, + "loss": 1.9687, + "step": 4528 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997579522922735e-05, + "loss": 1.4191, + "step": 4529 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997578429804084e-05, + "loss": 1.8628, + "step": 4530 + }, + { + "epoch": 0.0, + "learning_rate": 4.999757733643867e-05, + "loss": 2.0473, + "step": 4531 + }, + { + "epoch": 0.0, + "learning_rate": 4.999757624282648e-05, + "loss": 1.7376, + "step": 4532 + }, + { + "epoch": 0.0, + "learning_rate": 4.999757514896754e-05, + "loss": 1.7128, + "step": 4533 + }, + { + "epoch": 0.0, + "learning_rate": 4.999757405486183e-05, + "loss": 1.2395, + "step": 4534 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997572960509355e-05, + "loss": 0.7994, + "step": 4535 + }, + { + "epoch": 0.0, + "learning_rate": 4.999757186591011e-05, + "loss": 1.5496, + "step": 4536 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997570771064095e-05, + "loss": 1.3805, + "step": 4537 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756967597132e-05, + "loss": 1.1317, + "step": 4538 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997568580631785e-05, + "loss": 1.0239, + "step": 4539 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756748504548e-05, + "loss": 0.9977, + "step": 4540 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997566389212416e-05, + "loss": 0.7617, + "step": 4541 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756529313258e-05, + "loss": 0.7574, + "step": 4542 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997564196805976e-05, + "loss": 0.4952, + "step": 4543 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756310023261e-05, + "loss": 0.4071, + "step": 4544 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756200341248e-05, + "loss": 0.3267, + "step": 4545 + }, + { + "epoch": 0.0, + "learning_rate": 4.999756090634558e-05, + "loss": 0.2456, + "step": 4546 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755980903191e-05, + "loss": 0.1915, + "step": 4547 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997558711471486e-05, + "loss": 0.2422, + "step": 4548 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755761366429e-05, + "loss": 0.1811, + "step": 4549 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755651561033e-05, + "loss": 0.1229, + "step": 4550 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997555417309613e-05, + "loss": 0.1014, + "step": 4551 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755431876212e-05, + "loss": 0.5357, + "step": 4552 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997553219967865e-05, + "loss": 1.2375, + "step": 4553 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755212092685e-05, + "loss": 1.3645, + "step": 4554 + }, + { + "epoch": 0.0, + "learning_rate": 4.999755102163906e-05, + "loss": 1.3012, + "step": 4555 + }, + { + "epoch": 0.0, + "learning_rate": 4.999754992210451e-05, + "loss": 1.2112, + "step": 4556 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997548822323195e-05, + "loss": 1.3208, + "step": 4557 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997547722295113e-05, + "loss": 1.0551, + "step": 4558 + }, + { + "epoch": 0.0, + "learning_rate": 4.999754662202026e-05, + "loss": 1.1901, + "step": 4559 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997545521498656e-05, + "loss": 0.8277, + "step": 4560 + }, + { + "epoch": 0.0, + "learning_rate": 4.999754442073028e-05, + "loss": 0.3503, + "step": 4561 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997543319715135e-05, + "loss": 1.1548, + "step": 4562 + }, + { + "epoch": 0.0, + "learning_rate": 4.999754221845323e-05, + "loss": 1.1193, + "step": 4563 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997541116944556e-05, + "loss": 1.0134, + "step": 4564 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997540015189116e-05, + "loss": 1.3742, + "step": 4565 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753891318691e-05, + "loss": 1.2755, + "step": 4566 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997537810937944e-05, + "loss": 1.4311, + "step": 4567 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753670844222e-05, + "loss": 1.3954, + "step": 4568 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753560569972e-05, + "loss": 1.3045, + "step": 4569 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753450271045e-05, + "loss": 2.1417, + "step": 4570 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753339947442e-05, + "loss": 1.2134, + "step": 4571 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753229599163e-05, + "loss": 1.1868, + "step": 4572 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753119226207e-05, + "loss": 1.072, + "step": 4573 + }, + { + "epoch": 0.0, + "learning_rate": 4.999753008828575e-05, + "loss": 0.9794, + "step": 4574 + }, + { + "epoch": 0.0, + "learning_rate": 4.999752898406266e-05, + "loss": 1.0879, + "step": 4575 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975278795928e-05, + "loss": 1.0484, + "step": 4576 + }, + { + "epoch": 0.0, + "learning_rate": 4.999752677487618e-05, + "loss": 0.909, + "step": 4577 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975256699128e-05, + "loss": 0.8647, + "step": 4578 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997524564702645e-05, + "loss": 1.1287, + "step": 4579 + }, + { + "epoch": 0.0, + "learning_rate": 4.999752345924573e-05, + "loss": 1.1584, + "step": 4580 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997522353542056e-05, + "loss": 1.4009, + "step": 4581 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997521247591614e-05, + "loss": 0.9556, + "step": 4582 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975201413944e-05, + "loss": 0.6594, + "step": 4583 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997519034950425e-05, + "loss": 1.0026, + "step": 4584 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997517928259683e-05, + "loss": 1.068, + "step": 4585 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997516821322184e-05, + "loss": 1.2799, + "step": 4586 + }, + { + "epoch": 0.0, + "learning_rate": 4.999751571413791e-05, + "loss": 1.2804, + "step": 4587 + }, + { + "epoch": 0.0, + "learning_rate": 4.999751460670688e-05, + "loss": 1.1973, + "step": 4588 + }, + { + "epoch": 0.0, + "learning_rate": 4.999751349902908e-05, + "loss": 1.1968, + "step": 4589 + }, + { + "epoch": 0.0, + "learning_rate": 4.999751239110452e-05, + "loss": 1.2127, + "step": 4590 + }, + { + "epoch": 0.0, + "learning_rate": 4.999751128293318e-05, + "loss": 1.3084, + "step": 4591 + }, + { + "epoch": 0.0, + "learning_rate": 4.99975101745151e-05, + "loss": 1.0746, + "step": 4592 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997509065850234e-05, + "loss": 1.1344, + "step": 4593 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750795693861e-05, + "loss": 1.1177, + "step": 4594 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750684778022e-05, + "loss": 1.4068, + "step": 4595 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750573837507e-05, + "loss": 1.1157, + "step": 4596 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750462872315e-05, + "loss": 1.0543, + "step": 4597 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750351882447e-05, + "loss": 0.8104, + "step": 4598 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750240867902e-05, + "loss": 1.3103, + "step": 4599 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997501298286806e-05, + "loss": 1.2957, + "step": 4600 + }, + { + "epoch": 0.0, + "learning_rate": 4.999750018764783e-05, + "loss": 1.1328, + "step": 4601 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749907676209e-05, + "loss": 0.9242, + "step": 4602 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749796562958e-05, + "loss": 1.1808, + "step": 4603 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749685425031e-05, + "loss": 1.2292, + "step": 4604 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997495742624275e-05, + "loss": 0.935, + "step": 4605 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997494630751474e-05, + "loss": 1.0585, + "step": 4606 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974935186319e-05, + "loss": 0.9551, + "step": 4607 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749240626558e-05, + "loss": 0.9226, + "step": 4608 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749129365248e-05, + "loss": 1.1205, + "step": 4609 + }, + { + "epoch": 0.0, + "learning_rate": 4.999749018079262e-05, + "loss": 1.2519, + "step": 4610 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748906768599e-05, + "loss": 1.063, + "step": 4611 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974879543326e-05, + "loss": 1.1115, + "step": 4612 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748684073245e-05, + "loss": 1.1365, + "step": 4613 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748572688553e-05, + "loss": 1.1795, + "step": 4614 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748461279185e-05, + "loss": 0.8403, + "step": 4615 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974834984514e-05, + "loss": 0.9619, + "step": 4616 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997482383864186e-05, + "loss": 1.5095, + "step": 4617 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748126903021e-05, + "loss": 1.1162, + "step": 4618 + }, + { + "epoch": 0.0, + "learning_rate": 4.999748015394947e-05, + "loss": 0.8096, + "step": 4619 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997479038621964e-05, + "loss": 1.1393, + "step": 4620 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997477923047684e-05, + "loss": 1.0588, + "step": 4621 + }, + { + "epoch": 0.0, + "learning_rate": 4.999747680722665e-05, + "loss": 1.0214, + "step": 4622 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997475691158856e-05, + "loss": 1.0753, + "step": 4623 + }, + { + "epoch": 0.0, + "learning_rate": 4.999747457484429e-05, + "loss": 1.0879, + "step": 4624 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997473458282964e-05, + "loss": 1.1705, + "step": 4625 + }, + { + "epoch": 0.0, + "learning_rate": 4.999747234147487e-05, + "loss": 1.4716, + "step": 4626 + }, + { + "epoch": 0.0, + "learning_rate": 4.999747122442001e-05, + "loss": 0.9731, + "step": 4627 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997470107118383e-05, + "loss": 0.9341, + "step": 4628 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997468989569994e-05, + "loss": 1.1177, + "step": 4629 + }, + { + "epoch": 0.0, + "learning_rate": 4.999746787177485e-05, + "loss": 1.4303, + "step": 4630 + }, + { + "epoch": 0.0, + "learning_rate": 4.999746675373292e-05, + "loss": 1.0908, + "step": 4631 + }, + { + "epoch": 0.0, + "learning_rate": 4.999746563544425e-05, + "loss": 1.4172, + "step": 4632 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974645169088e-05, + "loss": 0.9805, + "step": 4633 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997463398126596e-05, + "loss": 1.1843, + "step": 4634 + }, + { + "epoch": 0.0, + "learning_rate": 4.999746227909762e-05, + "loss": 1.1419, + "step": 4635 + }, + { + "epoch": 0.0, + "learning_rate": 4.999746115982188e-05, + "loss": 0.9037, + "step": 4636 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997460040299376e-05, + "loss": 0.9535, + "step": 4637 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997458920530113e-05, + "loss": 1.1616, + "step": 4638 + }, + { + "epoch": 0.0, + "learning_rate": 4.999745780051408e-05, + "loss": 0.9401, + "step": 4639 + }, + { + "epoch": 0.0, + "learning_rate": 4.999745668025128e-05, + "loss": 0.9385, + "step": 4640 + }, + { + "epoch": 0.0, + "learning_rate": 4.999745555974173e-05, + "loss": 1.5485, + "step": 4641 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974544389854e-05, + "loss": 1.2252, + "step": 4642 + }, + { + "epoch": 0.0, + "learning_rate": 4.999745331798231e-05, + "loss": 1.1477, + "step": 4643 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997452196732454e-05, + "loss": 1.497, + "step": 4644 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997451075235834e-05, + "loss": 0.9156, + "step": 4645 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744995349245e-05, + "loss": 0.6902, + "step": 4646 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744883150231e-05, + "loss": 0.6409, + "step": 4647 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974477092654e-05, + "loss": 0.8875, + "step": 4648 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744658678172e-05, + "loss": 1.017, + "step": 4649 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744546405128e-05, + "loss": 1.4372, + "step": 4650 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997444341074076e-05, + "loss": 1.0174, + "step": 4651 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744321785011e-05, + "loss": 1.1617, + "step": 4652 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744209437938e-05, + "loss": 1.3952, + "step": 4653 + }, + { + "epoch": 0.0, + "learning_rate": 4.999744097066188e-05, + "loss": 1.1098, + "step": 4654 + }, + { + "epoch": 0.0, + "learning_rate": 4.999743984669763e-05, + "loss": 1.1052, + "step": 4655 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974387224866e-05, + "loss": 0.9504, + "step": 4656 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997437598028806e-05, + "loss": 0.9464, + "step": 4657 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997436473324256e-05, + "loss": 1.1921, + "step": 4658 + }, + { + "epoch": 0.0, + "learning_rate": 4.999743534837294e-05, + "loss": 1.0369, + "step": 4659 + }, + { + "epoch": 0.0, + "learning_rate": 4.999743422317486e-05, + "loss": 1.0909, + "step": 4660 + }, + { + "epoch": 0.0, + "learning_rate": 4.999743309773002e-05, + "loss": 0.9977, + "step": 4661 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974319720384e-05, + "loss": 1.1727, + "step": 4662 + }, + { + "epoch": 0.0, + "learning_rate": 4.999743084610003e-05, + "loss": 0.9632, + "step": 4663 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742971991489e-05, + "loss": 1.3036, + "step": 4664 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742859348299e-05, + "loss": 1.1178, + "step": 4665 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742746680433e-05, + "loss": 1.66, + "step": 4666 + }, + { + "epoch": 0.0, + "learning_rate": 4.99974263398789e-05, + "loss": 0.2656, + "step": 4667 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742521270671e-05, + "loss": 0.4481, + "step": 4668 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742408528775e-05, + "loss": 0.273, + "step": 4669 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742295762202e-05, + "loss": 1.0934, + "step": 4670 + }, + { + "epoch": 0.0, + "learning_rate": 4.999742182970953e-05, + "loss": 0.6311, + "step": 4671 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997420701550286e-05, + "loss": 0.6401, + "step": 4672 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741957314427e-05, + "loss": 1.102, + "step": 4673 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997418444491496e-05, + "loss": 0.8887, + "step": 4674 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741731559196e-05, + "loss": 1.336, + "step": 4675 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741618644565e-05, + "loss": 1.7969, + "step": 4676 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741505705258e-05, + "loss": 1.2247, + "step": 4677 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741392741274e-05, + "loss": 1.0536, + "step": 4678 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997412797526144e-05, + "loss": 1.0859, + "step": 4679 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741166739278e-05, + "loss": 1.4554, + "step": 4680 + }, + { + "epoch": 0.0, + "learning_rate": 4.999741053701266e-05, + "loss": 1.3994, + "step": 4681 + }, + { + "epoch": 0.0, + "learning_rate": 4.999740940638576e-05, + "loss": 1.5389, + "step": 4682 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997408275512115e-05, + "loss": 1.4944, + "step": 4683 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997407144391697e-05, + "loss": 1.5548, + "step": 4684 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997406013024514e-05, + "loss": 1.2535, + "step": 4685 + }, + { + "epoch": 0.0, + "learning_rate": 4.999740488141057e-05, + "loss": 1.2727, + "step": 4686 + }, + { + "epoch": 0.0, + "learning_rate": 4.999740374954986e-05, + "loss": 0.9823, + "step": 4687 + }, + { + "epoch": 0.0, + "learning_rate": 4.999740261744239e-05, + "loss": 1.006, + "step": 4688 + }, + { + "epoch": 0.0, + "learning_rate": 4.999740148508815e-05, + "loss": 1.1687, + "step": 4689 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997400352487154e-05, + "loss": 0.9116, + "step": 4690 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997399219639384e-05, + "loss": 1.3216, + "step": 4691 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739808654486e-05, + "loss": 1.1733, + "step": 4692 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997396953203566e-05, + "loss": 1.2683, + "step": 4693 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739581961551e-05, + "loss": 1.9192, + "step": 4694 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739468578069e-05, + "loss": 1.497, + "step": 4695 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739355169911e-05, + "loss": 1.1082, + "step": 4696 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997392417370756e-05, + "loss": 1.5696, + "step": 4697 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739128279565e-05, + "loss": 0.9002, + "step": 4698 + }, + { + "epoch": 0.0, + "learning_rate": 4.999739014797378e-05, + "loss": 0.6441, + "step": 4699 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997389012905136e-05, + "loss": 1.2714, + "step": 4700 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738787758974e-05, + "loss": 1.1863, + "step": 4701 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738674202757e-05, + "loss": 1.1173, + "step": 4702 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997385606218644e-05, + "loss": 1.4007, + "step": 4703 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738447016295e-05, + "loss": 1.0387, + "step": 4704 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738333386049e-05, + "loss": 1.1106, + "step": 4705 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738219731127e-05, + "loss": 1.189, + "step": 4706 + }, + { + "epoch": 0.0, + "learning_rate": 4.999738106051529e-05, + "loss": 1.2896, + "step": 4707 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737992347254e-05, + "loss": 1.2141, + "step": 4708 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737878618304e-05, + "loss": 1.0269, + "step": 4709 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737764864676e-05, + "loss": 1.1955, + "step": 4710 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737651086372e-05, + "loss": 1.3306, + "step": 4711 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737537283392e-05, + "loss": 0.8874, + "step": 4712 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997374234557354e-05, + "loss": 1.1114, + "step": 4713 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737309603403e-05, + "loss": 1.0591, + "step": 4714 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997371957263935e-05, + "loss": 1.1689, + "step": 4715 + }, + { + "epoch": 0.0, + "learning_rate": 4.999737081824708e-05, + "loss": 1.0724, + "step": 4716 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736967898346e-05, + "loss": 1.063, + "step": 4717 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736853947308e-05, + "loss": 1.1839, + "step": 4718 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736739971593e-05, + "loss": 1.0832, + "step": 4719 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736625971202e-05, + "loss": 1.253, + "step": 4720 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736511946135e-05, + "loss": 0.9533, + "step": 4721 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736397896391e-05, + "loss": 0.6984, + "step": 4722 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736283821971e-05, + "loss": 1.1763, + "step": 4723 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736169722874e-05, + "loss": 1.289, + "step": 4724 + }, + { + "epoch": 0.0, + "learning_rate": 4.999736055599102e-05, + "loss": 1.1467, + "step": 4725 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735941450653e-05, + "loss": 0.9113, + "step": 4726 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997358272775273e-05, + "loss": 1.0437, + "step": 4727 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735713079726e-05, + "loss": 1.0288, + "step": 4728 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735598857248e-05, + "loss": 1.0476, + "step": 4729 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735484610094e-05, + "loss": 0.4842, + "step": 4730 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997353703382625e-05, + "loss": 1.1656, + "step": 4731 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997352560417556e-05, + "loss": 1.1774, + "step": 4732 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735141720572e-05, + "loss": 1.4356, + "step": 4733 + }, + { + "epoch": 0.0, + "learning_rate": 4.999735027374712e-05, + "loss": 0.9103, + "step": 4734 + }, + { + "epoch": 0.0, + "learning_rate": 4.999734913004177e-05, + "loss": 1.6251, + "step": 4735 + }, + { + "epoch": 0.0, + "learning_rate": 4.999734798608964e-05, + "loss": 1.2213, + "step": 4736 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997346841890755e-05, + "loss": 1.0874, + "step": 4737 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997345697445106e-05, + "loss": 1.3342, + "step": 4738 + }, + { + "epoch": 0.0, + "learning_rate": 4.999734455275269e-05, + "loss": 1.1486, + "step": 4739 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997343407813514e-05, + "loss": 1.2113, + "step": 4740 + }, + { + "epoch": 0.0, + "learning_rate": 4.999734226262757e-05, + "loss": 1.1141, + "step": 4741 + }, + { + "epoch": 0.0, + "learning_rate": 4.999734111719487e-05, + "loss": 1.1153, + "step": 4742 + }, + { + "epoch": 0.0, + "learning_rate": 4.99973399715154e-05, + "loss": 1.1899, + "step": 4743 + }, + { + "epoch": 0.0, + "learning_rate": 4.999733882558918e-05, + "loss": 1.1211, + "step": 4744 + }, + { + "epoch": 0.0, + "learning_rate": 4.999733767941618e-05, + "loss": 1.0558, + "step": 4745 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997336532996426e-05, + "loss": 1.1537, + "step": 4746 + }, + { + "epoch": 0.0, + "learning_rate": 4.99973353863299e-05, + "loss": 1.1311, + "step": 4747 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997334239416623e-05, + "loss": 0.7318, + "step": 4748 + }, + { + "epoch": 0.0, + "learning_rate": 4.999733309225658e-05, + "loss": 0.8849, + "step": 4749 + }, + { + "epoch": 0.0, + "learning_rate": 4.999733194484977e-05, + "loss": 1.0484, + "step": 4750 + }, + { + "epoch": 0.0, + "learning_rate": 4.99973307971962e-05, + "loss": 1.1667, + "step": 4751 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997329649295866e-05, + "loss": 1.5542, + "step": 4752 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997328501148764e-05, + "loss": 1.0831, + "step": 4753 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997327352754905e-05, + "loss": 1.4354, + "step": 4754 + }, + { + "epoch": 0.0, + "learning_rate": 4.999732620411428e-05, + "loss": 1.2352, + "step": 4755 + }, + { + "epoch": 0.0, + "learning_rate": 4.999732505522689e-05, + "loss": 1.4719, + "step": 4756 + }, + { + "epoch": 0.0, + "learning_rate": 4.999732390609274e-05, + "loss": 1.057, + "step": 4757 + }, + { + "epoch": 0.0, + "learning_rate": 4.999732275671183e-05, + "loss": 1.0866, + "step": 4758 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997321607084154e-05, + "loss": 1.104, + "step": 4759 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997320457209714e-05, + "loss": 1.0629, + "step": 4760 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731930708851e-05, + "loss": 1.6223, + "step": 4761 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731815672054e-05, + "loss": 1.3682, + "step": 4762 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997317006105816e-05, + "loss": 1.2582, + "step": 4763 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731585524432e-05, + "loss": 0.9874, + "step": 4764 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997314704136065e-05, + "loss": 0.8081, + "step": 4765 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731355278105e-05, + "loss": 1.0638, + "step": 4766 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997312401179276e-05, + "loss": 1.5015, + "step": 4767 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731124933073e-05, + "loss": 1.5185, + "step": 4768 + }, + { + "epoch": 0.0, + "learning_rate": 4.999731009723542e-05, + "loss": 1.2712, + "step": 4769 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730894489335e-05, + "loss": 1.0055, + "step": 4770 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730779230452e-05, + "loss": 1.085, + "step": 4771 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997306639468926e-05, + "loss": 1.2079, + "step": 4772 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730548638657e-05, + "loss": 1.1686, + "step": 4773 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730433305745e-05, + "loss": 1.0924, + "step": 4774 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730317948156e-05, + "loss": 1.3168, + "step": 4775 + }, + { + "epoch": 0.0, + "learning_rate": 4.999730202565892e-05, + "loss": 1.1525, + "step": 4776 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997300871589506e-05, + "loss": 1.1351, + "step": 4777 + }, + { + "epoch": 0.0, + "learning_rate": 4.999729971727334e-05, + "loss": 1.0702, + "step": 4778 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997298562710405e-05, + "loss": 1.0449, + "step": 4779 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972974079007e-05, + "loss": 1.6182, + "step": 4780 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997296252844246e-05, + "loss": 1.2316, + "step": 4781 + }, + { + "epoch": 0.0, + "learning_rate": 4.999729509754102e-05, + "loss": 1.1996, + "step": 4782 + }, + { + "epoch": 0.0, + "learning_rate": 4.999729394199103e-05, + "loss": 1.1948, + "step": 4783 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997292786194285e-05, + "loss": 0.9327, + "step": 4784 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997291630150776e-05, + "loss": 1.0551, + "step": 4785 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972904738605e-05, + "loss": 1.0891, + "step": 4786 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728931732346e-05, + "loss": 1.0666, + "step": 4787 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728816053966e-05, + "loss": 1.1281, + "step": 4788 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972870035091e-05, + "loss": 1.5918, + "step": 4789 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728584623178e-05, + "loss": 1.0612, + "step": 4790 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728468870769e-05, + "loss": 1.0924, + "step": 4791 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997283530936836e-05, + "loss": 1.1297, + "step": 4792 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997282372919225e-05, + "loss": 1.176, + "step": 4793 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728121465485e-05, + "loss": 1.1923, + "step": 4794 + }, + { + "epoch": 0.0, + "learning_rate": 4.999728005614371e-05, + "loss": 1.0567, + "step": 4795 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727889738581e-05, + "loss": 1.1293, + "step": 4796 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727773838115e-05, + "loss": 0.6926, + "step": 4797 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727657912972e-05, + "loss": 1.2504, + "step": 4798 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997275419631535e-05, + "loss": 0.7057, + "step": 4799 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727425988659e-05, + "loss": 0.9938, + "step": 4800 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727309989487e-05, + "loss": 1.1662, + "step": 4801 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727193965639e-05, + "loss": 1.3126, + "step": 4802 + }, + { + "epoch": 0.0, + "learning_rate": 4.999727077917115e-05, + "loss": 1.1409, + "step": 4803 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726961843915e-05, + "loss": 1.003, + "step": 4804 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997268457460386e-05, + "loss": 1.1619, + "step": 4805 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997267296234865e-05, + "loss": 1.1168, + "step": 4806 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726613476257e-05, + "loss": 1.2055, + "step": 4807 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726497304352e-05, + "loss": 1.1727, + "step": 4808 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726381107771e-05, + "loss": 1.1429, + "step": 4809 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726264886513e-05, + "loss": 1.2134, + "step": 4810 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997261486405787e-05, + "loss": 0.9725, + "step": 4811 + }, + { + "epoch": 0.0, + "learning_rate": 4.999726032369969e-05, + "loss": 1.3631, + "step": 4812 + }, + { + "epoch": 0.0, + "learning_rate": 4.999725916074683e-05, + "loss": 1.3103, + "step": 4813 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972579975472e-05, + "loss": 1.0344, + "step": 4814 + }, + { + "epoch": 0.0, + "learning_rate": 4.999725683410082e-05, + "loss": 1.7591, + "step": 4815 + }, + { + "epoch": 0.0, + "learning_rate": 4.999725567040766e-05, + "loss": 1.6107, + "step": 4816 + }, + { + "epoch": 0.0, + "learning_rate": 4.999725450646775e-05, + "loss": 1.2663, + "step": 4817 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997253342281075e-05, + "loss": 1.1365, + "step": 4818 + }, + { + "epoch": 0.0, + "learning_rate": 4.999725217784764e-05, + "loss": 0.8303, + "step": 4819 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997251013167435e-05, + "loss": 0.4453, + "step": 4820 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997249848240474e-05, + "loss": 1.1691, + "step": 4821 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724868306675e-05, + "loss": 1.5815, + "step": 4822 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724751764626e-05, + "loss": 1.109, + "step": 4823 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724635197901e-05, + "loss": 0.8534, + "step": 4824 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997245186065e-05, + "loss": 1.1309, + "step": 4825 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724401990423e-05, + "loss": 1.1172, + "step": 4826 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724285349669e-05, + "loss": 1.7788, + "step": 4827 + }, + { + "epoch": 0.0, + "learning_rate": 4.999724168684239e-05, + "loss": 1.3092, + "step": 4828 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997240519941335e-05, + "loss": 1.0972, + "step": 4829 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997239352793515e-05, + "loss": 0.6636, + "step": 4830 + }, + { + "epoch": 0.0, + "learning_rate": 4.999723818539892e-05, + "loss": 0.448, + "step": 4831 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997237017757576e-05, + "loss": 0.4685, + "step": 4832 + }, + { + "epoch": 0.0, + "learning_rate": 4.999723584986947e-05, + "loss": 0.3573, + "step": 4833 + }, + { + "epoch": 0.0, + "learning_rate": 4.999723468173459e-05, + "loss": 0.8482, + "step": 4834 + }, + { + "epoch": 0.0, + "learning_rate": 4.999723351335296e-05, + "loss": 1.3927, + "step": 4835 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997232344724565e-05, + "loss": 1.117, + "step": 4836 + }, + { + "epoch": 0.0, + "learning_rate": 4.999723117584941e-05, + "loss": 1.0969, + "step": 4837 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997230006727486e-05, + "loss": 1.0169, + "step": 4838 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972288373588e-05, + "loss": 0.8773, + "step": 4839 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997227667743364e-05, + "loss": 1.0116, + "step": 4840 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997226497881156e-05, + "loss": 1.153, + "step": 4841 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997225327772184e-05, + "loss": 0.7223, + "step": 4842 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997224157416454e-05, + "loss": 0.5854, + "step": 4843 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997222986813966e-05, + "loss": 1.1327, + "step": 4844 + }, + { + "epoch": 0.0, + "learning_rate": 4.99972218159647e-05, + "loss": 1.3386, + "step": 4845 + }, + { + "epoch": 0.0, + "learning_rate": 4.999722064486869e-05, + "loss": 0.6762, + "step": 4846 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721947352591e-05, + "loss": 1.5002, + "step": 4847 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997218301936365e-05, + "loss": 1.6148, + "step": 4848 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721713010007e-05, + "loss": 1.7661, + "step": 4849 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997215958016995e-05, + "loss": 1.9748, + "step": 4850 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721478568718e-05, + "loss": 1.3449, + "step": 4851 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721361311058e-05, + "loss": 1.1963, + "step": 4852 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997212440287234e-05, + "loss": 1.0218, + "step": 4853 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721126721712e-05, + "loss": 1.3188, + "step": 4854 + }, + { + "epoch": 0.0, + "learning_rate": 4.999721009390025e-05, + "loss": 1.0898, + "step": 4855 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997208920336606e-05, + "loss": 1.1505, + "step": 4856 + }, + { + "epoch": 0.0, + "learning_rate": 4.999720774652621e-05, + "loss": 1.1612, + "step": 4857 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997206572469046e-05, + "loss": 1.2213, + "step": 4858 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997205398165126e-05, + "loss": 1.2131, + "step": 4859 + }, + { + "epoch": 0.0, + "learning_rate": 4.999720422361445e-05, + "loss": 1.0219, + "step": 4860 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997203048817e-05, + "loss": 1.2768, + "step": 4861 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997201873772794e-05, + "loss": 1.0464, + "step": 4862 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997200698481816e-05, + "loss": 1.0345, + "step": 4863 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719952294409e-05, + "loss": 1.2539, + "step": 4864 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719834715959e-05, + "loss": 1.0661, + "step": 4865 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719717112834e-05, + "loss": 1.0236, + "step": 4866 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719599485032e-05, + "loss": 1.1617, + "step": 4867 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997194818325544e-05, + "loss": 0.8693, + "step": 4868 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997193641554e-05, + "loss": 1.0996, + "step": 4869 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997192464535705e-05, + "loss": 1.2102, + "step": 4870 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719128727063e-05, + "loss": 0.6804, + "step": 4871 + }, + { + "epoch": 0.0, + "learning_rate": 4.999719010975881e-05, + "loss": 0.9678, + "step": 4872 + }, + { + "epoch": 0.0, + "learning_rate": 4.999718893200022e-05, + "loss": 0.9744, + "step": 4873 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997187753994876e-05, + "loss": 1.253, + "step": 4874 + }, + { + "epoch": 0.0, + "learning_rate": 4.999718657574276e-05, + "loss": 1.1902, + "step": 4875 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997185397243885e-05, + "loss": 0.7328, + "step": 4876 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997184218498247e-05, + "loss": 0.7258, + "step": 4877 + }, + { + "epoch": 0.0, + "learning_rate": 4.999718303950586e-05, + "loss": 0.5045, + "step": 4878 + }, + { + "epoch": 0.0, + "learning_rate": 4.99971818602667e-05, + "loss": 0.601, + "step": 4879 + }, + { + "epoch": 0.0, + "learning_rate": 4.999718068078078e-05, + "loss": 0.4128, + "step": 4880 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997179501048095e-05, + "loss": 0.7225, + "step": 4881 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997178321068655e-05, + "loss": 1.0695, + "step": 4882 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997177140842457e-05, + "loss": 1.2155, + "step": 4883 + }, + { + "epoch": 0.0, + "learning_rate": 4.999717596036949e-05, + "loss": 1.5828, + "step": 4884 + }, + { + "epoch": 0.0, + "learning_rate": 4.999717477964976e-05, + "loss": 0.9714, + "step": 4885 + }, + { + "epoch": 0.0, + "learning_rate": 4.999717359868327e-05, + "loss": 1.0561, + "step": 4886 + }, + { + "epoch": 0.0, + "learning_rate": 4.999717241747002e-05, + "loss": 1.0278, + "step": 4887 + }, + { + "epoch": 0.0, + "learning_rate": 4.999717123601001e-05, + "loss": 0.9739, + "step": 4888 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997170054303236e-05, + "loss": 1.0747, + "step": 4889 + }, + { + "epoch": 0.0, + "learning_rate": 4.99971688723497e-05, + "loss": 1.3003, + "step": 4890 + }, + { + "epoch": 0.0, + "learning_rate": 4.999716769014941e-05, + "loss": 1.0654, + "step": 4891 + }, + { + "epoch": 0.0, + "learning_rate": 4.999716650770234e-05, + "loss": 1.1268, + "step": 4892 + }, + { + "epoch": 0.0, + "learning_rate": 4.999716532500852e-05, + "loss": 1.0623, + "step": 4893 + }, + { + "epoch": 0.0, + "learning_rate": 4.999716414206794e-05, + "loss": 1.1484, + "step": 4894 + }, + { + "epoch": 0.0, + "learning_rate": 4.99971629588806e-05, + "loss": 1.3828, + "step": 4895 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997161775446496e-05, + "loss": 1.2322, + "step": 4896 + }, + { + "epoch": 0.0, + "learning_rate": 4.999716059176563e-05, + "loss": 1.1974, + "step": 4897 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997159407838e-05, + "loss": 1.1084, + "step": 4898 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715822366361e-05, + "loss": 1.2828, + "step": 4899 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997157039242466e-05, + "loss": 1.0407, + "step": 4900 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715585457455e-05, + "loss": 1.1489, + "step": 4901 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715466965988e-05, + "loss": 1.4901, + "step": 4902 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715348449845e-05, + "loss": 1.0606, + "step": 4903 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715229909025e-05, + "loss": 1.0927, + "step": 4904 + }, + { + "epoch": 0.0, + "learning_rate": 4.999715111343529e-05, + "loss": 1.3185, + "step": 4905 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714992753358e-05, + "loss": 1.3352, + "step": 4906 + }, + { + "epoch": 0.0, + "learning_rate": 4.99971487413851e-05, + "loss": 1.4179, + "step": 4907 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997147554989856e-05, + "loss": 1.1385, + "step": 4908 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714636834785e-05, + "loss": 1.1543, + "step": 4909 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997145181459095e-05, + "loss": 1.3222, + "step": 4910 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714399432356e-05, + "loss": 1.5961, + "step": 4911 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997142806941276e-05, + "loss": 1.1299, + "step": 4912 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997141619312234e-05, + "loss": 1.1121, + "step": 4913 + }, + { + "epoch": 0.0, + "learning_rate": 4.999714043143642e-05, + "loss": 1.0932, + "step": 4914 + }, + { + "epoch": 0.0, + "learning_rate": 4.999713924331385e-05, + "loss": 1.041, + "step": 4915 + }, + { + "epoch": 0.0, + "learning_rate": 4.999713805494453e-05, + "loss": 1.1886, + "step": 4916 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997136866328434e-05, + "loss": 0.8678, + "step": 4917 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997135677465576e-05, + "loss": 0.9387, + "step": 4918 + }, + { + "epoch": 0.0, + "learning_rate": 4.999713448835596e-05, + "loss": 1.246, + "step": 4919 + }, + { + "epoch": 0.0, + "learning_rate": 4.999713329899959e-05, + "loss": 1.2206, + "step": 4920 + }, + { + "epoch": 0.0, + "learning_rate": 4.999713210939645e-05, + "loss": 0.9171, + "step": 4921 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997130919546555e-05, + "loss": 1.0763, + "step": 4922 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997129729449896e-05, + "loss": 1.3132, + "step": 4923 + }, + { + "epoch": 0.0, + "learning_rate": 4.999712853910647e-05, + "loss": 0.8297, + "step": 4924 + }, + { + "epoch": 0.0, + "learning_rate": 4.999712734851629e-05, + "loss": 1.1485, + "step": 4925 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997126157679344e-05, + "loss": 0.9347, + "step": 4926 + }, + { + "epoch": 0.0, + "learning_rate": 4.999712496659564e-05, + "loss": 1.1448, + "step": 4927 + }, + { + "epoch": 0.0, + "learning_rate": 4.999712377526518e-05, + "loss": 1.4814, + "step": 4928 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997122583687954e-05, + "loss": 0.9763, + "step": 4929 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997121391863964e-05, + "loss": 1.3074, + "step": 4930 + }, + { + "epoch": 0.0, + "learning_rate": 4.999712019979322e-05, + "loss": 1.1433, + "step": 4931 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711900747571e-05, + "loss": 1.2763, + "step": 4932 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711781491144e-05, + "loss": 0.9959, + "step": 4933 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711662210041e-05, + "loss": 0.9883, + "step": 4934 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997115429042615e-05, + "loss": 1.2426, + "step": 4935 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997114235738066e-05, + "loss": 1.2025, + "step": 4936 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711304218675e-05, + "loss": 1.0928, + "step": 4937 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711184838868e-05, + "loss": 0.9726, + "step": 4938 + }, + { + "epoch": 0.0, + "learning_rate": 4.999711065434384e-05, + "loss": 1.0069, + "step": 4939 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997109460052244e-05, + "loss": 1.2129, + "step": 4940 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997108265513886e-05, + "loss": 0.8876, + "step": 4941 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997107070728764e-05, + "loss": 1.1348, + "step": 4942 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997105875696884e-05, + "loss": 1.8116, + "step": 4943 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997104680418247e-05, + "loss": 1.475, + "step": 4944 + }, + { + "epoch": 0.0, + "learning_rate": 4.999710348489285e-05, + "loss": 0.8913, + "step": 4945 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997102289120685e-05, + "loss": 1.3088, + "step": 4946 + }, + { + "epoch": 0.0, + "learning_rate": 4.999710109310176e-05, + "loss": 1.3785, + "step": 4947 + }, + { + "epoch": 0.0, + "learning_rate": 4.999709989683607e-05, + "loss": 0.842, + "step": 4948 + }, + { + "epoch": 0.0, + "learning_rate": 4.999709870032363e-05, + "loss": 1.0386, + "step": 4949 + }, + { + "epoch": 0.0, + "learning_rate": 4.999709750356443e-05, + "loss": 1.2069, + "step": 4950 + }, + { + "epoch": 0.0, + "learning_rate": 4.999709630655846e-05, + "loss": 1.0357, + "step": 4951 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997095109305735e-05, + "loss": 0.5687, + "step": 4952 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997093911806245e-05, + "loss": 0.929, + "step": 4953 + }, + { + "epoch": 0.0, + "learning_rate": 4.999709271406e-05, + "loss": 1.0482, + "step": 4954 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997091516066985e-05, + "loss": 1.1471, + "step": 4955 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997090317827215e-05, + "loss": 1.1224, + "step": 4956 + }, + { + "epoch": 0.0, + "learning_rate": 4.999708911934069e-05, + "loss": 1.1038, + "step": 4957 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997087920607395e-05, + "loss": 1.1545, + "step": 4958 + }, + { + "epoch": 0.0, + "learning_rate": 4.999708672162734e-05, + "loss": 1.0817, + "step": 4959 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997085522400525e-05, + "loss": 0.8527, + "step": 4960 + }, + { + "epoch": 0.0, + "learning_rate": 4.999708432292696e-05, + "loss": 1.2293, + "step": 4961 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997083123206624e-05, + "loss": 1.2644, + "step": 4962 + }, + { + "epoch": 0.0, + "learning_rate": 4.999708192323952e-05, + "loss": 1.1254, + "step": 4963 + }, + { + "epoch": 0.0, + "learning_rate": 4.999708072302567e-05, + "loss": 1.0951, + "step": 4964 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707952256505e-05, + "loss": 0.8272, + "step": 4965 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707832185767e-05, + "loss": 1.4679, + "step": 4966 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707712090354e-05, + "loss": 1.8612, + "step": 4967 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707591970264e-05, + "loss": 1.3245, + "step": 4968 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997074718254976e-05, + "loss": 1.3916, + "step": 4969 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707351656056e-05, + "loss": 1.0122, + "step": 4970 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997072314619376e-05, + "loss": 0.8691, + "step": 4971 + }, + { + "epoch": 0.0, + "learning_rate": 4.999707111243144e-05, + "loss": 1.1927, + "step": 4972 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706990999674e-05, + "loss": 0.959, + "step": 4973 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706870731528e-05, + "loss": 1.1453, + "step": 4974 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706750438705e-05, + "loss": 1.2579, + "step": 4975 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706630121207e-05, + "loss": 1.1374, + "step": 4976 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706509779033e-05, + "loss": 1.4763, + "step": 4977 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706389412182e-05, + "loss": 1.2916, + "step": 4978 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706269020656e-05, + "loss": 1.1468, + "step": 4979 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706148604453e-05, + "loss": 0.9316, + "step": 4980 + }, + { + "epoch": 0.0, + "learning_rate": 4.999706028163575e-05, + "loss": 0.7751, + "step": 4981 + }, + { + "epoch": 0.0, + "learning_rate": 4.99970590769802e-05, + "loss": 1.495, + "step": 4982 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997057872077894e-05, + "loss": 1.0799, + "step": 4983 + }, + { + "epoch": 0.0, + "learning_rate": 4.999705666692883e-05, + "loss": 1.0632, + "step": 4984 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997055461533006e-05, + "loss": 1.1453, + "step": 4985 + }, + { + "epoch": 0.0, + "learning_rate": 4.999705425589042e-05, + "loss": 0.639, + "step": 4986 + }, + { + "epoch": 0.0, + "learning_rate": 4.999705305000106e-05, + "loss": 0.8285, + "step": 4987 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997051843864965e-05, + "loss": 1.1244, + "step": 4988 + }, + { + "epoch": 0.0, + "learning_rate": 4.999705063748209e-05, + "loss": 0.9547, + "step": 4989 + }, + { + "epoch": 0.0, + "learning_rate": 4.999704943085246e-05, + "loss": 0.4331, + "step": 4990 + }, + { + "epoch": 0.0, + "learning_rate": 4.999704822397607e-05, + "loss": 0.1439, + "step": 4991 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997047016852924e-05, + "loss": 1.0579, + "step": 4992 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997045809483013e-05, + "loss": 1.2167, + "step": 4993 + }, + { + "epoch": 0.0, + "learning_rate": 4.9997044601866345e-05, + "loss": 1.0966, + "step": 4994 + }, + { + "epoch": 0.0, + "learning_rate": 4.999704339400291e-05, + "loss": 1.6641, + "step": 4995 + }, + { + "epoch": 0.0, + "learning_rate": 4.999704218589272e-05, + "loss": 1.1495, + "step": 4996 + }, + { + "epoch": 0.0, + "learning_rate": 4.999704097753577e-05, + "loss": 1.1242, + "step": 4997 + }, + { + "epoch": 0.0, + "learning_rate": 4.999703976893207e-05, + "loss": 1.1382, + "step": 4998 + }, + { + "epoch": 0.0, + "learning_rate": 4.999703856008159e-05, + "loss": 1.2479, + "step": 4999 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997037350984355e-05, + "loss": 1.1091, + "step": 5000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0793377161026, + "eval_runtime": 84.714, + "eval_samples_per_second": 16.349, + "eval_steps_per_second": 4.096, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999703614164036e-05, + "loss": 1.0244, + "step": 5001 + }, + { + "epoch": 0.01, + "learning_rate": 4.999703493204961e-05, + "loss": 1.2538, + "step": 5002 + }, + { + "epoch": 0.01, + "learning_rate": 4.99970337222121e-05, + "loss": 0.9095, + "step": 5003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997032512127826e-05, + "loss": 1.175, + "step": 5004 + }, + { + "epoch": 0.01, + "learning_rate": 4.999703130179679e-05, + "loss": 0.9529, + "step": 5005 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997030091219e-05, + "loss": 1.0424, + "step": 5006 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702888039445e-05, + "loss": 0.7228, + "step": 5007 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702766932314e-05, + "loss": 1.1532, + "step": 5008 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702645800506e-05, + "loss": 0.8094, + "step": 5009 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997025246440236e-05, + "loss": 1.148, + "step": 5010 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702403462864e-05, + "loss": 1.2554, + "step": 5011 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997022822570286e-05, + "loss": 1.2795, + "step": 5012 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702161026517e-05, + "loss": 1.2422, + "step": 5013 + }, + { + "epoch": 0.01, + "learning_rate": 4.999702039771329e-05, + "loss": 1.2922, + "step": 5014 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701918491466e-05, + "loss": 1.2931, + "step": 5015 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701797186926e-05, + "loss": 1.2758, + "step": 5016 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701675857711e-05, + "loss": 1.284, + "step": 5017 + }, + { + "epoch": 0.01, + "learning_rate": 4.99970155450382e-05, + "loss": 1.0614, + "step": 5018 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701433125252e-05, + "loss": 1.303, + "step": 5019 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997013117220095e-05, + "loss": 1.4622, + "step": 5020 + }, + { + "epoch": 0.01, + "learning_rate": 4.99970119029409e-05, + "loss": 1.2645, + "step": 5021 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997010688414945e-05, + "loss": 1.2847, + "step": 5022 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700947364223e-05, + "loss": 1.1669, + "step": 5023 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700825862276e-05, + "loss": 0.8389, + "step": 5024 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700704335653e-05, + "loss": 1.2053, + "step": 5025 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700582784353e-05, + "loss": 0.8173, + "step": 5026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997004612083775e-05, + "loss": 0.9234, + "step": 5027 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997003396077266e-05, + "loss": 1.0486, + "step": 5028 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700217982399e-05, + "loss": 1.0777, + "step": 5029 + }, + { + "epoch": 0.01, + "learning_rate": 4.999700096332396e-05, + "loss": 1.1065, + "step": 5030 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996999746577164e-05, + "loss": 0.9859, + "step": 5031 + }, + { + "epoch": 0.01, + "learning_rate": 4.999699852958361e-05, + "loss": 1.3586, + "step": 5032 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969973123433e-05, + "loss": 1.192, + "step": 5033 + }, + { + "epoch": 0.01, + "learning_rate": 4.999699609485622e-05, + "loss": 0.9577, + "step": 5034 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996994877122396e-05, + "loss": 0.8894, + "step": 5035 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969936591418e-05, + "loss": 0.8399, + "step": 5036 + }, + { + "epoch": 0.01, + "learning_rate": 4.999699244091445e-05, + "loss": 0.7582, + "step": 5037 + }, + { + "epoch": 0.01, + "learning_rate": 4.999699122244034e-05, + "loss": 1.0065, + "step": 5038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996990003719466e-05, + "loss": 0.5715, + "step": 5039 + }, + { + "epoch": 0.01, + "learning_rate": 4.999698878475184e-05, + "loss": 0.5834, + "step": 5040 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996987565537445e-05, + "loss": 0.8216, + "step": 5041 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996986346076294e-05, + "loss": 1.2597, + "step": 5042 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996985126368386e-05, + "loss": 1.2998, + "step": 5043 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996983906413714e-05, + "loss": 1.0993, + "step": 5044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996982686212284e-05, + "loss": 0.9802, + "step": 5045 + }, + { + "epoch": 0.01, + "learning_rate": 4.999698146576409e-05, + "loss": 1.2207, + "step": 5046 + }, + { + "epoch": 0.01, + "learning_rate": 4.999698024506915e-05, + "loss": 0.8932, + "step": 5047 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996979024127434e-05, + "loss": 0.8309, + "step": 5048 + }, + { + "epoch": 0.01, + "learning_rate": 4.999697780293897e-05, + "loss": 0.8186, + "step": 5049 + }, + { + "epoch": 0.01, + "learning_rate": 4.999697658150374e-05, + "loss": 1.1159, + "step": 5050 + }, + { + "epoch": 0.01, + "learning_rate": 4.999697535982175e-05, + "loss": 1.0577, + "step": 5051 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996974137893005e-05, + "loss": 0.9394, + "step": 5052 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969729157175e-05, + "loss": 1.1254, + "step": 5053 + }, + { + "epoch": 0.01, + "learning_rate": 4.999697169329523e-05, + "loss": 1.2276, + "step": 5054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996970470626204e-05, + "loss": 0.9111, + "step": 5055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996969247710426e-05, + "loss": 0.6828, + "step": 5056 + }, + { + "epoch": 0.01, + "learning_rate": 4.999696802454788e-05, + "loss": 0.4971, + "step": 5057 + }, + { + "epoch": 0.01, + "learning_rate": 4.999696680113857e-05, + "loss": 0.3282, + "step": 5058 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996965577482506e-05, + "loss": 0.7744, + "step": 5059 + }, + { + "epoch": 0.01, + "learning_rate": 4.999696435357968e-05, + "loss": 1.1189, + "step": 5060 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969631294301e-05, + "loss": 0.795, + "step": 5061 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996961905033755e-05, + "loss": 1.2361, + "step": 5062 + }, + { + "epoch": 0.01, + "learning_rate": 4.999696068039065e-05, + "loss": 1.1867, + "step": 5063 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996959455500794e-05, + "loss": 1.2989, + "step": 5064 + }, + { + "epoch": 0.01, + "learning_rate": 4.999695823036417e-05, + "loss": 1.2148, + "step": 5065 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996957004980796e-05, + "loss": 0.9647, + "step": 5066 + }, + { + "epoch": 0.01, + "learning_rate": 4.999695577935065e-05, + "loss": 1.4714, + "step": 5067 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996954553473754e-05, + "loss": 1.3342, + "step": 5068 + }, + { + "epoch": 0.01, + "learning_rate": 4.999695332735009e-05, + "loss": 1.1808, + "step": 5069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996952100979674e-05, + "loss": 1.1745, + "step": 5070 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969508743625e-05, + "loss": 1.0786, + "step": 5071 + }, + { + "epoch": 0.01, + "learning_rate": 4.999694964749856e-05, + "loss": 1.0861, + "step": 5072 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996948420387866e-05, + "loss": 1.1887, + "step": 5073 + }, + { + "epoch": 0.01, + "learning_rate": 4.999694719303041e-05, + "loss": 1.0008, + "step": 5074 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969459654262e-05, + "loss": 1.1709, + "step": 5075 + }, + { + "epoch": 0.01, + "learning_rate": 4.999694473757522e-05, + "loss": 1.1251, + "step": 5076 + }, + { + "epoch": 0.01, + "learning_rate": 4.999694350947749e-05, + "loss": 1.036, + "step": 5077 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996942281133e-05, + "loss": 1.4192, + "step": 5078 + }, + { + "epoch": 0.01, + "learning_rate": 4.999694105254174e-05, + "loss": 0.9987, + "step": 5079 + }, + { + "epoch": 0.01, + "learning_rate": 4.999693982370373e-05, + "loss": 1.015, + "step": 5080 + }, + { + "epoch": 0.01, + "learning_rate": 4.999693859461896e-05, + "loss": 0.9053, + "step": 5081 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996937365287434e-05, + "loss": 1.1885, + "step": 5082 + }, + { + "epoch": 0.01, + "learning_rate": 4.999693613570914e-05, + "loss": 1.2264, + "step": 5083 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996934905884094e-05, + "loss": 1.0292, + "step": 5084 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996933675812284e-05, + "loss": 0.6453, + "step": 5085 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996932445493716e-05, + "loss": 1.0079, + "step": 5086 + }, + { + "epoch": 0.01, + "learning_rate": 4.999693121492839e-05, + "loss": 1.0922, + "step": 5087 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969299841163e-05, + "loss": 1.1088, + "step": 5088 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692875305746e-05, + "loss": 1.046, + "step": 5089 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996927521751854e-05, + "loss": 1.7119, + "step": 5090 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969262901995e-05, + "loss": 1.4077, + "step": 5091 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692505840038e-05, + "loss": 1.9513, + "step": 5092 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692382635449e-05, + "loss": 1.406, + "step": 5093 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692259406186e-05, + "loss": 1.2516, + "step": 5094 + }, + { + "epoch": 0.01, + "learning_rate": 4.999692136152245e-05, + "loss": 1.1783, + "step": 5095 + }, + { + "epoch": 0.01, + "learning_rate": 4.99969201287363e-05, + "loss": 0.7014, + "step": 5096 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996918895703384e-05, + "loss": 0.8977, + "step": 5097 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996917662423704e-05, + "loss": 0.8868, + "step": 5098 + }, + { + "epoch": 0.01, + "learning_rate": 4.999691642889727e-05, + "loss": 1.018, + "step": 5099 + }, + { + "epoch": 0.01, + "learning_rate": 4.999691519512407e-05, + "loss": 1.0971, + "step": 5100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999691396110412e-05, + "loss": 1.1603, + "step": 5101 + }, + { + "epoch": 0.01, + "learning_rate": 4.999691272683741e-05, + "loss": 0.1859, + "step": 5102 + }, + { + "epoch": 0.01, + "learning_rate": 4.999691149232394e-05, + "loss": 0.1624, + "step": 5103 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996910257563715e-05, + "loss": 0.1466, + "step": 5104 + }, + { + "epoch": 0.01, + "learning_rate": 4.999690902255672e-05, + "loss": 0.1275, + "step": 5105 + }, + { + "epoch": 0.01, + "learning_rate": 4.999690778730297e-05, + "loss": 0.1011, + "step": 5106 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996906551802465e-05, + "loss": 0.0959, + "step": 5107 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996905316055195e-05, + "loss": 0.2996, + "step": 5108 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996904080061175e-05, + "loss": 1.068, + "step": 5109 + }, + { + "epoch": 0.01, + "learning_rate": 4.999690284382039e-05, + "loss": 1.4908, + "step": 5110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999690160733285e-05, + "loss": 1.1142, + "step": 5111 + }, + { + "epoch": 0.01, + "learning_rate": 4.999690037059854e-05, + "loss": 1.3284, + "step": 5112 + }, + { + "epoch": 0.01, + "learning_rate": 4.999689913361749e-05, + "loss": 1.0132, + "step": 5113 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996897896389666e-05, + "loss": 1.1871, + "step": 5114 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996896658915086e-05, + "loss": 0.8212, + "step": 5115 + }, + { + "epoch": 0.01, + "learning_rate": 4.999689542119375e-05, + "loss": 0.6875, + "step": 5116 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996894183225653e-05, + "loss": 1.264, + "step": 5117 + }, + { + "epoch": 0.01, + "learning_rate": 4.99968929450108e-05, + "loss": 1.0435, + "step": 5118 + }, + { + "epoch": 0.01, + "learning_rate": 4.999689170654919e-05, + "loss": 1.2933, + "step": 5119 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996890467840815e-05, + "loss": 1.3859, + "step": 5120 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688922888568e-05, + "loss": 0.7856, + "step": 5121 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688798968379e-05, + "loss": 0.5347, + "step": 5122 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688675023515e-05, + "loss": 0.4216, + "step": 5123 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688551053974e-05, + "loss": 1.0338, + "step": 5124 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996884270597576e-05, + "loss": 1.2116, + "step": 5125 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688303040865e-05, + "loss": 1.1681, + "step": 5126 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688178997296e-05, + "loss": 1.3969, + "step": 5127 + }, + { + "epoch": 0.01, + "learning_rate": 4.999688054929053e-05, + "loss": 1.1493, + "step": 5128 + }, + { + "epoch": 0.01, + "learning_rate": 4.999687930836132e-05, + "loss": 1.4235, + "step": 5129 + }, + { + "epoch": 0.01, + "learning_rate": 4.999687806718536e-05, + "loss": 0.9103, + "step": 5130 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996876825762646e-05, + "loss": 0.8201, + "step": 5131 + }, + { + "epoch": 0.01, + "learning_rate": 4.999687558409317e-05, + "loss": 1.1494, + "step": 5132 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996874342176934e-05, + "loss": 1.1671, + "step": 5133 + }, + { + "epoch": 0.01, + "learning_rate": 4.999687310001394e-05, + "loss": 1.1026, + "step": 5134 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996871857604186e-05, + "loss": 1.0921, + "step": 5135 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996870614947675e-05, + "loss": 1.3104, + "step": 5136 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686937204441e-05, + "loss": 1.1806, + "step": 5137 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686812889438e-05, + "loss": 1.0932, + "step": 5138 + }, + { + "epoch": 0.01, + "learning_rate": 4.99968668854976e-05, + "loss": 0.892, + "step": 5139 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686564185405e-05, + "loss": 0.6406, + "step": 5140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686439796374e-05, + "loss": 0.4112, + "step": 5141 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686315382668e-05, + "loss": 0.5258, + "step": 5142 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686190944286e-05, + "loss": 0.7285, + "step": 5143 + }, + { + "epoch": 0.01, + "learning_rate": 4.999686066481229e-05, + "loss": 1.1035, + "step": 5144 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996859419934944e-05, + "loss": 0.8942, + "step": 5145 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996858174810843e-05, + "loss": 1.0368, + "step": 5146 + }, + { + "epoch": 0.01, + "learning_rate": 4.999685692943999e-05, + "loss": 0.9548, + "step": 5147 + }, + { + "epoch": 0.01, + "learning_rate": 4.999685568382238e-05, + "loss": 1.049, + "step": 5148 + }, + { + "epoch": 0.01, + "learning_rate": 4.999685443795801e-05, + "loss": 0.7284, + "step": 5149 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996853191846885e-05, + "loss": 1.198, + "step": 5150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999685194548899e-05, + "loss": 1.3568, + "step": 5151 + }, + { + "epoch": 0.01, + "learning_rate": 4.999685069888434e-05, + "loss": 1.1284, + "step": 5152 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684945203293e-05, + "loss": 0.92, + "step": 5153 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684820493478e-05, + "loss": 1.5527, + "step": 5154 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684695758986e-05, + "loss": 1.4233, + "step": 5155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996845709998173e-05, + "loss": 1.3858, + "step": 5156 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684446215973e-05, + "loss": 0.9554, + "step": 5157 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684321407454e-05, + "loss": 1.1443, + "step": 5158 + }, + { + "epoch": 0.01, + "learning_rate": 4.999684196574258e-05, + "loss": 1.0987, + "step": 5159 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996840717163875e-05, + "loss": 0.961, + "step": 5160 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996839468338396e-05, + "loss": 0.8515, + "step": 5161 + }, + { + "epoch": 0.01, + "learning_rate": 4.999683821926617e-05, + "loss": 1.2304, + "step": 5162 + }, + { + "epoch": 0.01, + "learning_rate": 4.999683696994718e-05, + "loss": 1.3475, + "step": 5163 + }, + { + "epoch": 0.01, + "learning_rate": 4.999683572038143e-05, + "loss": 1.1656, + "step": 5164 + }, + { + "epoch": 0.01, + "learning_rate": 4.999683447056893e-05, + "loss": 1.0646, + "step": 5165 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996833220509665e-05, + "loss": 1.1677, + "step": 5166 + }, + { + "epoch": 0.01, + "learning_rate": 4.999683197020365e-05, + "loss": 1.1873, + "step": 5167 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996830719650865e-05, + "loss": 1.361, + "step": 5168 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996829468851325e-05, + "loss": 1.0357, + "step": 5169 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996828217805034e-05, + "loss": 1.2511, + "step": 5170 + }, + { + "epoch": 0.01, + "learning_rate": 4.999682696651198e-05, + "loss": 1.0471, + "step": 5171 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996825714972166e-05, + "loss": 1.0212, + "step": 5172 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996824463185596e-05, + "loss": 0.9291, + "step": 5173 + }, + { + "epoch": 0.01, + "learning_rate": 4.999682321115227e-05, + "loss": 0.6947, + "step": 5174 + }, + { + "epoch": 0.01, + "learning_rate": 4.999682195887219e-05, + "loss": 0.5708, + "step": 5175 + }, + { + "epoch": 0.01, + "learning_rate": 4.999682070634534e-05, + "loss": 0.3236, + "step": 5176 + }, + { + "epoch": 0.01, + "learning_rate": 4.999681945357174e-05, + "loss": 0.3532, + "step": 5177 + }, + { + "epoch": 0.01, + "learning_rate": 4.999681820055138e-05, + "loss": 0.3641, + "step": 5178 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996816947284256e-05, + "loss": 0.749, + "step": 5179 + }, + { + "epoch": 0.01, + "learning_rate": 4.999681569377038e-05, + "loss": 1.5034, + "step": 5180 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996814440009744e-05, + "loss": 1.1136, + "step": 5181 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996813186002355e-05, + "loss": 0.9345, + "step": 5182 + }, + { + "epoch": 0.01, + "learning_rate": 4.99968119317482e-05, + "loss": 1.3294, + "step": 5183 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996810677247296e-05, + "loss": 1.0999, + "step": 5184 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680942249962e-05, + "loss": 0.8845, + "step": 5185 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680816750521e-05, + "loss": 1.214, + "step": 5186 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996806912264024e-05, + "loss": 1.164, + "step": 5187 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680565677608e-05, + "loss": 0.9096, + "step": 5188 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680440104138e-05, + "loss": 0.6272, + "step": 5189 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996803145059925e-05, + "loss": 1.0698, + "step": 5190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680188883171e-05, + "loss": 1.1675, + "step": 5191 + }, + { + "epoch": 0.01, + "learning_rate": 4.999680063235674e-05, + "loss": 1.1066, + "step": 5192 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679937563501e-05, + "loss": 1.0981, + "step": 5193 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679811866652e-05, + "loss": 0.9234, + "step": 5194 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996796861451275e-05, + "loss": 0.9038, + "step": 5195 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679560398927e-05, + "loss": 1.1016, + "step": 5196 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996794346280505e-05, + "loss": 1.4688, + "step": 5197 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679308832499e-05, + "loss": 1.0326, + "step": 5198 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679183012271e-05, + "loss": 0.9382, + "step": 5199 + }, + { + "epoch": 0.01, + "learning_rate": 4.999679057167368e-05, + "loss": 1.2755, + "step": 5200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678931297789e-05, + "loss": 1.295, + "step": 5201 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678805403534e-05, + "loss": 1.2337, + "step": 5202 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678679484603e-05, + "loss": 1.0238, + "step": 5203 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678553540996e-05, + "loss": 1.218, + "step": 5204 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678427572714e-05, + "loss": 1.5987, + "step": 5205 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678301579755e-05, + "loss": 1.1188, + "step": 5206 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996781755621216e-05, + "loss": 1.1451, + "step": 5207 + }, + { + "epoch": 0.01, + "learning_rate": 4.999678049519812e-05, + "loss": 1.0454, + "step": 5208 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677923452827e-05, + "loss": 1.0136, + "step": 5209 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677797361165e-05, + "loss": 1.1909, + "step": 5210 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996776712448285e-05, + "loss": 0.9326, + "step": 5211 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677545103816e-05, + "loss": 1.2454, + "step": 5212 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677418938127e-05, + "loss": 0.8697, + "step": 5213 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677292747763e-05, + "loss": 1.1012, + "step": 5214 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677166532723e-05, + "loss": 0.8678, + "step": 5215 + }, + { + "epoch": 0.01, + "learning_rate": 4.999677040293007e-05, + "loss": 1.119, + "step": 5216 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996769140286156e-05, + "loss": 1.3689, + "step": 5217 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996767877395485e-05, + "loss": 1.0955, + "step": 5218 + }, + { + "epoch": 0.01, + "learning_rate": 4.999676661425805e-05, + "loss": 1.2735, + "step": 5219 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996765350873864e-05, + "loss": 1.2391, + "step": 5220 + }, + { + "epoch": 0.01, + "learning_rate": 4.999676408724292e-05, + "loss": 0.8565, + "step": 5221 + }, + { + "epoch": 0.01, + "learning_rate": 4.999676282336522e-05, + "loss": 0.9062, + "step": 5222 + }, + { + "epoch": 0.01, + "learning_rate": 4.999676155924076e-05, + "loss": 1.0853, + "step": 5223 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996760294869536e-05, + "loss": 1.4615, + "step": 5224 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675903025156e-05, + "loss": 0.9734, + "step": 5225 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675776538682e-05, + "loss": 1.1386, + "step": 5226 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675650027534e-05, + "loss": 1.1606, + "step": 5227 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675523491709e-05, + "loss": 1.2047, + "step": 5228 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675396931208e-05, + "loss": 1.385, + "step": 5229 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675270346032e-05, + "loss": 1.4627, + "step": 5230 + }, + { + "epoch": 0.01, + "learning_rate": 4.99967514373618e-05, + "loss": 0.9382, + "step": 5231 + }, + { + "epoch": 0.01, + "learning_rate": 4.999675017101653e-05, + "loss": 0.9209, + "step": 5232 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996748904424484e-05, + "loss": 0.8517, + "step": 5233 + }, + { + "epoch": 0.01, + "learning_rate": 4.99967476375857e-05, + "loss": 0.8616, + "step": 5234 + }, + { + "epoch": 0.01, + "learning_rate": 4.999674637050015e-05, + "loss": 1.0328, + "step": 5235 + }, + { + "epoch": 0.01, + "learning_rate": 4.999674510316784e-05, + "loss": 1.0035, + "step": 5236 + }, + { + "epoch": 0.01, + "learning_rate": 4.999674383558877e-05, + "loss": 0.9074, + "step": 5237 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996742567762956e-05, + "loss": 0.9313, + "step": 5238 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996741299690374e-05, + "loss": 1.2877, + "step": 5239 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996740031371035e-05, + "loss": 1.197, + "step": 5240 + }, + { + "epoch": 0.01, + "learning_rate": 4.999673876280494e-05, + "loss": 1.0386, + "step": 5241 + }, + { + "epoch": 0.01, + "learning_rate": 4.999673749399209e-05, + "loss": 1.2689, + "step": 5242 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996736224932486e-05, + "loss": 1.027, + "step": 5243 + }, + { + "epoch": 0.01, + "learning_rate": 4.999673495562612e-05, + "loss": 1.3627, + "step": 5244 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996733686072996e-05, + "loss": 1.1203, + "step": 5245 + }, + { + "epoch": 0.01, + "learning_rate": 4.999673241627312e-05, + "loss": 1.0976, + "step": 5246 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996731146226476e-05, + "loss": 1.2692, + "step": 5247 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672987593308e-05, + "loss": 0.8317, + "step": 5248 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672860539293e-05, + "loss": 0.8718, + "step": 5249 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996727334606016e-05, + "loss": 0.961, + "step": 5250 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672606357236e-05, + "loss": 1.0874, + "step": 5251 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672479229193e-05, + "loss": 1.4313, + "step": 5252 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672352076475e-05, + "loss": 1.0807, + "step": 5253 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672224899081e-05, + "loss": 1.7528, + "step": 5254 + }, + { + "epoch": 0.01, + "learning_rate": 4.999672097697012e-05, + "loss": 1.6995, + "step": 5255 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996719704702675e-05, + "loss": 1.2513, + "step": 5256 + }, + { + "epoch": 0.01, + "learning_rate": 4.999671843218846e-05, + "loss": 1.2308, + "step": 5257 + }, + { + "epoch": 0.01, + "learning_rate": 4.99967171594275e-05, + "loss": 1.3474, + "step": 5258 + }, + { + "epoch": 0.01, + "learning_rate": 4.999671588641978e-05, + "loss": 1.0074, + "step": 5259 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996714613165294e-05, + "loss": 1.315, + "step": 5260 + }, + { + "epoch": 0.01, + "learning_rate": 4.999671333966406e-05, + "loss": 1.0775, + "step": 5261 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996712065916065e-05, + "loss": 1.1714, + "step": 5262 + }, + { + "epoch": 0.01, + "learning_rate": 4.999671079192131e-05, + "loss": 1.1032, + "step": 5263 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996709517679805e-05, + "loss": 0.8256, + "step": 5264 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670824319154e-05, + "loss": 1.2036, + "step": 5265 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670696845652e-05, + "loss": 1.1551, + "step": 5266 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670569347474e-05, + "loss": 1.088, + "step": 5267 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670441824621e-05, + "loss": 1.1427, + "step": 5268 + }, + { + "epoch": 0.01, + "learning_rate": 4.99967031427709e-05, + "loss": 1.334, + "step": 5269 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670186704886e-05, + "loss": 0.8837, + "step": 5270 + }, + { + "epoch": 0.01, + "learning_rate": 4.999670059108005e-05, + "loss": 1.1194, + "step": 5271 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996699314864494e-05, + "loss": 1.1684, + "step": 5272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996698038402166e-05, + "loss": 0.5408, + "step": 5273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996696761693095e-05, + "loss": 0.364, + "step": 5274 + }, + { + "epoch": 0.01, + "learning_rate": 4.999669548473726e-05, + "loss": 1.16, + "step": 5275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996694207534665e-05, + "loss": 0.7768, + "step": 5276 + }, + { + "epoch": 0.01, + "learning_rate": 4.999669293008532e-05, + "loss": 1.2486, + "step": 5277 + }, + { + "epoch": 0.01, + "learning_rate": 4.999669165238922e-05, + "loss": 0.9353, + "step": 5278 + }, + { + "epoch": 0.01, + "learning_rate": 4.999669037444635e-05, + "loss": 1.4989, + "step": 5279 + }, + { + "epoch": 0.01, + "learning_rate": 4.999668909625674e-05, + "loss": 1.5388, + "step": 5280 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996687817820366e-05, + "loss": 1.2112, + "step": 5281 + }, + { + "epoch": 0.01, + "learning_rate": 4.999668653913723e-05, + "loss": 0.9339, + "step": 5282 + }, + { + "epoch": 0.01, + "learning_rate": 4.999668526020734e-05, + "loss": 1.2608, + "step": 5283 + }, + { + "epoch": 0.01, + "learning_rate": 4.99966839810307e-05, + "loss": 1.1584, + "step": 5284 + }, + { + "epoch": 0.01, + "learning_rate": 4.99966827016073e-05, + "loss": 0.8799, + "step": 5285 + }, + { + "epoch": 0.01, + "learning_rate": 4.999668142193714e-05, + "loss": 1.1249, + "step": 5286 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996680142020226e-05, + "loss": 0.9992, + "step": 5287 + }, + { + "epoch": 0.01, + "learning_rate": 4.999667886185655e-05, + "loss": 0.7114, + "step": 5288 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996677581446125e-05, + "loss": 1.2043, + "step": 5289 + }, + { + "epoch": 0.01, + "learning_rate": 4.999667630078894e-05, + "loss": 0.8246, + "step": 5290 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996675019885e-05, + "loss": 0.8577, + "step": 5291 + }, + { + "epoch": 0.01, + "learning_rate": 4.99966737387343e-05, + "loss": 0.9255, + "step": 5292 + }, + { + "epoch": 0.01, + "learning_rate": 4.999667245733684e-05, + "loss": 1.7593, + "step": 5293 + }, + { + "epoch": 0.01, + "learning_rate": 4.999667117569263e-05, + "loss": 1.4936, + "step": 5294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996669893801665e-05, + "loss": 1.2999, + "step": 5295 + }, + { + "epoch": 0.01, + "learning_rate": 4.999666861166393e-05, + "loss": 1.7748, + "step": 5296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996667329279455e-05, + "loss": 4.5491, + "step": 5297 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996666046648214e-05, + "loss": 1.2272, + "step": 5298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996664763770215e-05, + "loss": 1.1773, + "step": 5299 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996663480645465e-05, + "loss": 1.0385, + "step": 5300 + }, + { + "epoch": 0.01, + "learning_rate": 4.999666219727396e-05, + "loss": 0.9458, + "step": 5301 + }, + { + "epoch": 0.01, + "learning_rate": 4.99966609136557e-05, + "loss": 1.2472, + "step": 5302 + }, + { + "epoch": 0.01, + "learning_rate": 4.999665962979068e-05, + "loss": 1.0255, + "step": 5303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996658345678896e-05, + "loss": 0.7737, + "step": 5304 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996657061320365e-05, + "loss": 1.0313, + "step": 5305 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996655776715076e-05, + "loss": 1.3828, + "step": 5306 + }, + { + "epoch": 0.01, + "learning_rate": 4.999665449186303e-05, + "loss": 0.9499, + "step": 5307 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996653206764225e-05, + "loss": 1.0777, + "step": 5308 + }, + { + "epoch": 0.01, + "learning_rate": 4.999665192141867e-05, + "loss": 1.1835, + "step": 5309 + }, + { + "epoch": 0.01, + "learning_rate": 4.999665063582635e-05, + "loss": 0.9606, + "step": 5310 + }, + { + "epoch": 0.01, + "learning_rate": 4.999664934998728e-05, + "loss": 1.0589, + "step": 5311 + }, + { + "epoch": 0.01, + "learning_rate": 4.999664806390145e-05, + "loss": 1.1687, + "step": 5312 + }, + { + "epoch": 0.01, + "learning_rate": 4.999664677756887e-05, + "loss": 0.9827, + "step": 5313 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996645490989524e-05, + "loss": 0.867, + "step": 5314 + }, + { + "epoch": 0.01, + "learning_rate": 4.999664420416342e-05, + "loss": 1.2698, + "step": 5315 + }, + { + "epoch": 0.01, + "learning_rate": 4.999664291709057e-05, + "loss": 1.3419, + "step": 5316 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996641629770955e-05, + "loss": 1.3358, + "step": 5317 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996640342204595e-05, + "loss": 0.9363, + "step": 5318 + }, + { + "epoch": 0.01, + "learning_rate": 4.999663905439147e-05, + "loss": 0.6667, + "step": 5319 + }, + { + "epoch": 0.01, + "learning_rate": 4.999663776633159e-05, + "loss": 0.7523, + "step": 5320 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996636478024955e-05, + "loss": 1.2398, + "step": 5321 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996635189471564e-05, + "loss": 1.123, + "step": 5322 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996633900671416e-05, + "loss": 1.1395, + "step": 5323 + }, + { + "epoch": 0.01, + "learning_rate": 4.999663261162451e-05, + "loss": 1.1425, + "step": 5324 + }, + { + "epoch": 0.01, + "learning_rate": 4.999663132233085e-05, + "loss": 1.1347, + "step": 5325 + }, + { + "epoch": 0.01, + "learning_rate": 4.999663003279043e-05, + "loss": 0.7952, + "step": 5326 + }, + { + "epoch": 0.01, + "learning_rate": 4.999662874300326e-05, + "loss": 1.2129, + "step": 5327 + }, + { + "epoch": 0.01, + "learning_rate": 4.999662745296933e-05, + "loss": 1.2426, + "step": 5328 + }, + { + "epoch": 0.01, + "learning_rate": 4.999662616268864e-05, + "loss": 1.0447, + "step": 5329 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996624872161204e-05, + "loss": 1.2094, + "step": 5330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996623581387e-05, + "loss": 1.3758, + "step": 5331 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996622290366054e-05, + "loss": 1.2301, + "step": 5332 + }, + { + "epoch": 0.01, + "learning_rate": 4.999662099909834e-05, + "loss": 1.4833, + "step": 5333 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996619707583874e-05, + "loss": 1.1072, + "step": 5334 + }, + { + "epoch": 0.01, + "learning_rate": 4.999661841582265e-05, + "loss": 1.3784, + "step": 5335 + }, + { + "epoch": 0.01, + "learning_rate": 4.999661712381467e-05, + "loss": 1.0261, + "step": 5336 + }, + { + "epoch": 0.01, + "learning_rate": 4.999661583155994e-05, + "loss": 0.9837, + "step": 5337 + }, + { + "epoch": 0.01, + "learning_rate": 4.999661453905845e-05, + "loss": 1.0831, + "step": 5338 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996613246310205e-05, + "loss": 1.2405, + "step": 5339 + }, + { + "epoch": 0.01, + "learning_rate": 4.99966119533152e-05, + "loss": 0.954, + "step": 5340 + }, + { + "epoch": 0.01, + "learning_rate": 4.999661066007344e-05, + "loss": 1.0908, + "step": 5341 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660936658492e-05, + "loss": 1.2137, + "step": 5342 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660807284966e-05, + "loss": 1.3878, + "step": 5343 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660677886763e-05, + "loss": 0.8494, + "step": 5344 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660548463885e-05, + "loss": 1.1138, + "step": 5345 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660419016331e-05, + "loss": 1.2996, + "step": 5346 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996602895441016e-05, + "loss": 2.2952, + "step": 5347 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660160047197e-05, + "loss": 3.8883, + "step": 5348 + }, + { + "epoch": 0.01, + "learning_rate": 4.999660030525616e-05, + "loss": 1.2231, + "step": 5349 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965990097936e-05, + "loss": 1.2186, + "step": 5350 + }, + { + "epoch": 0.01, + "learning_rate": 4.999659771408428e-05, + "loss": 1.0336, + "step": 5351 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965964181282e-05, + "loss": 1.1406, + "step": 5352 + }, + { + "epoch": 0.01, + "learning_rate": 4.999659512192537e-05, + "loss": 1.2545, + "step": 5353 + }, + { + "epoch": 0.01, + "learning_rate": 4.999659382547579e-05, + "loss": 0.7299, + "step": 5354 + }, + { + "epoch": 0.01, + "learning_rate": 4.999659252877945e-05, + "loss": 0.9641, + "step": 5355 + }, + { + "epoch": 0.01, + "learning_rate": 4.999659123183636e-05, + "loss": 0.8678, + "step": 5356 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996589934646496e-05, + "loss": 1.1279, + "step": 5357 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996588637209885e-05, + "loss": 0.6869, + "step": 5358 + }, + { + "epoch": 0.01, + "learning_rate": 4.999658733952652e-05, + "loss": 0.8084, + "step": 5359 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965860415964e-05, + "loss": 1.1045, + "step": 5360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996584743419525e-05, + "loss": 1.1457, + "step": 5361 + }, + { + "epoch": 0.01, + "learning_rate": 4.999658344499589e-05, + "loss": 1.0041, + "step": 5362 + }, + { + "epoch": 0.01, + "learning_rate": 4.999658214632551e-05, + "loss": 1.2116, + "step": 5363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996580847408366e-05, + "loss": 1.0101, + "step": 5364 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996579548244465e-05, + "loss": 0.4227, + "step": 5365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996578248833806e-05, + "loss": 0.871, + "step": 5366 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965769491764e-05, + "loss": 1.2822, + "step": 5367 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996575649272235e-05, + "loss": 3.2187, + "step": 5368 + }, + { + "epoch": 0.01, + "learning_rate": 4.999657434912131e-05, + "loss": 1.2285, + "step": 5369 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996573048723633e-05, + "loss": 1.0293, + "step": 5370 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965717480792e-05, + "loss": 1.2138, + "step": 5371 + }, + { + "epoch": 0.01, + "learning_rate": 4.999657044718801e-05, + "loss": 1.1882, + "step": 5372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996569146050066e-05, + "loss": 1.2422, + "step": 5373 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996567844665366e-05, + "loss": 1.2266, + "step": 5374 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656654303391e-05, + "loss": 1.0654, + "step": 5375 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965652411557e-05, + "loss": 1.1164, + "step": 5376 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996563939030734e-05, + "loss": 1.0866, + "step": 5377 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656263665901e-05, + "loss": 1.1725, + "step": 5378 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656133404053e-05, + "loss": 0.8584, + "step": 5379 + }, + { + "epoch": 0.01, + "learning_rate": 4.99965600311753e-05, + "loss": 1.1981, + "step": 5380 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996558728063314e-05, + "loss": 1.1273, + "step": 5381 + }, + { + "epoch": 0.01, + "learning_rate": 4.999655742470457e-05, + "loss": 1.2806, + "step": 5382 + }, + { + "epoch": 0.01, + "learning_rate": 4.999655612109907e-05, + "loss": 1.8106, + "step": 5383 + }, + { + "epoch": 0.01, + "learning_rate": 4.999655481724681e-05, + "loss": 1.6897, + "step": 5384 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996553513147806e-05, + "loss": 1.2792, + "step": 5385 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996552208802035e-05, + "loss": 1.7448, + "step": 5386 + }, + { + "epoch": 0.01, + "learning_rate": 4.999655090420951e-05, + "loss": 1.6907, + "step": 5387 + }, + { + "epoch": 0.01, + "learning_rate": 4.999654959937023e-05, + "loss": 1.7045, + "step": 5388 + }, + { + "epoch": 0.01, + "learning_rate": 4.999654829428421e-05, + "loss": 1.6589, + "step": 5389 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996546988951414e-05, + "loss": 1.6445, + "step": 5390 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996545683371875e-05, + "loss": 1.6535, + "step": 5391 + }, + { + "epoch": 0.01, + "learning_rate": 4.999654437754557e-05, + "loss": 1.6749, + "step": 5392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996543071472524e-05, + "loss": 1.65, + "step": 5393 + }, + { + "epoch": 0.01, + "learning_rate": 4.999654176515271e-05, + "loss": 1.6437, + "step": 5394 + }, + { + "epoch": 0.01, + "learning_rate": 4.999654045858615e-05, + "loss": 1.6235, + "step": 5395 + }, + { + "epoch": 0.01, + "learning_rate": 4.999653915177283e-05, + "loss": 1.6279, + "step": 5396 + }, + { + "epoch": 0.01, + "learning_rate": 4.999653784471276e-05, + "loss": 1.645, + "step": 5397 + }, + { + "epoch": 0.01, + "learning_rate": 4.999653653740592e-05, + "loss": 1.6092, + "step": 5398 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996535229852335e-05, + "loss": 1.6206, + "step": 5399 + }, + { + "epoch": 0.01, + "learning_rate": 4.999653392205199e-05, + "loss": 1.6062, + "step": 5400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996532614004896e-05, + "loss": 1.6103, + "step": 5401 + }, + { + "epoch": 0.01, + "learning_rate": 4.999653130571105e-05, + "loss": 1.5852, + "step": 5402 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652999717044e-05, + "loss": 1.5889, + "step": 5403 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652868838308e-05, + "loss": 1.632, + "step": 5404 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652737934896e-05, + "loss": 1.5914, + "step": 5405 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652607006808e-05, + "loss": 1.5206, + "step": 5406 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652476054046e-05, + "loss": 1.5644, + "step": 5407 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652345076608e-05, + "loss": 1.3976, + "step": 5408 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652214074494e-05, + "loss": 1.5316, + "step": 5409 + }, + { + "epoch": 0.01, + "learning_rate": 4.999652083047705e-05, + "loss": 1.5824, + "step": 5410 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996519519962396e-05, + "loss": 1.4277, + "step": 5411 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996518209200994e-05, + "loss": 1.5752, + "step": 5412 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996516898192835e-05, + "loss": 1.5158, + "step": 5413 + }, + { + "epoch": 0.01, + "learning_rate": 4.999651558693792e-05, + "loss": 1.5819, + "step": 5414 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996514275436256e-05, + "loss": 1.4186, + "step": 5415 + }, + { + "epoch": 0.01, + "learning_rate": 4.999651296368783e-05, + "loss": 1.3164, + "step": 5416 + }, + { + "epoch": 0.01, + "learning_rate": 4.999651165169266e-05, + "loss": 1.4611, + "step": 5417 + }, + { + "epoch": 0.01, + "learning_rate": 4.999651033945072e-05, + "loss": 1.3652, + "step": 5418 + }, + { + "epoch": 0.01, + "learning_rate": 4.999650902696203e-05, + "loss": 1.2459, + "step": 5419 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996507714226584e-05, + "loss": 1.4631, + "step": 5420 + }, + { + "epoch": 0.01, + "learning_rate": 4.999650640124439e-05, + "loss": 1.4849, + "step": 5421 + }, + { + "epoch": 0.01, + "learning_rate": 4.999650508801543e-05, + "loss": 1.5499, + "step": 5422 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996503774539724e-05, + "loss": 1.5977, + "step": 5423 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996502460817264e-05, + "loss": 1.5931, + "step": 5424 + }, + { + "epoch": 0.01, + "learning_rate": 4.999650114684804e-05, + "loss": 1.5806, + "step": 5425 + }, + { + "epoch": 0.01, + "learning_rate": 4.999649983263207e-05, + "loss": 1.544, + "step": 5426 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996498518169345e-05, + "loss": 1.6057, + "step": 5427 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996497203459855e-05, + "loss": 1.6214, + "step": 5428 + }, + { + "epoch": 0.01, + "learning_rate": 4.999649588850363e-05, + "loss": 1.1424, + "step": 5429 + }, + { + "epoch": 0.01, + "learning_rate": 4.999649457330063e-05, + "loss": 1.5469, + "step": 5430 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996493257850885e-05, + "loss": 1.6572, + "step": 5431 + }, + { + "epoch": 0.01, + "learning_rate": 4.999649194215438e-05, + "loss": 1.6586, + "step": 5432 + }, + { + "epoch": 0.01, + "learning_rate": 4.999649062621112e-05, + "loss": 1.6485, + "step": 5433 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648931002111e-05, + "loss": 1.6322, + "step": 5434 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648799358434e-05, + "loss": 1.6366, + "step": 5435 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648667690082e-05, + "loss": 1.6318, + "step": 5436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996485359970544e-05, + "loss": 1.3802, + "step": 5437 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648404279351e-05, + "loss": 1.635, + "step": 5438 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648272536973e-05, + "loss": 1.6398, + "step": 5439 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648140769918e-05, + "loss": 1.6355, + "step": 5440 + }, + { + "epoch": 0.01, + "learning_rate": 4.999648008978189e-05, + "loss": 1.455, + "step": 5441 + }, + { + "epoch": 0.01, + "learning_rate": 4.999647877161784e-05, + "loss": 1.6273, + "step": 5442 + }, + { + "epoch": 0.01, + "learning_rate": 4.999647745320704e-05, + "loss": 1.6324, + "step": 5443 + }, + { + "epoch": 0.01, + "learning_rate": 4.999647613454947e-05, + "loss": 1.6379, + "step": 5444 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996474815645156e-05, + "loss": 1.6175, + "step": 5445 + }, + { + "epoch": 0.01, + "learning_rate": 4.999647349649409e-05, + "loss": 1.4542, + "step": 5446 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996472177096264e-05, + "loss": 1.3211, + "step": 5447 + }, + { + "epoch": 0.01, + "learning_rate": 4.999647085745169e-05, + "loss": 1.626, + "step": 5448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996469537560356e-05, + "loss": 0.9563, + "step": 5449 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996468217422265e-05, + "loss": 1.6406, + "step": 5450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996466897037424e-05, + "loss": 1.5165, + "step": 5451 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996465576405825e-05, + "loss": 1.6257, + "step": 5452 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996464255527475e-05, + "loss": 1.6199, + "step": 5453 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996462934402374e-05, + "loss": 1.6214, + "step": 5454 + }, + { + "epoch": 0.01, + "learning_rate": 4.999646161303051e-05, + "loss": 1.6068, + "step": 5455 + }, + { + "epoch": 0.01, + "learning_rate": 4.999646029141189e-05, + "loss": 1.6175, + "step": 5456 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996458969546525e-05, + "loss": 1.6042, + "step": 5457 + }, + { + "epoch": 0.01, + "learning_rate": 4.99964576474344e-05, + "loss": 1.3519, + "step": 5458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996456325075525e-05, + "loss": 1.6099, + "step": 5459 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996455002469885e-05, + "loss": 1.6056, + "step": 5460 + }, + { + "epoch": 0.01, + "learning_rate": 4.999645367961751e-05, + "loss": 1.6091, + "step": 5461 + }, + { + "epoch": 0.01, + "learning_rate": 4.999645235651836e-05, + "loss": 1.6222, + "step": 5462 + }, + { + "epoch": 0.01, + "learning_rate": 4.999645103317247e-05, + "loss": 1.601, + "step": 5463 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644970957981e-05, + "loss": 1.5557, + "step": 5464 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644838574041e-05, + "loss": 1.6104, + "step": 5465 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644706165425e-05, + "loss": 1.616, + "step": 5466 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996445737321334e-05, + "loss": 1.611, + "step": 5467 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644441274166e-05, + "loss": 1.6151, + "step": 5468 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644308791524e-05, + "loss": 1.2763, + "step": 5469 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644176284206e-05, + "loss": 1.4887, + "step": 5470 + }, + { + "epoch": 0.01, + "learning_rate": 4.999644043752213e-05, + "loss": 1.363, + "step": 5471 + }, + { + "epoch": 0.01, + "learning_rate": 4.999643911195545e-05, + "loss": 1.6844, + "step": 5472 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996437786142e-05, + "loss": 1.5473, + "step": 5473 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996436460081815e-05, + "loss": 1.5769, + "step": 5474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996435133774865e-05, + "loss": 1.3247, + "step": 5475 + }, + { + "epoch": 0.01, + "learning_rate": 4.999643380722116e-05, + "loss": 0.9342, + "step": 5476 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996432480420705e-05, + "loss": 1.0491, + "step": 5477 + }, + { + "epoch": 0.01, + "learning_rate": 4.999643115337349e-05, + "loss": 1.2968, + "step": 5478 + }, + { + "epoch": 0.01, + "learning_rate": 4.999642982607953e-05, + "loss": 1.0755, + "step": 5479 + }, + { + "epoch": 0.01, + "learning_rate": 4.99964284985388e-05, + "loss": 1.312, + "step": 5480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996427170751334e-05, + "loss": 1.1861, + "step": 5481 + }, + { + "epoch": 0.01, + "learning_rate": 4.99964258427171e-05, + "loss": 1.1286, + "step": 5482 + }, + { + "epoch": 0.01, + "learning_rate": 4.999642451443612e-05, + "loss": 1.3266, + "step": 5483 + }, + { + "epoch": 0.01, + "learning_rate": 4.999642318590838e-05, + "loss": 1.486, + "step": 5484 + }, + { + "epoch": 0.01, + "learning_rate": 4.999642185713389e-05, + "loss": 0.9346, + "step": 5485 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996420528112645e-05, + "loss": 0.8062, + "step": 5486 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996419198844643e-05, + "loss": 1.1346, + "step": 5487 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641786932989e-05, + "loss": 1.3305, + "step": 5488 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641653956839e-05, + "loss": 1.0559, + "step": 5489 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641520956013e-05, + "loss": 0.8833, + "step": 5490 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641387930511e-05, + "loss": 1.0513, + "step": 5491 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641254880334e-05, + "loss": 1.0562, + "step": 5492 + }, + { + "epoch": 0.01, + "learning_rate": 4.999641121805482e-05, + "loss": 1.0458, + "step": 5493 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996409887059536e-05, + "loss": 1.0736, + "step": 5494 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640855581751e-05, + "loss": 1.0123, + "step": 5495 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640722432872e-05, + "loss": 1.218, + "step": 5496 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640589259318e-05, + "loss": 1.1162, + "step": 5497 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640456061089e-05, + "loss": 0.9334, + "step": 5498 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640322838184e-05, + "loss": 1.1247, + "step": 5499 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640189590604e-05, + "loss": 1.2491, + "step": 5500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0697647333145142, + "eval_runtime": 83.3184, + "eval_samples_per_second": 16.623, + "eval_steps_per_second": 4.165, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999640056318349e-05, + "loss": 1.0866, + "step": 5501 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996399230214175e-05, + "loss": 1.1621, + "step": 5502 + }, + { + "epoch": 0.01, + "learning_rate": 4.999639789699811e-05, + "loss": 1.9449, + "step": 5503 + }, + { + "epoch": 0.01, + "learning_rate": 4.999639656353529e-05, + "loss": 1.3423, + "step": 5504 + }, + { + "epoch": 0.01, + "learning_rate": 4.999639522982572e-05, + "loss": 1.0964, + "step": 5505 + }, + { + "epoch": 0.01, + "learning_rate": 4.99963938958694e-05, + "loss": 1.1129, + "step": 5506 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996392561666313e-05, + "loss": 1.3408, + "step": 5507 + }, + { + "epoch": 0.01, + "learning_rate": 4.999639122721649e-05, + "loss": 0.9812, + "step": 5508 + }, + { + "epoch": 0.01, + "learning_rate": 4.99963898925199e-05, + "loss": 0.3642, + "step": 5509 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996388557576556e-05, + "loss": 0.8257, + "step": 5510 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996387222386466e-05, + "loss": 1.2286, + "step": 5511 + }, + { + "epoch": 0.01, + "learning_rate": 4.999638588694961e-05, + "loss": 1.333, + "step": 5512 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996384551266014e-05, + "loss": 1.2729, + "step": 5513 + }, + { + "epoch": 0.01, + "learning_rate": 4.999638321533566e-05, + "loss": 1.1928, + "step": 5514 + }, + { + "epoch": 0.01, + "learning_rate": 4.999638187915855e-05, + "loss": 1.3171, + "step": 5515 + }, + { + "epoch": 0.01, + "learning_rate": 4.999638054273468e-05, + "loss": 1.176, + "step": 5516 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637920606407e-05, + "loss": 0.9842, + "step": 5517 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637786914669e-05, + "loss": 1.0741, + "step": 5518 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637653198257e-05, + "loss": 1.2538, + "step": 5519 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996375194571694e-05, + "loss": 0.947, + "step": 5520 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637385691406e-05, + "loss": 3.6488, + "step": 5521 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637251900968e-05, + "loss": 2.0481, + "step": 5522 + }, + { + "epoch": 0.01, + "learning_rate": 4.999637118085854e-05, + "loss": 1.2494, + "step": 5523 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996369842460646e-05, + "loss": 0.9502, + "step": 5524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996368503816e-05, + "loss": 1.3348, + "step": 5525 + }, + { + "epoch": 0.01, + "learning_rate": 4.99963671649246e-05, + "loss": 1.2725, + "step": 5526 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996365825786446e-05, + "loss": 0.4649, + "step": 5527 + }, + { + "epoch": 0.01, + "learning_rate": 4.999636448640154e-05, + "loss": 0.2174, + "step": 5528 + }, + { + "epoch": 0.01, + "learning_rate": 4.999636314676988e-05, + "loss": 0.1566, + "step": 5529 + }, + { + "epoch": 0.01, + "learning_rate": 4.999636180689146e-05, + "loss": 0.0937, + "step": 5530 + }, + { + "epoch": 0.01, + "learning_rate": 4.999636046676629e-05, + "loss": 0.0804, + "step": 5531 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996359126394374e-05, + "loss": 0.0901, + "step": 5532 + }, + { + "epoch": 0.01, + "learning_rate": 4.999635778577569e-05, + "loss": 0.1146, + "step": 5533 + }, + { + "epoch": 0.01, + "learning_rate": 4.999635644491027e-05, + "loss": 0.0508, + "step": 5534 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996355103798085e-05, + "loss": 0.0421, + "step": 5535 + }, + { + "epoch": 0.01, + "learning_rate": 4.999635376243915e-05, + "loss": 0.0781, + "step": 5536 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996352420833456e-05, + "loss": 0.061, + "step": 5537 + }, + { + "epoch": 0.01, + "learning_rate": 4.999635107898102e-05, + "loss": 0.2096, + "step": 5538 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634973688182e-05, + "loss": 0.1037, + "step": 5539 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634839453588e-05, + "loss": 0.204, + "step": 5540 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996347051943174e-05, + "loss": 0.1553, + "step": 5541 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634570910372e-05, + "loss": 0.2328, + "step": 5542 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634436601751e-05, + "loss": 0.1369, + "step": 5543 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996343022684544e-05, + "loss": 0.1622, + "step": 5544 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634167910483e-05, + "loss": 0.0946, + "step": 5545 + }, + { + "epoch": 0.01, + "learning_rate": 4.999634033527836e-05, + "loss": 0.812, + "step": 5546 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996338991205136e-05, + "loss": 1.375, + "step": 5547 + }, + { + "epoch": 0.01, + "learning_rate": 4.999633764688516e-05, + "loss": 1.1506, + "step": 5548 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996336302318425e-05, + "loss": 1.2242, + "step": 5549 + }, + { + "epoch": 0.01, + "learning_rate": 4.999633495750494e-05, + "loss": 1.2195, + "step": 5550 + }, + { + "epoch": 0.01, + "learning_rate": 4.999633361244471e-05, + "loss": 1.1318, + "step": 5551 + }, + { + "epoch": 0.01, + "learning_rate": 4.999633226713773e-05, + "loss": 1.2764, + "step": 5552 + }, + { + "epoch": 0.01, + "learning_rate": 4.999633092158398e-05, + "loss": 1.1542, + "step": 5553 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632957578349e-05, + "loss": 1.4049, + "step": 5554 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632822973623e-05, + "loss": 1.8587, + "step": 5555 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632688344223e-05, + "loss": 1.7064, + "step": 5556 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632553690148e-05, + "loss": 1.2494, + "step": 5557 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632419011396e-05, + "loss": 1.3254, + "step": 5558 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996322843079703e-05, + "loss": 0.8808, + "step": 5559 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632149579869e-05, + "loss": 1.0223, + "step": 5560 + }, + { + "epoch": 0.01, + "learning_rate": 4.999632014827092e-05, + "loss": 0.8446, + "step": 5561 + }, + { + "epoch": 0.01, + "learning_rate": 4.99963188004964e-05, + "loss": 1.0649, + "step": 5562 + }, + { + "epoch": 0.01, + "learning_rate": 4.999631745247513e-05, + "loss": 0.8702, + "step": 5563 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996316104207094e-05, + "loss": 1.0529, + "step": 5564 + }, + { + "epoch": 0.01, + "learning_rate": 4.999631475569232e-05, + "loss": 1.352, + "step": 5565 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996313406930786e-05, + "loss": 1.5323, + "step": 5566 + }, + { + "epoch": 0.01, + "learning_rate": 4.999631205792249e-05, + "loss": 1.368, + "step": 5567 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996310708667454e-05, + "loss": 1.0888, + "step": 5568 + }, + { + "epoch": 0.01, + "learning_rate": 4.999630935916566e-05, + "loss": 0.9347, + "step": 5569 + }, + { + "epoch": 0.01, + "learning_rate": 4.999630800941711e-05, + "loss": 0.9903, + "step": 5570 + }, + { + "epoch": 0.01, + "learning_rate": 4.999630665942182e-05, + "loss": 1.0515, + "step": 5571 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996305309179767e-05, + "loss": 1.1067, + "step": 5572 + }, + { + "epoch": 0.01, + "learning_rate": 4.999630395869096e-05, + "loss": 1.2955, + "step": 5573 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996302607955404e-05, + "loss": 1.2846, + "step": 5574 + }, + { + "epoch": 0.01, + "learning_rate": 4.999630125697309e-05, + "loss": 1.0916, + "step": 5575 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996299905744025e-05, + "loss": 1.1508, + "step": 5576 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629855426821e-05, + "loss": 1.2063, + "step": 5577 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629720254564e-05, + "loss": 1.0702, + "step": 5578 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629585057632e-05, + "loss": 1.0311, + "step": 5579 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629449836024e-05, + "loss": 1.3127, + "step": 5580 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629314589741e-05, + "loss": 1.3089, + "step": 5581 + }, + { + "epoch": 0.01, + "learning_rate": 4.999629179318783e-05, + "loss": 1.0694, + "step": 5582 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962904402315e-05, + "loss": 1.1529, + "step": 5583 + }, + { + "epoch": 0.01, + "learning_rate": 4.999628908702841e-05, + "loss": 1.1264, + "step": 5584 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996287733578575e-05, + "loss": 1.1999, + "step": 5585 + }, + { + "epoch": 0.01, + "learning_rate": 4.999628637988198e-05, + "loss": 1.1031, + "step": 5586 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996285025938636e-05, + "loss": 0.7861, + "step": 5587 + }, + { + "epoch": 0.01, + "learning_rate": 4.999628367174853e-05, + "loss": 0.4965, + "step": 5588 + }, + { + "epoch": 0.01, + "learning_rate": 4.999628231731168e-05, + "loss": 0.8337, + "step": 5589 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996280962628075e-05, + "loss": 0.8052, + "step": 5590 + }, + { + "epoch": 0.01, + "learning_rate": 4.999627960769772e-05, + "loss": 1.2772, + "step": 5591 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996278252520614e-05, + "loss": 0.9121, + "step": 5592 + }, + { + "epoch": 0.01, + "learning_rate": 4.999627689709675e-05, + "loss": 1.7697, + "step": 5593 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996275541426136e-05, + "loss": 1.6754, + "step": 5594 + }, + { + "epoch": 0.01, + "learning_rate": 4.999627418550877e-05, + "loss": 0.9327, + "step": 5595 + }, + { + "epoch": 0.01, + "learning_rate": 4.999627282934465e-05, + "loss": 0.7492, + "step": 5596 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996271472933773e-05, + "loss": 0.6132, + "step": 5597 + }, + { + "epoch": 0.01, + "learning_rate": 4.999627011627615e-05, + "loss": 0.7812, + "step": 5598 + }, + { + "epoch": 0.01, + "learning_rate": 4.999626875937177e-05, + "loss": 1.0239, + "step": 5599 + }, + { + "epoch": 0.01, + "learning_rate": 4.999626740222064e-05, + "loss": 1.3136, + "step": 5600 + }, + { + "epoch": 0.01, + "learning_rate": 4.999626604482276e-05, + "loss": 1.5179, + "step": 5601 + }, + { + "epoch": 0.01, + "learning_rate": 4.999626468717812e-05, + "loss": 1.271, + "step": 5602 + }, + { + "epoch": 0.01, + "learning_rate": 4.999626332928673e-05, + "loss": 1.0892, + "step": 5603 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962619711486e-05, + "loss": 1.113, + "step": 5604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996260612763704e-05, + "loss": 1.1502, + "step": 5605 + }, + { + "epoch": 0.01, + "learning_rate": 4.999625925413205e-05, + "loss": 1.1396, + "step": 5606 + }, + { + "epoch": 0.01, + "learning_rate": 4.999625789525365e-05, + "loss": 1.56, + "step": 5607 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962565361285e-05, + "loss": 1.2087, + "step": 5608 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962551767566e-05, + "loss": 1.268, + "step": 5609 + }, + { + "epoch": 0.01, + "learning_rate": 4.999625381713794e-05, + "loss": 1.2953, + "step": 5610 + }, + { + "epoch": 0.01, + "learning_rate": 4.999625245727253e-05, + "loss": 1.222, + "step": 5611 + }, + { + "epoch": 0.01, + "learning_rate": 4.999625109716037e-05, + "loss": 1.1168, + "step": 5612 + }, + { + "epoch": 0.01, + "learning_rate": 4.999624973680146e-05, + "loss": 1.1631, + "step": 5613 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996248376195784e-05, + "loss": 1.4415, + "step": 5614 + }, + { + "epoch": 0.01, + "learning_rate": 4.999624701534337e-05, + "loss": 1.1527, + "step": 5615 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962456542442e-05, + "loss": 1.1625, + "step": 5616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996244292898275e-05, + "loss": 1.3437, + "step": 5617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996242931305606e-05, + "loss": 0.7632, + "step": 5618 + }, + { + "epoch": 0.01, + "learning_rate": 4.999624156946617e-05, + "loss": 1.2084, + "step": 5619 + }, + { + "epoch": 0.01, + "learning_rate": 4.999624020737999e-05, + "loss": 0.9181, + "step": 5620 + }, + { + "epoch": 0.01, + "learning_rate": 4.999623884504706e-05, + "loss": 1.1415, + "step": 5621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996237482467374e-05, + "loss": 1.2355, + "step": 5622 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996236119640937e-05, + "loss": 1.2088, + "step": 5623 + }, + { + "epoch": 0.01, + "learning_rate": 4.999623475656774e-05, + "loss": 0.5592, + "step": 5624 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962333932478e-05, + "loss": 1.0354, + "step": 5625 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996232029681114e-05, + "loss": 0.9641, + "step": 5626 + }, + { + "epoch": 0.01, + "learning_rate": 4.999623066586766e-05, + "loss": 1.1484, + "step": 5627 + }, + { + "epoch": 0.01, + "learning_rate": 4.999622930180746e-05, + "loss": 1.1756, + "step": 5628 + }, + { + "epoch": 0.01, + "learning_rate": 4.999622793750051e-05, + "loss": 1.022, + "step": 5629 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996226572946806e-05, + "loss": 1.2777, + "step": 5630 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996225208146355e-05, + "loss": 1.4627, + "step": 5631 + }, + { + "epoch": 0.01, + "learning_rate": 4.999622384309914e-05, + "loss": 1.3966, + "step": 5632 + }, + { + "epoch": 0.01, + "learning_rate": 4.999622247780518e-05, + "loss": 1.1712, + "step": 5633 + }, + { + "epoch": 0.01, + "learning_rate": 4.999622111226447e-05, + "loss": 1.1774, + "step": 5634 + }, + { + "epoch": 0.01, + "learning_rate": 4.999621974647701e-05, + "loss": 1.6404, + "step": 5635 + }, + { + "epoch": 0.01, + "learning_rate": 4.999621838044279e-05, + "loss": 1.4061, + "step": 5636 + }, + { + "epoch": 0.01, + "learning_rate": 4.999621701416182e-05, + "loss": 1.254, + "step": 5637 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962156476341e-05, + "loss": 1.154, + "step": 5638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996214280859624e-05, + "loss": 0.999, + "step": 5639 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962129138384e-05, + "loss": 1.443, + "step": 5640 + }, + { + "epoch": 0.01, + "learning_rate": 4.999621154657043e-05, + "loss": 1.3345, + "step": 5641 + }, + { + "epoch": 0.01, + "learning_rate": 4.99962101790557e-05, + "loss": 1.4813, + "step": 5642 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620881129422e-05, + "loss": 0.4938, + "step": 5643 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620744328598e-05, + "loss": 0.8844, + "step": 5644 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996206075030996e-05, + "loss": 1.2019, + "step": 5645 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620470652926e-05, + "loss": 1.3681, + "step": 5646 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620333778077e-05, + "loss": 1.3369, + "step": 5647 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620196878553e-05, + "loss": 0.9395, + "step": 5648 + }, + { + "epoch": 0.01, + "learning_rate": 4.999620059954353e-05, + "loss": 1.4062, + "step": 5649 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996199230054784e-05, + "loss": 1.3625, + "step": 5650 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996197860319295e-05, + "loss": 1.1258, + "step": 5651 + }, + { + "epoch": 0.01, + "learning_rate": 4.999619649033704e-05, + "loss": 0.9185, + "step": 5652 + }, + { + "epoch": 0.01, + "learning_rate": 4.999619512010804e-05, + "loss": 1.1147, + "step": 5653 + }, + { + "epoch": 0.01, + "learning_rate": 4.999619374963229e-05, + "loss": 1.2958, + "step": 5654 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996192378909786e-05, + "loss": 0.9991, + "step": 5655 + }, + { + "epoch": 0.01, + "learning_rate": 4.999619100794053e-05, + "loss": 0.9079, + "step": 5656 + }, + { + "epoch": 0.01, + "learning_rate": 4.999618963672452e-05, + "loss": 1.1405, + "step": 5657 + }, + { + "epoch": 0.01, + "learning_rate": 4.999618826526176e-05, + "loss": 1.2317, + "step": 5658 + }, + { + "epoch": 0.01, + "learning_rate": 4.999618689355224e-05, + "loss": 1.2066, + "step": 5659 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996185521595984e-05, + "loss": 1.3058, + "step": 5660 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996184149392967e-05, + "loss": 0.8117, + "step": 5661 + }, + { + "epoch": 0.01, + "learning_rate": 4.99961827769432e-05, + "loss": 0.6044, + "step": 5662 + }, + { + "epoch": 0.01, + "learning_rate": 4.999618140424668e-05, + "loss": 1.3677, + "step": 5663 + }, + { + "epoch": 0.01, + "learning_rate": 4.999618003130341e-05, + "loss": 1.2567, + "step": 5664 + }, + { + "epoch": 0.01, + "learning_rate": 4.999617865811338e-05, + "loss": 1.283, + "step": 5665 + }, + { + "epoch": 0.01, + "learning_rate": 4.999617728467661e-05, + "loss": 1.1749, + "step": 5666 + }, + { + "epoch": 0.01, + "learning_rate": 4.999617591099308e-05, + "loss": 0.912, + "step": 5667 + }, + { + "epoch": 0.01, + "learning_rate": 4.99961745370628e-05, + "loss": 1.248, + "step": 5668 + }, + { + "epoch": 0.01, + "learning_rate": 4.999617316288577e-05, + "loss": 1.3435, + "step": 5669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996171788461996e-05, + "loss": 1.1158, + "step": 5670 + }, + { + "epoch": 0.01, + "learning_rate": 4.999617041379146e-05, + "loss": 0.7348, + "step": 5671 + }, + { + "epoch": 0.01, + "learning_rate": 4.999616903887417e-05, + "loss": 1.1834, + "step": 5672 + }, + { + "epoch": 0.01, + "learning_rate": 4.999616766371014e-05, + "loss": 1.0043, + "step": 5673 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996166288299354e-05, + "loss": 1.0576, + "step": 5674 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996164912641804e-05, + "loss": 1.4277, + "step": 5675 + }, + { + "epoch": 0.01, + "learning_rate": 4.999616353673752e-05, + "loss": 0.969, + "step": 5676 + }, + { + "epoch": 0.01, + "learning_rate": 4.999616216058647e-05, + "loss": 0.719, + "step": 5677 + }, + { + "epoch": 0.01, + "learning_rate": 4.999616078418868e-05, + "loss": 1.0071, + "step": 5678 + }, + { + "epoch": 0.01, + "learning_rate": 4.999615940754413e-05, + "loss": 1.0275, + "step": 5679 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996158030652834e-05, + "loss": 1.1718, + "step": 5680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996156653514786e-05, + "loss": 1.0183, + "step": 5681 + }, + { + "epoch": 0.01, + "learning_rate": 4.999615527612999e-05, + "loss": 1.116, + "step": 5682 + }, + { + "epoch": 0.01, + "learning_rate": 4.999615389849843e-05, + "loss": 1.2664, + "step": 5683 + }, + { + "epoch": 0.01, + "learning_rate": 4.999615252062013e-05, + "loss": 1.2419, + "step": 5684 + }, + { + "epoch": 0.01, + "learning_rate": 4.999615114249507e-05, + "loss": 1.298, + "step": 5685 + }, + { + "epoch": 0.01, + "learning_rate": 4.999614976412327e-05, + "loss": 1.3061, + "step": 5686 + }, + { + "epoch": 0.01, + "learning_rate": 4.99961483855047e-05, + "loss": 1.0749, + "step": 5687 + }, + { + "epoch": 0.01, + "learning_rate": 4.99961470066394e-05, + "loss": 0.6447, + "step": 5688 + }, + { + "epoch": 0.01, + "learning_rate": 4.999614562752734e-05, + "loss": 0.4944, + "step": 5689 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996144248168525e-05, + "loss": 0.7365, + "step": 5690 + }, + { + "epoch": 0.01, + "learning_rate": 4.999614286856296e-05, + "loss": 1.5031, + "step": 5691 + }, + { + "epoch": 0.01, + "learning_rate": 4.999614148871065e-05, + "loss": 1.3854, + "step": 5692 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996140108611576e-05, + "loss": 1.0011, + "step": 5693 + }, + { + "epoch": 0.01, + "learning_rate": 4.999613872826576e-05, + "loss": 1.2076, + "step": 5694 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996137347673186e-05, + "loss": 0.9505, + "step": 5695 + }, + { + "epoch": 0.01, + "learning_rate": 4.999613596683387e-05, + "loss": 1.2965, + "step": 5696 + }, + { + "epoch": 0.01, + "learning_rate": 4.99961345857478e-05, + "loss": 1.14, + "step": 5697 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996133204414974e-05, + "loss": 1.3744, + "step": 5698 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996131822835403e-05, + "loss": 1.2792, + "step": 5699 + }, + { + "epoch": 0.01, + "learning_rate": 4.999613044100907e-05, + "loss": 1.2024, + "step": 5700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996129058936e-05, + "loss": 1.1759, + "step": 5701 + }, + { + "epoch": 0.01, + "learning_rate": 4.999612767661617e-05, + "loss": 1.1439, + "step": 5702 + }, + { + "epoch": 0.01, + "learning_rate": 4.999612629404959e-05, + "loss": 1.1925, + "step": 5703 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996124911236255e-05, + "loss": 0.9742, + "step": 5704 + }, + { + "epoch": 0.01, + "learning_rate": 4.999612352817617e-05, + "loss": 1.1068, + "step": 5705 + }, + { + "epoch": 0.01, + "learning_rate": 4.999612214486934e-05, + "loss": 0.5886, + "step": 5706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996120761315756e-05, + "loss": 0.991, + "step": 5707 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611937751542e-05, + "loss": 1.0096, + "step": 5708 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996117993468336e-05, + "loss": 0.9139, + "step": 5709 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611660917449e-05, + "loss": 1.185, + "step": 5710 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611522463391e-05, + "loss": 1.2934, + "step": 5711 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611383984657e-05, + "loss": 0.9533, + "step": 5712 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611245481247e-05, + "loss": 1.0366, + "step": 5713 + }, + { + "epoch": 0.01, + "learning_rate": 4.999611106953164e-05, + "loss": 0.8981, + "step": 5714 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610968400404e-05, + "loss": 0.8346, + "step": 5715 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610829822969e-05, + "loss": 1.0552, + "step": 5716 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610691220859e-05, + "loss": 1.1626, + "step": 5717 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610552594075e-05, + "loss": 1.4871, + "step": 5718 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610413942616e-05, + "loss": 1.0045, + "step": 5719 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610275266481e-05, + "loss": 1.1701, + "step": 5720 + }, + { + "epoch": 0.01, + "learning_rate": 4.999610136565671e-05, + "loss": 1.1458, + "step": 5721 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609997840185e-05, + "loss": 1.342, + "step": 5722 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996098590900255e-05, + "loss": 1.2274, + "step": 5723 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609720315191e-05, + "loss": 0.5639, + "step": 5724 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996095815156794e-05, + "loss": 0.4767, + "step": 5725 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996094426914943e-05, + "loss": 1.0088, + "step": 5726 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609303842634e-05, + "loss": 1.4282, + "step": 5727 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609164969098e-05, + "loss": 0.7006, + "step": 5728 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996090260708874e-05, + "loss": 1.1023, + "step": 5729 + }, + { + "epoch": 0.01, + "learning_rate": 4.999608887148002e-05, + "loss": 1.2316, + "step": 5730 + }, + { + "epoch": 0.01, + "learning_rate": 4.999608748200441e-05, + "loss": 1.4644, + "step": 5731 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996086092282046e-05, + "loss": 0.9136, + "step": 5732 + }, + { + "epoch": 0.01, + "learning_rate": 4.999608470231294e-05, + "loss": 1.2339, + "step": 5733 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996083312097076e-05, + "loss": 1.3003, + "step": 5734 + }, + { + "epoch": 0.01, + "learning_rate": 4.999608192163446e-05, + "loss": 1.3405, + "step": 5735 + }, + { + "epoch": 0.01, + "learning_rate": 4.99960805309251e-05, + "loss": 1.1211, + "step": 5736 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996079139968986e-05, + "loss": 1.167, + "step": 5737 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996077748766125e-05, + "loss": 1.0671, + "step": 5738 + }, + { + "epoch": 0.01, + "learning_rate": 4.999607635731651e-05, + "loss": 1.2298, + "step": 5739 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996074965620144e-05, + "loss": 1.0296, + "step": 5740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996073573677024e-05, + "loss": 1.244, + "step": 5741 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996072181487154e-05, + "loss": 1.1463, + "step": 5742 + }, + { + "epoch": 0.01, + "learning_rate": 4.999607078905054e-05, + "loss": 1.0963, + "step": 5743 + }, + { + "epoch": 0.01, + "learning_rate": 4.999606939636717e-05, + "loss": 1.1444, + "step": 5744 + }, + { + "epoch": 0.01, + "learning_rate": 4.999606800343705e-05, + "loss": 1.1883, + "step": 5745 + }, + { + "epoch": 0.01, + "learning_rate": 4.999606661026018e-05, + "loss": 1.0707, + "step": 5746 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996065216836563e-05, + "loss": 1.1605, + "step": 5747 + }, + { + "epoch": 0.01, + "learning_rate": 4.999606382316618e-05, + "loss": 1.0791, + "step": 5748 + }, + { + "epoch": 0.01, + "learning_rate": 4.999606242924907e-05, + "loss": 0.7398, + "step": 5749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996061035085196e-05, + "loss": 1.1299, + "step": 5750 + }, + { + "epoch": 0.01, + "learning_rate": 4.999605964067457e-05, + "loss": 1.389, + "step": 5751 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996058246017195e-05, + "loss": 1.3896, + "step": 5752 + }, + { + "epoch": 0.01, + "learning_rate": 4.999605685111307e-05, + "loss": 0.9519, + "step": 5753 + }, + { + "epoch": 0.01, + "learning_rate": 4.99960554559622e-05, + "loss": 1.1582, + "step": 5754 + }, + { + "epoch": 0.01, + "learning_rate": 4.999605406056457e-05, + "loss": 1.1379, + "step": 5755 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996052664920195e-05, + "loss": 1.1026, + "step": 5756 + }, + { + "epoch": 0.01, + "learning_rate": 4.999605126902907e-05, + "loss": 0.9483, + "step": 5757 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604987289119e-05, + "loss": 1.1033, + "step": 5758 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604847650656e-05, + "loss": 1.5733, + "step": 5759 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604707987519e-05, + "loss": 1.185, + "step": 5760 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604568299706e-05, + "loss": 1.3739, + "step": 5761 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604428587218e-05, + "loss": 1.0264, + "step": 5762 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604288850055e-05, + "loss": 1.1799, + "step": 5763 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996041490882175e-05, + "loss": 1.2137, + "step": 5764 + }, + { + "epoch": 0.01, + "learning_rate": 4.999604009301704e-05, + "loss": 2.0663, + "step": 5765 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996038694905163e-05, + "loss": 1.1117, + "step": 5766 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996037296546535e-05, + "loss": 1.0114, + "step": 5767 + }, + { + "epoch": 0.01, + "learning_rate": 4.999603589794115e-05, + "loss": 0.7678, + "step": 5768 + }, + { + "epoch": 0.01, + "learning_rate": 4.999603449908902e-05, + "loss": 0.844, + "step": 5769 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996033099990136e-05, + "loss": 0.9896, + "step": 5770 + }, + { + "epoch": 0.01, + "learning_rate": 4.999603170064451e-05, + "loss": 0.3288, + "step": 5771 + }, + { + "epoch": 0.01, + "learning_rate": 4.999603030105212e-05, + "loss": 0.2919, + "step": 5772 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996028901212987e-05, + "loss": 0.9983, + "step": 5773 + }, + { + "epoch": 0.01, + "learning_rate": 4.999602750112711e-05, + "loss": 1.1373, + "step": 5774 + }, + { + "epoch": 0.01, + "learning_rate": 4.999602610079447e-05, + "loss": 1.32, + "step": 5775 + }, + { + "epoch": 0.01, + "learning_rate": 4.999602470021509e-05, + "loss": 1.1605, + "step": 5776 + }, + { + "epoch": 0.01, + "learning_rate": 4.999602329938896e-05, + "loss": 1.2259, + "step": 5777 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996021898316074e-05, + "loss": 0.7605, + "step": 5778 + }, + { + "epoch": 0.01, + "learning_rate": 4.999602049699644e-05, + "loss": 1.2776, + "step": 5779 + }, + { + "epoch": 0.01, + "learning_rate": 4.999601909543006e-05, + "loss": 1.2831, + "step": 5780 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996017693616924e-05, + "loss": 1.0186, + "step": 5781 + }, + { + "epoch": 0.01, + "learning_rate": 4.999601629155703e-05, + "loss": 1.2855, + "step": 5782 + }, + { + "epoch": 0.01, + "learning_rate": 4.99960148892504e-05, + "loss": 1.0829, + "step": 5783 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996013486697016e-05, + "loss": 1.1834, + "step": 5784 + }, + { + "epoch": 0.01, + "learning_rate": 4.999601208389688e-05, + "loss": 1.0825, + "step": 5785 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996010680850004e-05, + "loss": 1.3013, + "step": 5786 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996009277556364e-05, + "loss": 1.1277, + "step": 5787 + }, + { + "epoch": 0.01, + "learning_rate": 4.999600787401598e-05, + "loss": 1.436, + "step": 5788 + }, + { + "epoch": 0.01, + "learning_rate": 4.999600647022885e-05, + "loss": 1.2099, + "step": 5789 + }, + { + "epoch": 0.01, + "learning_rate": 4.999600506619496e-05, + "loss": 1.4521, + "step": 5790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996003661914326e-05, + "loss": 1.0093, + "step": 5791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9996002257386945e-05, + "loss": 1.2973, + "step": 5792 + }, + { + "epoch": 0.01, + "learning_rate": 4.99960008526128e-05, + "loss": 1.0852, + "step": 5793 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995999447591926e-05, + "loss": 1.2553, + "step": 5794 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995998042324286e-05, + "loss": 0.214, + "step": 5795 + }, + { + "epoch": 0.01, + "learning_rate": 4.99959966368099e-05, + "loss": 0.1374, + "step": 5796 + }, + { + "epoch": 0.01, + "learning_rate": 4.999599523104876e-05, + "loss": 0.4754, + "step": 5797 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995993825040876e-05, + "loss": 0.4334, + "step": 5798 + }, + { + "epoch": 0.01, + "learning_rate": 4.999599241878624e-05, + "loss": 0.441, + "step": 5799 + }, + { + "epoch": 0.01, + "learning_rate": 4.999599101228486e-05, + "loss": 0.3289, + "step": 5800 + }, + { + "epoch": 0.01, + "learning_rate": 4.999598960553672e-05, + "loss": 0.3035, + "step": 5801 + }, + { + "epoch": 0.01, + "learning_rate": 4.999598819854184e-05, + "loss": 0.2614, + "step": 5802 + }, + { + "epoch": 0.01, + "learning_rate": 4.999598679130021e-05, + "loss": 0.2639, + "step": 5803 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995985383811824e-05, + "loss": 0.3609, + "step": 5804 + }, + { + "epoch": 0.01, + "learning_rate": 4.999598397607669e-05, + "loss": 0.5269, + "step": 5805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995982568094804e-05, + "loss": 0.2979, + "step": 5806 + }, + { + "epoch": 0.01, + "learning_rate": 4.999598115986617e-05, + "loss": 0.3059, + "step": 5807 + }, + { + "epoch": 0.01, + "learning_rate": 4.999597975139079e-05, + "loss": 0.6379, + "step": 5808 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995978342668656e-05, + "loss": 0.9649, + "step": 5809 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995976933699774e-05, + "loss": 0.9759, + "step": 5810 + }, + { + "epoch": 0.01, + "learning_rate": 4.999597552448414e-05, + "loss": 0.6076, + "step": 5811 + }, + { + "epoch": 0.01, + "learning_rate": 4.999597411502176e-05, + "loss": 1.2914, + "step": 5812 + }, + { + "epoch": 0.01, + "learning_rate": 4.999597270531262e-05, + "loss": 1.1692, + "step": 5813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995971295356744e-05, + "loss": 1.1402, + "step": 5814 + }, + { + "epoch": 0.01, + "learning_rate": 4.999596988515411e-05, + "loss": 1.1395, + "step": 5815 + }, + { + "epoch": 0.01, + "learning_rate": 4.999596847470473e-05, + "loss": 1.0746, + "step": 5816 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995967064008596e-05, + "loss": 1.2578, + "step": 5817 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995965653065714e-05, + "loss": 1.0937, + "step": 5818 + }, + { + "epoch": 0.01, + "learning_rate": 4.999596424187609e-05, + "loss": 1.114, + "step": 5819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995962830439704e-05, + "loss": 1.1062, + "step": 5820 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995961418756576e-05, + "loss": 0.3034, + "step": 5821 + }, + { + "epoch": 0.01, + "learning_rate": 4.99959600068267e-05, + "loss": 1.3744, + "step": 5822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995958594650074e-05, + "loss": 1.0629, + "step": 5823 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995957182226693e-05, + "loss": 1.1352, + "step": 5824 + }, + { + "epoch": 0.01, + "learning_rate": 4.999595576955656e-05, + "loss": 1.1774, + "step": 5825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995954356639686e-05, + "loss": 0.9296, + "step": 5826 + }, + { + "epoch": 0.01, + "learning_rate": 4.999595294347605e-05, + "loss": 1.1025, + "step": 5827 + }, + { + "epoch": 0.01, + "learning_rate": 4.999595153006568e-05, + "loss": 1.1803, + "step": 5828 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995950116408555e-05, + "loss": 1.2585, + "step": 5829 + }, + { + "epoch": 0.01, + "learning_rate": 4.999594870250468e-05, + "loss": 0.8967, + "step": 5830 + }, + { + "epoch": 0.01, + "learning_rate": 4.999594728835405e-05, + "loss": 1.0024, + "step": 5831 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995945873956686e-05, + "loss": 1.1998, + "step": 5832 + }, + { + "epoch": 0.01, + "learning_rate": 4.999594445931256e-05, + "loss": 1.1819, + "step": 5833 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995943044421685e-05, + "loss": 1.3396, + "step": 5834 + }, + { + "epoch": 0.01, + "learning_rate": 4.999594162928406e-05, + "loss": 1.2523, + "step": 5835 + }, + { + "epoch": 0.01, + "learning_rate": 4.999594021389969e-05, + "loss": 1.3324, + "step": 5836 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995938798268573e-05, + "loss": 1.0069, + "step": 5837 + }, + { + "epoch": 0.01, + "learning_rate": 4.99959373823907e-05, + "loss": 1.0306, + "step": 5838 + }, + { + "epoch": 0.01, + "learning_rate": 4.999593596626608e-05, + "loss": 0.9872, + "step": 5839 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995934549894704e-05, + "loss": 1.0294, + "step": 5840 + }, + { + "epoch": 0.01, + "learning_rate": 4.999593313327659e-05, + "loss": 1.067, + "step": 5841 + }, + { + "epoch": 0.01, + "learning_rate": 4.999593171641172e-05, + "loss": 1.1862, + "step": 5842 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995930299300104e-05, + "loss": 1.0927, + "step": 5843 + }, + { + "epoch": 0.01, + "learning_rate": 4.999592888194173e-05, + "loss": 1.2252, + "step": 5844 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995927464336625e-05, + "loss": 1.2999, + "step": 5845 + }, + { + "epoch": 0.01, + "learning_rate": 4.999592604648475e-05, + "loss": 0.9361, + "step": 5846 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995924628386136e-05, + "loss": 1.0318, + "step": 5847 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995923210040776e-05, + "loss": 1.0358, + "step": 5848 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995921791448664e-05, + "loss": 0.9336, + "step": 5849 + }, + { + "epoch": 0.01, + "learning_rate": 4.99959203726098e-05, + "loss": 0.7572, + "step": 5850 + }, + { + "epoch": 0.01, + "learning_rate": 4.999591895352419e-05, + "loss": 0.7159, + "step": 5851 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995917534191825e-05, + "loss": 1.3437, + "step": 5852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995916114612717e-05, + "loss": 1.0615, + "step": 5853 + }, + { + "epoch": 0.01, + "learning_rate": 4.999591469478686e-05, + "loss": 1.2398, + "step": 5854 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995913274714254e-05, + "loss": 4.6326, + "step": 5855 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995911854394894e-05, + "loss": 3.5142, + "step": 5856 + }, + { + "epoch": 0.01, + "learning_rate": 4.999591043382878e-05, + "loss": 3.019, + "step": 5857 + }, + { + "epoch": 0.01, + "learning_rate": 4.999590901301593e-05, + "loss": 2.6754, + "step": 5858 + }, + { + "epoch": 0.01, + "learning_rate": 4.999590759195633e-05, + "loss": 2.7023, + "step": 5859 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995906170649976e-05, + "loss": 2.5355, + "step": 5860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995904749096875e-05, + "loss": 2.4428, + "step": 5861 + }, + { + "epoch": 0.01, + "learning_rate": 4.999590332729702e-05, + "loss": 2.4379, + "step": 5862 + }, + { + "epoch": 0.01, + "learning_rate": 4.999590190525042e-05, + "loss": 2.3608, + "step": 5863 + }, + { + "epoch": 0.01, + "learning_rate": 4.999590048295707e-05, + "loss": 2.1782, + "step": 5864 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995899060416974e-05, + "loss": 2.2068, + "step": 5865 + }, + { + "epoch": 0.01, + "learning_rate": 4.999589763763013e-05, + "loss": 2.152, + "step": 5866 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995896214596525e-05, + "loss": 2.0718, + "step": 5867 + }, + { + "epoch": 0.01, + "learning_rate": 4.999589479131618e-05, + "loss": 2.2563, + "step": 5868 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995893367789086e-05, + "loss": 2.1127, + "step": 5869 + }, + { + "epoch": 0.01, + "learning_rate": 4.999589194401525e-05, + "loss": 2.1021, + "step": 5870 + }, + { + "epoch": 0.01, + "learning_rate": 4.999589051999465e-05, + "loss": 2.1175, + "step": 5871 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588909572731e-05, + "loss": 1.365, + "step": 5872 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588767121322e-05, + "loss": 1.2585, + "step": 5873 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588624645238e-05, + "loss": 1.2671, + "step": 5874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995884821444797e-05, + "loss": 0.9925, + "step": 5875 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588339619046e-05, + "loss": 0.9696, + "step": 5876 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588197068937e-05, + "loss": 1.2444, + "step": 5877 + }, + { + "epoch": 0.01, + "learning_rate": 4.999588054494154e-05, + "loss": 0.7877, + "step": 5878 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995879118946956e-05, + "loss": 1.2257, + "step": 5879 + }, + { + "epoch": 0.01, + "learning_rate": 4.999587769270562e-05, + "loss": 1.4817, + "step": 5880 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995876266217544e-05, + "loss": 1.2465, + "step": 5881 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995874839482715e-05, + "loss": 1.3474, + "step": 5882 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995873412501135e-05, + "loss": 1.6764, + "step": 5883 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995871985272804e-05, + "loss": 0.9174, + "step": 5884 + }, + { + "epoch": 0.01, + "learning_rate": 4.999587055779773e-05, + "loss": 0.7554, + "step": 5885 + }, + { + "epoch": 0.01, + "learning_rate": 4.999586913007591e-05, + "loss": 0.7866, + "step": 5886 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995867702107334e-05, + "loss": 1.0494, + "step": 5887 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995866273892014e-05, + "loss": 1.0432, + "step": 5888 + }, + { + "epoch": 0.01, + "learning_rate": 4.999586484542994e-05, + "loss": 1.3386, + "step": 5889 + }, + { + "epoch": 0.01, + "learning_rate": 4.999586341672112e-05, + "loss": 1.6875, + "step": 5890 + }, + { + "epoch": 0.01, + "learning_rate": 4.999586198776556e-05, + "loss": 1.0927, + "step": 5891 + }, + { + "epoch": 0.01, + "learning_rate": 4.999586055856324e-05, + "loss": 0.8526, + "step": 5892 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995859129114175e-05, + "loss": 0.8085, + "step": 5893 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995857699418357e-05, + "loss": 0.9571, + "step": 5894 + }, + { + "epoch": 0.01, + "learning_rate": 4.99958562694758e-05, + "loss": 0.8421, + "step": 5895 + }, + { + "epoch": 0.01, + "learning_rate": 4.999585483928649e-05, + "loss": 0.9443, + "step": 5896 + }, + { + "epoch": 0.01, + "learning_rate": 4.999585340885043e-05, + "loss": 0.76, + "step": 5897 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995851978167626e-05, + "loss": 0.7921, + "step": 5898 + }, + { + "epoch": 0.01, + "learning_rate": 4.999585054723807e-05, + "loss": 0.5879, + "step": 5899 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584911606176e-05, + "loss": 0.6805, + "step": 5900 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584768463871e-05, + "loss": 0.7923, + "step": 5901 + }, + { + "epoch": 0.01, + "learning_rate": 4.99958462529689e-05, + "loss": 0.6167, + "step": 5902 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584482105236e-05, + "loss": 0.9948, + "step": 5903 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584338888906e-05, + "loss": 0.3271, + "step": 5904 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584195647901e-05, + "loss": 1.1693, + "step": 5905 + }, + { + "epoch": 0.01, + "learning_rate": 4.999584052382221e-05, + "loss": 0.9345, + "step": 5906 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995839090918673e-05, + "loss": 1.5019, + "step": 5907 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995837657768376e-05, + "loss": 1.1689, + "step": 5908 + }, + { + "epoch": 0.01, + "learning_rate": 4.999583622437134e-05, + "loss": 1.1574, + "step": 5909 + }, + { + "epoch": 0.01, + "learning_rate": 4.999583479072755e-05, + "loss": 1.1267, + "step": 5910 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995833356837014e-05, + "loss": 1.2111, + "step": 5911 + }, + { + "epoch": 0.01, + "learning_rate": 4.999583192269973e-05, + "loss": 1.0961, + "step": 5912 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995830488315696e-05, + "loss": 1.2409, + "step": 5913 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995829053684914e-05, + "loss": 1.0839, + "step": 5914 + }, + { + "epoch": 0.01, + "learning_rate": 4.999582761880738e-05, + "loss": 1.189, + "step": 5915 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995826183683104e-05, + "loss": 0.9599, + "step": 5916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995824748312076e-05, + "loss": 1.2058, + "step": 5917 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995823312694304e-05, + "loss": 1.1992, + "step": 5918 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995821876829775e-05, + "loss": 0.9585, + "step": 5919 + }, + { + "epoch": 0.01, + "learning_rate": 4.999582044071851e-05, + "loss": 1.1868, + "step": 5920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995819004360484e-05, + "loss": 1.0446, + "step": 5921 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995817567755715e-05, + "loss": 1.0714, + "step": 5922 + }, + { + "epoch": 0.01, + "learning_rate": 4.99958161309042e-05, + "loss": 1.1892, + "step": 5923 + }, + { + "epoch": 0.01, + "learning_rate": 4.999581469380593e-05, + "loss": 1.0839, + "step": 5924 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995813256460925e-05, + "loss": 1.4469, + "step": 5925 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995811818869166e-05, + "loss": 1.3577, + "step": 5926 + }, + { + "epoch": 0.01, + "learning_rate": 4.999581038103065e-05, + "loss": 1.2339, + "step": 5927 + }, + { + "epoch": 0.01, + "learning_rate": 4.99958089429454e-05, + "loss": 1.2053, + "step": 5928 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580750461339e-05, + "loss": 1.0734, + "step": 5929 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580606603463e-05, + "loss": 1.1296, + "step": 5930 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580462720914e-05, + "loss": 1.1232, + "step": 5931 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580318813689e-05, + "loss": 1.139, + "step": 5932 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580174881789e-05, + "loss": 1.1215, + "step": 5933 + }, + { + "epoch": 0.01, + "learning_rate": 4.999580030925214e-05, + "loss": 0.9538, + "step": 5934 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995798869439647e-05, + "loss": 0.9857, + "step": 5935 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995797429380407e-05, + "loss": 1.0086, + "step": 5936 + }, + { + "epoch": 0.01, + "learning_rate": 4.999579598907442e-05, + "loss": 1.2054, + "step": 5937 + }, + { + "epoch": 0.01, + "learning_rate": 4.999579454852168e-05, + "loss": 1.0337, + "step": 5938 + }, + { + "epoch": 0.01, + "learning_rate": 4.99957931077222e-05, + "loss": 1.5357, + "step": 5939 + }, + { + "epoch": 0.01, + "learning_rate": 4.999579166667596e-05, + "loss": 1.2708, + "step": 5940 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995790225382985e-05, + "loss": 1.1459, + "step": 5941 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578878384325e-05, + "loss": 2.3755, + "step": 5942 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578734205678e-05, + "loss": 1.2678, + "step": 5943 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578590002355e-05, + "loss": 1.4369, + "step": 5944 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578445774358e-05, + "loss": 1.0646, + "step": 5945 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578301521686e-05, + "loss": 1.3442, + "step": 5946 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578157244339e-05, + "loss": 1.3115, + "step": 5947 + }, + { + "epoch": 0.01, + "learning_rate": 4.999578012942318e-05, + "loss": 1.0278, + "step": 5948 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995778686156217e-05, + "loss": 0.9083, + "step": 5949 + }, + { + "epoch": 0.01, + "learning_rate": 4.99957772426425e-05, + "loss": 1.231, + "step": 5950 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995775798882036e-05, + "loss": 1.0653, + "step": 5951 + }, + { + "epoch": 0.01, + "learning_rate": 4.999577435487484e-05, + "loss": 0.9043, + "step": 5952 + }, + { + "epoch": 0.01, + "learning_rate": 4.999577291062087e-05, + "loss": 1.0906, + "step": 5953 + }, + { + "epoch": 0.01, + "learning_rate": 4.999577146612018e-05, + "loss": 0.9863, + "step": 5954 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995770021372726e-05, + "loss": 1.097, + "step": 5955 + }, + { + "epoch": 0.01, + "learning_rate": 4.999576857637852e-05, + "loss": 1.0197, + "step": 5956 + }, + { + "epoch": 0.01, + "learning_rate": 4.999576713113758e-05, + "loss": 1.1653, + "step": 5957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995765685649885e-05, + "loss": 1.1057, + "step": 5958 + }, + { + "epoch": 0.01, + "learning_rate": 4.999576423991544e-05, + "loss": 1.0866, + "step": 5959 + }, + { + "epoch": 0.01, + "learning_rate": 4.999576279393425e-05, + "loss": 1.7562, + "step": 5960 + }, + { + "epoch": 0.01, + "learning_rate": 4.999576134770631e-05, + "loss": 1.2862, + "step": 5961 + }, + { + "epoch": 0.01, + "learning_rate": 4.999575990123163e-05, + "loss": 1.2652, + "step": 5962 + }, + { + "epoch": 0.01, + "learning_rate": 4.99957584545102e-05, + "loss": 1.1406, + "step": 5963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995757007542024e-05, + "loss": 1.3136, + "step": 5964 + }, + { + "epoch": 0.01, + "learning_rate": 4.99957555603271e-05, + "loss": 1.0399, + "step": 5965 + }, + { + "epoch": 0.01, + "learning_rate": 4.999575411286542e-05, + "loss": 1.0532, + "step": 5966 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995752665157005e-05, + "loss": 1.0086, + "step": 5967 + }, + { + "epoch": 0.01, + "learning_rate": 4.999575121720183e-05, + "loss": 1.0175, + "step": 5968 + }, + { + "epoch": 0.01, + "learning_rate": 4.999574976899991e-05, + "loss": 1.0899, + "step": 5969 + }, + { + "epoch": 0.01, + "learning_rate": 4.999574832055125e-05, + "loss": 1.1038, + "step": 5970 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995746871855836e-05, + "loss": 1.323, + "step": 5971 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995745422913674e-05, + "loss": 1.26, + "step": 5972 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995743973724774e-05, + "loss": 1.1442, + "step": 5973 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995742524289116e-05, + "loss": 1.0892, + "step": 5974 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995741074606715e-05, + "loss": 1.3687, + "step": 5975 + }, + { + "epoch": 0.01, + "learning_rate": 4.999573962467756e-05, + "loss": 1.1966, + "step": 5976 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995738174501666e-05, + "loss": 1.1295, + "step": 5977 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995736724079025e-05, + "loss": 1.2428, + "step": 5978 + }, + { + "epoch": 0.01, + "learning_rate": 4.999573527340963e-05, + "loss": 0.745, + "step": 5979 + }, + { + "epoch": 0.01, + "learning_rate": 4.99957338224935e-05, + "loss": 1.1817, + "step": 5980 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995732371330605e-05, + "loss": 1.0781, + "step": 5981 + }, + { + "epoch": 0.01, + "learning_rate": 4.999573091992097e-05, + "loss": 0.9299, + "step": 5982 + }, + { + "epoch": 0.01, + "learning_rate": 4.999572946826459e-05, + "loss": 0.9942, + "step": 5983 + }, + { + "epoch": 0.01, + "learning_rate": 4.999572801636147e-05, + "loss": 1.1955, + "step": 5984 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995726564211585e-05, + "loss": 1.2949, + "step": 5985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995725111814964e-05, + "loss": 1.2096, + "step": 5986 + }, + { + "epoch": 0.01, + "learning_rate": 4.999572365917159e-05, + "loss": 1.4178, + "step": 5987 + }, + { + "epoch": 0.01, + "learning_rate": 4.999572220628148e-05, + "loss": 1.6477, + "step": 5988 + }, + { + "epoch": 0.01, + "learning_rate": 4.999572075314462e-05, + "loss": 1.3698, + "step": 5989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995719299761e-05, + "loss": 0.9212, + "step": 5990 + }, + { + "epoch": 0.01, + "learning_rate": 4.999571784613064e-05, + "loss": 1.149, + "step": 5991 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995716392253535e-05, + "loss": 0.8656, + "step": 5992 + }, + { + "epoch": 0.01, + "learning_rate": 4.999571493812968e-05, + "loss": 1.2262, + "step": 5993 + }, + { + "epoch": 0.01, + "learning_rate": 4.999571348375908e-05, + "loss": 0.8487, + "step": 5994 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995712029141734e-05, + "loss": 0.3838, + "step": 5995 + }, + { + "epoch": 0.01, + "learning_rate": 4.999571057427764e-05, + "loss": 1.2446, + "step": 5996 + }, + { + "epoch": 0.01, + "learning_rate": 4.999570911916679e-05, + "loss": 0.8315, + "step": 5997 + }, + { + "epoch": 0.01, + "learning_rate": 4.999570766380921e-05, + "loss": 1.1521, + "step": 5998 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995706208204875e-05, + "loss": 1.2743, + "step": 5999 + }, + { + "epoch": 0.01, + "learning_rate": 4.999570475235379e-05, + "loss": 1.1449, + "step": 6000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0794674158096313, + "eval_runtime": 86.1885, + "eval_samples_per_second": 16.069, + "eval_steps_per_second": 4.026, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995703296255955e-05, + "loss": 1.0349, + "step": 6001 + }, + { + "epoch": 0.01, + "learning_rate": 4.999570183991138e-05, + "loss": 1.0534, + "step": 6002 + }, + { + "epoch": 0.01, + "learning_rate": 4.999570038332006e-05, + "loss": 1.3352, + "step": 6003 + }, + { + "epoch": 0.01, + "learning_rate": 4.999569892648198e-05, + "loss": 1.2638, + "step": 6004 + }, + { + "epoch": 0.01, + "learning_rate": 4.999569746939716e-05, + "loss": 1.185, + "step": 6005 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995696012065593e-05, + "loss": 1.1458, + "step": 6006 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995694554487286e-05, + "loss": 1.2757, + "step": 6007 + }, + { + "epoch": 0.01, + "learning_rate": 4.999569309666223e-05, + "loss": 1.2362, + "step": 6008 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995691638590426e-05, + "loss": 1.2912, + "step": 6009 + }, + { + "epoch": 0.01, + "learning_rate": 4.999569018027187e-05, + "loss": 1.4617, + "step": 6010 + }, + { + "epoch": 0.01, + "learning_rate": 4.999568872170657e-05, + "loss": 1.0121, + "step": 6011 + }, + { + "epoch": 0.01, + "learning_rate": 4.999568726289452e-05, + "loss": 1.678, + "step": 6012 + }, + { + "epoch": 0.01, + "learning_rate": 4.999568580383572e-05, + "loss": 0.97, + "step": 6013 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995684344530186e-05, + "loss": 0.9885, + "step": 6014 + }, + { + "epoch": 0.01, + "learning_rate": 4.99956828849779e-05, + "loss": 0.9913, + "step": 6015 + }, + { + "epoch": 0.01, + "learning_rate": 4.999568142517886e-05, + "loss": 1.0645, + "step": 6016 + }, + { + "epoch": 0.01, + "learning_rate": 4.999567996513308e-05, + "loss": 1.6482, + "step": 6017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995678504840554e-05, + "loss": 0.8483, + "step": 6018 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995677044301284e-05, + "loss": 0.9729, + "step": 6019 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995675583515256e-05, + "loss": 0.7855, + "step": 6020 + }, + { + "epoch": 0.01, + "learning_rate": 4.999567412248249e-05, + "loss": 1.9137, + "step": 6021 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995672661202975e-05, + "loss": 1.5124, + "step": 6022 + }, + { + "epoch": 0.01, + "learning_rate": 4.999567119967671e-05, + "loss": 1.4559, + "step": 6023 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995669737903704e-05, + "loss": 1.3439, + "step": 6024 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566827588394e-05, + "loss": 1.2469, + "step": 6025 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566681361745e-05, + "loss": 1.208, + "step": 6026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995665351104193e-05, + "loss": 1.1692, + "step": 6027 + }, + { + "epoch": 0.01, + "learning_rate": 4.99956638883442e-05, + "loss": 1.1804, + "step": 6028 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566242533746e-05, + "loss": 1.3017, + "step": 6029 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995660962083966e-05, + "loss": 1.2244, + "step": 6030 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565949858373e-05, + "loss": 1.324, + "step": 6031 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565803483675e-05, + "loss": 1.6463, + "step": 6032 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565657084302e-05, + "loss": 1.201, + "step": 6033 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565510660255e-05, + "loss": 0.8303, + "step": 6034 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995653642115326e-05, + "loss": 0.3958, + "step": 6035 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565217738136e-05, + "loss": 0.8038, + "step": 6036 + }, + { + "epoch": 0.01, + "learning_rate": 4.999565071240064e-05, + "loss": 1.1105, + "step": 6037 + }, + { + "epoch": 0.01, + "learning_rate": 4.999564924717318e-05, + "loss": 1.0792, + "step": 6038 + }, + { + "epoch": 0.01, + "learning_rate": 4.999564778169897e-05, + "loss": 0.9225, + "step": 6039 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995646315978016e-05, + "loss": 0.7761, + "step": 6040 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995644850010315e-05, + "loss": 0.7188, + "step": 6041 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995643383795864e-05, + "loss": 0.854, + "step": 6042 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995641917334674e-05, + "loss": 1.1032, + "step": 6043 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995640450626734e-05, + "loss": 0.8793, + "step": 6044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995638983672044e-05, + "loss": 0.5732, + "step": 6045 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563751647061e-05, + "loss": 1.0618, + "step": 6046 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563604902243e-05, + "loss": 1.3174, + "step": 6047 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563458132751e-05, + "loss": 1.0841, + "step": 6048 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563311338583e-05, + "loss": 0.999, + "step": 6049 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995631645197414e-05, + "loss": 1.0769, + "step": 6050 + }, + { + "epoch": 0.01, + "learning_rate": 4.999563017676225e-05, + "loss": 1.007, + "step": 6051 + }, + { + "epoch": 0.01, + "learning_rate": 4.999562870808034e-05, + "loss": 1.052, + "step": 6052 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995627239151675e-05, + "loss": 1.1273, + "step": 6053 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995625769976274e-05, + "loss": 1.076, + "step": 6054 + }, + { + "epoch": 0.01, + "learning_rate": 4.999562430055412e-05, + "loss": 0.9607, + "step": 6055 + }, + { + "epoch": 0.01, + "learning_rate": 4.999562283088523e-05, + "loss": 1.3641, + "step": 6056 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995621360969586e-05, + "loss": 1.1847, + "step": 6057 + }, + { + "epoch": 0.01, + "learning_rate": 4.99956198908072e-05, + "loss": 1.026, + "step": 6058 + }, + { + "epoch": 0.01, + "learning_rate": 4.999561842039806e-05, + "loss": 1.3139, + "step": 6059 + }, + { + "epoch": 0.01, + "learning_rate": 4.999561694974218e-05, + "loss": 1.0536, + "step": 6060 + }, + { + "epoch": 0.01, + "learning_rate": 4.999561547883955e-05, + "loss": 0.956, + "step": 6061 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995614007690176e-05, + "loss": 1.3575, + "step": 6062 + }, + { + "epoch": 0.01, + "learning_rate": 4.999561253629406e-05, + "loss": 1.0711, + "step": 6063 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995611064651195e-05, + "loss": 1.0042, + "step": 6064 + }, + { + "epoch": 0.01, + "learning_rate": 4.999560959276158e-05, + "loss": 0.8043, + "step": 6065 + }, + { + "epoch": 0.01, + "learning_rate": 4.999560812062522e-05, + "loss": 1.4357, + "step": 6066 + }, + { + "epoch": 0.01, + "learning_rate": 4.999560664824212e-05, + "loss": 1.1579, + "step": 6067 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995605175612264e-05, + "loss": 1.1137, + "step": 6068 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995603702735675e-05, + "loss": 1.3972, + "step": 6069 + }, + { + "epoch": 0.01, + "learning_rate": 4.999560222961233e-05, + "loss": 0.9841, + "step": 6070 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995600756242236e-05, + "loss": 1.4509, + "step": 6071 + }, + { + "epoch": 0.01, + "learning_rate": 4.999559928262541e-05, + "loss": 1.2235, + "step": 6072 + }, + { + "epoch": 0.01, + "learning_rate": 4.999559780876182e-05, + "loss": 1.0759, + "step": 6073 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955963346515e-05, + "loss": 1.0943, + "step": 6074 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995594860294424e-05, + "loss": 0.8859, + "step": 6075 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955933856906e-05, + "loss": 1.0972, + "step": 6076 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995591910840036e-05, + "loss": 1.0955, + "step": 6077 + }, + { + "epoch": 0.01, + "learning_rate": 4.999559043574272e-05, + "loss": 1.2344, + "step": 6078 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995588960398665e-05, + "loss": 1.1871, + "step": 6079 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558748480786e-05, + "loss": 1.0826, + "step": 6080 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558600897032e-05, + "loss": 1.0216, + "step": 6081 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558453288602e-05, + "loss": 0.9615, + "step": 6082 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558305655499e-05, + "loss": 1.3907, + "step": 6083 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558157997719e-05, + "loss": 1.0384, + "step": 6084 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558010315266e-05, + "loss": 1.2994, + "step": 6085 + }, + { + "epoch": 0.01, + "learning_rate": 4.999557862608139e-05, + "loss": 1.2716, + "step": 6086 + }, + { + "epoch": 0.01, + "learning_rate": 4.999557714876336e-05, + "loss": 1.1883, + "step": 6087 + }, + { + "epoch": 0.01, + "learning_rate": 4.999557567119859e-05, + "loss": 1.1904, + "step": 6088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995574193387077e-05, + "loss": 1.3526, + "step": 6089 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995572715328814e-05, + "loss": 1.2066, + "step": 6090 + }, + { + "epoch": 0.01, + "learning_rate": 4.999557123702381e-05, + "loss": 0.7818, + "step": 6091 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995569758472056e-05, + "loss": 0.7816, + "step": 6092 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995568279673554e-05, + "loss": 1.0388, + "step": 6093 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995566800628315e-05, + "loss": 1.3529, + "step": 6094 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995565321336325e-05, + "loss": 2.0495, + "step": 6095 + }, + { + "epoch": 0.01, + "learning_rate": 4.999556384179759e-05, + "loss": 1.6708, + "step": 6096 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995562362012106e-05, + "loss": 1.2122, + "step": 6097 + }, + { + "epoch": 0.01, + "learning_rate": 4.999556088197988e-05, + "loss": 1.3476, + "step": 6098 + }, + { + "epoch": 0.01, + "learning_rate": 4.999555940170091e-05, + "loss": 1.5162, + "step": 6099 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995557921175186e-05, + "loss": 1.7973, + "step": 6100 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995556440402725e-05, + "loss": 1.6417, + "step": 6101 + }, + { + "epoch": 0.01, + "learning_rate": 4.999555495938352e-05, + "loss": 1.5791, + "step": 6102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995553478117563e-05, + "loss": 0.623, + "step": 6103 + }, + { + "epoch": 0.01, + "learning_rate": 4.999555199660486e-05, + "loss": 1.1451, + "step": 6104 + }, + { + "epoch": 0.01, + "learning_rate": 4.999555051484541e-05, + "loss": 1.2129, + "step": 6105 + }, + { + "epoch": 0.01, + "learning_rate": 4.999554903283922e-05, + "loss": 0.9466, + "step": 6106 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995547550586284e-05, + "loss": 0.4574, + "step": 6107 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955460680866e-05, + "loss": 0.6577, + "step": 6108 + }, + { + "epoch": 0.01, + "learning_rate": 4.999554458534017e-05, + "loss": 1.2173, + "step": 6109 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995543102346995e-05, + "loss": 1.0804, + "step": 6110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999554161910708e-05, + "loss": 1.148, + "step": 6111 + }, + { + "epoch": 0.01, + "learning_rate": 4.999554013562041e-05, + "loss": 1.3866, + "step": 6112 + }, + { + "epoch": 0.01, + "learning_rate": 4.999553865188701e-05, + "loss": 1.078, + "step": 6113 + }, + { + "epoch": 0.01, + "learning_rate": 4.999553716790685e-05, + "loss": 0.9918, + "step": 6114 + }, + { + "epoch": 0.01, + "learning_rate": 4.999553568367995e-05, + "loss": 0.9809, + "step": 6115 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955341992063e-05, + "loss": 0.9585, + "step": 6116 + }, + { + "epoch": 0.01, + "learning_rate": 4.999553271448591e-05, + "loss": 1.3153, + "step": 6117 + }, + { + "epoch": 0.01, + "learning_rate": 4.999553122951878e-05, + "loss": 1.0406, + "step": 6118 + }, + { + "epoch": 0.01, + "learning_rate": 4.999552974430489e-05, + "loss": 1.2737, + "step": 6119 + }, + { + "epoch": 0.01, + "learning_rate": 4.999552825884426e-05, + "loss": 1.3108, + "step": 6120 + }, + { + "epoch": 0.01, + "learning_rate": 4.999552677313689e-05, + "loss": 1.1334, + "step": 6121 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995525287182775e-05, + "loss": 1.6797, + "step": 6122 + }, + { + "epoch": 0.01, + "learning_rate": 4.999552380098191e-05, + "loss": 1.2596, + "step": 6123 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955223145343e-05, + "loss": 0.8459, + "step": 6124 + }, + { + "epoch": 0.01, + "learning_rate": 4.999552082783995e-05, + "loss": 1.0164, + "step": 6125 + }, + { + "epoch": 0.01, + "learning_rate": 4.999551934089884e-05, + "loss": 0.5692, + "step": 6126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995517853711e-05, + "loss": 0.6399, + "step": 6127 + }, + { + "epoch": 0.01, + "learning_rate": 4.999551636627641e-05, + "loss": 1.1292, + "step": 6128 + }, + { + "epoch": 0.01, + "learning_rate": 4.999551487859507e-05, + "loss": 0.9345, + "step": 6129 + }, + { + "epoch": 0.01, + "learning_rate": 4.999551339066699e-05, + "loss": 1.1914, + "step": 6130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999551190249217e-05, + "loss": 1.0834, + "step": 6131 + }, + { + "epoch": 0.01, + "learning_rate": 4.99955104140706e-05, + "loss": 1.3149, + "step": 6132 + }, + { + "epoch": 0.01, + "learning_rate": 4.999550892540229e-05, + "loss": 1.2369, + "step": 6133 + }, + { + "epoch": 0.01, + "learning_rate": 4.999550743648722e-05, + "loss": 1.0676, + "step": 6134 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995505947325415e-05, + "loss": 1.2102, + "step": 6135 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995504457916864e-05, + "loss": 1.1858, + "step": 6136 + }, + { + "epoch": 0.01, + "learning_rate": 4.999550296826157e-05, + "loss": 0.9689, + "step": 6137 + }, + { + "epoch": 0.01, + "learning_rate": 4.999550147835953e-05, + "loss": 1.1618, + "step": 6138 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995499988210744e-05, + "loss": 1.1267, + "step": 6139 + }, + { + "epoch": 0.01, + "learning_rate": 4.999549849781521e-05, + "loss": 0.9775, + "step": 6140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999549700717293e-05, + "loss": 1.2295, + "step": 6141 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995495516283914e-05, + "loss": 1.0337, + "step": 6142 + }, + { + "epoch": 0.01, + "learning_rate": 4.999549402514815e-05, + "loss": 0.7293, + "step": 6143 + }, + { + "epoch": 0.01, + "learning_rate": 4.999549253376563e-05, + "loss": 0.9217, + "step": 6144 + }, + { + "epoch": 0.01, + "learning_rate": 4.999549104213638e-05, + "loss": 0.9321, + "step": 6145 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995489550260374e-05, + "loss": 0.7627, + "step": 6146 + }, + { + "epoch": 0.01, + "learning_rate": 4.999548805813763e-05, + "loss": 0.8338, + "step": 6147 + }, + { + "epoch": 0.01, + "learning_rate": 4.999548656576814e-05, + "loss": 0.8278, + "step": 6148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995485073151905e-05, + "loss": 1.0563, + "step": 6149 + }, + { + "epoch": 0.01, + "learning_rate": 4.999548358028893e-05, + "loss": 1.2953, + "step": 6150 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995482087179196e-05, + "loss": 0.9959, + "step": 6151 + }, + { + "epoch": 0.01, + "learning_rate": 4.999548059382273e-05, + "loss": 1.1194, + "step": 6152 + }, + { + "epoch": 0.01, + "learning_rate": 4.999547910021951e-05, + "loss": 1.236, + "step": 6153 + }, + { + "epoch": 0.01, + "learning_rate": 4.999547760636955e-05, + "loss": 1.0783, + "step": 6154 + }, + { + "epoch": 0.01, + "learning_rate": 4.999547611227284e-05, + "loss": 1.3052, + "step": 6155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995474617929395e-05, + "loss": 0.9864, + "step": 6156 + }, + { + "epoch": 0.01, + "learning_rate": 4.99954731233392e-05, + "loss": 1.2228, + "step": 6157 + }, + { + "epoch": 0.01, + "learning_rate": 4.999547162850226e-05, + "loss": 1.0359, + "step": 6158 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995470133418576e-05, + "loss": 0.8855, + "step": 6159 + }, + { + "epoch": 0.01, + "learning_rate": 4.999546863808815e-05, + "loss": 1.1619, + "step": 6160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999546714251098e-05, + "loss": 1.0703, + "step": 6161 + }, + { + "epoch": 0.01, + "learning_rate": 4.999546564668705e-05, + "loss": 1.0541, + "step": 6162 + }, + { + "epoch": 0.01, + "learning_rate": 4.999546415061639e-05, + "loss": 1.301, + "step": 6163 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995462654298984e-05, + "loss": 1.2866, + "step": 6164 + }, + { + "epoch": 0.01, + "learning_rate": 4.999546115773483e-05, + "loss": 1.3945, + "step": 6165 + }, + { + "epoch": 0.01, + "learning_rate": 4.999545966092393e-05, + "loss": 0.7452, + "step": 6166 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995458163866294e-05, + "loss": 0.3172, + "step": 6167 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995456666561904e-05, + "loss": 0.2841, + "step": 6168 + }, + { + "epoch": 0.01, + "learning_rate": 4.999545516901078e-05, + "loss": 0.2657, + "step": 6169 + }, + { + "epoch": 0.01, + "learning_rate": 4.99954536712129e-05, + "loss": 1.2747, + "step": 6170 + }, + { + "epoch": 0.01, + "learning_rate": 4.999545217316828e-05, + "loss": 0.9224, + "step": 6171 + }, + { + "epoch": 0.01, + "learning_rate": 4.999545067487691e-05, + "loss": 1.1763, + "step": 6172 + }, + { + "epoch": 0.01, + "learning_rate": 4.99954491763388e-05, + "loss": 1.1748, + "step": 6173 + }, + { + "epoch": 0.01, + "learning_rate": 4.999544767755395e-05, + "loss": 1.0592, + "step": 6174 + }, + { + "epoch": 0.01, + "learning_rate": 4.999544617852235e-05, + "loss": 0.9498, + "step": 6175 + }, + { + "epoch": 0.01, + "learning_rate": 4.999544467924401e-05, + "loss": 1.3276, + "step": 6176 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995443179718914e-05, + "loss": 1.1507, + "step": 6177 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995441679947084e-05, + "loss": 0.4551, + "step": 6178 + }, + { + "epoch": 0.01, + "learning_rate": 4.999544017992851e-05, + "loss": 0.8058, + "step": 6179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995438679663184e-05, + "loss": 0.847, + "step": 6180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999543717915112e-05, + "loss": 1.1359, + "step": 6181 + }, + { + "epoch": 0.01, + "learning_rate": 4.999543567839231e-05, + "loss": 0.873, + "step": 6182 + }, + { + "epoch": 0.01, + "learning_rate": 4.999543417738676e-05, + "loss": 0.9266, + "step": 6183 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995432676134455e-05, + "loss": 0.8686, + "step": 6184 + }, + { + "epoch": 0.01, + "learning_rate": 4.999543117463542e-05, + "loss": 1.9153, + "step": 6185 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995429672889626e-05, + "loss": 1.4137, + "step": 6186 + }, + { + "epoch": 0.01, + "learning_rate": 4.99954281708971e-05, + "loss": 1.2259, + "step": 6187 + }, + { + "epoch": 0.01, + "learning_rate": 4.999542666865782e-05, + "loss": 1.1254, + "step": 6188 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995425166171805e-05, + "loss": 1.0492, + "step": 6189 + }, + { + "epoch": 0.01, + "learning_rate": 4.999542366343903e-05, + "loss": 1.0931, + "step": 6190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999542216045953e-05, + "loss": 1.1489, + "step": 6191 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995420657233274e-05, + "loss": 1.3093, + "step": 6192 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541915376028e-05, + "loss": 1.1749, + "step": 6193 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541765004053e-05, + "loss": 1.2643, + "step": 6194 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541614607405e-05, + "loss": 1.4281, + "step": 6195 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995414641860816e-05, + "loss": 1.284, + "step": 6196 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541313740085e-05, + "loss": 1.1549, + "step": 6197 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541163269412e-05, + "loss": 1.0731, + "step": 6198 + }, + { + "epoch": 0.01, + "learning_rate": 4.999541012774066e-05, + "loss": 0.891, + "step": 6199 + }, + { + "epoch": 0.01, + "learning_rate": 4.999540862254046e-05, + "loss": 0.7303, + "step": 6200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999540711709351e-05, + "loss": 0.8618, + "step": 6201 + }, + { + "epoch": 0.01, + "learning_rate": 4.999540561139981e-05, + "loss": 0.6789, + "step": 6202 + }, + { + "epoch": 0.01, + "learning_rate": 4.999540410545937e-05, + "loss": 0.9251, + "step": 6203 + }, + { + "epoch": 0.01, + "learning_rate": 4.999540259927219e-05, + "loss": 0.9536, + "step": 6204 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995401092838255e-05, + "loss": 0.9837, + "step": 6205 + }, + { + "epoch": 0.01, + "learning_rate": 4.999539958615759e-05, + "loss": 0.7343, + "step": 6206 + }, + { + "epoch": 0.01, + "learning_rate": 4.999539807923017e-05, + "loss": 0.9371, + "step": 6207 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995396572056015e-05, + "loss": 0.9017, + "step": 6208 + }, + { + "epoch": 0.01, + "learning_rate": 4.999539506463511e-05, + "loss": 0.9413, + "step": 6209 + }, + { + "epoch": 0.01, + "learning_rate": 4.999539355696746e-05, + "loss": 1.1699, + "step": 6210 + }, + { + "epoch": 0.01, + "learning_rate": 4.999539204905307e-05, + "loss": 1.277, + "step": 6211 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995390540891935e-05, + "loss": 1.2824, + "step": 6212 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995389032484055e-05, + "loss": 1.1497, + "step": 6213 + }, + { + "epoch": 0.01, + "learning_rate": 4.999538752382943e-05, + "loss": 1.1358, + "step": 6214 + }, + { + "epoch": 0.01, + "learning_rate": 4.999538601492806e-05, + "loss": 0.8836, + "step": 6215 + }, + { + "epoch": 0.01, + "learning_rate": 4.999538450577995e-05, + "loss": 0.3426, + "step": 6216 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953829963851e-05, + "loss": 0.9849, + "step": 6217 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953814867435e-05, + "loss": 0.6789, + "step": 6218 + }, + { + "epoch": 0.01, + "learning_rate": 4.999537997685515e-05, + "loss": 0.6195, + "step": 6219 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995378466720066e-05, + "loss": 0.9728, + "step": 6220 + }, + { + "epoch": 0.01, + "learning_rate": 4.999537695633823e-05, + "loss": 1.2863, + "step": 6221 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995375445709656e-05, + "loss": 1.1435, + "step": 6222 + }, + { + "epoch": 0.01, + "learning_rate": 4.999537393483434e-05, + "loss": 1.0346, + "step": 6223 + }, + { + "epoch": 0.01, + "learning_rate": 4.999537242371228e-05, + "loss": 1.0292, + "step": 6224 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995370912343474e-05, + "loss": 1.1251, + "step": 6225 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536940072793e-05, + "loss": 1.1762, + "step": 6226 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536788886563e-05, + "loss": 1.1432, + "step": 6227 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536637675659e-05, + "loss": 2.3195, + "step": 6228 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536486440081e-05, + "loss": 1.9143, + "step": 6229 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536335179829e-05, + "loss": 1.2253, + "step": 6230 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536183894902e-05, + "loss": 1.1785, + "step": 6231 + }, + { + "epoch": 0.01, + "learning_rate": 4.999536032585301e-05, + "loss": 1.0539, + "step": 6232 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995358812510254e-05, + "loss": 0.9327, + "step": 6233 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995357298920754e-05, + "loss": 0.5802, + "step": 6234 + }, + { + "epoch": 0.01, + "learning_rate": 4.999535578508451e-05, + "loss": 1.1302, + "step": 6235 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995354271001524e-05, + "loss": 1.0786, + "step": 6236 + }, + { + "epoch": 0.01, + "learning_rate": 4.999535275667179e-05, + "loss": 1.0324, + "step": 6237 + }, + { + "epoch": 0.01, + "learning_rate": 4.999535124209532e-05, + "loss": 1.0391, + "step": 6238 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995349727272104e-05, + "loss": 0.9431, + "step": 6239 + }, + { + "epoch": 0.01, + "learning_rate": 4.999534821220214e-05, + "loss": 0.9996, + "step": 6240 + }, + { + "epoch": 0.01, + "learning_rate": 4.999534669688544e-05, + "loss": 1.2086, + "step": 6241 + }, + { + "epoch": 0.01, + "learning_rate": 4.999534518132199e-05, + "loss": 1.4165, + "step": 6242 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953436655118e-05, + "loss": 1.1405, + "step": 6243 + }, + { + "epoch": 0.01, + "learning_rate": 4.999534214945486e-05, + "loss": 1.2725, + "step": 6244 + }, + { + "epoch": 0.01, + "learning_rate": 4.999534063315118e-05, + "loss": 1.151, + "step": 6245 + }, + { + "epoch": 0.01, + "learning_rate": 4.999533911660076e-05, + "loss": 1.1695, + "step": 6246 + }, + { + "epoch": 0.01, + "learning_rate": 4.999533759980359e-05, + "loss": 1.0715, + "step": 6247 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995336082759683e-05, + "loss": 1.0895, + "step": 6248 + }, + { + "epoch": 0.01, + "learning_rate": 4.999533456546903e-05, + "loss": 1.3081, + "step": 6249 + }, + { + "epoch": 0.01, + "learning_rate": 4.999533304793164e-05, + "loss": 1.5056, + "step": 6250 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953315301475e-05, + "loss": 1.3054, + "step": 6251 + }, + { + "epoch": 0.01, + "learning_rate": 4.999533001211662e-05, + "loss": 1.1802, + "step": 6252 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532849383899e-05, + "loss": 1.2632, + "step": 6253 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532697531462e-05, + "loss": 1.1067, + "step": 6254 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532545654351e-05, + "loss": 1.1343, + "step": 6255 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532393752565e-05, + "loss": 1.2952, + "step": 6256 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532241826105e-05, + "loss": 1.4313, + "step": 6257 + }, + { + "epoch": 0.01, + "learning_rate": 4.999532089874971e-05, + "loss": 1.7484, + "step": 6258 + }, + { + "epoch": 0.01, + "learning_rate": 4.999531937899162e-05, + "loss": 1.668, + "step": 6259 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953178589868e-05, + "loss": 1.3347, + "step": 6260 + }, + { + "epoch": 0.01, + "learning_rate": 4.999531633873522e-05, + "loss": 1.0661, + "step": 6261 + }, + { + "epoch": 0.01, + "learning_rate": 4.999531481823691e-05, + "loss": 0.9182, + "step": 6262 + }, + { + "epoch": 0.01, + "learning_rate": 4.999531329749185e-05, + "loss": 1.0667, + "step": 6263 + }, + { + "epoch": 0.01, + "learning_rate": 4.999531177650005e-05, + "loss": 1.1956, + "step": 6264 + }, + { + "epoch": 0.01, + "learning_rate": 4.99953102552615e-05, + "loss": 1.3031, + "step": 6265 + }, + { + "epoch": 0.01, + "learning_rate": 4.999530873377621e-05, + "loss": 1.0669, + "step": 6266 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995307212044186e-05, + "loss": 1.2452, + "step": 6267 + }, + { + "epoch": 0.01, + "learning_rate": 4.999530569006541e-05, + "loss": 1.4003, + "step": 6268 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995304167839893e-05, + "loss": 1.3337, + "step": 6269 + }, + { + "epoch": 0.01, + "learning_rate": 4.999530264536763e-05, + "loss": 1.2999, + "step": 6270 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995301122648624e-05, + "loss": 1.2661, + "step": 6271 + }, + { + "epoch": 0.01, + "learning_rate": 4.999529959968288e-05, + "loss": 0.9675, + "step": 6272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995298076470385e-05, + "loss": 1.1439, + "step": 6273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995296553011157e-05, + "loss": 1.282, + "step": 6274 + }, + { + "epoch": 0.01, + "learning_rate": 4.999529502930518e-05, + "loss": 1.2966, + "step": 6275 + }, + { + "epoch": 0.01, + "learning_rate": 4.999529350535246e-05, + "loss": 1.1969, + "step": 6276 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995291981153e-05, + "loss": 1.2212, + "step": 6277 + }, + { + "epoch": 0.01, + "learning_rate": 4.999529045670679e-05, + "loss": 0.8573, + "step": 6278 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995288932013845e-05, + "loss": 1.2187, + "step": 6279 + }, + { + "epoch": 0.01, + "learning_rate": 4.999528740707415e-05, + "loss": 1.0123, + "step": 6280 + }, + { + "epoch": 0.01, + "learning_rate": 4.999528588188772e-05, + "loss": 1.3662, + "step": 6281 + }, + { + "epoch": 0.01, + "learning_rate": 4.999528435645454e-05, + "loss": 1.915, + "step": 6282 + }, + { + "epoch": 0.01, + "learning_rate": 4.999528283077463e-05, + "loss": 0.9141, + "step": 6283 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995281304847964e-05, + "loss": 1.4028, + "step": 6284 + }, + { + "epoch": 0.01, + "learning_rate": 4.999527977867455e-05, + "loss": 1.143, + "step": 6285 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995278252254406e-05, + "loss": 1.1971, + "step": 6286 + }, + { + "epoch": 0.01, + "learning_rate": 4.999527672558751e-05, + "loss": 1.0219, + "step": 6287 + }, + { + "epoch": 0.01, + "learning_rate": 4.999527519867388e-05, + "loss": 0.7101, + "step": 6288 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995273671513496e-05, + "loss": 0.8893, + "step": 6289 + }, + { + "epoch": 0.01, + "learning_rate": 4.999527214410639e-05, + "loss": 1.2082, + "step": 6290 + }, + { + "epoch": 0.01, + "learning_rate": 4.999527061645252e-05, + "loss": 1.1676, + "step": 6291 + }, + { + "epoch": 0.01, + "learning_rate": 4.999526908855191e-05, + "loss": 0.8196, + "step": 6292 + }, + { + "epoch": 0.01, + "learning_rate": 4.999526756040457e-05, + "loss": 1.2572, + "step": 6293 + }, + { + "epoch": 0.01, + "learning_rate": 4.999526603201048e-05, + "loss": 1.1991, + "step": 6294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995264503369645e-05, + "loss": 1.1967, + "step": 6295 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995262974482066e-05, + "loss": 0.994, + "step": 6296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995261445347744e-05, + "loss": 1.1037, + "step": 6297 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995259915966684e-05, + "loss": 1.2701, + "step": 6298 + }, + { + "epoch": 0.01, + "learning_rate": 4.999525838633888e-05, + "loss": 0.863, + "step": 6299 + }, + { + "epoch": 0.01, + "learning_rate": 4.999525685646433e-05, + "loss": 1.1585, + "step": 6300 + }, + { + "epoch": 0.01, + "learning_rate": 4.999525532634304e-05, + "loss": 1.436, + "step": 6301 + }, + { + "epoch": 0.01, + "learning_rate": 4.999525379597501e-05, + "loss": 0.9212, + "step": 6302 + }, + { + "epoch": 0.01, + "learning_rate": 4.999525226536024e-05, + "loss": 1.8257, + "step": 6303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995250734498713e-05, + "loss": 0.5991, + "step": 6304 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524920339046e-05, + "loss": 1.1361, + "step": 6305 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524767203545e-05, + "loss": 1.1269, + "step": 6306 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524614043371e-05, + "loss": 1.1514, + "step": 6307 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524460858522e-05, + "loss": 1.3033, + "step": 6308 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524307648999e-05, + "loss": 0.9704, + "step": 6309 + }, + { + "epoch": 0.01, + "learning_rate": 4.999524154414802e-05, + "loss": 1.1856, + "step": 6310 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995240011559304e-05, + "loss": 1.4247, + "step": 6311 + }, + { + "epoch": 0.01, + "learning_rate": 4.999523847872385e-05, + "loss": 1.0136, + "step": 6312 + }, + { + "epoch": 0.01, + "learning_rate": 4.999523694564165e-05, + "loss": 1.3243, + "step": 6313 + }, + { + "epoch": 0.01, + "learning_rate": 4.99952354123127e-05, + "loss": 0.9522, + "step": 6314 + }, + { + "epoch": 0.01, + "learning_rate": 4.999523387873701e-05, + "loss": 1.2351, + "step": 6315 + }, + { + "epoch": 0.01, + "learning_rate": 4.999523234491459e-05, + "loss": 1.0311, + "step": 6316 + }, + { + "epoch": 0.01, + "learning_rate": 4.999523081084542e-05, + "loss": 1.3266, + "step": 6317 + }, + { + "epoch": 0.01, + "learning_rate": 4.999522927652951e-05, + "loss": 1.2291, + "step": 6318 + }, + { + "epoch": 0.01, + "learning_rate": 4.999522774196686e-05, + "loss": 1.4014, + "step": 6319 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995226207157455e-05, + "loss": 1.4934, + "step": 6320 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995224672101315e-05, + "loss": 1.2602, + "step": 6321 + }, + { + "epoch": 0.01, + "learning_rate": 4.999522313679843e-05, + "loss": 1.2214, + "step": 6322 + }, + { + "epoch": 0.01, + "learning_rate": 4.999522160124881e-05, + "loss": 1.1039, + "step": 6323 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995220065452445e-05, + "loss": 1.1839, + "step": 6324 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995218529409336e-05, + "loss": 1.3264, + "step": 6325 + }, + { + "epoch": 0.01, + "learning_rate": 4.999521699311949e-05, + "loss": 1.4254, + "step": 6326 + }, + { + "epoch": 0.01, + "learning_rate": 4.999521545658289e-05, + "loss": 0.9513, + "step": 6327 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995213919799556e-05, + "loss": 1.7214, + "step": 6328 + }, + { + "epoch": 0.01, + "learning_rate": 4.999521238276947e-05, + "loss": 2.0432, + "step": 6329 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995210845492654e-05, + "loss": 1.57, + "step": 6330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995209307969094e-05, + "loss": 1.2588, + "step": 6331 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995207770198796e-05, + "loss": 1.3624, + "step": 6332 + }, + { + "epoch": 0.01, + "learning_rate": 4.999520623218175e-05, + "loss": 1.1314, + "step": 6333 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995204693917955e-05, + "loss": 0.9187, + "step": 6334 + }, + { + "epoch": 0.01, + "learning_rate": 4.999520315540742e-05, + "loss": 1.0166, + "step": 6335 + }, + { + "epoch": 0.01, + "learning_rate": 4.999520161665016e-05, + "loss": 0.987, + "step": 6336 + }, + { + "epoch": 0.01, + "learning_rate": 4.999520007764614e-05, + "loss": 1.1031, + "step": 6337 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995198538395385e-05, + "loss": 1.2838, + "step": 6338 + }, + { + "epoch": 0.01, + "learning_rate": 4.999519699889788e-05, + "loss": 0.6294, + "step": 6339 + }, + { + "epoch": 0.01, + "learning_rate": 4.999519545915364e-05, + "loss": 0.9347, + "step": 6340 + }, + { + "epoch": 0.01, + "learning_rate": 4.999519391916266e-05, + "loss": 0.9834, + "step": 6341 + }, + { + "epoch": 0.01, + "learning_rate": 4.999519237892493e-05, + "loss": 1.083, + "step": 6342 + }, + { + "epoch": 0.01, + "learning_rate": 4.999519083844047e-05, + "loss": 1.3272, + "step": 6343 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995189297709254e-05, + "loss": 1.0224, + "step": 6344 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951877567313e-05, + "loss": 1.2144, + "step": 6345 + }, + { + "epoch": 0.01, + "learning_rate": 4.999518621550661e-05, + "loss": 1.216, + "step": 6346 + }, + { + "epoch": 0.01, + "learning_rate": 4.999518467403518e-05, + "loss": 1.0545, + "step": 6347 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995183132317e-05, + "loss": 1.6558, + "step": 6348 + }, + { + "epoch": 0.01, + "learning_rate": 4.999518159035208e-05, + "loss": 1.3072, + "step": 6349 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995180048140424e-05, + "loss": 1.5042, + "step": 6350 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995178505682015e-05, + "loss": 1.491, + "step": 6351 + }, + { + "epoch": 0.01, + "learning_rate": 4.999517696297688e-05, + "loss": 1.4511, + "step": 6352 + }, + { + "epoch": 0.01, + "learning_rate": 4.999517542002499e-05, + "loss": 1.2005, + "step": 6353 + }, + { + "epoch": 0.01, + "learning_rate": 4.999517387682636e-05, + "loss": 0.913, + "step": 6354 + }, + { + "epoch": 0.01, + "learning_rate": 4.999517233338099e-05, + "loss": 0.9846, + "step": 6355 + }, + { + "epoch": 0.01, + "learning_rate": 4.999517078968888e-05, + "loss": 0.995, + "step": 6356 + }, + { + "epoch": 0.01, + "learning_rate": 4.999516924575002e-05, + "loss": 1.0608, + "step": 6357 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995167701564425e-05, + "loss": 0.9245, + "step": 6358 + }, + { + "epoch": 0.01, + "learning_rate": 4.999516615713209e-05, + "loss": 0.947, + "step": 6359 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995164612453014e-05, + "loss": 0.8862, + "step": 6360 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951630675272e-05, + "loss": 0.9554, + "step": 6361 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995161522354626e-05, + "loss": 1.039, + "step": 6362 + }, + { + "epoch": 0.01, + "learning_rate": 4.999515997693532e-05, + "loss": 1.1183, + "step": 6363 + }, + { + "epoch": 0.01, + "learning_rate": 4.999515843126928e-05, + "loss": 1.1794, + "step": 6364 + }, + { + "epoch": 0.01, + "learning_rate": 4.999515688535649e-05, + "loss": 1.4328, + "step": 6365 + }, + { + "epoch": 0.01, + "learning_rate": 4.999515533919696e-05, + "loss": 0.9539, + "step": 6366 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995153792790696e-05, + "loss": 0.9478, + "step": 6367 + }, + { + "epoch": 0.01, + "learning_rate": 4.999515224613768e-05, + "loss": 0.9248, + "step": 6368 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995150699237924e-05, + "loss": 0.7609, + "step": 6369 + }, + { + "epoch": 0.01, + "learning_rate": 4.999514915209143e-05, + "loss": 1.0044, + "step": 6370 + }, + { + "epoch": 0.01, + "learning_rate": 4.999514760469819e-05, + "loss": 1.1496, + "step": 6371 + }, + { + "epoch": 0.01, + "learning_rate": 4.999514605705822e-05, + "loss": 0.828, + "step": 6372 + }, + { + "epoch": 0.01, + "learning_rate": 4.999514450917149e-05, + "loss": 1.1735, + "step": 6373 + }, + { + "epoch": 0.01, + "learning_rate": 4.999514296103803e-05, + "loss": 1.0782, + "step": 6374 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995141412657834e-05, + "loss": 1.0884, + "step": 6375 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995139864030884e-05, + "loss": 0.8416, + "step": 6376 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951383151572e-05, + "loss": 1.1527, + "step": 6377 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995136766036774e-05, + "loss": 1.0447, + "step": 6378 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951352166696e-05, + "loss": 1.3278, + "step": 6379 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995133667055694e-05, + "loss": 0.9146, + "step": 6380 + }, + { + "epoch": 0.01, + "learning_rate": 4.999513211719504e-05, + "loss": 1.1876, + "step": 6381 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995130567087644e-05, + "loss": 1.1195, + "step": 6382 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512901673351e-05, + "loss": 0.8774, + "step": 6383 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512746613263e-05, + "loss": 1.2939, + "step": 6384 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512591528501e-05, + "loss": 1.1973, + "step": 6385 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512436419066e-05, + "loss": 1.1773, + "step": 6386 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512281284956e-05, + "loss": 1.3966, + "step": 6387 + }, + { + "epoch": 0.01, + "learning_rate": 4.999512126126171e-05, + "loss": 0.8522, + "step": 6388 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511970942713e-05, + "loss": 1.1953, + "step": 6389 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511815734581e-05, + "loss": 1.2966, + "step": 6390 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511660501775e-05, + "loss": 1.1662, + "step": 6391 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511505244294e-05, + "loss": 1.4128, + "step": 6392 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511349962139e-05, + "loss": 1.075, + "step": 6393 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951119465531e-05, + "loss": 1.1241, + "step": 6394 + }, + { + "epoch": 0.01, + "learning_rate": 4.999511039323807e-05, + "loss": 1.1761, + "step": 6395 + }, + { + "epoch": 0.01, + "learning_rate": 4.99951088396763e-05, + "loss": 1.3527, + "step": 6396 + }, + { + "epoch": 0.01, + "learning_rate": 4.999510728586778e-05, + "loss": 1.2662, + "step": 6397 + }, + { + "epoch": 0.01, + "learning_rate": 4.999510573181253e-05, + "loss": 0.9103, + "step": 6398 + }, + { + "epoch": 0.01, + "learning_rate": 4.999510417751053e-05, + "loss": 0.9209, + "step": 6399 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995102622961796e-05, + "loss": 0.6531, + "step": 6400 + }, + { + "epoch": 0.01, + "learning_rate": 4.999510106816632e-05, + "loss": 0.145, + "step": 6401 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950995131241e-05, + "loss": 0.0826, + "step": 6402 + }, + { + "epoch": 0.01, + "learning_rate": 4.999509795783515e-05, + "loss": 0.0694, + "step": 6403 + }, + { + "epoch": 0.01, + "learning_rate": 4.999509640229944e-05, + "loss": 0.2337, + "step": 6404 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995094846517e-05, + "loss": 0.2951, + "step": 6405 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995093290487814e-05, + "loss": 1.0009, + "step": 6406 + }, + { + "epoch": 0.01, + "learning_rate": 4.999509173421189e-05, + "loss": 0.9331, + "step": 6407 + }, + { + "epoch": 0.01, + "learning_rate": 4.999509017768923e-05, + "loss": 1.3308, + "step": 6408 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995088620919825e-05, + "loss": 1.3025, + "step": 6409 + }, + { + "epoch": 0.01, + "learning_rate": 4.999508706390368e-05, + "loss": 0.9508, + "step": 6410 + }, + { + "epoch": 0.01, + "learning_rate": 4.999508550664079e-05, + "loss": 0.854, + "step": 6411 + }, + { + "epoch": 0.01, + "learning_rate": 4.999508394913116e-05, + "loss": 0.9445, + "step": 6412 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995082391374795e-05, + "loss": 1.2569, + "step": 6413 + }, + { + "epoch": 0.01, + "learning_rate": 4.999508083337168e-05, + "loss": 1.3479, + "step": 6414 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995079275121825e-05, + "loss": 1.3147, + "step": 6415 + }, + { + "epoch": 0.01, + "learning_rate": 4.999507771662524e-05, + "loss": 1.3282, + "step": 6416 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995076157881906e-05, + "loss": 1.3538, + "step": 6417 + }, + { + "epoch": 0.01, + "learning_rate": 4.999507459889183e-05, + "loss": 1.3551, + "step": 6418 + }, + { + "epoch": 0.01, + "learning_rate": 4.999507303965502e-05, + "loss": 1.2779, + "step": 6419 + }, + { + "epoch": 0.01, + "learning_rate": 4.999507148017146e-05, + "loss": 1.3151, + "step": 6420 + }, + { + "epoch": 0.01, + "learning_rate": 4.999506992044116e-05, + "loss": 1.1586, + "step": 6421 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995068360464125e-05, + "loss": 0.8012, + "step": 6422 + }, + { + "epoch": 0.01, + "learning_rate": 4.999506680024034e-05, + "loss": 0.5621, + "step": 6423 + }, + { + "epoch": 0.01, + "learning_rate": 4.999506523976982e-05, + "loss": 0.6656, + "step": 6424 + }, + { + "epoch": 0.01, + "learning_rate": 4.999506367905257e-05, + "loss": 0.6819, + "step": 6425 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995062118088564e-05, + "loss": 0.9057, + "step": 6426 + }, + { + "epoch": 0.01, + "learning_rate": 4.999506055687783e-05, + "loss": 1.3069, + "step": 6427 + }, + { + "epoch": 0.01, + "learning_rate": 4.999505899542034e-05, + "loss": 0.8545, + "step": 6428 + }, + { + "epoch": 0.01, + "learning_rate": 4.999505743371612e-05, + "loss": 1.0369, + "step": 6429 + }, + { + "epoch": 0.01, + "learning_rate": 4.999505587176516e-05, + "loss": 1.0941, + "step": 6430 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995054309567455e-05, + "loss": 1.3182, + "step": 6431 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995052747123006e-05, + "loss": 1.4654, + "step": 6432 + }, + { + "epoch": 0.01, + "learning_rate": 4.999505118443183e-05, + "loss": 1.0973, + "step": 6433 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950496214939e-05, + "loss": 0.9623, + "step": 6434 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995048058309236e-05, + "loss": 1.0791, + "step": 6435 + }, + { + "epoch": 0.01, + "learning_rate": 4.999504649487783e-05, + "loss": 1.4668, + "step": 6436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995044931199676e-05, + "loss": 1.3924, + "step": 6437 + }, + { + "epoch": 0.01, + "learning_rate": 4.999504336727478e-05, + "loss": 0.6485, + "step": 6438 + }, + { + "epoch": 0.01, + "learning_rate": 4.999504180310316e-05, + "loss": 1.164, + "step": 6439 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995040238684785e-05, + "loss": 1.101, + "step": 6440 + }, + { + "epoch": 0.01, + "learning_rate": 4.999503867401968e-05, + "loss": 1.0594, + "step": 6441 + }, + { + "epoch": 0.01, + "learning_rate": 4.999503710910783e-05, + "loss": 1.0415, + "step": 6442 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995035543949245e-05, + "loss": 1.0195, + "step": 6443 + }, + { + "epoch": 0.01, + "learning_rate": 4.999503397854391e-05, + "loss": 0.9997, + "step": 6444 + }, + { + "epoch": 0.01, + "learning_rate": 4.999503241289184e-05, + "loss": 1.1615, + "step": 6445 + }, + { + "epoch": 0.01, + "learning_rate": 4.999503084699303e-05, + "loss": 0.9525, + "step": 6446 + }, + { + "epoch": 0.01, + "learning_rate": 4.999502928084747e-05, + "loss": 1.1436, + "step": 6447 + }, + { + "epoch": 0.01, + "learning_rate": 4.999502771445518e-05, + "loss": 0.9101, + "step": 6448 + }, + { + "epoch": 0.01, + "learning_rate": 4.999502614781615e-05, + "loss": 1.2262, + "step": 6449 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995024580930375e-05, + "loss": 0.9769, + "step": 6450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995023013797856e-05, + "loss": 0.797, + "step": 6451 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995021446418614e-05, + "loss": 1.1696, + "step": 6452 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995019878792614e-05, + "loss": 1.226, + "step": 6453 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995018310919877e-05, + "loss": 1.2363, + "step": 6454 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950167428004e-05, + "loss": 1.0814, + "step": 6455 + }, + { + "epoch": 0.01, + "learning_rate": 4.999501517443419e-05, + "loss": 1.03, + "step": 6456 + }, + { + "epoch": 0.01, + "learning_rate": 4.999501360582123e-05, + "loss": 1.0106, + "step": 6457 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995012036961534e-05, + "loss": 0.7034, + "step": 6458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995010467855097e-05, + "loss": 0.5473, + "step": 6459 + }, + { + "epoch": 0.01, + "learning_rate": 4.999500889850193e-05, + "loss": 0.5888, + "step": 6460 + }, + { + "epoch": 0.01, + "learning_rate": 4.999500732890201e-05, + "loss": 0.2767, + "step": 6461 + }, + { + "epoch": 0.01, + "learning_rate": 4.999500575905535e-05, + "loss": 0.2416, + "step": 6462 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995004188961954e-05, + "loss": 0.1992, + "step": 6463 + }, + { + "epoch": 0.01, + "learning_rate": 4.9995002618621816e-05, + "loss": 0.4523, + "step": 6464 + }, + { + "epoch": 0.01, + "learning_rate": 4.999500104803494e-05, + "loss": 0.262, + "step": 6465 + }, + { + "epoch": 0.01, + "learning_rate": 4.999499947720132e-05, + "loss": 0.177, + "step": 6466 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994997906120966e-05, + "loss": 0.1315, + "step": 6467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994996334793866e-05, + "loss": 0.1851, + "step": 6468 + }, + { + "epoch": 0.01, + "learning_rate": 4.999499476322003e-05, + "loss": 0.3109, + "step": 6469 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994993191399453e-05, + "loss": 0.8243, + "step": 6470 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994991619332134e-05, + "loss": 1.1854, + "step": 6471 + }, + { + "epoch": 0.01, + "learning_rate": 4.999499004701808e-05, + "loss": 1.1112, + "step": 6472 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994988474457285e-05, + "loss": 1.0755, + "step": 6473 + }, + { + "epoch": 0.01, + "learning_rate": 4.999498690164974e-05, + "loss": 1.141, + "step": 6474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994985328595465e-05, + "loss": 1.4014, + "step": 6475 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994983755294446e-05, + "loss": 1.1114, + "step": 6476 + }, + { + "epoch": 0.01, + "learning_rate": 4.999498218174668e-05, + "loss": 1.0627, + "step": 6477 + }, + { + "epoch": 0.01, + "learning_rate": 4.999498060795219e-05, + "loss": 1.2869, + "step": 6478 + }, + { + "epoch": 0.01, + "learning_rate": 4.999497903391096e-05, + "loss": 1.2929, + "step": 6479 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994977459622976e-05, + "loss": 1.0122, + "step": 6480 + }, + { + "epoch": 0.01, + "learning_rate": 4.999497588508826e-05, + "loss": 0.7117, + "step": 6481 + }, + { + "epoch": 0.01, + "learning_rate": 4.99949743103068e-05, + "loss": 0.7217, + "step": 6482 + }, + { + "epoch": 0.01, + "learning_rate": 4.999497273527861e-05, + "loss": 1.2543, + "step": 6483 + }, + { + "epoch": 0.01, + "learning_rate": 4.999497116000367e-05, + "loss": 1.2273, + "step": 6484 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994969584481994e-05, + "loss": 1.0454, + "step": 6485 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994968008713574e-05, + "loss": 1.1889, + "step": 6486 + }, + { + "epoch": 0.01, + "learning_rate": 4.999496643269842e-05, + "loss": 1.0451, + "step": 6487 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994964856436524e-05, + "loss": 1.039, + "step": 6488 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994963279927886e-05, + "loss": 0.8729, + "step": 6489 + }, + { + "epoch": 0.01, + "learning_rate": 4.999496170317252e-05, + "loss": 0.7305, + "step": 6490 + }, + { + "epoch": 0.01, + "learning_rate": 4.99949601261704e-05, + "loss": 1.1235, + "step": 6491 + }, + { + "epoch": 0.01, + "learning_rate": 4.999495854892154e-05, + "loss": 1.2587, + "step": 6492 + }, + { + "epoch": 0.01, + "learning_rate": 4.999495697142595e-05, + "loss": 1.1407, + "step": 6493 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994955393683616e-05, + "loss": 1.2415, + "step": 6494 + }, + { + "epoch": 0.01, + "learning_rate": 4.999495381569455e-05, + "loss": 1.3505, + "step": 6495 + }, + { + "epoch": 0.01, + "learning_rate": 4.999495223745873e-05, + "loss": 1.1493, + "step": 6496 + }, + { + "epoch": 0.01, + "learning_rate": 4.999495065897618e-05, + "loss": 1.2276, + "step": 6497 + }, + { + "epoch": 0.01, + "learning_rate": 4.999494908024688e-05, + "loss": 0.9747, + "step": 6498 + }, + { + "epoch": 0.01, + "learning_rate": 4.999494750127085e-05, + "loss": 1.2284, + "step": 6499 + }, + { + "epoch": 0.01, + "learning_rate": 4.999494592204808e-05, + "loss": 1.2709, + "step": 6500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0758188962936401, + "eval_runtime": 94.9288, + "eval_samples_per_second": 14.59, + "eval_steps_per_second": 3.655, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994944342578564e-05, + "loss": 1.2316, + "step": 6501 + }, + { + "epoch": 0.01, + "learning_rate": 4.999494276286232e-05, + "loss": 1.0427, + "step": 6502 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994941182899326e-05, + "loss": 1.5905, + "step": 6503 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994939602689595e-05, + "loss": 0.9685, + "step": 6504 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994938022233126e-05, + "loss": 1.0835, + "step": 6505 + }, + { + "epoch": 0.01, + "learning_rate": 4.999493644152991e-05, + "loss": 1.1555, + "step": 6506 + }, + { + "epoch": 0.01, + "learning_rate": 4.999493486057997e-05, + "loss": 1.1553, + "step": 6507 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994933279383275e-05, + "loss": 1.2677, + "step": 6508 + }, + { + "epoch": 0.01, + "learning_rate": 4.999493169793985e-05, + "loss": 1.3516, + "step": 6509 + }, + { + "epoch": 0.01, + "learning_rate": 4.999493011624968e-05, + "loss": 1.1805, + "step": 6510 + }, + { + "epoch": 0.01, + "learning_rate": 4.999492853431278e-05, + "loss": 1.0536, + "step": 6511 + }, + { + "epoch": 0.01, + "learning_rate": 4.999492695212913e-05, + "loss": 0.7601, + "step": 6512 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994925369698744e-05, + "loss": 1.234, + "step": 6513 + }, + { + "epoch": 0.01, + "learning_rate": 4.999492378702162e-05, + "loss": 1.1395, + "step": 6514 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994922204097764e-05, + "loss": 1.0047, + "step": 6515 + }, + { + "epoch": 0.01, + "learning_rate": 4.999492062092716e-05, + "loss": 1.2291, + "step": 6516 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994919037509814e-05, + "loss": 1.1659, + "step": 6517 + }, + { + "epoch": 0.01, + "learning_rate": 4.999491745384573e-05, + "loss": 1.0848, + "step": 6518 + }, + { + "epoch": 0.01, + "learning_rate": 4.999491586993491e-05, + "loss": 1.1353, + "step": 6519 + }, + { + "epoch": 0.01, + "learning_rate": 4.999491428577735e-05, + "loss": 0.9852, + "step": 6520 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994912701373046e-05, + "loss": 1.0104, + "step": 6521 + }, + { + "epoch": 0.01, + "learning_rate": 4.999491111672201e-05, + "loss": 1.2367, + "step": 6522 + }, + { + "epoch": 0.01, + "learning_rate": 4.999490953182423e-05, + "loss": 1.0964, + "step": 6523 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994907946679716e-05, + "loss": 0.6671, + "step": 6524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994906361288454e-05, + "loss": 1.2634, + "step": 6525 + }, + { + "epoch": 0.01, + "learning_rate": 4.999490477565046e-05, + "loss": 1.1709, + "step": 6526 + }, + { + "epoch": 0.01, + "learning_rate": 4.999490318976573e-05, + "loss": 0.6647, + "step": 6527 + }, + { + "epoch": 0.01, + "learning_rate": 4.999490160363425e-05, + "loss": 0.5002, + "step": 6528 + }, + { + "epoch": 0.01, + "learning_rate": 4.999490001725604e-05, + "loss": 1.1257, + "step": 6529 + }, + { + "epoch": 0.01, + "learning_rate": 4.999489843063109e-05, + "loss": 1.2702, + "step": 6530 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994896843759396e-05, + "loss": 1.1731, + "step": 6531 + }, + { + "epoch": 0.01, + "learning_rate": 4.999489525664096e-05, + "loss": 1.0127, + "step": 6532 + }, + { + "epoch": 0.01, + "learning_rate": 4.999489366927579e-05, + "loss": 1.1434, + "step": 6533 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994892081663886e-05, + "loss": 1.0425, + "step": 6534 + }, + { + "epoch": 0.01, + "learning_rate": 4.999489049380524e-05, + "loss": 1.4964, + "step": 6535 + }, + { + "epoch": 0.01, + "learning_rate": 4.999488890569986e-05, + "loss": 1.5346, + "step": 6536 + }, + { + "epoch": 0.01, + "learning_rate": 4.999488731734773e-05, + "loss": 1.4415, + "step": 6537 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994885728748866e-05, + "loss": 1.3784, + "step": 6538 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994884139903254e-05, + "loss": 1.4162, + "step": 6539 + }, + { + "epoch": 0.01, + "learning_rate": 4.999488255081092e-05, + "loss": 1.6596, + "step": 6540 + }, + { + "epoch": 0.01, + "learning_rate": 4.999488096147183e-05, + "loss": 1.2537, + "step": 6541 + }, + { + "epoch": 0.01, + "learning_rate": 4.999487937188601e-05, + "loss": 0.9764, + "step": 6542 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994877782053453e-05, + "loss": 1.1744, + "step": 6543 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994876191974155e-05, + "loss": 1.3614, + "step": 6544 + }, + { + "epoch": 0.01, + "learning_rate": 4.999487460164812e-05, + "loss": 1.3881, + "step": 6545 + }, + { + "epoch": 0.01, + "learning_rate": 4.999487301107534e-05, + "loss": 1.0576, + "step": 6546 + }, + { + "epoch": 0.01, + "learning_rate": 4.999487142025583e-05, + "loss": 1.1086, + "step": 6547 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994869829189575e-05, + "loss": 1.1733, + "step": 6548 + }, + { + "epoch": 0.01, + "learning_rate": 4.999486823787658e-05, + "loss": 1.1962, + "step": 6549 + }, + { + "epoch": 0.01, + "learning_rate": 4.999486664631685e-05, + "loss": 1.044, + "step": 6550 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994865054510374e-05, + "loss": 0.9311, + "step": 6551 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994863462457164e-05, + "loss": 0.9222, + "step": 6552 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994861870157216e-05, + "loss": 1.154, + "step": 6553 + }, + { + "epoch": 0.01, + "learning_rate": 4.999486027761053e-05, + "loss": 1.0585, + "step": 6554 + }, + { + "epoch": 0.01, + "learning_rate": 4.999485868481711e-05, + "loss": 1.2625, + "step": 6555 + }, + { + "epoch": 0.01, + "learning_rate": 4.999485709177695e-05, + "loss": 1.243, + "step": 6556 + }, + { + "epoch": 0.01, + "learning_rate": 4.999485549849004e-05, + "loss": 1.2738, + "step": 6557 + }, + { + "epoch": 0.01, + "learning_rate": 4.99948539049564e-05, + "loss": 1.4934, + "step": 6558 + }, + { + "epoch": 0.01, + "learning_rate": 4.999485231117602e-05, + "loss": 1.1014, + "step": 6559 + }, + { + "epoch": 0.01, + "learning_rate": 4.99948507171489e-05, + "loss": 1.0812, + "step": 6560 + }, + { + "epoch": 0.01, + "learning_rate": 4.999484912287504e-05, + "loss": 1.0902, + "step": 6561 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994847528354446e-05, + "loss": 1.2505, + "step": 6562 + }, + { + "epoch": 0.01, + "learning_rate": 4.999484593358711e-05, + "loss": 1.286, + "step": 6563 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994844338573047e-05, + "loss": 1.432, + "step": 6564 + }, + { + "epoch": 0.01, + "learning_rate": 4.999484274331223e-05, + "loss": 1.2748, + "step": 6565 + }, + { + "epoch": 0.01, + "learning_rate": 4.999484114780468e-05, + "loss": 1.4985, + "step": 6566 + }, + { + "epoch": 0.01, + "learning_rate": 4.999483955205039e-05, + "loss": 1.5063, + "step": 6567 + }, + { + "epoch": 0.01, + "learning_rate": 4.999483795604936e-05, + "loss": 1.1407, + "step": 6568 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994836359801594e-05, + "loss": 1.2549, + "step": 6569 + }, + { + "epoch": 0.01, + "learning_rate": 4.999483476330709e-05, + "loss": 1.1062, + "step": 6570 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994833166565846e-05, + "loss": 1.0548, + "step": 6571 + }, + { + "epoch": 0.01, + "learning_rate": 4.999483156957787e-05, + "loss": 1.0383, + "step": 6572 + }, + { + "epoch": 0.01, + "learning_rate": 4.999482997234315e-05, + "loss": 1.208, + "step": 6573 + }, + { + "epoch": 0.01, + "learning_rate": 4.999482837486169e-05, + "loss": 0.8818, + "step": 6574 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994826777133494e-05, + "loss": 0.8663, + "step": 6575 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994825179158554e-05, + "loss": 1.0257, + "step": 6576 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994823580936884e-05, + "loss": 0.9159, + "step": 6577 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994821982468476e-05, + "loss": 1.2628, + "step": 6578 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994820383753324e-05, + "loss": 1.2665, + "step": 6579 + }, + { + "epoch": 0.01, + "learning_rate": 4.999481878479143e-05, + "loss": 0.9341, + "step": 6580 + }, + { + "epoch": 0.01, + "learning_rate": 4.999481718558281e-05, + "loss": 1.3301, + "step": 6581 + }, + { + "epoch": 0.01, + "learning_rate": 4.999481558612744e-05, + "loss": 0.5321, + "step": 6582 + }, + { + "epoch": 0.01, + "learning_rate": 4.999481398642534e-05, + "loss": 0.5291, + "step": 6583 + }, + { + "epoch": 0.01, + "learning_rate": 4.99948123864765e-05, + "loss": 0.745, + "step": 6584 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994810786280924e-05, + "loss": 1.0752, + "step": 6585 + }, + { + "epoch": 0.01, + "learning_rate": 4.99948091858386e-05, + "loss": 1.2102, + "step": 6586 + }, + { + "epoch": 0.01, + "learning_rate": 4.999480758514954e-05, + "loss": 0.5819, + "step": 6587 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994805984213746e-05, + "loss": 0.9426, + "step": 6588 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994804383031215e-05, + "loss": 1.0205, + "step": 6589 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994802781601946e-05, + "loss": 1.0928, + "step": 6590 + }, + { + "epoch": 0.01, + "learning_rate": 4.999480117992593e-05, + "loss": 1.1653, + "step": 6591 + }, + { + "epoch": 0.01, + "learning_rate": 4.999479957800319e-05, + "loss": 1.3666, + "step": 6592 + }, + { + "epoch": 0.01, + "learning_rate": 4.999479797583371e-05, + "loss": 1.3309, + "step": 6593 + }, + { + "epoch": 0.01, + "learning_rate": 4.999479637341748e-05, + "loss": 0.9612, + "step": 6594 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994794770754515e-05, + "loss": 1.2552, + "step": 6595 + }, + { + "epoch": 0.01, + "learning_rate": 4.999479316784482e-05, + "loss": 1.3941, + "step": 6596 + }, + { + "epoch": 0.01, + "learning_rate": 4.999479156468838e-05, + "loss": 1.3753, + "step": 6597 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994789961285204e-05, + "loss": 1.4621, + "step": 6598 + }, + { + "epoch": 0.01, + "learning_rate": 4.999478835763529e-05, + "loss": 0.9614, + "step": 6599 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994786753738644e-05, + "loss": 1.1641, + "step": 6600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994785149595244e-05, + "loss": 1.1776, + "step": 6601 + }, + { + "epoch": 0.01, + "learning_rate": 4.999478354520513e-05, + "loss": 1.2815, + "step": 6602 + }, + { + "epoch": 0.01, + "learning_rate": 4.999478194056826e-05, + "loss": 0.6628, + "step": 6603 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994780335684656e-05, + "loss": 0.8115, + "step": 6604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994778730554314e-05, + "loss": 1.2254, + "step": 6605 + }, + { + "epoch": 0.01, + "learning_rate": 4.999477712517723e-05, + "loss": 1.3675, + "step": 6606 + }, + { + "epoch": 0.01, + "learning_rate": 4.999477551955342e-05, + "loss": 1.2994, + "step": 6607 + }, + { + "epoch": 0.01, + "learning_rate": 4.999477391368286e-05, + "loss": 0.9946, + "step": 6608 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994772307565566e-05, + "loss": 1.0522, + "step": 6609 + }, + { + "epoch": 0.01, + "learning_rate": 4.999477070120153e-05, + "loss": 1.0744, + "step": 6610 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994769094590764e-05, + "loss": 1.4907, + "step": 6611 + }, + { + "epoch": 0.01, + "learning_rate": 4.999476748773326e-05, + "loss": 1.18, + "step": 6612 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994765880629014e-05, + "loss": 0.9152, + "step": 6613 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994764273278036e-05, + "loss": 1.127, + "step": 6614 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994762665680314e-05, + "loss": 1.1186, + "step": 6615 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994761057835855e-05, + "loss": 1.0582, + "step": 6616 + }, + { + "epoch": 0.01, + "learning_rate": 4.999475944974466e-05, + "loss": 1.4079, + "step": 6617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994757841406724e-05, + "loss": 1.3987, + "step": 6618 + }, + { + "epoch": 0.01, + "learning_rate": 4.999475623282205e-05, + "loss": 1.1375, + "step": 6619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994754623990645e-05, + "loss": 1.0294, + "step": 6620 + }, + { + "epoch": 0.01, + "learning_rate": 4.99947530149125e-05, + "loss": 1.163, + "step": 6621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994751405587616e-05, + "loss": 0.1906, + "step": 6622 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994749796015995e-05, + "loss": 0.6676, + "step": 6623 + }, + { + "epoch": 0.01, + "learning_rate": 4.999474818619763e-05, + "loss": 1.3446, + "step": 6624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994746576132536e-05, + "loss": 1.1012, + "step": 6625 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994744965820703e-05, + "loss": 1.0001, + "step": 6626 + }, + { + "epoch": 0.01, + "learning_rate": 4.999474335526213e-05, + "loss": 1.0914, + "step": 6627 + }, + { + "epoch": 0.01, + "learning_rate": 4.999474174445682e-05, + "loss": 0.9416, + "step": 6628 + }, + { + "epoch": 0.01, + "learning_rate": 4.999474013340477e-05, + "loss": 1.1493, + "step": 6629 + }, + { + "epoch": 0.01, + "learning_rate": 4.999473852210599e-05, + "loss": 1.2614, + "step": 6630 + }, + { + "epoch": 0.01, + "learning_rate": 4.999473691056046e-05, + "loss": 1.3037, + "step": 6631 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994735298768206e-05, + "loss": 0.8561, + "step": 6632 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994733686729205e-05, + "loss": 1.146, + "step": 6633 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994732074443474e-05, + "loss": 1.0051, + "step": 6634 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994730461911e-05, + "loss": 1.1089, + "step": 6635 + }, + { + "epoch": 0.01, + "learning_rate": 4.999472884913179e-05, + "loss": 1.3316, + "step": 6636 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994727236105845e-05, + "loss": 1.1779, + "step": 6637 + }, + { + "epoch": 0.01, + "learning_rate": 4.999472562283316e-05, + "loss": 1.2062, + "step": 6638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994724009313734e-05, + "loss": 1.2757, + "step": 6639 + }, + { + "epoch": 0.01, + "learning_rate": 4.999472239554758e-05, + "loss": 1.1214, + "step": 6640 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994720781534673e-05, + "loss": 1.0394, + "step": 6641 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994719167275044e-05, + "loss": 1.045, + "step": 6642 + }, + { + "epoch": 0.01, + "learning_rate": 4.999471755276867e-05, + "loss": 0.9549, + "step": 6643 + }, + { + "epoch": 0.01, + "learning_rate": 4.999471593801556e-05, + "loss": 0.9414, + "step": 6644 + }, + { + "epoch": 0.01, + "learning_rate": 4.999471432301572e-05, + "loss": 1.0907, + "step": 6645 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994712707769134e-05, + "loss": 1.0651, + "step": 6646 + }, + { + "epoch": 0.01, + "learning_rate": 4.999471109227582e-05, + "loss": 0.9648, + "step": 6647 + }, + { + "epoch": 0.01, + "learning_rate": 4.999470947653576e-05, + "loss": 1.051, + "step": 6648 + }, + { + "epoch": 0.01, + "learning_rate": 4.999470786054896e-05, + "loss": 0.9602, + "step": 6649 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994706244315426e-05, + "loss": 1.0035, + "step": 6650 + }, + { + "epoch": 0.01, + "learning_rate": 4.999470462783516e-05, + "loss": 0.6232, + "step": 6651 + }, + { + "epoch": 0.01, + "learning_rate": 4.999470301110815e-05, + "loss": 0.6789, + "step": 6652 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994701394134405e-05, + "loss": 0.8334, + "step": 6653 + }, + { + "epoch": 0.01, + "learning_rate": 4.999469977691392e-05, + "loss": 1.5565, + "step": 6654 + }, + { + "epoch": 0.01, + "learning_rate": 4.999469815944671e-05, + "loss": 1.1051, + "step": 6655 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994696541732756e-05, + "loss": 1.239, + "step": 6656 + }, + { + "epoch": 0.01, + "learning_rate": 4.999469492377205e-05, + "loss": 1.2687, + "step": 6657 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994693305564634e-05, + "loss": 1.0522, + "step": 6658 + }, + { + "epoch": 0.01, + "learning_rate": 4.999469168711046e-05, + "loss": 1.0463, + "step": 6659 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994690068409556e-05, + "loss": 1.0889, + "step": 6660 + }, + { + "epoch": 0.01, + "learning_rate": 4.999468844946192e-05, + "loss": 0.9888, + "step": 6661 + }, + { + "epoch": 0.01, + "learning_rate": 4.999468683026754e-05, + "loss": 1.3083, + "step": 6662 + }, + { + "epoch": 0.01, + "learning_rate": 4.999468521082642e-05, + "loss": 0.9751, + "step": 6663 + }, + { + "epoch": 0.01, + "learning_rate": 4.999468359113857e-05, + "loss": 0.9221, + "step": 6664 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994681971203985e-05, + "loss": 0.7942, + "step": 6665 + }, + { + "epoch": 0.01, + "learning_rate": 4.999468035102266e-05, + "loss": 0.6858, + "step": 6666 + }, + { + "epoch": 0.01, + "learning_rate": 4.99946787305946e-05, + "loss": 1.0173, + "step": 6667 + }, + { + "epoch": 0.01, + "learning_rate": 4.99946771099198e-05, + "loss": 0.4216, + "step": 6668 + }, + { + "epoch": 0.01, + "learning_rate": 4.999467548899826e-05, + "loss": 0.2308, + "step": 6669 + }, + { + "epoch": 0.01, + "learning_rate": 4.999467386782999e-05, + "loss": 0.1515, + "step": 6670 + }, + { + "epoch": 0.01, + "learning_rate": 4.999467224641498e-05, + "loss": 0.0879, + "step": 6671 + }, + { + "epoch": 0.01, + "learning_rate": 4.999467062475323e-05, + "loss": 0.0501, + "step": 6672 + }, + { + "epoch": 0.01, + "learning_rate": 4.999466900284474e-05, + "loss": 0.0293, + "step": 6673 + }, + { + "epoch": 0.01, + "learning_rate": 4.999466738068952e-05, + "loss": 0.0257, + "step": 6674 + }, + { + "epoch": 0.01, + "learning_rate": 4.999466575828756e-05, + "loss": 0.0204, + "step": 6675 + }, + { + "epoch": 0.01, + "learning_rate": 4.999466413563887e-05, + "loss": 0.0162, + "step": 6676 + }, + { + "epoch": 0.01, + "learning_rate": 4.999466251274344e-05, + "loss": 0.0418, + "step": 6677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994660889601274e-05, + "loss": 0.0339, + "step": 6678 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994659266212364e-05, + "loss": 0.0208, + "step": 6679 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994657642576724e-05, + "loss": 0.0155, + "step": 6680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994656018694346e-05, + "loss": 0.0171, + "step": 6681 + }, + { + "epoch": 0.01, + "learning_rate": 4.999465439456523e-05, + "loss": 0.012, + "step": 6682 + }, + { + "epoch": 0.01, + "learning_rate": 4.999465277018938e-05, + "loss": 0.2595, + "step": 6683 + }, + { + "epoch": 0.01, + "learning_rate": 4.999465114556679e-05, + "loss": 0.4083, + "step": 6684 + }, + { + "epoch": 0.01, + "learning_rate": 4.999464952069747e-05, + "loss": 0.3142, + "step": 6685 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994647895581406e-05, + "loss": 0.191, + "step": 6686 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994646270218604e-05, + "loss": 0.1369, + "step": 6687 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994644644609065e-05, + "loss": 0.0933, + "step": 6688 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994643018752796e-05, + "loss": 0.0805, + "step": 6689 + }, + { + "epoch": 0.01, + "learning_rate": 4.999464139264979e-05, + "loss": 0.0519, + "step": 6690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994639766300046e-05, + "loss": 0.0392, + "step": 6691 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994638139703564e-05, + "loss": 0.0669, + "step": 6692 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994636512860346e-05, + "loss": 0.0817, + "step": 6693 + }, + { + "epoch": 0.01, + "learning_rate": 4.999463488577039e-05, + "loss": 0.0558, + "step": 6694 + }, + { + "epoch": 0.01, + "learning_rate": 4.99946332584337e-05, + "loss": 0.0373, + "step": 6695 + }, + { + "epoch": 0.01, + "learning_rate": 4.999463163085027e-05, + "loss": 0.0279, + "step": 6696 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994630003020106e-05, + "loss": 0.0529, + "step": 6697 + }, + { + "epoch": 0.01, + "learning_rate": 4.999462837494321e-05, + "loss": 0.7594, + "step": 6698 + }, + { + "epoch": 0.01, + "learning_rate": 4.999462674661957e-05, + "loss": 1.082, + "step": 6699 + }, + { + "epoch": 0.01, + "learning_rate": 4.99946251180492e-05, + "loss": 1.4619, + "step": 6700 + }, + { + "epoch": 0.01, + "learning_rate": 4.999462348923208e-05, + "loss": 1.1149, + "step": 6701 + }, + { + "epoch": 0.01, + "learning_rate": 4.999462186016825e-05, + "loss": 1.2684, + "step": 6702 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994620230857656e-05, + "loss": 0.9541, + "step": 6703 + }, + { + "epoch": 0.01, + "learning_rate": 4.999461860130034e-05, + "loss": 1.1064, + "step": 6704 + }, + { + "epoch": 0.01, + "learning_rate": 4.999461697149629e-05, + "loss": 1.3397, + "step": 6705 + }, + { + "epoch": 0.01, + "learning_rate": 4.999461534144549e-05, + "loss": 1.1285, + "step": 6706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994613711147964e-05, + "loss": 1.1078, + "step": 6707 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994612080603706e-05, + "loss": 1.2667, + "step": 6708 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994610449812704e-05, + "loss": 1.1437, + "step": 6709 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994608818774965e-05, + "loss": 1.0543, + "step": 6710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994607187490495e-05, + "loss": 1.1706, + "step": 6711 + }, + { + "epoch": 0.01, + "learning_rate": 4.999460555595928e-05, + "loss": 1.1993, + "step": 6712 + }, + { + "epoch": 0.01, + "learning_rate": 4.999460392418134e-05, + "loss": 1.1689, + "step": 6713 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994602292156655e-05, + "loss": 0.9531, + "step": 6714 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994600659885237e-05, + "loss": 0.4351, + "step": 6715 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459902736709e-05, + "loss": 0.3077, + "step": 6716 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459739460219e-05, + "loss": 1.1547, + "step": 6717 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459576159057e-05, + "loss": 1.0603, + "step": 6718 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459412833221e-05, + "loss": 1.2145, + "step": 6719 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459249482711e-05, + "loss": 1.0326, + "step": 6720 + }, + { + "epoch": 0.01, + "learning_rate": 4.999459086107527e-05, + "loss": 4.1597, + "step": 6721 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994589227076706e-05, + "loss": 4.1984, + "step": 6722 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994587592831396e-05, + "loss": 3.4404, + "step": 6723 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994585958339354e-05, + "loss": 4.4833, + "step": 6724 + }, + { + "epoch": 0.01, + "learning_rate": 4.999458432360058e-05, + "loss": 4.0146, + "step": 6725 + }, + { + "epoch": 0.01, + "learning_rate": 4.999458268861506e-05, + "loss": 1.5474, + "step": 6726 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994581053382814e-05, + "loss": 0.9322, + "step": 6727 + }, + { + "epoch": 0.01, + "learning_rate": 4.999457941790382e-05, + "loss": 0.4904, + "step": 6728 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994577782178095e-05, + "loss": 0.3933, + "step": 6729 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994576146205644e-05, + "loss": 0.3312, + "step": 6730 + }, + { + "epoch": 0.01, + "learning_rate": 4.999457450998645e-05, + "loss": 0.2082, + "step": 6731 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994572873520515e-05, + "loss": 1.03, + "step": 6732 + }, + { + "epoch": 0.01, + "learning_rate": 4.999457123680785e-05, + "loss": 1.4107, + "step": 6733 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456959984845e-05, + "loss": 1.2318, + "step": 6734 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456796264231e-05, + "loss": 0.5894, + "step": 6735 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456632518944e-05, + "loss": 0.5863, + "step": 6736 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456468748983e-05, + "loss": 1.0186, + "step": 6737 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456304954349e-05, + "loss": 1.0029, + "step": 6738 + }, + { + "epoch": 0.01, + "learning_rate": 4.999456141135041e-05, + "loss": 0.848, + "step": 6739 + }, + { + "epoch": 0.01, + "learning_rate": 4.999455977291059e-05, + "loss": 1.549, + "step": 6740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994558134224035e-05, + "loss": 1.1585, + "step": 6741 + }, + { + "epoch": 0.01, + "learning_rate": 4.999455649529074e-05, + "loss": 1.1006, + "step": 6742 + }, + { + "epoch": 0.01, + "learning_rate": 4.999455485611072e-05, + "loss": 1.3462, + "step": 6743 + }, + { + "epoch": 0.01, + "learning_rate": 4.999455321668396e-05, + "loss": 1.1244, + "step": 6744 + }, + { + "epoch": 0.01, + "learning_rate": 4.999455157701046e-05, + "loss": 1.3523, + "step": 6745 + }, + { + "epoch": 0.01, + "learning_rate": 4.999454993709023e-05, + "loss": 1.0386, + "step": 6746 + }, + { + "epoch": 0.01, + "learning_rate": 4.999454829692327e-05, + "loss": 1.4276, + "step": 6747 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994546656509565e-05, + "loss": 1.3696, + "step": 6748 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994545015849125e-05, + "loss": 1.2155, + "step": 6749 + }, + { + "epoch": 0.01, + "learning_rate": 4.999454337494195e-05, + "loss": 1.1756, + "step": 6750 + }, + { + "epoch": 0.01, + "learning_rate": 4.999454173378804e-05, + "loss": 1.3096, + "step": 6751 + }, + { + "epoch": 0.01, + "learning_rate": 4.99945400923874e-05, + "loss": 1.0379, + "step": 6752 + }, + { + "epoch": 0.01, + "learning_rate": 4.999453845074001e-05, + "loss": 1.0734, + "step": 6753 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994536808845906e-05, + "loss": 0.8223, + "step": 6754 + }, + { + "epoch": 0.01, + "learning_rate": 4.999453516670505e-05, + "loss": 0.8731, + "step": 6755 + }, + { + "epoch": 0.01, + "learning_rate": 4.999453352431746e-05, + "loss": 0.7837, + "step": 6756 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994531881683136e-05, + "loss": 0.9755, + "step": 6757 + }, + { + "epoch": 0.01, + "learning_rate": 4.999453023880209e-05, + "loss": 1.4549, + "step": 6758 + }, + { + "epoch": 0.01, + "learning_rate": 4.999452859567428e-05, + "loss": 1.209, + "step": 6759 + }, + { + "epoch": 0.01, + "learning_rate": 4.999452695229976e-05, + "loss": 0.7333, + "step": 6760 + }, + { + "epoch": 0.01, + "learning_rate": 4.99945253086785e-05, + "loss": 1.0987, + "step": 6761 + }, + { + "epoch": 0.01, + "learning_rate": 4.999452366481049e-05, + "loss": 1.315, + "step": 6762 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994522020695757e-05, + "loss": 1.3147, + "step": 6763 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994520376334284e-05, + "loss": 1.0884, + "step": 6764 + }, + { + "epoch": 0.01, + "learning_rate": 4.999451873172608e-05, + "loss": 1.1147, + "step": 6765 + }, + { + "epoch": 0.01, + "learning_rate": 4.999451708687114e-05, + "loss": 1.1792, + "step": 6766 + }, + { + "epoch": 0.01, + "learning_rate": 4.999451544176946e-05, + "loss": 1.3728, + "step": 6767 + }, + { + "epoch": 0.01, + "learning_rate": 4.999451379642105e-05, + "loss": 1.1996, + "step": 6768 + }, + { + "epoch": 0.01, + "learning_rate": 4.99945121508259e-05, + "loss": 0.7847, + "step": 6769 + }, + { + "epoch": 0.01, + "learning_rate": 4.999451050498402e-05, + "loss": 0.4295, + "step": 6770 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994508858895406e-05, + "loss": 0.6031, + "step": 6771 + }, + { + "epoch": 0.01, + "learning_rate": 4.999450721256005e-05, + "loss": 0.8572, + "step": 6772 + }, + { + "epoch": 0.01, + "learning_rate": 4.999450556597796e-05, + "loss": 1.1249, + "step": 6773 + }, + { + "epoch": 0.01, + "learning_rate": 4.999450391914914e-05, + "loss": 0.8992, + "step": 6774 + }, + { + "epoch": 0.01, + "learning_rate": 4.999450227207358e-05, + "loss": 1.182, + "step": 6775 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994500624751285e-05, + "loss": 1.1329, + "step": 6776 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994498977182255e-05, + "loss": 1.2008, + "step": 6777 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994497329366494e-05, + "loss": 1.1274, + "step": 6778 + }, + { + "epoch": 0.01, + "learning_rate": 4.999449568130399e-05, + "loss": 4.991, + "step": 6779 + }, + { + "epoch": 0.01, + "learning_rate": 4.999449403299475e-05, + "loss": 2.3363, + "step": 6780 + }, + { + "epoch": 0.01, + "learning_rate": 4.999449238443879e-05, + "loss": 1.1702, + "step": 6781 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994490735636076e-05, + "loss": 1.3478, + "step": 6782 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994489086586636e-05, + "loss": 1.3045, + "step": 6783 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994487437290464e-05, + "loss": 0.9367, + "step": 6784 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994485787747556e-05, + "loss": 1.5058, + "step": 6785 + }, + { + "epoch": 0.01, + "learning_rate": 4.999448413795791e-05, + "loss": 1.0185, + "step": 6786 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994482487921526e-05, + "loss": 1.3135, + "step": 6787 + }, + { + "epoch": 0.01, + "learning_rate": 4.999448083763841e-05, + "loss": 1.1197, + "step": 6788 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447918710857e-05, + "loss": 0.989, + "step": 6789 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447753633198e-05, + "loss": 1.0585, + "step": 6790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994475885308654e-05, + "loss": 1.1427, + "step": 6791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994474234038604e-05, + "loss": 1.2104, + "step": 6792 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447258252181e-05, + "loss": 1.1049, + "step": 6793 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447093075829e-05, + "loss": 1.2225, + "step": 6794 + }, + { + "epoch": 0.01, + "learning_rate": 4.999446927874803e-05, + "loss": 1.271, + "step": 6795 + }, + { + "epoch": 0.01, + "learning_rate": 4.999446762649104e-05, + "loss": 1.402, + "step": 6796 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994465973987304e-05, + "loss": 1.1503, + "step": 6797 + }, + { + "epoch": 0.01, + "learning_rate": 4.999446432123684e-05, + "loss": 1.2106, + "step": 6798 + }, + { + "epoch": 0.01, + "learning_rate": 4.999446266823964e-05, + "loss": 1.0387, + "step": 6799 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994461014995706e-05, + "loss": 1.1426, + "step": 6800 + }, + { + "epoch": 0.01, + "learning_rate": 4.999445936150504e-05, + "loss": 1.159, + "step": 6801 + }, + { + "epoch": 0.01, + "learning_rate": 4.999445770776764e-05, + "loss": 1.0738, + "step": 6802 + }, + { + "epoch": 0.01, + "learning_rate": 4.99944560537835e-05, + "loss": 1.0195, + "step": 6803 + }, + { + "epoch": 0.01, + "learning_rate": 4.999445439955263e-05, + "loss": 1.1082, + "step": 6804 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994452745075015e-05, + "loss": 1.1216, + "step": 6805 + }, + { + "epoch": 0.01, + "learning_rate": 4.999445109035068e-05, + "loss": 0.9553, + "step": 6806 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994449435379595e-05, + "loss": 0.9602, + "step": 6807 + }, + { + "epoch": 0.01, + "learning_rate": 4.999444778016178e-05, + "loss": 1.156, + "step": 6808 + }, + { + "epoch": 0.01, + "learning_rate": 4.999444612469724e-05, + "loss": 0.9277, + "step": 6809 + }, + { + "epoch": 0.01, + "learning_rate": 4.999444446898596e-05, + "loss": 0.9691, + "step": 6810 + }, + { + "epoch": 0.01, + "learning_rate": 4.999444281302794e-05, + "loss": 1.2191, + "step": 6811 + }, + { + "epoch": 0.01, + "learning_rate": 4.999444115682319e-05, + "loss": 1.2958, + "step": 6812 + }, + { + "epoch": 0.01, + "learning_rate": 4.999443950037171e-05, + "loss": 1.5685, + "step": 6813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994437843673484e-05, + "loss": 1.3699, + "step": 6814 + }, + { + "epoch": 0.01, + "learning_rate": 4.999443618672853e-05, + "loss": 1.0349, + "step": 6815 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994434529536847e-05, + "loss": 1.0596, + "step": 6816 + }, + { + "epoch": 0.01, + "learning_rate": 4.999443287209842e-05, + "loss": 0.853, + "step": 6817 + }, + { + "epoch": 0.01, + "learning_rate": 4.999443121441326e-05, + "loss": 0.9858, + "step": 6818 + }, + { + "epoch": 0.01, + "learning_rate": 4.999442955648137e-05, + "loss": 1.1507, + "step": 6819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994427898302744e-05, + "loss": 1.123, + "step": 6820 + }, + { + "epoch": 0.01, + "learning_rate": 4.999442623987739e-05, + "loss": 0.9609, + "step": 6821 + }, + { + "epoch": 0.01, + "learning_rate": 4.999442458120529e-05, + "loss": 1.2603, + "step": 6822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994422922286455e-05, + "loss": 0.9679, + "step": 6823 + }, + { + "epoch": 0.01, + "learning_rate": 4.999442126312089e-05, + "loss": 1.1821, + "step": 6824 + }, + { + "epoch": 0.01, + "learning_rate": 4.99944196037086e-05, + "loss": 1.0709, + "step": 6825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994417944049563e-05, + "loss": 1.222, + "step": 6826 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994416284143796e-05, + "loss": 1.2295, + "step": 6827 + }, + { + "epoch": 0.01, + "learning_rate": 4.999441462399129e-05, + "loss": 0.9857, + "step": 6828 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994412963592057e-05, + "loss": 0.7959, + "step": 6829 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994411302946084e-05, + "loss": 0.9756, + "step": 6830 + }, + { + "epoch": 0.01, + "learning_rate": 4.999440964205338e-05, + "loss": 1.1994, + "step": 6831 + }, + { + "epoch": 0.01, + "learning_rate": 4.999440798091394e-05, + "loss": 1.4401, + "step": 6832 + }, + { + "epoch": 0.01, + "learning_rate": 4.999440631952777e-05, + "loss": 0.9325, + "step": 6833 + }, + { + "epoch": 0.01, + "learning_rate": 4.999440465789486e-05, + "loss": 0.7858, + "step": 6834 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994402996015223e-05, + "loss": 0.8887, + "step": 6835 + }, + { + "epoch": 0.01, + "learning_rate": 4.999440133388884e-05, + "loss": 1.0236, + "step": 6836 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994399671515734e-05, + "loss": 1.1955, + "step": 6837 + }, + { + "epoch": 0.01, + "learning_rate": 4.999439800889589e-05, + "loss": 0.9023, + "step": 6838 + }, + { + "epoch": 0.01, + "learning_rate": 4.999439634602931e-05, + "loss": 1.3026, + "step": 6839 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994394682916005e-05, + "loss": 1.0394, + "step": 6840 + }, + { + "epoch": 0.01, + "learning_rate": 4.999439301955595e-05, + "loss": 0.9914, + "step": 6841 + }, + { + "epoch": 0.01, + "learning_rate": 4.999439135594917e-05, + "loss": 1.3485, + "step": 6842 + }, + { + "epoch": 0.01, + "learning_rate": 4.999438969209565e-05, + "loss": 1.207, + "step": 6843 + }, + { + "epoch": 0.01, + "learning_rate": 4.999438802799541e-05, + "loss": 0.8097, + "step": 6844 + }, + { + "epoch": 0.01, + "learning_rate": 4.999438636364843e-05, + "loss": 1.2124, + "step": 6845 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994384699054706e-05, + "loss": 1.2324, + "step": 6846 + }, + { + "epoch": 0.01, + "learning_rate": 4.999438303421425e-05, + "loss": 1.0811, + "step": 6847 + }, + { + "epoch": 0.01, + "learning_rate": 4.999438136912707e-05, + "loss": 0.9873, + "step": 6848 + }, + { + "epoch": 0.01, + "learning_rate": 4.999437970379315e-05, + "loss": 1.0589, + "step": 6849 + }, + { + "epoch": 0.01, + "learning_rate": 4.99943780382125e-05, + "loss": 1.2425, + "step": 6850 + }, + { + "epoch": 0.01, + "learning_rate": 4.999437637238511e-05, + "loss": 0.6193, + "step": 6851 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994374706310985e-05, + "loss": 0.4063, + "step": 6852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994373039990136e-05, + "loss": 0.3648, + "step": 6853 + }, + { + "epoch": 0.01, + "learning_rate": 4.999437137342254e-05, + "loss": 0.3846, + "step": 6854 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994369706608226e-05, + "loss": 0.3362, + "step": 6855 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994368039547165e-05, + "loss": 0.3245, + "step": 6856 + }, + { + "epoch": 0.01, + "learning_rate": 4.999436637223937e-05, + "loss": 0.3606, + "step": 6857 + }, + { + "epoch": 0.01, + "learning_rate": 4.999436470468485e-05, + "loss": 0.3314, + "step": 6858 + }, + { + "epoch": 0.01, + "learning_rate": 4.999436303688359e-05, + "loss": 0.4419, + "step": 6859 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994361368835595e-05, + "loss": 0.9462, + "step": 6860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994359700540874e-05, + "loss": 1.5972, + "step": 6861 + }, + { + "epoch": 0.01, + "learning_rate": 4.999435803199941e-05, + "loss": 1.2746, + "step": 6862 + }, + { + "epoch": 0.01, + "learning_rate": 4.999435636321122e-05, + "loss": 1.2198, + "step": 6863 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994354694176295e-05, + "loss": 0.869, + "step": 6864 + }, + { + "epoch": 0.01, + "learning_rate": 4.999435302489463e-05, + "loss": 0.4223, + "step": 6865 + }, + { + "epoch": 0.01, + "learning_rate": 4.999435135536624e-05, + "loss": 0.3452, + "step": 6866 + }, + { + "epoch": 0.01, + "learning_rate": 4.999434968559111e-05, + "loss": 0.6081, + "step": 6867 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994348015569246e-05, + "loss": 1.0303, + "step": 6868 + }, + { + "epoch": 0.01, + "learning_rate": 4.999434634530065e-05, + "loss": 1.0843, + "step": 6869 + }, + { + "epoch": 0.01, + "learning_rate": 4.999434467478532e-05, + "loss": 0.9482, + "step": 6870 + }, + { + "epoch": 0.01, + "learning_rate": 4.999434300402326e-05, + "loss": 0.6892, + "step": 6871 + }, + { + "epoch": 0.01, + "learning_rate": 4.999434133301446e-05, + "loss": 0.5223, + "step": 6872 + }, + { + "epoch": 0.01, + "learning_rate": 4.999433966175893e-05, + "loss": 1.151, + "step": 6873 + }, + { + "epoch": 0.01, + "learning_rate": 4.999433799025667e-05, + "loss": 1.3094, + "step": 6874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994336318507667e-05, + "loss": 1.0836, + "step": 6875 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994334646511934e-05, + "loss": 1.6371, + "step": 6876 + }, + { + "epoch": 0.01, + "learning_rate": 4.999433297426948e-05, + "loss": 1.6683, + "step": 6877 + }, + { + "epoch": 0.01, + "learning_rate": 4.999433130178027e-05, + "loss": 1.7682, + "step": 6878 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994329629044345e-05, + "loss": 1.4243, + "step": 6879 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994327956061684e-05, + "loss": 1.7879, + "step": 6880 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994326282832285e-05, + "loss": 1.7397, + "step": 6881 + }, + { + "epoch": 0.01, + "learning_rate": 4.999432460935615e-05, + "loss": 1.1371, + "step": 6882 + }, + { + "epoch": 0.01, + "learning_rate": 4.999432293563328e-05, + "loss": 1.1943, + "step": 6883 + }, + { + "epoch": 0.01, + "learning_rate": 4.999432126166368e-05, + "loss": 1.2871, + "step": 6884 + }, + { + "epoch": 0.01, + "learning_rate": 4.999431958744735e-05, + "loss": 1.2688, + "step": 6885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994317912984284e-05, + "loss": 1.5491, + "step": 6886 + }, + { + "epoch": 0.01, + "learning_rate": 4.999431623827449e-05, + "loss": 1.1822, + "step": 6887 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994314563317955e-05, + "loss": 1.2313, + "step": 6888 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994312888114685e-05, + "loss": 1.0805, + "step": 6889 + }, + { + "epoch": 0.01, + "learning_rate": 4.999431121266469e-05, + "loss": 1.0169, + "step": 6890 + }, + { + "epoch": 0.01, + "learning_rate": 4.999430953696795e-05, + "loss": 1.4716, + "step": 6891 + }, + { + "epoch": 0.01, + "learning_rate": 4.999430786102449e-05, + "loss": 1.3546, + "step": 6892 + }, + { + "epoch": 0.01, + "learning_rate": 4.999430618483429e-05, + "loss": 1.2015, + "step": 6893 + }, + { + "epoch": 0.01, + "learning_rate": 4.999430450839736e-05, + "loss": 0.8801, + "step": 6894 + }, + { + "epoch": 0.01, + "learning_rate": 4.999430283171369e-05, + "loss": 1.2549, + "step": 6895 + }, + { + "epoch": 0.01, + "learning_rate": 4.99943011547833e-05, + "loss": 1.0718, + "step": 6896 + }, + { + "epoch": 0.01, + "learning_rate": 4.999429947760617e-05, + "loss": 1.2091, + "step": 6897 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994297800182296e-05, + "loss": 1.1126, + "step": 6898 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994296122511694e-05, + "loss": 0.8508, + "step": 6899 + }, + { + "epoch": 0.01, + "learning_rate": 4.999429444459437e-05, + "loss": 1.2032, + "step": 6900 + }, + { + "epoch": 0.01, + "learning_rate": 4.99942927664303e-05, + "loss": 1.1239, + "step": 6901 + }, + { + "epoch": 0.01, + "learning_rate": 4.999429108801951e-05, + "loss": 1.1106, + "step": 6902 + }, + { + "epoch": 0.01, + "learning_rate": 4.999428940936197e-05, + "loss": 1.2155, + "step": 6903 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994287730457715e-05, + "loss": 1.0073, + "step": 6904 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994286051306715e-05, + "loss": 0.7326, + "step": 6905 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994284371908985e-05, + "loss": 1.1721, + "step": 6906 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994282692264525e-05, + "loss": 1.2096, + "step": 6907 + }, + { + "epoch": 0.01, + "learning_rate": 4.999428101237332e-05, + "loss": 0.9284, + "step": 6908 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427933223539e-05, + "loss": 1.0399, + "step": 6909 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427765185073e-05, + "loss": 1.6415, + "step": 6910 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427597121934e-05, + "loss": 1.1169, + "step": 6911 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427429034121e-05, + "loss": 1.2315, + "step": 6912 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427260921635e-05, + "loss": 1.3721, + "step": 6913 + }, + { + "epoch": 0.01, + "learning_rate": 4.999427092784476e-05, + "loss": 1.1605, + "step": 6914 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994269246226425e-05, + "loss": 1.0359, + "step": 6915 + }, + { + "epoch": 0.01, + "learning_rate": 4.999426756436137e-05, + "loss": 1.2378, + "step": 6916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994265882249576e-05, + "loss": 1.1071, + "step": 6917 + }, + { + "epoch": 0.01, + "learning_rate": 4.999426419989105e-05, + "loss": 0.9853, + "step": 6918 + }, + { + "epoch": 0.01, + "learning_rate": 4.999426251728579e-05, + "loss": 0.9752, + "step": 6919 + }, + { + "epoch": 0.01, + "learning_rate": 4.99942608344338e-05, + "loss": 1.0607, + "step": 6920 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425915133508e-05, + "loss": 1.1958, + "step": 6921 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425746798962e-05, + "loss": 1.3917, + "step": 6922 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425578439743e-05, + "loss": 0.859, + "step": 6923 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425410055851e-05, + "loss": 0.5413, + "step": 6924 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425241647285e-05, + "loss": 0.7141, + "step": 6925 + }, + { + "epoch": 0.01, + "learning_rate": 4.999425073214047e-05, + "loss": 0.6767, + "step": 6926 + }, + { + "epoch": 0.01, + "learning_rate": 4.999424904756135e-05, + "loss": 1.3069, + "step": 6927 + }, + { + "epoch": 0.01, + "learning_rate": 4.999424736273549e-05, + "loss": 1.1934, + "step": 6928 + }, + { + "epoch": 0.01, + "learning_rate": 4.99942456776629e-05, + "loss": 1.2299, + "step": 6929 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994243992343585e-05, + "loss": 0.6811, + "step": 6930 + }, + { + "epoch": 0.01, + "learning_rate": 4.999424230677754e-05, + "loss": 0.117, + "step": 6931 + }, + { + "epoch": 0.01, + "learning_rate": 4.999424062096475e-05, + "loss": 0.1434, + "step": 6932 + }, + { + "epoch": 0.01, + "learning_rate": 4.999423893490524e-05, + "loss": 0.0817, + "step": 6933 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994237248598984e-05, + "loss": 0.0638, + "step": 6934 + }, + { + "epoch": 0.01, + "learning_rate": 4.999423556204601e-05, + "loss": 0.234, + "step": 6935 + }, + { + "epoch": 0.01, + "learning_rate": 4.999423387524629e-05, + "loss": 0.0815, + "step": 6936 + }, + { + "epoch": 0.01, + "learning_rate": 4.999423218819984e-05, + "loss": 0.3098, + "step": 6937 + }, + { + "epoch": 0.01, + "learning_rate": 4.999423050090667e-05, + "loss": 0.9545, + "step": 6938 + }, + { + "epoch": 0.01, + "learning_rate": 4.999422881336675e-05, + "loss": 1.0009, + "step": 6939 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994227125580106e-05, + "loss": 0.3902, + "step": 6940 + }, + { + "epoch": 0.01, + "learning_rate": 4.999422543754673e-05, + "loss": 0.1013, + "step": 6941 + }, + { + "epoch": 0.01, + "learning_rate": 4.999422374926662e-05, + "loss": 3.111, + "step": 6942 + }, + { + "epoch": 0.01, + "learning_rate": 4.999422206073978e-05, + "loss": 5.4106, + "step": 6943 + }, + { + "epoch": 0.01, + "learning_rate": 4.99942203719662e-05, + "loss": 4.7675, + "step": 6944 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421868294591e-05, + "loss": 4.1092, + "step": 6945 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421699367887e-05, + "loss": 0.9569, + "step": 6946 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421530416509e-05, + "loss": 1.146, + "step": 6947 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421361440458e-05, + "loss": 0.9052, + "step": 6948 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421192439735e-05, + "loss": 0.9786, + "step": 6949 + }, + { + "epoch": 0.01, + "learning_rate": 4.999421023414338e-05, + "loss": 1.1034, + "step": 6950 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994208543642685e-05, + "loss": 1.2519, + "step": 6951 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994206852895255e-05, + "loss": 1.1061, + "step": 6952 + }, + { + "epoch": 0.01, + "learning_rate": 4.999420516190109e-05, + "loss": 0.933, + "step": 6953 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994203470660184e-05, + "loss": 0.9922, + "step": 6954 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994201779172556e-05, + "loss": 0.9366, + "step": 6955 + }, + { + "epoch": 0.01, + "learning_rate": 4.999420008743819e-05, + "loss": 1.1203, + "step": 6956 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941983954571e-05, + "loss": 1.1666, + "step": 6957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994196703229276e-05, + "loss": 1.0677, + "step": 6958 + }, + { + "epoch": 0.01, + "learning_rate": 4.999419501075472e-05, + "loss": 1.1357, + "step": 6959 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994193318033425e-05, + "loss": 1.1193, + "step": 6960 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941916250654e-05, + "loss": 1.3423, + "step": 6961 + }, + { + "epoch": 0.01, + "learning_rate": 4.999418993185065e-05, + "loss": 1.2765, + "step": 6962 + }, + { + "epoch": 0.01, + "learning_rate": 4.999418823838916e-05, + "loss": 0.8398, + "step": 6963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994186544680943e-05, + "loss": 0.3674, + "step": 6964 + }, + { + "epoch": 0.01, + "learning_rate": 4.999418485072599e-05, + "loss": 0.4106, + "step": 6965 + }, + { + "epoch": 0.01, + "learning_rate": 4.999418315652431e-05, + "loss": 0.5539, + "step": 6966 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941814620759e-05, + "loss": 1.1158, + "step": 6967 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994179767380746e-05, + "loss": 1.0879, + "step": 6968 + }, + { + "epoch": 0.01, + "learning_rate": 4.999417807243887e-05, + "loss": 0.9524, + "step": 6969 + }, + { + "epoch": 0.01, + "learning_rate": 4.999417637725026e-05, + "loss": 0.9135, + "step": 6970 + }, + { + "epoch": 0.01, + "learning_rate": 4.999417468181491e-05, + "loss": 0.9985, + "step": 6971 + }, + { + "epoch": 0.01, + "learning_rate": 4.999417298613284e-05, + "loss": 1.372, + "step": 6972 + }, + { + "epoch": 0.01, + "learning_rate": 4.999417129020403e-05, + "loss": 1.4207, + "step": 6973 + }, + { + "epoch": 0.01, + "learning_rate": 4.999416959402849e-05, + "loss": 1.0777, + "step": 6974 + }, + { + "epoch": 0.01, + "learning_rate": 4.999416789760622e-05, + "loss": 1.1585, + "step": 6975 + }, + { + "epoch": 0.01, + "learning_rate": 4.999416620093722e-05, + "loss": 1.0714, + "step": 6976 + }, + { + "epoch": 0.01, + "learning_rate": 4.999416450402148e-05, + "loss": 1.1122, + "step": 6977 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994162806859014e-05, + "loss": 1.3217, + "step": 6978 + }, + { + "epoch": 0.01, + "learning_rate": 4.999416110944982e-05, + "loss": 1.0181, + "step": 6979 + }, + { + "epoch": 0.01, + "learning_rate": 4.999415941179388e-05, + "loss": 1.3369, + "step": 6980 + }, + { + "epoch": 0.01, + "learning_rate": 4.999415771389122e-05, + "loss": 1.1256, + "step": 6981 + }, + { + "epoch": 0.01, + "learning_rate": 4.999415601574183e-05, + "loss": 1.1828, + "step": 6982 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941543173457e-05, + "loss": 1.3077, + "step": 6983 + }, + { + "epoch": 0.01, + "learning_rate": 4.999415261870285e-05, + "loss": 0.9496, + "step": 6984 + }, + { + "epoch": 0.01, + "learning_rate": 4.999415091981326e-05, + "loss": 1.2384, + "step": 6985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994149220676935e-05, + "loss": 0.9296, + "step": 6986 + }, + { + "epoch": 0.01, + "learning_rate": 4.999414752129389e-05, + "loss": 1.3176, + "step": 6987 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941458216641e-05, + "loss": 0.3747, + "step": 6988 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994144121787586e-05, + "loss": 0.1655, + "step": 6989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994142421664336e-05, + "loss": 0.2205, + "step": 6990 + }, + { + "epoch": 0.01, + "learning_rate": 4.999414072129436e-05, + "loss": 0.2001, + "step": 6991 + }, + { + "epoch": 0.01, + "learning_rate": 4.999413902067764e-05, + "loss": 0.2171, + "step": 6992 + }, + { + "epoch": 0.01, + "learning_rate": 4.999413731981421e-05, + "loss": 0.8767, + "step": 6993 + }, + { + "epoch": 0.01, + "learning_rate": 4.999413561870403e-05, + "loss": 1.1458, + "step": 6994 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994133917347124e-05, + "loss": 1.0598, + "step": 6995 + }, + { + "epoch": 0.01, + "learning_rate": 4.999413221574348e-05, + "loss": 0.576, + "step": 6996 + }, + { + "epoch": 0.01, + "learning_rate": 4.999413051389312e-05, + "loss": 0.6216, + "step": 6997 + }, + { + "epoch": 0.01, + "learning_rate": 4.999412881179602e-05, + "loss": 0.5405, + "step": 6998 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994127109452185e-05, + "loss": 0.9278, + "step": 6999 + }, + { + "epoch": 0.01, + "learning_rate": 4.999412540686162e-05, + "loss": 0.918, + "step": 7000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0878294706344604, + "eval_runtime": 84.2959, + "eval_samples_per_second": 16.43, + "eval_steps_per_second": 4.116, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 4.999412370402433e-05, + "loss": 1.0767, + "step": 7001 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994122000940305e-05, + "loss": 1.2599, + "step": 7002 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994120297609543e-05, + "loss": 1.2067, + "step": 7003 + }, + { + "epoch": 0.01, + "learning_rate": 4.999411859403206e-05, + "loss": 1.2545, + "step": 7004 + }, + { + "epoch": 0.01, + "learning_rate": 4.999411689020783e-05, + "loss": 1.119, + "step": 7005 + }, + { + "epoch": 0.01, + "learning_rate": 4.999411518613688e-05, + "loss": 1.1328, + "step": 7006 + }, + { + "epoch": 0.01, + "learning_rate": 4.99941134818192e-05, + "loss": 1.1584, + "step": 7007 + }, + { + "epoch": 0.01, + "learning_rate": 4.999411177725478e-05, + "loss": 1.1062, + "step": 7008 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994110072443635e-05, + "loss": 1.0393, + "step": 7009 + }, + { + "epoch": 0.01, + "learning_rate": 4.999410836738576e-05, + "loss": 0.9956, + "step": 7010 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994106662081155e-05, + "loss": 1.1625, + "step": 7011 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994104956529805e-05, + "loss": 0.7091, + "step": 7012 + }, + { + "epoch": 0.01, + "learning_rate": 4.999410325073174e-05, + "loss": 1.0893, + "step": 7013 + }, + { + "epoch": 0.01, + "learning_rate": 4.999410154468694e-05, + "loss": 1.008, + "step": 7014 + }, + { + "epoch": 0.01, + "learning_rate": 4.99940998383954e-05, + "loss": 1.069, + "step": 7015 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994098131857135e-05, + "loss": 1.0903, + "step": 7016 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994096425072147e-05, + "loss": 1.1329, + "step": 7017 + }, + { + "epoch": 0.01, + "learning_rate": 4.999409471804042e-05, + "loss": 1.099, + "step": 7018 + }, + { + "epoch": 0.01, + "learning_rate": 4.999409301076196e-05, + "loss": 0.8939, + "step": 7019 + }, + { + "epoch": 0.01, + "learning_rate": 4.999409130323677e-05, + "loss": 1.1571, + "step": 7020 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994089595464846e-05, + "loss": 1.0265, + "step": 7021 + }, + { + "epoch": 0.01, + "learning_rate": 4.99940878874462e-05, + "loss": 1.7861, + "step": 7022 + }, + { + "epoch": 0.01, + "learning_rate": 4.999408617918081e-05, + "loss": 1.5894, + "step": 7023 + }, + { + "epoch": 0.01, + "learning_rate": 4.99940844706687e-05, + "loss": 1.3719, + "step": 7024 + }, + { + "epoch": 0.01, + "learning_rate": 4.999408276190986e-05, + "loss": 1.1614, + "step": 7025 + }, + { + "epoch": 0.01, + "learning_rate": 4.999408105290427e-05, + "loss": 1.1558, + "step": 7026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994079343651965e-05, + "loss": 1.1941, + "step": 7027 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994077634152934e-05, + "loss": 1.3192, + "step": 7028 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994075924407165e-05, + "loss": 1.0487, + "step": 7029 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994074214414666e-05, + "loss": 1.1189, + "step": 7030 + }, + { + "epoch": 0.01, + "learning_rate": 4.999407250417543e-05, + "loss": 1.2138, + "step": 7031 + }, + { + "epoch": 0.01, + "learning_rate": 4.999407079368947e-05, + "loss": 0.9906, + "step": 7032 + }, + { + "epoch": 0.01, + "learning_rate": 4.999406908295677e-05, + "loss": 1.2017, + "step": 7033 + }, + { + "epoch": 0.01, + "learning_rate": 4.999406737197735e-05, + "loss": 1.2657, + "step": 7034 + }, + { + "epoch": 0.01, + "learning_rate": 4.99940656607512e-05, + "loss": 1.2592, + "step": 7035 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994063949278316e-05, + "loss": 1.2166, + "step": 7036 + }, + { + "epoch": 0.01, + "learning_rate": 4.999406223755869e-05, + "loss": 1.1549, + "step": 7037 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994060525592347e-05, + "loss": 0.8705, + "step": 7038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994058813379266e-05, + "loss": 1.2059, + "step": 7039 + }, + { + "epoch": 0.01, + "learning_rate": 4.999405710091946e-05, + "loss": 1.0886, + "step": 7040 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994055388212926e-05, + "loss": 0.1385, + "step": 7041 + }, + { + "epoch": 0.01, + "learning_rate": 4.999405367525965e-05, + "loss": 0.1784, + "step": 7042 + }, + { + "epoch": 0.01, + "learning_rate": 4.999405196205965e-05, + "loss": 1.1128, + "step": 7043 + }, + { + "epoch": 0.01, + "learning_rate": 4.999405024861292e-05, + "loss": 1.0444, + "step": 7044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994048534919454e-05, + "loss": 0.9553, + "step": 7045 + }, + { + "epoch": 0.01, + "learning_rate": 4.999404682097926e-05, + "loss": 1.2984, + "step": 7046 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994045106792334e-05, + "loss": 1.1051, + "step": 7047 + }, + { + "epoch": 0.01, + "learning_rate": 4.999404339235868e-05, + "loss": 1.0497, + "step": 7048 + }, + { + "epoch": 0.01, + "learning_rate": 4.999404167767829e-05, + "loss": 0.6193, + "step": 7049 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994039962751176e-05, + "loss": 1.0276, + "step": 7050 + }, + { + "epoch": 0.01, + "learning_rate": 4.999403824757733e-05, + "loss": 1.0596, + "step": 7051 + }, + { + "epoch": 0.01, + "learning_rate": 4.999403653215675e-05, + "loss": 0.7923, + "step": 7052 + }, + { + "epoch": 0.01, + "learning_rate": 4.999403481648944e-05, + "loss": 1.1786, + "step": 7053 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994033100575404e-05, + "loss": 1.2927, + "step": 7054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994031384414635e-05, + "loss": 1.0812, + "step": 7055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994029668007135e-05, + "loss": 1.1084, + "step": 7056 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994027951352905e-05, + "loss": 1.2078, + "step": 7057 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994026234451944e-05, + "loss": 0.9318, + "step": 7058 + }, + { + "epoch": 0.01, + "learning_rate": 4.999402451730425e-05, + "loss": 1.1528, + "step": 7059 + }, + { + "epoch": 0.01, + "learning_rate": 4.999402279990984e-05, + "loss": 1.398, + "step": 7060 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994021082268685e-05, + "loss": 1.0382, + "step": 7061 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994019364380795e-05, + "loss": 0.694, + "step": 7062 + }, + { + "epoch": 0.01, + "learning_rate": 4.999401764624618e-05, + "loss": 0.7811, + "step": 7063 + }, + { + "epoch": 0.01, + "learning_rate": 4.999401592786484e-05, + "loss": 1.1132, + "step": 7064 + }, + { + "epoch": 0.01, + "learning_rate": 4.999401420923676e-05, + "loss": 0.9212, + "step": 7065 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994012490361965e-05, + "loss": 0.4159, + "step": 7066 + }, + { + "epoch": 0.01, + "learning_rate": 4.999401077124043e-05, + "loss": 0.5159, + "step": 7067 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994009051872156e-05, + "loss": 1.156, + "step": 7068 + }, + { + "epoch": 0.01, + "learning_rate": 4.9994007332257166e-05, + "loss": 0.8949, + "step": 7069 + }, + { + "epoch": 0.01, + "learning_rate": 4.999400561239544e-05, + "loss": 0.7439, + "step": 7070 + }, + { + "epoch": 0.01, + "learning_rate": 4.999400389228698e-05, + "loss": 1.122, + "step": 7071 + }, + { + "epoch": 0.01, + "learning_rate": 4.99940021719318e-05, + "loss": 1.1978, + "step": 7072 + }, + { + "epoch": 0.01, + "learning_rate": 4.999400045132988e-05, + "loss": 1.0629, + "step": 7073 + }, + { + "epoch": 0.01, + "learning_rate": 4.999399873048123e-05, + "loss": 0.4868, + "step": 7074 + }, + { + "epoch": 0.01, + "learning_rate": 4.999399700938586e-05, + "loss": 1.0516, + "step": 7075 + }, + { + "epoch": 0.01, + "learning_rate": 4.999399528804375e-05, + "loss": 1.1544, + "step": 7076 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993993566454913e-05, + "loss": 1.2188, + "step": 7077 + }, + { + "epoch": 0.01, + "learning_rate": 4.999399184461935e-05, + "loss": 0.8363, + "step": 7078 + }, + { + "epoch": 0.01, + "learning_rate": 4.999399012253705e-05, + "loss": 0.7173, + "step": 7079 + }, + { + "epoch": 0.01, + "learning_rate": 4.999398840020802e-05, + "loss": 0.535, + "step": 7080 + }, + { + "epoch": 0.01, + "learning_rate": 4.999398667763226e-05, + "loss": 0.6233, + "step": 7081 + }, + { + "epoch": 0.01, + "learning_rate": 4.999398495480977e-05, + "loss": 0.4848, + "step": 7082 + }, + { + "epoch": 0.01, + "learning_rate": 4.999398323174055e-05, + "loss": 1.0284, + "step": 7083 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993981508424605e-05, + "loss": 1.0248, + "step": 7084 + }, + { + "epoch": 0.01, + "learning_rate": 4.999397978486193e-05, + "loss": 1.3033, + "step": 7085 + }, + { + "epoch": 0.01, + "learning_rate": 4.999397806105252e-05, + "loss": 1.3483, + "step": 7086 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993976336996385e-05, + "loss": 1.154, + "step": 7087 + }, + { + "epoch": 0.01, + "learning_rate": 4.999397461269351e-05, + "loss": 1.2915, + "step": 7088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993972888143914e-05, + "loss": 1.0423, + "step": 7089 + }, + { + "epoch": 0.01, + "learning_rate": 4.999397116334758e-05, + "loss": 1.1034, + "step": 7090 + }, + { + "epoch": 0.01, + "learning_rate": 4.999396943830453e-05, + "loss": 0.9868, + "step": 7091 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993967713014736e-05, + "loss": 0.2809, + "step": 7092 + }, + { + "epoch": 0.01, + "learning_rate": 4.999396598747822e-05, + "loss": 0.3869, + "step": 7093 + }, + { + "epoch": 0.01, + "learning_rate": 4.999396426169497e-05, + "loss": 1.2138, + "step": 7094 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993962535665e-05, + "loss": 1.1352, + "step": 7095 + }, + { + "epoch": 0.01, + "learning_rate": 4.999396080938828e-05, + "loss": 0.7317, + "step": 7096 + }, + { + "epoch": 0.01, + "learning_rate": 4.999395908286485e-05, + "loss": 1.0277, + "step": 7097 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993957356094676e-05, + "loss": 1.166, + "step": 7098 + }, + { + "epoch": 0.01, + "learning_rate": 4.999395562907778e-05, + "loss": 1.1118, + "step": 7099 + }, + { + "epoch": 0.01, + "learning_rate": 4.999395390181416e-05, + "loss": 1.0271, + "step": 7100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999395217430381e-05, + "loss": 1.1238, + "step": 7101 + }, + { + "epoch": 0.01, + "learning_rate": 4.999395044654671e-05, + "loss": 0.994, + "step": 7102 + }, + { + "epoch": 0.01, + "learning_rate": 4.99939487185429e-05, + "loss": 1.1137, + "step": 7103 + }, + { + "epoch": 0.01, + "learning_rate": 4.999394699029235e-05, + "loss": 1.2896, + "step": 7104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993945261795074e-05, + "loss": 1.1667, + "step": 7105 + }, + { + "epoch": 0.01, + "learning_rate": 4.999394353305107e-05, + "loss": 1.2486, + "step": 7106 + }, + { + "epoch": 0.01, + "learning_rate": 4.999394180406034e-05, + "loss": 1.0502, + "step": 7107 + }, + { + "epoch": 0.01, + "learning_rate": 4.999394007482287e-05, + "loss": 1.0564, + "step": 7108 + }, + { + "epoch": 0.01, + "learning_rate": 4.999393834533867e-05, + "loss": 2.3266, + "step": 7109 + }, + { + "epoch": 0.01, + "learning_rate": 4.999393661560775e-05, + "loss": 1.1655, + "step": 7110 + }, + { + "epoch": 0.01, + "learning_rate": 4.99939348856301e-05, + "loss": 1.1056, + "step": 7111 + }, + { + "epoch": 0.01, + "learning_rate": 4.999393315540572e-05, + "loss": 0.9551, + "step": 7112 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993931424934604e-05, + "loss": 1.2253, + "step": 7113 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993929694216754e-05, + "loss": 1.2538, + "step": 7114 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993927963252186e-05, + "loss": 1.1117, + "step": 7115 + }, + { + "epoch": 0.01, + "learning_rate": 4.999392623204089e-05, + "loss": 1.1508, + "step": 7116 + }, + { + "epoch": 0.01, + "learning_rate": 4.999392450058285e-05, + "loss": 1.3282, + "step": 7117 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993922768878094e-05, + "loss": 1.2517, + "step": 7118 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993921036926605e-05, + "loss": 0.919, + "step": 7119 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993919304728385e-05, + "loss": 1.1984, + "step": 7120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993917572283434e-05, + "loss": 1.0078, + "step": 7121 + }, + { + "epoch": 0.01, + "learning_rate": 4.999391583959175e-05, + "loss": 1.0748, + "step": 7122 + }, + { + "epoch": 0.01, + "learning_rate": 4.999391410665335e-05, + "loss": 1.374, + "step": 7123 + }, + { + "epoch": 0.01, + "learning_rate": 4.999391237346821e-05, + "loss": 1.2001, + "step": 7124 + }, + { + "epoch": 0.01, + "learning_rate": 4.999391064003635e-05, + "loss": 1.3737, + "step": 7125 + }, + { + "epoch": 0.01, + "learning_rate": 4.999390890635775e-05, + "loss": 1.2602, + "step": 7126 + }, + { + "epoch": 0.01, + "learning_rate": 4.999390717243242e-05, + "loss": 1.1102, + "step": 7127 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993905438260366e-05, + "loss": 1.1347, + "step": 7128 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993903703841584e-05, + "loss": 0.888, + "step": 7129 + }, + { + "epoch": 0.01, + "learning_rate": 4.999390196917607e-05, + "loss": 0.718, + "step": 7130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999390023426383e-05, + "loss": 0.4841, + "step": 7131 + }, + { + "epoch": 0.01, + "learning_rate": 4.999389849910486e-05, + "loss": 0.3925, + "step": 7132 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993896763699154e-05, + "loss": 0.3584, + "step": 7133 + }, + { + "epoch": 0.01, + "learning_rate": 4.999389502804673e-05, + "loss": 0.2742, + "step": 7134 + }, + { + "epoch": 0.01, + "learning_rate": 4.999389329214756e-05, + "loss": 0.5736, + "step": 7135 + }, + { + "epoch": 0.01, + "learning_rate": 4.999389155600168e-05, + "loss": 1.1826, + "step": 7136 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388981960906e-05, + "loss": 1.0934, + "step": 7137 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388808296971e-05, + "loss": 1.0974, + "step": 7138 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388634608364e-05, + "loss": 1.0185, + "step": 7139 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993884608950836e-05, + "loss": 1.0648, + "step": 7140 + }, + { + "epoch": 0.01, + "learning_rate": 4.99938828715713e-05, + "loss": 1.1015, + "step": 7141 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388113394504e-05, + "loss": 1.2112, + "step": 7142 + }, + { + "epoch": 0.01, + "learning_rate": 4.999387939607205e-05, + "loss": 1.2462, + "step": 7143 + }, + { + "epoch": 0.01, + "learning_rate": 4.999387765795232e-05, + "loss": 1.2127, + "step": 7144 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993875919585874e-05, + "loss": 1.1119, + "step": 7145 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993874180972694e-05, + "loss": 1.1006, + "step": 7146 + }, + { + "epoch": 0.01, + "learning_rate": 4.999387244211279e-05, + "loss": 1.3689, + "step": 7147 + }, + { + "epoch": 0.01, + "learning_rate": 4.999387070300615e-05, + "loss": 1.1952, + "step": 7148 + }, + { + "epoch": 0.01, + "learning_rate": 4.999386896365278e-05, + "loss": 1.1658, + "step": 7149 + }, + { + "epoch": 0.01, + "learning_rate": 4.999386722405269e-05, + "loss": 1.2052, + "step": 7150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999386548420586e-05, + "loss": 1.1605, + "step": 7151 + }, + { + "epoch": 0.01, + "learning_rate": 4.999386374411231e-05, + "loss": 0.5754, + "step": 7152 + }, + { + "epoch": 0.01, + "learning_rate": 4.999386200377203e-05, + "loss": 1.0458, + "step": 7153 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993860263185015e-05, + "loss": 1.1622, + "step": 7154 + }, + { + "epoch": 0.01, + "learning_rate": 4.999385852235128e-05, + "loss": 1.0383, + "step": 7155 + }, + { + "epoch": 0.01, + "learning_rate": 4.999385678127081e-05, + "loss": 0.9998, + "step": 7156 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993855039943613e-05, + "loss": 0.9812, + "step": 7157 + }, + { + "epoch": 0.01, + "learning_rate": 4.999385329836969e-05, + "loss": 1.1803, + "step": 7158 + }, + { + "epoch": 0.01, + "learning_rate": 4.999385155654903e-05, + "loss": 1.0188, + "step": 7159 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993849814481644e-05, + "loss": 0.9353, + "step": 7160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999384807216754e-05, + "loss": 0.4585, + "step": 7161 + }, + { + "epoch": 0.01, + "learning_rate": 4.999384632960669e-05, + "loss": 0.2882, + "step": 7162 + }, + { + "epoch": 0.01, + "learning_rate": 4.999384458679912e-05, + "loss": 0.2708, + "step": 7163 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993842843744823e-05, + "loss": 0.2178, + "step": 7164 + }, + { + "epoch": 0.01, + "learning_rate": 4.99938411004438e-05, + "loss": 0.3011, + "step": 7165 + }, + { + "epoch": 0.01, + "learning_rate": 4.999383935689605e-05, + "loss": 0.2672, + "step": 7166 + }, + { + "epoch": 0.01, + "learning_rate": 4.999383761310156e-05, + "loss": 0.1637, + "step": 7167 + }, + { + "epoch": 0.01, + "learning_rate": 4.999383586906035e-05, + "loss": 0.156, + "step": 7168 + }, + { + "epoch": 0.01, + "learning_rate": 4.99938341247724e-05, + "loss": 0.1112, + "step": 7169 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993832380237735e-05, + "loss": 0.0966, + "step": 7170 + }, + { + "epoch": 0.01, + "learning_rate": 4.999383063545633e-05, + "loss": 0.071, + "step": 7171 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993828890428206e-05, + "loss": 0.6251, + "step": 7172 + }, + { + "epoch": 0.01, + "learning_rate": 4.999382714515335e-05, + "loss": 0.5264, + "step": 7173 + }, + { + "epoch": 0.01, + "learning_rate": 4.999382539963177e-05, + "loss": 0.9327, + "step": 7174 + }, + { + "epoch": 0.01, + "learning_rate": 4.999382365386345e-05, + "loss": 1.2255, + "step": 7175 + }, + { + "epoch": 0.01, + "learning_rate": 4.999382190784841e-05, + "loss": 1.1865, + "step": 7176 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993820161586636e-05, + "loss": 1.1373, + "step": 7177 + }, + { + "epoch": 0.01, + "learning_rate": 4.999381841507814e-05, + "loss": 0.9976, + "step": 7178 + }, + { + "epoch": 0.01, + "learning_rate": 4.999381666832291e-05, + "loss": 1.1255, + "step": 7179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993814921320956e-05, + "loss": 1.2367, + "step": 7180 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993813174072275e-05, + "loss": 1.0031, + "step": 7181 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993811426576864e-05, + "loss": 1.0087, + "step": 7182 + }, + { + "epoch": 0.01, + "learning_rate": 4.999380967883472e-05, + "loss": 1.1194, + "step": 7183 + }, + { + "epoch": 0.01, + "learning_rate": 4.999380793084585e-05, + "loss": 1.1359, + "step": 7184 + }, + { + "epoch": 0.01, + "learning_rate": 4.999380618261025e-05, + "loss": 1.5818, + "step": 7185 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993804434127926e-05, + "loss": 1.4419, + "step": 7186 + }, + { + "epoch": 0.01, + "learning_rate": 4.999380268539887e-05, + "loss": 1.1241, + "step": 7187 + }, + { + "epoch": 0.01, + "learning_rate": 4.999380093642309e-05, + "loss": 1.1122, + "step": 7188 + }, + { + "epoch": 0.01, + "learning_rate": 4.999379918720058e-05, + "loss": 1.3919, + "step": 7189 + }, + { + "epoch": 0.01, + "learning_rate": 4.999379743773134e-05, + "loss": 1.2955, + "step": 7190 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993795688015375e-05, + "loss": 1.2744, + "step": 7191 + }, + { + "epoch": 0.01, + "learning_rate": 4.999379393805268e-05, + "loss": 1.4393, + "step": 7192 + }, + { + "epoch": 0.01, + "learning_rate": 4.999379218784326e-05, + "loss": 1.3003, + "step": 7193 + }, + { + "epoch": 0.01, + "learning_rate": 4.999379043738711e-05, + "loss": 1.6819, + "step": 7194 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993788686684226e-05, + "loss": 1.8338, + "step": 7195 + }, + { + "epoch": 0.01, + "learning_rate": 4.999378693573462e-05, + "loss": 1.4827, + "step": 7196 + }, + { + "epoch": 0.01, + "learning_rate": 4.999378518453828e-05, + "loss": 0.9903, + "step": 7197 + }, + { + "epoch": 0.01, + "learning_rate": 4.999378343309522e-05, + "loss": 1.3554, + "step": 7198 + }, + { + "epoch": 0.01, + "learning_rate": 4.999378168140543e-05, + "loss": 1.2596, + "step": 7199 + }, + { + "epoch": 0.01, + "learning_rate": 4.99937799294689e-05, + "loss": 1.0637, + "step": 7200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999377817728566e-05, + "loss": 1.4157, + "step": 7201 + }, + { + "epoch": 0.01, + "learning_rate": 4.999377642485568e-05, + "loss": 1.2872, + "step": 7202 + }, + { + "epoch": 0.01, + "learning_rate": 4.999377467217897e-05, + "loss": 5.7376, + "step": 7203 + }, + { + "epoch": 0.01, + "learning_rate": 4.999377291925554e-05, + "loss": 5.4455, + "step": 7204 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993771166085376e-05, + "loss": 1.0564, + "step": 7205 + }, + { + "epoch": 0.01, + "learning_rate": 4.999376941266849e-05, + "loss": 0.7091, + "step": 7206 + }, + { + "epoch": 0.01, + "learning_rate": 4.999376765900487e-05, + "loss": 1.0153, + "step": 7207 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993765905094535e-05, + "loss": 1.5328, + "step": 7208 + }, + { + "epoch": 0.01, + "learning_rate": 4.999376415093746e-05, + "loss": 0.9528, + "step": 7209 + }, + { + "epoch": 0.01, + "learning_rate": 4.999376239653366e-05, + "loss": 1.1227, + "step": 7210 + }, + { + "epoch": 0.01, + "learning_rate": 4.999376064188313e-05, + "loss": 0.9175, + "step": 7211 + }, + { + "epoch": 0.01, + "learning_rate": 4.999375888698587e-05, + "loss": 1.1229, + "step": 7212 + }, + { + "epoch": 0.01, + "learning_rate": 4.999375713184189e-05, + "loss": 1.1384, + "step": 7213 + }, + { + "epoch": 0.01, + "learning_rate": 4.999375537645118e-05, + "loss": 1.18, + "step": 7214 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993753620813744e-05, + "loss": 1.2178, + "step": 7215 + }, + { + "epoch": 0.01, + "learning_rate": 4.999375186492957e-05, + "loss": 1.4092, + "step": 7216 + }, + { + "epoch": 0.01, + "learning_rate": 4.999375010879868e-05, + "loss": 0.9861, + "step": 7217 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993748352421055e-05, + "loss": 1.0413, + "step": 7218 + }, + { + "epoch": 0.01, + "learning_rate": 4.99937465957967e-05, + "loss": 1.1335, + "step": 7219 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993744838925625e-05, + "loss": 0.6983, + "step": 7220 + }, + { + "epoch": 0.01, + "learning_rate": 4.999374308180782e-05, + "loss": 0.8774, + "step": 7221 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993741324443286e-05, + "loss": 1.1254, + "step": 7222 + }, + { + "epoch": 0.01, + "learning_rate": 4.999373956683202e-05, + "loss": 0.427, + "step": 7223 + }, + { + "epoch": 0.01, + "learning_rate": 4.999373780897404e-05, + "loss": 0.367, + "step": 7224 + }, + { + "epoch": 0.01, + "learning_rate": 4.999373605086932e-05, + "loss": 0.356, + "step": 7225 + }, + { + "epoch": 0.01, + "learning_rate": 4.999373429251788e-05, + "loss": 0.3391, + "step": 7226 + }, + { + "epoch": 0.01, + "learning_rate": 4.99937325339197e-05, + "loss": 0.377, + "step": 7227 + }, + { + "epoch": 0.01, + "learning_rate": 4.99937307750748e-05, + "loss": 0.323, + "step": 7228 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993729015983174e-05, + "loss": 0.293, + "step": 7229 + }, + { + "epoch": 0.01, + "learning_rate": 4.999372725664482e-05, + "loss": 0.2573, + "step": 7230 + }, + { + "epoch": 0.01, + "learning_rate": 4.999372549705974e-05, + "loss": 0.9169, + "step": 7231 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993723737227924e-05, + "loss": 0.7361, + "step": 7232 + }, + { + "epoch": 0.01, + "learning_rate": 4.999372197714939e-05, + "loss": 0.9766, + "step": 7233 + }, + { + "epoch": 0.01, + "learning_rate": 4.999372021682413e-05, + "loss": 1.1306, + "step": 7234 + }, + { + "epoch": 0.01, + "learning_rate": 4.999371845625213e-05, + "loss": 1.3027, + "step": 7235 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993716695433415e-05, + "loss": 0.7542, + "step": 7236 + }, + { + "epoch": 0.01, + "learning_rate": 4.999371493436796e-05, + "loss": 1.1492, + "step": 7237 + }, + { + "epoch": 0.01, + "learning_rate": 4.999371317305579e-05, + "loss": 1.1588, + "step": 7238 + }, + { + "epoch": 0.01, + "learning_rate": 4.999371141149689e-05, + "loss": 0.913, + "step": 7239 + }, + { + "epoch": 0.01, + "learning_rate": 4.999370964969126e-05, + "loss": 1.1554, + "step": 7240 + }, + { + "epoch": 0.01, + "learning_rate": 4.999370788763891e-05, + "loss": 1.176, + "step": 7241 + }, + { + "epoch": 0.01, + "learning_rate": 4.999370612533982e-05, + "loss": 1.0936, + "step": 7242 + }, + { + "epoch": 0.01, + "learning_rate": 4.999370436279401e-05, + "loss": 1.0287, + "step": 7243 + }, + { + "epoch": 0.01, + "learning_rate": 4.999370260000147e-05, + "loss": 1.0089, + "step": 7244 + }, + { + "epoch": 0.01, + "learning_rate": 4.99937008369622e-05, + "loss": 1.0935, + "step": 7245 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993699073676206e-05, + "loss": 1.1778, + "step": 7246 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993697310143486e-05, + "loss": 0.9498, + "step": 7247 + }, + { + "epoch": 0.01, + "learning_rate": 4.999369554636404e-05, + "loss": 1.0388, + "step": 7248 + }, + { + "epoch": 0.01, + "learning_rate": 4.999369378233787e-05, + "loss": 1.2277, + "step": 7249 + }, + { + "epoch": 0.01, + "learning_rate": 4.999369201806496e-05, + "loss": 1.0388, + "step": 7250 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993690253545335e-05, + "loss": 1.2701, + "step": 7251 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993688488778976e-05, + "loss": 1.0766, + "step": 7252 + }, + { + "epoch": 0.01, + "learning_rate": 4.99936867237659e-05, + "loss": 0.5179, + "step": 7253 + }, + { + "epoch": 0.01, + "learning_rate": 4.999368495850608e-05, + "loss": 0.7307, + "step": 7254 + }, + { + "epoch": 0.01, + "learning_rate": 4.999368319299954e-05, + "loss": 1.1805, + "step": 7255 + }, + { + "epoch": 0.01, + "learning_rate": 4.999368142724628e-05, + "loss": 0.9093, + "step": 7256 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993679661246284e-05, + "loss": 1.1146, + "step": 7257 + }, + { + "epoch": 0.01, + "learning_rate": 4.999367789499957e-05, + "loss": 1.3203, + "step": 7258 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993676128506116e-05, + "loss": 1.4558, + "step": 7259 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993674361765947e-05, + "loss": 0.7744, + "step": 7260 + }, + { + "epoch": 0.01, + "learning_rate": 4.999367259477905e-05, + "loss": 0.8, + "step": 7261 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993670827545416e-05, + "loss": 0.8254, + "step": 7262 + }, + { + "epoch": 0.01, + "learning_rate": 4.999366906006506e-05, + "loss": 0.9704, + "step": 7263 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993667292337984e-05, + "loss": 1.1857, + "step": 7264 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993665524364175e-05, + "loss": 1.0887, + "step": 7265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993663756143636e-05, + "loss": 1.0521, + "step": 7266 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993661987676373e-05, + "loss": 1.1798, + "step": 7267 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993660218962394e-05, + "loss": 0.878, + "step": 7268 + }, + { + "epoch": 0.01, + "learning_rate": 4.999365845000167e-05, + "loss": 1.4214, + "step": 7269 + }, + { + "epoch": 0.01, + "learning_rate": 4.999365668079423e-05, + "loss": 1.3913, + "step": 7270 + }, + { + "epoch": 0.01, + "learning_rate": 4.999365491134006e-05, + "loss": 1.4542, + "step": 7271 + }, + { + "epoch": 0.01, + "learning_rate": 4.999365314163916e-05, + "loss": 1.3582, + "step": 7272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993651371691543e-05, + "loss": 1.3169, + "step": 7273 + }, + { + "epoch": 0.01, + "learning_rate": 4.999364960149719e-05, + "loss": 0.9608, + "step": 7274 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993647831056114e-05, + "loss": 1.1639, + "step": 7275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993646060368303e-05, + "loss": 1.1301, + "step": 7276 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993644289433776e-05, + "loss": 1.0542, + "step": 7277 + }, + { + "epoch": 0.01, + "learning_rate": 4.999364251825252e-05, + "loss": 1.14, + "step": 7278 + }, + { + "epoch": 0.01, + "learning_rate": 4.999364074682453e-05, + "loss": 1.0161, + "step": 7279 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993638975149824e-05, + "loss": 0.9362, + "step": 7280 + }, + { + "epoch": 0.01, + "learning_rate": 4.999363720322839e-05, + "loss": 0.9912, + "step": 7281 + }, + { + "epoch": 0.01, + "learning_rate": 4.999363543106023e-05, + "loss": 0.5056, + "step": 7282 + }, + { + "epoch": 0.01, + "learning_rate": 4.999363365864533e-05, + "loss": 0.7172, + "step": 7283 + }, + { + "epoch": 0.01, + "learning_rate": 4.999363188598372e-05, + "loss": 1.1353, + "step": 7284 + }, + { + "epoch": 0.01, + "learning_rate": 4.999363011307537e-05, + "loss": 0.7113, + "step": 7285 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993628339920304e-05, + "loss": 0.8874, + "step": 7286 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993626566518505e-05, + "loss": 0.7167, + "step": 7287 + }, + { + "epoch": 0.01, + "learning_rate": 4.999362479286999e-05, + "loss": 1.0762, + "step": 7288 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993623018974736e-05, + "loss": 1.2297, + "step": 7289 + }, + { + "epoch": 0.01, + "learning_rate": 4.999362124483276e-05, + "loss": 1.1268, + "step": 7290 + }, + { + "epoch": 0.01, + "learning_rate": 4.999361947044405e-05, + "loss": 1.0713, + "step": 7291 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993617695808626e-05, + "loss": 0.9842, + "step": 7292 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993615920926465e-05, + "loss": 1.1844, + "step": 7293 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993614145797586e-05, + "loss": 1.1295, + "step": 7294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993612370421977e-05, + "loss": 1.3198, + "step": 7295 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993610594799643e-05, + "loss": 1.1933, + "step": 7296 + }, + { + "epoch": 0.01, + "learning_rate": 4.999360881893058e-05, + "loss": 1.1275, + "step": 7297 + }, + { + "epoch": 0.01, + "learning_rate": 4.999360704281479e-05, + "loss": 0.9787, + "step": 7298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993605266452275e-05, + "loss": 0.9672, + "step": 7299 + }, + { + "epoch": 0.01, + "learning_rate": 4.999360348984303e-05, + "loss": 1.1196, + "step": 7300 + }, + { + "epoch": 0.01, + "learning_rate": 4.999360171298707e-05, + "loss": 1.2345, + "step": 7301 + }, + { + "epoch": 0.01, + "learning_rate": 4.999359993588437e-05, + "loss": 1.1671, + "step": 7302 + }, + { + "epoch": 0.01, + "learning_rate": 4.999359815853495e-05, + "loss": 0.6403, + "step": 7303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993596380938815e-05, + "loss": 1.2603, + "step": 7304 + }, + { + "epoch": 0.01, + "learning_rate": 4.999359460309594e-05, + "loss": 1.2867, + "step": 7305 + }, + { + "epoch": 0.01, + "learning_rate": 4.999359282500634e-05, + "loss": 1.336, + "step": 7306 + }, + { + "epoch": 0.01, + "learning_rate": 4.999359104667002e-05, + "loss": 1.2513, + "step": 7307 + }, + { + "epoch": 0.01, + "learning_rate": 4.999358926808696e-05, + "loss": 1.0588, + "step": 7308 + }, + { + "epoch": 0.01, + "learning_rate": 4.999358748925719e-05, + "loss": 1.2615, + "step": 7309 + }, + { + "epoch": 0.01, + "learning_rate": 4.999358571018069e-05, + "loss": 1.0442, + "step": 7310 + }, + { + "epoch": 0.01, + "learning_rate": 4.999358393085746e-05, + "loss": 1.1173, + "step": 7311 + }, + { + "epoch": 0.01, + "learning_rate": 4.99935821512875e-05, + "loss": 0.9781, + "step": 7312 + }, + { + "epoch": 0.01, + "learning_rate": 4.999358037147082e-05, + "loss": 1.3245, + "step": 7313 + }, + { + "epoch": 0.01, + "learning_rate": 4.999357859140741e-05, + "loss": 1.2609, + "step": 7314 + }, + { + "epoch": 0.01, + "learning_rate": 4.999357681109727e-05, + "loss": 0.613, + "step": 7315 + }, + { + "epoch": 0.01, + "learning_rate": 4.999357503054042e-05, + "loss": 0.7257, + "step": 7316 + }, + { + "epoch": 0.01, + "learning_rate": 4.999357324973683e-05, + "loss": 1.2196, + "step": 7317 + }, + { + "epoch": 0.01, + "learning_rate": 4.999357146868652e-05, + "loss": 1.1914, + "step": 7318 + }, + { + "epoch": 0.01, + "learning_rate": 4.999356968738948e-05, + "loss": 1.2337, + "step": 7319 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993567905845716e-05, + "loss": 1.654, + "step": 7320 + }, + { + "epoch": 0.01, + "learning_rate": 4.999356612405522e-05, + "loss": 1.7418, + "step": 7321 + }, + { + "epoch": 0.01, + "learning_rate": 4.999356434201801e-05, + "loss": 1.6275, + "step": 7322 + }, + { + "epoch": 0.01, + "learning_rate": 4.999356255973407e-05, + "loss": 1.6289, + "step": 7323 + }, + { + "epoch": 0.01, + "learning_rate": 4.99935607772034e-05, + "loss": 1.5845, + "step": 7324 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993558994426005e-05, + "loss": 1.559, + "step": 7325 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993557211401884e-05, + "loss": 1.5541, + "step": 7326 + }, + { + "epoch": 0.01, + "learning_rate": 4.999355542813105e-05, + "loss": 1.5677, + "step": 7327 + }, + { + "epoch": 0.01, + "learning_rate": 4.999355364461347e-05, + "loss": 1.5506, + "step": 7328 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993551860849174e-05, + "loss": 1.5629, + "step": 7329 + }, + { + "epoch": 0.01, + "learning_rate": 4.999355007683815e-05, + "loss": 1.5482, + "step": 7330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993548292580406e-05, + "loss": 1.5299, + "step": 7331 + }, + { + "epoch": 0.01, + "learning_rate": 4.999354650807593e-05, + "loss": 1.5587, + "step": 7332 + }, + { + "epoch": 0.01, + "learning_rate": 4.999354472332473e-05, + "loss": 1.4898, + "step": 7333 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993542938326805e-05, + "loss": 1.5037, + "step": 7334 + }, + { + "epoch": 0.01, + "learning_rate": 4.999354115308215e-05, + "loss": 1.1999, + "step": 7335 + }, + { + "epoch": 0.01, + "learning_rate": 4.999353936759077e-05, + "loss": 1.4152, + "step": 7336 + }, + { + "epoch": 0.01, + "learning_rate": 4.999353758185267e-05, + "loss": 1.5625, + "step": 7337 + }, + { + "epoch": 0.01, + "learning_rate": 4.999353579586784e-05, + "loss": 1.3958, + "step": 7338 + }, + { + "epoch": 0.01, + "learning_rate": 4.999353400963629e-05, + "loss": 1.3154, + "step": 7339 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993532223158015e-05, + "loss": 1.3841, + "step": 7340 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993530436433e-05, + "loss": 1.541, + "step": 7341 + }, + { + "epoch": 0.01, + "learning_rate": 4.999352864946128e-05, + "loss": 1.4549, + "step": 7342 + }, + { + "epoch": 0.01, + "learning_rate": 4.999352686224281e-05, + "loss": 1.405, + "step": 7343 + }, + { + "epoch": 0.01, + "learning_rate": 4.999352507477764e-05, + "loss": 1.5148, + "step": 7344 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993523287065725e-05, + "loss": 1.5382, + "step": 7345 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993521499107096e-05, + "loss": 1.3132, + "step": 7346 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993519710901736e-05, + "loss": 1.3158, + "step": 7347 + }, + { + "epoch": 0.01, + "learning_rate": 4.999351792244965e-05, + "loss": 1.178, + "step": 7348 + }, + { + "epoch": 0.01, + "learning_rate": 4.999351613375084e-05, + "loss": 1.2829, + "step": 7349 + }, + { + "epoch": 0.01, + "learning_rate": 4.99935143448053e-05, + "loss": 1.3511, + "step": 7350 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993512555613045e-05, + "loss": 1.2952, + "step": 7351 + }, + { + "epoch": 0.01, + "learning_rate": 4.999351076617406e-05, + "loss": 1.3797, + "step": 7352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993508976488344e-05, + "loss": 1.5269, + "step": 7353 + }, + { + "epoch": 0.01, + "learning_rate": 4.999350718655591e-05, + "loss": 1.5188, + "step": 7354 + }, + { + "epoch": 0.01, + "learning_rate": 4.999350539637675e-05, + "loss": 1.4032, + "step": 7355 + }, + { + "epoch": 0.01, + "learning_rate": 4.999350360595086e-05, + "loss": 1.6251, + "step": 7356 + }, + { + "epoch": 0.01, + "learning_rate": 4.999350181527824e-05, + "loss": 1.6092, + "step": 7357 + }, + { + "epoch": 0.01, + "learning_rate": 4.999350002435891e-05, + "loss": 1.2604, + "step": 7358 + }, + { + "epoch": 0.01, + "learning_rate": 4.999349823319284e-05, + "loss": 1.0308, + "step": 7359 + }, + { + "epoch": 0.01, + "learning_rate": 4.999349644178005e-05, + "loss": 1.3368, + "step": 7360 + }, + { + "epoch": 0.01, + "learning_rate": 4.999349465012054e-05, + "loss": 1.6052, + "step": 7361 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993492858214304e-05, + "loss": 1.3298, + "step": 7362 + }, + { + "epoch": 0.01, + "learning_rate": 4.999349106606134e-05, + "loss": 1.5927, + "step": 7363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993489273661646e-05, + "loss": 1.3533, + "step": 7364 + }, + { + "epoch": 0.01, + "learning_rate": 4.999348748101523e-05, + "loss": 1.6058, + "step": 7365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993485688122086e-05, + "loss": 1.5934, + "step": 7366 + }, + { + "epoch": 0.01, + "learning_rate": 4.999348389498223e-05, + "loss": 1.3531, + "step": 7367 + }, + { + "epoch": 0.01, + "learning_rate": 4.999348210159563e-05, + "loss": 1.366, + "step": 7368 + }, + { + "epoch": 0.01, + "learning_rate": 4.999348030796232e-05, + "loss": 1.3443, + "step": 7369 + }, + { + "epoch": 0.01, + "learning_rate": 4.999347851408228e-05, + "loss": 1.5869, + "step": 7370 + }, + { + "epoch": 0.01, + "learning_rate": 4.999347671995551e-05, + "loss": 1.5461, + "step": 7371 + }, + { + "epoch": 0.01, + "learning_rate": 4.999347492558202e-05, + "loss": 1.593, + "step": 7372 + }, + { + "epoch": 0.01, + "learning_rate": 4.999347313096181e-05, + "loss": 1.4488, + "step": 7373 + }, + { + "epoch": 0.01, + "learning_rate": 4.999347133609486e-05, + "loss": 1.613, + "step": 7374 + }, + { + "epoch": 0.01, + "learning_rate": 4.99934695409812e-05, + "loss": 1.4047, + "step": 7375 + }, + { + "epoch": 0.01, + "learning_rate": 4.999346774562081e-05, + "loss": 1.6145, + "step": 7376 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993465950013694e-05, + "loss": 1.3279, + "step": 7377 + }, + { + "epoch": 0.01, + "learning_rate": 4.999346415415985e-05, + "loss": 1.509, + "step": 7378 + }, + { + "epoch": 0.01, + "learning_rate": 4.999346235805928e-05, + "loss": 1.4429, + "step": 7379 + }, + { + "epoch": 0.01, + "learning_rate": 4.999346056171199e-05, + "loss": 1.5884, + "step": 7380 + }, + { + "epoch": 0.01, + "learning_rate": 4.999345876511798e-05, + "loss": 1.5937, + "step": 7381 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993456968277245e-05, + "loss": 1.607, + "step": 7382 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993455171189775e-05, + "loss": 1.5965, + "step": 7383 + }, + { + "epoch": 0.01, + "learning_rate": 4.999345337385558e-05, + "loss": 1.5968, + "step": 7384 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993451576274665e-05, + "loss": 1.5513, + "step": 7385 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993449778447025e-05, + "loss": 1.222, + "step": 7386 + }, + { + "epoch": 0.01, + "learning_rate": 4.999344798037267e-05, + "loss": 1.3366, + "step": 7387 + }, + { + "epoch": 0.01, + "learning_rate": 4.999344618205157e-05, + "loss": 1.3441, + "step": 7388 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993444383483754e-05, + "loss": 1.6091, + "step": 7389 + }, + { + "epoch": 0.01, + "learning_rate": 4.999344258466921e-05, + "loss": 1.3621, + "step": 7390 + }, + { + "epoch": 0.01, + "learning_rate": 4.999344078560795e-05, + "loss": 1.5896, + "step": 7391 + }, + { + "epoch": 0.01, + "learning_rate": 4.999343898629997e-05, + "loss": 1.5968, + "step": 7392 + }, + { + "epoch": 0.01, + "learning_rate": 4.999343718674525e-05, + "loss": 1.3465, + "step": 7393 + }, + { + "epoch": 0.01, + "learning_rate": 4.999343538694382e-05, + "loss": 1.5791, + "step": 7394 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993433586895656e-05, + "loss": 1.3193, + "step": 7395 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993431786600765e-05, + "loss": 1.5984, + "step": 7396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993429986059156e-05, + "loss": 1.0591, + "step": 7397 + }, + { + "epoch": 0.01, + "learning_rate": 4.999342818527082e-05, + "loss": 1.6761, + "step": 7398 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993426384235754e-05, + "loss": 1.3947, + "step": 7399 + }, + { + "epoch": 0.01, + "learning_rate": 4.999342458295397e-05, + "loss": 1.146, + "step": 7400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993422781425456e-05, + "loss": 1.0649, + "step": 7401 + }, + { + "epoch": 0.01, + "learning_rate": 4.999342097965022e-05, + "loss": 1.2759, + "step": 7402 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993419177628264e-05, + "loss": 1.3245, + "step": 7403 + }, + { + "epoch": 0.01, + "learning_rate": 4.999341737535958e-05, + "loss": 1.3419, + "step": 7404 + }, + { + "epoch": 0.01, + "learning_rate": 4.999341557284417e-05, + "loss": 1.2456, + "step": 7405 + }, + { + "epoch": 0.01, + "learning_rate": 4.999341377008204e-05, + "loss": 1.2134, + "step": 7406 + }, + { + "epoch": 0.01, + "learning_rate": 4.999341196707318e-05, + "loss": 1.0812, + "step": 7407 + }, + { + "epoch": 0.01, + "learning_rate": 4.99934101638176e-05, + "loss": 0.9918, + "step": 7408 + }, + { + "epoch": 0.01, + "learning_rate": 4.99934083603153e-05, + "loss": 0.9686, + "step": 7409 + }, + { + "epoch": 0.01, + "learning_rate": 4.999340655656627e-05, + "loss": 1.0861, + "step": 7410 + }, + { + "epoch": 0.01, + "learning_rate": 4.999340475257051e-05, + "loss": 1.0647, + "step": 7411 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993402948328025e-05, + "loss": 1.311, + "step": 7412 + }, + { + "epoch": 0.01, + "learning_rate": 4.999340114383882e-05, + "loss": 1.3783, + "step": 7413 + }, + { + "epoch": 0.01, + "learning_rate": 4.99933993391029e-05, + "loss": 1.2996, + "step": 7414 + }, + { + "epoch": 0.01, + "learning_rate": 4.999339753412025e-05, + "loss": 1.0855, + "step": 7415 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993395728890866e-05, + "loss": 1.189, + "step": 7416 + }, + { + "epoch": 0.01, + "learning_rate": 4.999339392341477e-05, + "loss": 1.3219, + "step": 7417 + }, + { + "epoch": 0.01, + "learning_rate": 4.999339211769195e-05, + "loss": 0.8666, + "step": 7418 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993390311722396e-05, + "loss": 1.1259, + "step": 7419 + }, + { + "epoch": 0.01, + "learning_rate": 4.999338850550612e-05, + "loss": 1.1693, + "step": 7420 + }, + { + "epoch": 0.01, + "learning_rate": 4.999338669904312e-05, + "loss": 1.8688, + "step": 7421 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993384892333405e-05, + "loss": 1.2217, + "step": 7422 + }, + { + "epoch": 0.01, + "learning_rate": 4.999338308537695e-05, + "loss": 0.9835, + "step": 7423 + }, + { + "epoch": 0.01, + "learning_rate": 4.999338127817379e-05, + "loss": 1.1256, + "step": 7424 + }, + { + "epoch": 0.01, + "learning_rate": 4.999337947072389e-05, + "loss": 1.0414, + "step": 7425 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993377663027276e-05, + "loss": 0.865, + "step": 7426 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993375855083934e-05, + "loss": 1.0807, + "step": 7427 + }, + { + "epoch": 0.01, + "learning_rate": 4.999337404689386e-05, + "loss": 1.2146, + "step": 7428 + }, + { + "epoch": 0.01, + "learning_rate": 4.999337223845707e-05, + "loss": 1.1836, + "step": 7429 + }, + { + "epoch": 0.01, + "learning_rate": 4.999337042977355e-05, + "loss": 1.1424, + "step": 7430 + }, + { + "epoch": 0.01, + "learning_rate": 4.999336862084332e-05, + "loss": 1.043, + "step": 7431 + }, + { + "epoch": 0.01, + "learning_rate": 4.999336681166636e-05, + "loss": 1.2966, + "step": 7432 + }, + { + "epoch": 0.01, + "learning_rate": 4.999336500224268e-05, + "loss": 1.5118, + "step": 7433 + }, + { + "epoch": 0.01, + "learning_rate": 4.999336319257226e-05, + "loss": 1.3014, + "step": 7434 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993361382655125e-05, + "loss": 1.1033, + "step": 7435 + }, + { + "epoch": 0.01, + "learning_rate": 4.999335957249127e-05, + "loss": 1.3528, + "step": 7436 + }, + { + "epoch": 0.01, + "learning_rate": 4.999335776208068e-05, + "loss": 1.185, + "step": 7437 + }, + { + "epoch": 0.01, + "learning_rate": 4.999335595142338e-05, + "loss": 1.1749, + "step": 7438 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993354140519346e-05, + "loss": 1.037, + "step": 7439 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993352329368596e-05, + "loss": 0.9035, + "step": 7440 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993350517971115e-05, + "loss": 1.3873, + "step": 7441 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993348706326916e-05, + "loss": 1.1147, + "step": 7442 + }, + { + "epoch": 0.01, + "learning_rate": 4.999334689443599e-05, + "loss": 1.264, + "step": 7443 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993345082298335e-05, + "loss": 2.0064, + "step": 7444 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993343269913966e-05, + "loss": 1.4161, + "step": 7445 + }, + { + "epoch": 0.01, + "learning_rate": 4.999334145728287e-05, + "loss": 0.7776, + "step": 7446 + }, + { + "epoch": 0.01, + "learning_rate": 4.999333964440505e-05, + "loss": 0.6111, + "step": 7447 + }, + { + "epoch": 0.01, + "learning_rate": 4.999333783128051e-05, + "loss": 0.6493, + "step": 7448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993336017909244e-05, + "loss": 0.6455, + "step": 7449 + }, + { + "epoch": 0.01, + "learning_rate": 4.999333420429125e-05, + "loss": 0.602, + "step": 7450 + }, + { + "epoch": 0.01, + "learning_rate": 4.999333239042653e-05, + "loss": 0.603, + "step": 7451 + }, + { + "epoch": 0.01, + "learning_rate": 4.999333057631509e-05, + "loss": 0.5794, + "step": 7452 + }, + { + "epoch": 0.01, + "learning_rate": 4.999332876195693e-05, + "loss": 0.5581, + "step": 7453 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993326947352046e-05, + "loss": 0.5843, + "step": 7454 + }, + { + "epoch": 0.01, + "learning_rate": 4.999332513250043e-05, + "loss": 0.5669, + "step": 7455 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993323317402094e-05, + "loss": 0.5727, + "step": 7456 + }, + { + "epoch": 0.01, + "learning_rate": 4.999332150205705e-05, + "loss": 0.5761, + "step": 7457 + }, + { + "epoch": 0.01, + "learning_rate": 4.999331968646526e-05, + "loss": 0.5629, + "step": 7458 + }, + { + "epoch": 0.01, + "learning_rate": 4.999331787062676e-05, + "loss": 0.5799, + "step": 7459 + }, + { + "epoch": 0.01, + "learning_rate": 4.999331605454153e-05, + "loss": 0.5618, + "step": 7460 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993314238209585e-05, + "loss": 0.8993, + "step": 7461 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993312421630904e-05, + "loss": 1.4123, + "step": 7462 + }, + { + "epoch": 0.01, + "learning_rate": 4.999331060480551e-05, + "loss": 1.0034, + "step": 7463 + }, + { + "epoch": 0.01, + "learning_rate": 4.999330878773339e-05, + "loss": 1.3514, + "step": 7464 + }, + { + "epoch": 0.01, + "learning_rate": 4.999330697041454e-05, + "loss": 1.3569, + "step": 7465 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993305152848974e-05, + "loss": 1.0425, + "step": 7466 + }, + { + "epoch": 0.01, + "learning_rate": 4.999330333503668e-05, + "loss": 1.0502, + "step": 7467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993301516977666e-05, + "loss": 1.1806, + "step": 7468 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993299698671933e-05, + "loss": 1.0627, + "step": 7469 + }, + { + "epoch": 0.01, + "learning_rate": 4.999329788011947e-05, + "loss": 0.986, + "step": 7470 + }, + { + "epoch": 0.01, + "learning_rate": 4.999329606132028e-05, + "loss": 0.9962, + "step": 7471 + }, + { + "epoch": 0.01, + "learning_rate": 4.999329424227437e-05, + "loss": 1.2365, + "step": 7472 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993292422981744e-05, + "loss": 1.1068, + "step": 7473 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993290603442386e-05, + "loss": 1.3851, + "step": 7474 + }, + { + "epoch": 0.01, + "learning_rate": 4.999328878365631e-05, + "loss": 1.3683, + "step": 7475 + }, + { + "epoch": 0.01, + "learning_rate": 4.999328696362351e-05, + "loss": 1.3053, + "step": 7476 + }, + { + "epoch": 0.01, + "learning_rate": 4.999328514334398e-05, + "loss": 0.757, + "step": 7477 + }, + { + "epoch": 0.01, + "learning_rate": 4.999328332281774e-05, + "loss": 0.6441, + "step": 7478 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993281502044765e-05, + "loss": 0.9481, + "step": 7479 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327968102507e-05, + "loss": 1.1469, + "step": 7480 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327785975866e-05, + "loss": 1.0571, + "step": 7481 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327603824552e-05, + "loss": 0.7546, + "step": 7482 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327421648565e-05, + "loss": 0.6546, + "step": 7483 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327239447907e-05, + "loss": 1.1642, + "step": 7484 + }, + { + "epoch": 0.01, + "learning_rate": 4.999327057222576e-05, + "loss": 1.0698, + "step": 7485 + }, + { + "epoch": 0.01, + "learning_rate": 4.999326874972573e-05, + "loss": 1.1704, + "step": 7486 + }, + { + "epoch": 0.01, + "learning_rate": 4.999326692697897e-05, + "loss": 1.2712, + "step": 7487 + }, + { + "epoch": 0.01, + "learning_rate": 4.999326510398549e-05, + "loss": 0.9271, + "step": 7488 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993263280745296e-05, + "loss": 1.1715, + "step": 7489 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993261457258364e-05, + "loss": 1.149, + "step": 7490 + }, + { + "epoch": 0.01, + "learning_rate": 4.999325963352472e-05, + "loss": 1.0354, + "step": 7491 + }, + { + "epoch": 0.01, + "learning_rate": 4.999325780954435e-05, + "loss": 1.1546, + "step": 7492 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993255985317254e-05, + "loss": 1.2658, + "step": 7493 + }, + { + "epoch": 0.01, + "learning_rate": 4.999325416084344e-05, + "loss": 1.1783, + "step": 7494 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993252336122904e-05, + "loss": 1.0157, + "step": 7495 + }, + { + "epoch": 0.01, + "learning_rate": 4.999325051115564e-05, + "loss": 1.0256, + "step": 7496 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324868594166e-05, + "loss": 1.5544, + "step": 7497 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993246860480944e-05, + "loss": 1.1515, + "step": 7498 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324503477352e-05, + "loss": 1.2725, + "step": 7499 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324320881936e-05, + "loss": 1.0838, + "step": 7500 + }, + { + "epoch": 0.01, + "eval_loss": 1.067463994026184, + "eval_runtime": 84.948, + "eval_samples_per_second": 16.304, + "eval_steps_per_second": 4.085, + "step": 7500 + }, + { + "epoch": 0.01, + "learning_rate": 4.99932413826185e-05, + "loss": 1.2704, + "step": 7501 + }, + { + "epoch": 0.01, + "learning_rate": 4.999323955617089e-05, + "loss": 1.2755, + "step": 7502 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993237729476574e-05, + "loss": 1.0452, + "step": 7503 + }, + { + "epoch": 0.01, + "learning_rate": 4.999323590253553e-05, + "loss": 1.1438, + "step": 7504 + }, + { + "epoch": 0.01, + "learning_rate": 4.999323407534776e-05, + "loss": 1.1556, + "step": 7505 + }, + { + "epoch": 0.01, + "learning_rate": 4.999323224791327e-05, + "loss": 1.3714, + "step": 7506 + }, + { + "epoch": 0.01, + "learning_rate": 4.999323042023206e-05, + "loss": 1.3532, + "step": 7507 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993228592304124e-05, + "loss": 0.9893, + "step": 7508 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322676412947e-05, + "loss": 0.9889, + "step": 7509 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322493570809e-05, + "loss": 1.1921, + "step": 7510 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322310703999e-05, + "loss": 1.367, + "step": 7511 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993221278125164e-05, + "loss": 0.8645, + "step": 7512 + }, + { + "epoch": 0.01, + "learning_rate": 4.999321944896361e-05, + "loss": 0.8964, + "step": 7513 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993217619555345e-05, + "loss": 1.1824, + "step": 7514 + }, + { + "epoch": 0.01, + "learning_rate": 4.999321578990035e-05, + "loss": 1.1325, + "step": 7515 + }, + { + "epoch": 0.01, + "learning_rate": 4.999321395999864e-05, + "loss": 0.7096, + "step": 7516 + }, + { + "epoch": 0.01, + "learning_rate": 4.99932121298502e-05, + "loss": 1.5663, + "step": 7517 + }, + { + "epoch": 0.01, + "learning_rate": 4.999321029945504e-05, + "loss": 1.1504, + "step": 7518 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993208468813156e-05, + "loss": 1.3688, + "step": 7519 + }, + { + "epoch": 0.01, + "learning_rate": 4.999320663792455e-05, + "loss": 1.1016, + "step": 7520 + }, + { + "epoch": 0.01, + "learning_rate": 4.999320480678923e-05, + "loss": 1.009, + "step": 7521 + }, + { + "epoch": 0.01, + "learning_rate": 4.999320297540718e-05, + "loss": 1.3089, + "step": 7522 + }, + { + "epoch": 0.01, + "learning_rate": 4.99932011437784e-05, + "loss": 1.3837, + "step": 7523 + }, + { + "epoch": 0.01, + "learning_rate": 4.999319931190291e-05, + "loss": 0.9332, + "step": 7524 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931974797807e-05, + "loss": 1.2406, + "step": 7525 + }, + { + "epoch": 0.01, + "learning_rate": 4.999319564741176e-05, + "loss": 1.1936, + "step": 7526 + }, + { + "epoch": 0.01, + "learning_rate": 4.999319381479609e-05, + "loss": 1.6506, + "step": 7527 + }, + { + "epoch": 0.01, + "learning_rate": 4.999319198193371e-05, + "loss": 0.859, + "step": 7528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993190148824604e-05, + "loss": 1.2011, + "step": 7529 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993188315468776e-05, + "loss": 0.672, + "step": 7530 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993186481866225e-05, + "loss": 0.8646, + "step": 7531 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993184648016956e-05, + "loss": 0.9417, + "step": 7532 + }, + { + "epoch": 0.01, + "learning_rate": 4.999318281392096e-05, + "loss": 1.31, + "step": 7533 + }, + { + "epoch": 0.01, + "learning_rate": 4.999318097957824e-05, + "loss": 1.1293, + "step": 7534 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931791449888e-05, + "loss": 1.1137, + "step": 7535 + }, + { + "epoch": 0.01, + "learning_rate": 4.999317731015264e-05, + "loss": 1.2222, + "step": 7536 + }, + { + "epoch": 0.01, + "learning_rate": 4.999317547506976e-05, + "loss": 0.9789, + "step": 7537 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993173639740153e-05, + "loss": 1.0231, + "step": 7538 + }, + { + "epoch": 0.01, + "learning_rate": 4.999317180416383e-05, + "loss": 0.7891, + "step": 7539 + }, + { + "epoch": 0.01, + "learning_rate": 4.999316996834078e-05, + "loss": 1.3479, + "step": 7540 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993168132271005e-05, + "loss": 1.1883, + "step": 7541 + }, + { + "epoch": 0.01, + "learning_rate": 4.999316629595451e-05, + "loss": 1.1731, + "step": 7542 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993164459391296e-05, + "loss": 0.9551, + "step": 7543 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993162622581356e-05, + "loss": 1.1011, + "step": 7544 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931607855247e-05, + "loss": 1.026, + "step": 7545 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315894822132e-05, + "loss": 1.1553, + "step": 7546 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315711067122e-05, + "loss": 1.0465, + "step": 7547 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315527287439e-05, + "loss": 0.8922, + "step": 7548 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315343483084e-05, + "loss": 1.1713, + "step": 7549 + }, + { + "epoch": 0.01, + "learning_rate": 4.999315159654058e-05, + "loss": 1.1499, + "step": 7550 + }, + { + "epoch": 0.01, + "learning_rate": 4.999314975800358e-05, + "loss": 1.4271, + "step": 7551 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993147919219876e-05, + "loss": 1.0735, + "step": 7552 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993146080189436e-05, + "loss": 1.0994, + "step": 7553 + }, + { + "epoch": 0.01, + "learning_rate": 4.999314424091228e-05, + "loss": 1.016, + "step": 7554 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993142401388404e-05, + "loss": 0.9924, + "step": 7555 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931405616178e-05, + "loss": 1.2288, + "step": 7556 + }, + { + "epoch": 0.01, + "learning_rate": 4.999313872160048e-05, + "loss": 1.1756, + "step": 7557 + }, + { + "epoch": 0.01, + "learning_rate": 4.999313688133643e-05, + "loss": 1.1061, + "step": 7558 + }, + { + "epoch": 0.01, + "learning_rate": 4.999313504082567e-05, + "loss": 1.1704, + "step": 7559 + }, + { + "epoch": 0.01, + "learning_rate": 4.999313320006819e-05, + "loss": 0.9434, + "step": 7560 + }, + { + "epoch": 0.01, + "learning_rate": 4.999313135906398e-05, + "loss": 0.926, + "step": 7561 + }, + { + "epoch": 0.01, + "learning_rate": 4.999312951781305e-05, + "loss": 1.0371, + "step": 7562 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993127676315396e-05, + "loss": 0.769, + "step": 7563 + }, + { + "epoch": 0.01, + "learning_rate": 4.999312583457102e-05, + "loss": 1.1982, + "step": 7564 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993123992579923e-05, + "loss": 1.1619, + "step": 7565 + }, + { + "epoch": 0.01, + "learning_rate": 4.999312215034211e-05, + "loss": 1.3264, + "step": 7566 + }, + { + "epoch": 0.01, + "learning_rate": 4.999312030785757e-05, + "loss": 1.7573, + "step": 7567 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993118465126307e-05, + "loss": 0.8679, + "step": 7568 + }, + { + "epoch": 0.01, + "learning_rate": 4.999311662214833e-05, + "loss": 1.2439, + "step": 7569 + }, + { + "epoch": 0.01, + "learning_rate": 4.999311477892363e-05, + "loss": 1.1812, + "step": 7570 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931129354522e-05, + "loss": 1.0988, + "step": 7571 + }, + { + "epoch": 0.01, + "learning_rate": 4.999311109173406e-05, + "loss": 1.2373, + "step": 7572 + }, + { + "epoch": 0.01, + "learning_rate": 4.999310924776919e-05, + "loss": 1.9465, + "step": 7573 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993107403557606e-05, + "loss": 1.4244, + "step": 7574 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931055590993e-05, + "loss": 1.3393, + "step": 7575 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993103714394265e-05, + "loss": 1.2637, + "step": 7576 + }, + { + "epoch": 0.01, + "learning_rate": 4.99931018694425e-05, + "loss": 1.245, + "step": 7577 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993100024244035e-05, + "loss": 0.4278, + "step": 7578 + }, + { + "epoch": 0.01, + "learning_rate": 4.999309817879884e-05, + "loss": 1.1911, + "step": 7579 + }, + { + "epoch": 0.01, + "learning_rate": 4.999309633310692e-05, + "loss": 0.746, + "step": 7580 + }, + { + "epoch": 0.01, + "learning_rate": 4.999309448716828e-05, + "loss": 1.1593, + "step": 7581 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993092640982925e-05, + "loss": 1.1381, + "step": 7582 + }, + { + "epoch": 0.01, + "learning_rate": 4.999309079455085e-05, + "loss": 1.0607, + "step": 7583 + }, + { + "epoch": 0.01, + "learning_rate": 4.999308894787204e-05, + "loss": 1.0424, + "step": 7584 + }, + { + "epoch": 0.01, + "learning_rate": 4.999308710094652e-05, + "loss": 1.1306, + "step": 7585 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993085253774276e-05, + "loss": 1.1298, + "step": 7586 + }, + { + "epoch": 0.01, + "learning_rate": 4.999308340635531e-05, + "loss": 0.9243, + "step": 7587 + }, + { + "epoch": 0.01, + "learning_rate": 4.999308155868962e-05, + "loss": 0.8916, + "step": 7588 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307971077721e-05, + "loss": 0.6633, + "step": 7589 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307786261808e-05, + "loss": 1.0586, + "step": 7590 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307601421224e-05, + "loss": 1.1927, + "step": 7591 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307416555966e-05, + "loss": 0.7294, + "step": 7592 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307231666037e-05, + "loss": 0.5216, + "step": 7593 + }, + { + "epoch": 0.01, + "learning_rate": 4.999307046751436e-05, + "loss": 1.1658, + "step": 7594 + }, + { + "epoch": 0.01, + "learning_rate": 4.999306861812162e-05, + "loss": 1.1383, + "step": 7595 + }, + { + "epoch": 0.01, + "learning_rate": 4.999306676848217e-05, + "loss": 0.7021, + "step": 7596 + }, + { + "epoch": 0.01, + "learning_rate": 4.999306491859599e-05, + "loss": 1.2803, + "step": 7597 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993063068463084e-05, + "loss": 1.1629, + "step": 7598 + }, + { + "epoch": 0.01, + "learning_rate": 4.999306121808347e-05, + "loss": 0.7347, + "step": 7599 + }, + { + "epoch": 0.01, + "learning_rate": 4.999305936745713e-05, + "loss": 1.154, + "step": 7600 + }, + { + "epoch": 0.01, + "learning_rate": 4.999305751658407e-05, + "loss": 1.0441, + "step": 7601 + }, + { + "epoch": 0.01, + "learning_rate": 4.999305566546429e-05, + "loss": 1.0921, + "step": 7602 + }, + { + "epoch": 0.01, + "learning_rate": 4.999305381409779e-05, + "loss": 1.0165, + "step": 7603 + }, + { + "epoch": 0.01, + "learning_rate": 4.999305196248456e-05, + "loss": 0.9674, + "step": 7604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993050110624614e-05, + "loss": 1.2161, + "step": 7605 + }, + { + "epoch": 0.01, + "learning_rate": 4.999304825851796e-05, + "loss": 0.8684, + "step": 7606 + }, + { + "epoch": 0.01, + "learning_rate": 4.999304640616457e-05, + "loss": 1.3836, + "step": 7607 + }, + { + "epoch": 0.01, + "learning_rate": 4.999304455356446e-05, + "loss": 1.0503, + "step": 7608 + }, + { + "epoch": 0.01, + "learning_rate": 4.999304270071763e-05, + "loss": 1.0424, + "step": 7609 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993040847624085e-05, + "loss": 0.7922, + "step": 7610 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993038994283816e-05, + "loss": 1.1937, + "step": 7611 + }, + { + "epoch": 0.01, + "learning_rate": 4.999303714069682e-05, + "loss": 1.0771, + "step": 7612 + }, + { + "epoch": 0.01, + "learning_rate": 4.999303528686311e-05, + "loss": 0.7628, + "step": 7613 + }, + { + "epoch": 0.01, + "learning_rate": 4.999303343278268e-05, + "loss": 1.0711, + "step": 7614 + }, + { + "epoch": 0.01, + "learning_rate": 4.999303157845553e-05, + "loss": 0.9608, + "step": 7615 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993029723881655e-05, + "loss": 1.0593, + "step": 7616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993027869061063e-05, + "loss": 1.1047, + "step": 7617 + }, + { + "epoch": 0.01, + "learning_rate": 4.999302601399375e-05, + "loss": 0.8421, + "step": 7618 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993024158679716e-05, + "loss": 0.4275, + "step": 7619 + }, + { + "epoch": 0.01, + "learning_rate": 4.999302230311896e-05, + "loss": 0.4701, + "step": 7620 + }, + { + "epoch": 0.01, + "learning_rate": 4.999302044731149e-05, + "loss": 1.1274, + "step": 7621 + }, + { + "epoch": 0.01, + "learning_rate": 4.999301859125729e-05, + "loss": 1.1504, + "step": 7622 + }, + { + "epoch": 0.01, + "learning_rate": 4.999301673495637e-05, + "loss": 1.0298, + "step": 7623 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993014878408734e-05, + "loss": 1.2913, + "step": 7624 + }, + { + "epoch": 0.01, + "learning_rate": 4.999301302161438e-05, + "loss": 1.1684, + "step": 7625 + }, + { + "epoch": 0.01, + "learning_rate": 4.99930111645733e-05, + "loss": 1.1173, + "step": 7626 + }, + { + "epoch": 0.01, + "learning_rate": 4.99930093072855e-05, + "loss": 1.2299, + "step": 7627 + }, + { + "epoch": 0.01, + "learning_rate": 4.999300744975098e-05, + "loss": 1.2688, + "step": 7628 + }, + { + "epoch": 0.01, + "learning_rate": 4.999300559196974e-05, + "loss": 0.7244, + "step": 7629 + }, + { + "epoch": 0.01, + "learning_rate": 4.999300373394178e-05, + "loss": 1.1436, + "step": 7630 + }, + { + "epoch": 0.01, + "learning_rate": 4.99930018756671e-05, + "loss": 1.3276, + "step": 7631 + }, + { + "epoch": 0.01, + "learning_rate": 4.9993000017145696e-05, + "loss": 1.2979, + "step": 7632 + }, + { + "epoch": 0.01, + "learning_rate": 4.999299815837758e-05, + "loss": 1.2512, + "step": 7633 + }, + { + "epoch": 0.01, + "learning_rate": 4.999299629936274e-05, + "loss": 1.1245, + "step": 7634 + }, + { + "epoch": 0.01, + "learning_rate": 4.999299444010117e-05, + "loss": 1.122, + "step": 7635 + }, + { + "epoch": 0.01, + "learning_rate": 4.999299258059289e-05, + "loss": 1.0844, + "step": 7636 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992990720837886e-05, + "loss": 0.9044, + "step": 7637 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992988860836163e-05, + "loss": 1.0386, + "step": 7638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992987000587724e-05, + "loss": 1.1474, + "step": 7639 + }, + { + "epoch": 0.01, + "learning_rate": 4.999298514009256e-05, + "loss": 1.2707, + "step": 7640 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992983279350674e-05, + "loss": 0.9895, + "step": 7641 + }, + { + "epoch": 0.01, + "learning_rate": 4.999298141836207e-05, + "loss": 1.152, + "step": 7642 + }, + { + "epoch": 0.01, + "learning_rate": 4.999297955712674e-05, + "loss": 1.1866, + "step": 7643 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992977695644705e-05, + "loss": 1.3486, + "step": 7644 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992975833915936e-05, + "loss": 0.8838, + "step": 7645 + }, + { + "epoch": 0.01, + "learning_rate": 4.999297397194045e-05, + "loss": 1.1444, + "step": 7646 + }, + { + "epoch": 0.01, + "learning_rate": 4.999297210971825e-05, + "loss": 1.7278, + "step": 7647 + }, + { + "epoch": 0.01, + "learning_rate": 4.999297024724933e-05, + "loss": 1.3322, + "step": 7648 + }, + { + "epoch": 0.01, + "learning_rate": 4.999296838453368e-05, + "loss": 1.0926, + "step": 7649 + }, + { + "epoch": 0.01, + "learning_rate": 4.999296652157132e-05, + "loss": 1.049, + "step": 7650 + }, + { + "epoch": 0.01, + "learning_rate": 4.999296465836223e-05, + "loss": 1.2226, + "step": 7651 + }, + { + "epoch": 0.01, + "learning_rate": 4.999296279490642e-05, + "loss": 1.1249, + "step": 7652 + }, + { + "epoch": 0.01, + "learning_rate": 4.99929609312039e-05, + "loss": 1.1267, + "step": 7653 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992959067254655e-05, + "loss": 1.1067, + "step": 7654 + }, + { + "epoch": 0.01, + "learning_rate": 4.999295720305869e-05, + "loss": 1.2643, + "step": 7655 + }, + { + "epoch": 0.01, + "learning_rate": 4.999295533861601e-05, + "loss": 1.0183, + "step": 7656 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992953473926595e-05, + "loss": 1.0721, + "step": 7657 + }, + { + "epoch": 0.01, + "learning_rate": 4.999295160899047e-05, + "loss": 1.1275, + "step": 7658 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992949743807625e-05, + "loss": 1.1531, + "step": 7659 + }, + { + "epoch": 0.01, + "learning_rate": 4.999294787837807e-05, + "loss": 1.0552, + "step": 7660 + }, + { + "epoch": 0.01, + "learning_rate": 4.999294601270178e-05, + "loss": 1.0566, + "step": 7661 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992944146778776e-05, + "loss": 1.3829, + "step": 7662 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992942280609054e-05, + "loss": 1.2207, + "step": 7663 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992940414192616e-05, + "loss": 1.0252, + "step": 7664 + }, + { + "epoch": 0.01, + "learning_rate": 4.999293854752945e-05, + "loss": 0.7548, + "step": 7665 + }, + { + "epoch": 0.01, + "learning_rate": 4.999293668061956e-05, + "loss": 1.0979, + "step": 7666 + }, + { + "epoch": 0.01, + "learning_rate": 4.999293481346296e-05, + "loss": 1.8026, + "step": 7667 + }, + { + "epoch": 0.01, + "learning_rate": 4.999293294605964e-05, + "loss": 1.3354, + "step": 7668 + }, + { + "epoch": 0.01, + "learning_rate": 4.99929310784096e-05, + "loss": 1.3458, + "step": 7669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992929210512834e-05, + "loss": 1.0722, + "step": 7670 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992927342369356e-05, + "loss": 0.8646, + "step": 7671 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992925473979155e-05, + "loss": 1.4073, + "step": 7672 + }, + { + "epoch": 0.01, + "learning_rate": 4.999292360534223e-05, + "loss": 1.0497, + "step": 7673 + }, + { + "epoch": 0.01, + "learning_rate": 4.999292173645859e-05, + "loss": 0.5976, + "step": 7674 + }, + { + "epoch": 0.01, + "learning_rate": 4.999291986732823e-05, + "loss": 1.0724, + "step": 7675 + }, + { + "epoch": 0.01, + "learning_rate": 4.999291799795115e-05, + "loss": 1.0314, + "step": 7676 + }, + { + "epoch": 0.01, + "learning_rate": 4.999291612832735e-05, + "loss": 1.1537, + "step": 7677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992914258456836e-05, + "loss": 1.3026, + "step": 7678 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992912388339595e-05, + "loss": 0.8799, + "step": 7679 + }, + { + "epoch": 0.01, + "learning_rate": 4.999291051797563e-05, + "loss": 1.2481, + "step": 7680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992908647364956e-05, + "loss": 1.0373, + "step": 7681 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992906776507565e-05, + "loss": 1.3132, + "step": 7682 + }, + { + "epoch": 0.01, + "learning_rate": 4.999290490540345e-05, + "loss": 1.036, + "step": 7683 + }, + { + "epoch": 0.01, + "learning_rate": 4.999290303405261e-05, + "loss": 0.9312, + "step": 7684 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992901162455054e-05, + "loss": 1.787, + "step": 7685 + }, + { + "epoch": 0.01, + "learning_rate": 4.999289929061078e-05, + "loss": 1.1312, + "step": 7686 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992897418519784e-05, + "loss": 1.0114, + "step": 7687 + }, + { + "epoch": 0.01, + "learning_rate": 4.999289554618207e-05, + "loss": 1.1453, + "step": 7688 + }, + { + "epoch": 0.01, + "learning_rate": 4.999289367359764e-05, + "loss": 1.143, + "step": 7689 + }, + { + "epoch": 0.01, + "learning_rate": 4.999289180076649e-05, + "loss": 1.1481, + "step": 7690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992889927688614e-05, + "loss": 0.7681, + "step": 7691 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992888054364026e-05, + "loss": 0.9479, + "step": 7692 + }, + { + "epoch": 0.01, + "learning_rate": 4.999288618079272e-05, + "loss": 1.0966, + "step": 7693 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992884306974684e-05, + "loss": 1.1311, + "step": 7694 + }, + { + "epoch": 0.01, + "learning_rate": 4.999288243290994e-05, + "loss": 1.9402, + "step": 7695 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992880558598475e-05, + "loss": 0.9394, + "step": 7696 + }, + { + "epoch": 0.01, + "learning_rate": 4.999287868404029e-05, + "loss": 1.3466, + "step": 7697 + }, + { + "epoch": 0.01, + "learning_rate": 4.999287680923538e-05, + "loss": 0.9499, + "step": 7698 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992874934183756e-05, + "loss": 1.1177, + "step": 7699 + }, + { + "epoch": 0.01, + "learning_rate": 4.999287305888541e-05, + "loss": 1.1028, + "step": 7700 + }, + { + "epoch": 0.01, + "learning_rate": 4.999287118334035e-05, + "loss": 1.7157, + "step": 7701 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992869307548565e-05, + "loss": 1.1457, + "step": 7702 + }, + { + "epoch": 0.01, + "learning_rate": 4.999286743151006e-05, + "loss": 0.9201, + "step": 7703 + }, + { + "epoch": 0.01, + "learning_rate": 4.999286555522484e-05, + "loss": 0.9718, + "step": 7704 + }, + { + "epoch": 0.01, + "learning_rate": 4.99928636786929e-05, + "loss": 1.4427, + "step": 7705 + }, + { + "epoch": 0.01, + "learning_rate": 4.999286180191425e-05, + "loss": 1.1255, + "step": 7706 + }, + { + "epoch": 0.01, + "learning_rate": 4.999285992488887e-05, + "loss": 1.0025, + "step": 7707 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992858047616776e-05, + "loss": 0.9853, + "step": 7708 + }, + { + "epoch": 0.01, + "learning_rate": 4.999285617009796e-05, + "loss": 0.8761, + "step": 7709 + }, + { + "epoch": 0.01, + "learning_rate": 4.999285429233242e-05, + "loss": 0.9686, + "step": 7710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992852414320165e-05, + "loss": 1.4701, + "step": 7711 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992850536061196e-05, + "loss": 1.2018, + "step": 7712 + }, + { + "epoch": 0.01, + "learning_rate": 4.999284865755551e-05, + "loss": 1.1506, + "step": 7713 + }, + { + "epoch": 0.01, + "learning_rate": 4.99928467788031e-05, + "loss": 1.236, + "step": 7714 + }, + { + "epoch": 0.01, + "learning_rate": 4.999284489980397e-05, + "loss": 1.0325, + "step": 7715 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992843020558124e-05, + "loss": 1.0386, + "step": 7716 + }, + { + "epoch": 0.01, + "learning_rate": 4.999284114106556e-05, + "loss": 1.0008, + "step": 7717 + }, + { + "epoch": 0.01, + "learning_rate": 4.999283926132627e-05, + "loss": 0.8698, + "step": 7718 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992837381340265e-05, + "loss": 0.6912, + "step": 7719 + }, + { + "epoch": 0.01, + "learning_rate": 4.999283550110754e-05, + "loss": 0.861, + "step": 7720 + }, + { + "epoch": 0.01, + "learning_rate": 4.99928336206281e-05, + "loss": 0.6516, + "step": 7721 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992831739901946e-05, + "loss": 0.9092, + "step": 7722 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992829858929064e-05, + "loss": 0.9458, + "step": 7723 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992827977709464e-05, + "loss": 1.2814, + "step": 7724 + }, + { + "epoch": 0.01, + "learning_rate": 4.999282609624315e-05, + "loss": 1.0289, + "step": 7725 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992824214530114e-05, + "loss": 1.1974, + "step": 7726 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992822332570363e-05, + "loss": 0.9542, + "step": 7727 + }, + { + "epoch": 0.01, + "learning_rate": 4.999282045036389e-05, + "loss": 1.0783, + "step": 7728 + }, + { + "epoch": 0.01, + "learning_rate": 4.99928185679107e-05, + "loss": 1.1048, + "step": 7729 + }, + { + "epoch": 0.01, + "learning_rate": 4.999281668521078e-05, + "loss": 1.5022, + "step": 7730 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992814802264164e-05, + "loss": 0.9485, + "step": 7731 + }, + { + "epoch": 0.01, + "learning_rate": 4.999281291907082e-05, + "loss": 1.0487, + "step": 7732 + }, + { + "epoch": 0.01, + "learning_rate": 4.999281103563075e-05, + "loss": 0.9813, + "step": 7733 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992809151943967e-05, + "loss": 1.4835, + "step": 7734 + }, + { + "epoch": 0.01, + "learning_rate": 4.999280726801047e-05, + "loss": 0.905, + "step": 7735 + }, + { + "epoch": 0.01, + "learning_rate": 4.999280538383024e-05, + "loss": 1.3004, + "step": 7736 + }, + { + "epoch": 0.01, + "learning_rate": 4.999280349940331e-05, + "loss": 1.1508, + "step": 7737 + }, + { + "epoch": 0.01, + "learning_rate": 4.999280161472965e-05, + "loss": 0.7234, + "step": 7738 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279972980927e-05, + "loss": 0.7698, + "step": 7739 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279784464218e-05, + "loss": 1.1764, + "step": 7740 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279595922837e-05, + "loss": 1.2074, + "step": 7741 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279407356784e-05, + "loss": 1.1786, + "step": 7742 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279218766059e-05, + "loss": 1.2808, + "step": 7743 + }, + { + "epoch": 0.01, + "learning_rate": 4.999279030150662e-05, + "loss": 1.3466, + "step": 7744 + }, + { + "epoch": 0.01, + "learning_rate": 4.999278841510593e-05, + "loss": 0.9158, + "step": 7745 + }, + { + "epoch": 0.01, + "learning_rate": 4.999278652845854e-05, + "loss": 1.2887, + "step": 7746 + }, + { + "epoch": 0.01, + "learning_rate": 4.999278464156441e-05, + "loss": 1.0082, + "step": 7747 + }, + { + "epoch": 0.01, + "learning_rate": 4.999278275442357e-05, + "loss": 0.1872, + "step": 7748 + }, + { + "epoch": 0.01, + "learning_rate": 4.999278086703601e-05, + "loss": 1.0771, + "step": 7749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992778979401734e-05, + "loss": 1.3316, + "step": 7750 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992777091520735e-05, + "loss": 1.2208, + "step": 7751 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992775203393026e-05, + "loss": 1.2078, + "step": 7752 + }, + { + "epoch": 0.01, + "learning_rate": 4.999277331501859e-05, + "loss": 1.1975, + "step": 7753 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992771426397444e-05, + "loss": 0.9454, + "step": 7754 + }, + { + "epoch": 0.01, + "learning_rate": 4.999276953752958e-05, + "loss": 1.2826, + "step": 7755 + }, + { + "epoch": 0.01, + "learning_rate": 4.999276764841499e-05, + "loss": 1.4365, + "step": 7756 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992765759053686e-05, + "loss": 1.0539, + "step": 7757 + }, + { + "epoch": 0.01, + "learning_rate": 4.999276386944567e-05, + "loss": 1.0879, + "step": 7758 + }, + { + "epoch": 0.01, + "learning_rate": 4.999276197959093e-05, + "loss": 0.7525, + "step": 7759 + }, + { + "epoch": 0.01, + "learning_rate": 4.999276008948947e-05, + "loss": 1.0603, + "step": 7760 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992758199141293e-05, + "loss": 1.0382, + "step": 7761 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992756308546394e-05, + "loss": 1.4256, + "step": 7762 + }, + { + "epoch": 0.01, + "learning_rate": 4.999275441770479e-05, + "loss": 1.701, + "step": 7763 + }, + { + "epoch": 0.01, + "learning_rate": 4.999275252661646e-05, + "loss": 1.3756, + "step": 7764 + }, + { + "epoch": 0.01, + "learning_rate": 4.999275063528141e-05, + "loss": 1.5012, + "step": 7765 + }, + { + "epoch": 0.01, + "learning_rate": 4.999274874369965e-05, + "loss": 1.3719, + "step": 7766 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992746851871165e-05, + "loss": 0.9829, + "step": 7767 + }, + { + "epoch": 0.01, + "learning_rate": 4.999274495979596e-05, + "loss": 1.0734, + "step": 7768 + }, + { + "epoch": 0.01, + "learning_rate": 4.999274306747404e-05, + "loss": 1.3196, + "step": 7769 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992741174905404e-05, + "loss": 1.0388, + "step": 7770 + }, + { + "epoch": 0.01, + "learning_rate": 4.999273928209005e-05, + "loss": 1.3656, + "step": 7771 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992737389027977e-05, + "loss": 1.1544, + "step": 7772 + }, + { + "epoch": 0.01, + "learning_rate": 4.999273549571919e-05, + "loss": 0.8635, + "step": 7773 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992733602163674e-05, + "loss": 1.4682, + "step": 7774 + }, + { + "epoch": 0.01, + "learning_rate": 4.999273170836145e-05, + "loss": 1.3557, + "step": 7775 + }, + { + "epoch": 0.01, + "learning_rate": 4.999272981431251e-05, + "loss": 1.0475, + "step": 7776 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992727920016844e-05, + "loss": 1.0465, + "step": 7777 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992726025474466e-05, + "loss": 1.0858, + "step": 7778 + }, + { + "epoch": 0.01, + "learning_rate": 4.999272413068538e-05, + "loss": 1.0212, + "step": 7779 + }, + { + "epoch": 0.01, + "learning_rate": 4.999272223564956e-05, + "loss": 1.0863, + "step": 7780 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992720340367025e-05, + "loss": 0.8423, + "step": 7781 + }, + { + "epoch": 0.01, + "learning_rate": 4.999271844483778e-05, + "loss": 0.7978, + "step": 7782 + }, + { + "epoch": 0.01, + "learning_rate": 4.999271654906181e-05, + "loss": 1.2256, + "step": 7783 + }, + { + "epoch": 0.01, + "learning_rate": 4.999271465303913e-05, + "loss": 0.8826, + "step": 7784 + }, + { + "epoch": 0.01, + "learning_rate": 4.999271275676972e-05, + "loss": 0.8992, + "step": 7785 + }, + { + "epoch": 0.01, + "learning_rate": 4.999271086025361e-05, + "loss": 1.2419, + "step": 7786 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992708963490764e-05, + "loss": 1.3075, + "step": 7787 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992707066481216e-05, + "loss": 0.6001, + "step": 7788 + }, + { + "epoch": 0.01, + "learning_rate": 4.999270516922494e-05, + "loss": 0.762, + "step": 7789 + }, + { + "epoch": 0.01, + "learning_rate": 4.999270327172195e-05, + "loss": 0.955, + "step": 7790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992701373972245e-05, + "loss": 1.1614, + "step": 7791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992699475975817e-05, + "loss": 1.1102, + "step": 7792 + }, + { + "epoch": 0.01, + "learning_rate": 4.999269757773268e-05, + "loss": 1.5231, + "step": 7793 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992695679242815e-05, + "loss": 0.9765, + "step": 7794 + }, + { + "epoch": 0.01, + "learning_rate": 4.999269378050624e-05, + "loss": 0.8491, + "step": 7795 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992691881522945e-05, + "loss": 1.0887, + "step": 7796 + }, + { + "epoch": 0.01, + "learning_rate": 4.999268998229294e-05, + "loss": 1.1864, + "step": 7797 + }, + { + "epoch": 0.01, + "learning_rate": 4.999268808281621e-05, + "loss": 1.0629, + "step": 7798 + }, + { + "epoch": 0.01, + "learning_rate": 4.999268618309276e-05, + "loss": 1.305, + "step": 7799 + }, + { + "epoch": 0.01, + "learning_rate": 4.99926842831226e-05, + "loss": 1.5089, + "step": 7800 + }, + { + "epoch": 0.01, + "learning_rate": 4.999268238290572e-05, + "loss": 1.2749, + "step": 7801 + }, + { + "epoch": 0.01, + "learning_rate": 4.999268048244212e-05, + "loss": 1.1039, + "step": 7802 + }, + { + "epoch": 0.01, + "learning_rate": 4.999267858173181e-05, + "loss": 1.1027, + "step": 7803 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992676680774773e-05, + "loss": 1.5077, + "step": 7804 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992674779571024e-05, + "loss": 1.3028, + "step": 7805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992672878120564e-05, + "loss": 1.2525, + "step": 7806 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992670976423374e-05, + "loss": 1.3055, + "step": 7807 + }, + { + "epoch": 0.01, + "learning_rate": 4.999266907447947e-05, + "loss": 1.0859, + "step": 7808 + }, + { + "epoch": 0.01, + "learning_rate": 4.999266717228886e-05, + "loss": 0.8956, + "step": 7809 + }, + { + "epoch": 0.01, + "learning_rate": 4.999266526985152e-05, + "loss": 0.9879, + "step": 7810 + }, + { + "epoch": 0.01, + "learning_rate": 4.999266336716747e-05, + "loss": 1.046, + "step": 7811 + }, + { + "epoch": 0.01, + "learning_rate": 4.99926614642367e-05, + "loss": 1.4059, + "step": 7812 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992659561059216e-05, + "loss": 1.278, + "step": 7813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992657657635014e-05, + "loss": 1.0187, + "step": 7814 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992655753964094e-05, + "loss": 1.1006, + "step": 7815 + }, + { + "epoch": 0.01, + "learning_rate": 4.999265385004646e-05, + "loss": 1.1579, + "step": 7816 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992651945882105e-05, + "loss": 1.092, + "step": 7817 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992650041471034e-05, + "loss": 0.9898, + "step": 7818 + }, + { + "epoch": 0.01, + "learning_rate": 4.999264813681325e-05, + "loss": 1.1935, + "step": 7819 + }, + { + "epoch": 0.01, + "learning_rate": 4.999264623190874e-05, + "loss": 0.9533, + "step": 7820 + }, + { + "epoch": 0.01, + "learning_rate": 4.999264432675752e-05, + "loss": 1.0438, + "step": 7821 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992642421359583e-05, + "loss": 1.2504, + "step": 7822 + }, + { + "epoch": 0.01, + "learning_rate": 4.999264051571493e-05, + "loss": 0.7561, + "step": 7823 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992638609823556e-05, + "loss": 1.0104, + "step": 7824 + }, + { + "epoch": 0.01, + "learning_rate": 4.999263670368547e-05, + "loss": 1.2529, + "step": 7825 + }, + { + "epoch": 0.01, + "learning_rate": 4.999263479730066e-05, + "loss": 1.1248, + "step": 7826 + }, + { + "epoch": 0.01, + "learning_rate": 4.999263289066914e-05, + "loss": 1.0451, + "step": 7827 + }, + { + "epoch": 0.01, + "learning_rate": 4.99926309837909e-05, + "loss": 1.1424, + "step": 7828 + }, + { + "epoch": 0.01, + "learning_rate": 4.999262907666594e-05, + "loss": 0.9508, + "step": 7829 + }, + { + "epoch": 0.01, + "learning_rate": 4.999262716929427e-05, + "loss": 1.1694, + "step": 7830 + }, + { + "epoch": 0.01, + "learning_rate": 4.999262526167588e-05, + "loss": 1.0555, + "step": 7831 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992623353810774e-05, + "loss": 1.0308, + "step": 7832 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992621445698956e-05, + "loss": 0.9165, + "step": 7833 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992619537340414e-05, + "loss": 1.1295, + "step": 7834 + }, + { + "epoch": 0.01, + "learning_rate": 4.999261762873516e-05, + "loss": 0.9402, + "step": 7835 + }, + { + "epoch": 0.01, + "learning_rate": 4.999261571988319e-05, + "loss": 1.0604, + "step": 7836 + }, + { + "epoch": 0.01, + "learning_rate": 4.99926138107845e-05, + "loss": 1.0774, + "step": 7837 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992611901439096e-05, + "loss": 0.8003, + "step": 7838 + }, + { + "epoch": 0.01, + "learning_rate": 4.999260999184697e-05, + "loss": 0.8127, + "step": 7839 + }, + { + "epoch": 0.01, + "learning_rate": 4.999260808200813e-05, + "loss": 0.5362, + "step": 7840 + }, + { + "epoch": 0.01, + "learning_rate": 4.999260617192258e-05, + "loss": 0.4679, + "step": 7841 + }, + { + "epoch": 0.01, + "learning_rate": 4.999260426159031e-05, + "loss": 1.2592, + "step": 7842 + }, + { + "epoch": 0.01, + "learning_rate": 4.999260235101132e-05, + "loss": 1.0108, + "step": 7843 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992600440185614e-05, + "loss": 0.7332, + "step": 7844 + }, + { + "epoch": 0.01, + "learning_rate": 4.99925985291132e-05, + "loss": 1.0624, + "step": 7845 + }, + { + "epoch": 0.01, + "learning_rate": 4.999259661779406e-05, + "loss": 0.7453, + "step": 7846 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992594706228204e-05, + "loss": 0.8955, + "step": 7847 + }, + { + "epoch": 0.01, + "learning_rate": 4.999259279441564e-05, + "loss": 0.8746, + "step": 7848 + }, + { + "epoch": 0.01, + "learning_rate": 4.999259088235635e-05, + "loss": 1.0011, + "step": 7849 + }, + { + "epoch": 0.01, + "learning_rate": 4.999258897005035e-05, + "loss": 0.8298, + "step": 7850 + }, + { + "epoch": 0.01, + "learning_rate": 4.999258705749763e-05, + "loss": 0.9943, + "step": 7851 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992585144698197e-05, + "loss": 1.1311, + "step": 7852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992583231652046e-05, + "loss": 0.849, + "step": 7853 + }, + { + "epoch": 0.01, + "learning_rate": 4.999258131835918e-05, + "loss": 0.9639, + "step": 7854 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257940481959e-05, + "loss": 1.1424, + "step": 7855 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257749103329e-05, + "loss": 1.0167, + "step": 7856 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257557700028e-05, + "loss": 1.1265, + "step": 7857 + }, + { + "epoch": 0.01, + "learning_rate": 4.999257366272054e-05, + "loss": 0.9859, + "step": 7858 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992571748194096e-05, + "loss": 1.2524, + "step": 7859 + }, + { + "epoch": 0.01, + "learning_rate": 4.999256983342093e-05, + "loss": 1.0596, + "step": 7860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992567918401046e-05, + "loss": 0.7429, + "step": 7861 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992566003134456e-05, + "loss": 1.1427, + "step": 7862 + }, + { + "epoch": 0.01, + "learning_rate": 4.999256408762114e-05, + "loss": 1.18, + "step": 7863 + }, + { + "epoch": 0.01, + "learning_rate": 4.999256217186111e-05, + "loss": 1.3083, + "step": 7864 + }, + { + "epoch": 0.01, + "learning_rate": 4.999256025585436e-05, + "loss": 1.0804, + "step": 7865 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992558339600904e-05, + "loss": 1.179, + "step": 7866 + }, + { + "epoch": 0.01, + "learning_rate": 4.999255642310073e-05, + "loss": 1.2044, + "step": 7867 + }, + { + "epoch": 0.01, + "learning_rate": 4.999255450635384e-05, + "loss": 0.9728, + "step": 7868 + }, + { + "epoch": 0.01, + "learning_rate": 4.999255258936023e-05, + "loss": 1.3624, + "step": 7869 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992550672119894e-05, + "loss": 1.4786, + "step": 7870 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992548754632865e-05, + "loss": 1.0779, + "step": 7871 + }, + { + "epoch": 0.01, + "learning_rate": 4.99925468368991e-05, + "loss": 1.2894, + "step": 7872 + }, + { + "epoch": 0.01, + "learning_rate": 4.999254491891863e-05, + "loss": 1.2372, + "step": 7873 + }, + { + "epoch": 0.01, + "learning_rate": 4.999254300069144e-05, + "loss": 1.0072, + "step": 7874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992541082217536e-05, + "loss": 1.0588, + "step": 7875 + }, + { + "epoch": 0.01, + "learning_rate": 4.999253916349692e-05, + "loss": 1.3491, + "step": 7876 + }, + { + "epoch": 0.01, + "learning_rate": 4.999253724452958e-05, + "loss": 1.5759, + "step": 7877 + }, + { + "epoch": 0.01, + "learning_rate": 4.999253532531553e-05, + "loss": 0.7801, + "step": 7878 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992533405854756e-05, + "loss": 1.0617, + "step": 7879 + }, + { + "epoch": 0.01, + "learning_rate": 4.999253148614727e-05, + "loss": 1.0505, + "step": 7880 + }, + { + "epoch": 0.01, + "learning_rate": 4.999252956619308e-05, + "loss": 1.1608, + "step": 7881 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992527645992164e-05, + "loss": 1.2429, + "step": 7882 + }, + { + "epoch": 0.01, + "learning_rate": 4.999252572554454e-05, + "loss": 1.1126, + "step": 7883 + }, + { + "epoch": 0.01, + "learning_rate": 4.999252380485019e-05, + "loss": 0.9354, + "step": 7884 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992521883909127e-05, + "loss": 1.076, + "step": 7885 + }, + { + "epoch": 0.01, + "learning_rate": 4.999251996272135e-05, + "loss": 1.2101, + "step": 7886 + }, + { + "epoch": 0.01, + "learning_rate": 4.999251804128685e-05, + "loss": 1.2844, + "step": 7887 + }, + { + "epoch": 0.01, + "learning_rate": 4.999251611960565e-05, + "loss": 1.1769, + "step": 7888 + }, + { + "epoch": 0.01, + "learning_rate": 4.999251419767772e-05, + "loss": 1.2664, + "step": 7889 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992512275503076e-05, + "loss": 1.0522, + "step": 7890 + }, + { + "epoch": 0.01, + "learning_rate": 4.999251035308172e-05, + "loss": 1.0897, + "step": 7891 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992508430413654e-05, + "loss": 1.0961, + "step": 7892 + }, + { + "epoch": 0.01, + "learning_rate": 4.999250650749887e-05, + "loss": 1.3068, + "step": 7893 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992504584337365e-05, + "loss": 0.4432, + "step": 7894 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992502660929155e-05, + "loss": 0.7508, + "step": 7895 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992500737274214e-05, + "loss": 0.9558, + "step": 7896 + }, + { + "epoch": 0.01, + "learning_rate": 4.999249881337257e-05, + "loss": 0.8569, + "step": 7897 + }, + { + "epoch": 0.01, + "learning_rate": 4.99924968892242e-05, + "loss": 0.9947, + "step": 7898 + }, + { + "epoch": 0.01, + "learning_rate": 4.999249496482913e-05, + "loss": 1.2156, + "step": 7899 + }, + { + "epoch": 0.01, + "learning_rate": 4.999249304018733e-05, + "loss": 1.2722, + "step": 7900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992491115298824e-05, + "loss": 0.9461, + "step": 7901 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992489190163595e-05, + "loss": 0.937, + "step": 7902 + }, + { + "epoch": 0.01, + "learning_rate": 4.999248726478165e-05, + "loss": 1.1194, + "step": 7903 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992485339152993e-05, + "loss": 1.1772, + "step": 7904 + }, + { + "epoch": 0.01, + "learning_rate": 4.999248341327763e-05, + "loss": 1.2923, + "step": 7905 + }, + { + "epoch": 0.01, + "learning_rate": 4.999248148715554e-05, + "loss": 1.09, + "step": 7906 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992479560786744e-05, + "loss": 2.6286, + "step": 7907 + }, + { + "epoch": 0.01, + "learning_rate": 4.999247763417122e-05, + "loss": 3.4125, + "step": 7908 + }, + { + "epoch": 0.01, + "learning_rate": 4.999247570730899e-05, + "loss": 1.1838, + "step": 7909 + }, + { + "epoch": 0.01, + "learning_rate": 4.999247378020004e-05, + "loss": 1.379, + "step": 7910 + }, + { + "epoch": 0.01, + "learning_rate": 4.999247185284438e-05, + "loss": 1.099, + "step": 7911 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992469925242e-05, + "loss": 1.0116, + "step": 7912 + }, + { + "epoch": 0.01, + "learning_rate": 4.999246799739291e-05, + "loss": 1.0267, + "step": 7913 + }, + { + "epoch": 0.01, + "learning_rate": 4.999246606929711e-05, + "loss": 1.4954, + "step": 7914 + }, + { + "epoch": 0.01, + "learning_rate": 4.999246414095458e-05, + "loss": 1.6992, + "step": 7915 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992462212365345e-05, + "loss": 1.3373, + "step": 7916 + }, + { + "epoch": 0.01, + "learning_rate": 4.999246028352939e-05, + "loss": 0.8221, + "step": 7917 + }, + { + "epoch": 0.01, + "learning_rate": 4.999245835444672e-05, + "loss": 1.0465, + "step": 7918 + }, + { + "epoch": 0.01, + "learning_rate": 4.999245642511734e-05, + "loss": 1.1051, + "step": 7919 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992454495541244e-05, + "loss": 1.0927, + "step": 7920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992452565718426e-05, + "loss": 1.1845, + "step": 7921 + }, + { + "epoch": 0.01, + "learning_rate": 4.99924506356489e-05, + "loss": 1.0775, + "step": 7922 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992448705332654e-05, + "loss": 1.3101, + "step": 7923 + }, + { + "epoch": 0.01, + "learning_rate": 4.99924467747697e-05, + "loss": 1.2129, + "step": 7924 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992444843960027e-05, + "loss": 1.3299, + "step": 7925 + }, + { + "epoch": 0.01, + "learning_rate": 4.999244291290364e-05, + "loss": 1.1885, + "step": 7926 + }, + { + "epoch": 0.01, + "learning_rate": 4.999244098160053e-05, + "loss": 0.8202, + "step": 7927 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992439050050715e-05, + "loss": 1.169, + "step": 7928 + }, + { + "epoch": 0.01, + "learning_rate": 4.999243711825419e-05, + "loss": 0.9432, + "step": 7929 + }, + { + "epoch": 0.01, + "learning_rate": 4.999243518621094e-05, + "loss": 1.1669, + "step": 7930 + }, + { + "epoch": 0.01, + "learning_rate": 4.999243325392098e-05, + "loss": 1.0737, + "step": 7931 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992431321384294e-05, + "loss": 1.1336, + "step": 7932 + }, + { + "epoch": 0.01, + "learning_rate": 4.99924293886009e-05, + "loss": 1.4216, + "step": 7933 + }, + { + "epoch": 0.01, + "learning_rate": 4.99924274555708e-05, + "loss": 1.34, + "step": 7934 + }, + { + "epoch": 0.01, + "learning_rate": 4.999242552229398e-05, + "loss": 0.8725, + "step": 7935 + }, + { + "epoch": 0.01, + "learning_rate": 4.999242358877044e-05, + "loss": 1.1615, + "step": 7936 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992421655000194e-05, + "loss": 1.2269, + "step": 7937 + }, + { + "epoch": 0.01, + "learning_rate": 4.999241972098323e-05, + "loss": 1.1951, + "step": 7938 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992417786719545e-05, + "loss": 1.058, + "step": 7939 + }, + { + "epoch": 0.01, + "learning_rate": 4.999241585220915e-05, + "loss": 1.3327, + "step": 7940 + }, + { + "epoch": 0.01, + "learning_rate": 4.999241391745205e-05, + "loss": 1.2085, + "step": 7941 + }, + { + "epoch": 0.01, + "learning_rate": 4.999241198244822e-05, + "loss": 0.9829, + "step": 7942 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992410047197685e-05, + "loss": 1.2403, + "step": 7943 + }, + { + "epoch": 0.01, + "learning_rate": 4.999240811170043e-05, + "loss": 1.1999, + "step": 7944 + }, + { + "epoch": 0.01, + "learning_rate": 4.999240617595646e-05, + "loss": 0.9268, + "step": 7945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992404239965786e-05, + "loss": 1.1453, + "step": 7946 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992402303728395e-05, + "loss": 0.9478, + "step": 7947 + }, + { + "epoch": 0.01, + "learning_rate": 4.999240036724427e-05, + "loss": 0.852, + "step": 7948 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992398430513454e-05, + "loss": 1.402, + "step": 7949 + }, + { + "epoch": 0.01, + "learning_rate": 4.999239649353591e-05, + "loss": 1.0803, + "step": 7950 + }, + { + "epoch": 0.01, + "learning_rate": 4.999239455631166e-05, + "loss": 1.0025, + "step": 7951 + }, + { + "epoch": 0.01, + "learning_rate": 4.999239261884069e-05, + "loss": 1.3306, + "step": 7952 + }, + { + "epoch": 0.01, + "learning_rate": 4.999239068112302e-05, + "loss": 1.1896, + "step": 7953 + }, + { + "epoch": 0.01, + "learning_rate": 4.999238874315861e-05, + "loss": 0.9684, + "step": 7954 + }, + { + "epoch": 0.01, + "learning_rate": 4.99923868049475e-05, + "loss": 1.3475, + "step": 7955 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992384866489676e-05, + "loss": 1.0657, + "step": 7956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992382927785135e-05, + "loss": 1.1366, + "step": 7957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992380988833884e-05, + "loss": 1.3813, + "step": 7958 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992379049635916e-05, + "loss": 1.1277, + "step": 7959 + }, + { + "epoch": 0.01, + "learning_rate": 4.999237711019124e-05, + "loss": 1.2634, + "step": 7960 + }, + { + "epoch": 0.01, + "learning_rate": 4.999237517049984e-05, + "loss": 1.0193, + "step": 7961 + }, + { + "epoch": 0.01, + "learning_rate": 4.999237323056173e-05, + "loss": 1.0104, + "step": 7962 + }, + { + "epoch": 0.01, + "learning_rate": 4.99923712903769e-05, + "loss": 1.8146, + "step": 7963 + }, + { + "epoch": 0.01, + "learning_rate": 4.999236934994536e-05, + "loss": 0.9461, + "step": 7964 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992367409267105e-05, + "loss": 1.1662, + "step": 7965 + }, + { + "epoch": 0.01, + "learning_rate": 4.999236546834214e-05, + "loss": 0.9277, + "step": 7966 + }, + { + "epoch": 0.01, + "learning_rate": 4.999236352717046e-05, + "loss": 1.0035, + "step": 7967 + }, + { + "epoch": 0.01, + "learning_rate": 4.999236158575206e-05, + "loss": 0.9615, + "step": 7968 + }, + { + "epoch": 0.01, + "learning_rate": 4.999235964408695e-05, + "loss": 0.8536, + "step": 7969 + }, + { + "epoch": 0.01, + "learning_rate": 4.999235770217513e-05, + "loss": 1.0511, + "step": 7970 + }, + { + "epoch": 0.01, + "learning_rate": 4.999235576001659e-05, + "loss": 0.8384, + "step": 7971 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992353817611336e-05, + "loss": 0.7573, + "step": 7972 + }, + { + "epoch": 0.01, + "learning_rate": 4.999235187495938e-05, + "loss": 1.2414, + "step": 7973 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992349932060696e-05, + "loss": 1.2377, + "step": 7974 + }, + { + "epoch": 0.01, + "learning_rate": 4.99923479889153e-05, + "loss": 1.0569, + "step": 7975 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992346045523194e-05, + "loss": 0.9342, + "step": 7976 + }, + { + "epoch": 0.01, + "learning_rate": 4.999234410188437e-05, + "loss": 1.0519, + "step": 7977 + }, + { + "epoch": 0.01, + "learning_rate": 4.999234215799884e-05, + "loss": 1.0635, + "step": 7978 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992340213866585e-05, + "loss": 0.9977, + "step": 7979 + }, + { + "epoch": 0.01, + "learning_rate": 4.999233826948763e-05, + "loss": 1.084, + "step": 7980 + }, + { + "epoch": 0.01, + "learning_rate": 4.999233632486194e-05, + "loss": 1.1179, + "step": 7981 + }, + { + "epoch": 0.01, + "learning_rate": 4.999233437998956e-05, + "loss": 1.1019, + "step": 7982 + }, + { + "epoch": 0.01, + "learning_rate": 4.999233243487045e-05, + "loss": 0.6534, + "step": 7983 + }, + { + "epoch": 0.01, + "learning_rate": 4.999233048950464e-05, + "loss": 1.2761, + "step": 7984 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992328543892096e-05, + "loss": 0.9804, + "step": 7985 + }, + { + "epoch": 0.01, + "learning_rate": 4.999232659803286e-05, + "loss": 0.9749, + "step": 7986 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992324651926895e-05, + "loss": 1.5774, + "step": 7987 + }, + { + "epoch": 0.01, + "learning_rate": 4.999232270557422e-05, + "loss": 1.1378, + "step": 7988 + }, + { + "epoch": 0.01, + "learning_rate": 4.999232075897483e-05, + "loss": 0.9541, + "step": 7989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992318812128734e-05, + "loss": 1.1965, + "step": 7990 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992316865035924e-05, + "loss": 0.8706, + "step": 7991 + }, + { + "epoch": 0.01, + "learning_rate": 4.999231491769639e-05, + "loss": 0.7041, + "step": 7992 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992312970110153e-05, + "loss": 0.6082, + "step": 7993 + }, + { + "epoch": 0.01, + "learning_rate": 4.999231102227719e-05, + "loss": 1.1324, + "step": 7994 + }, + { + "epoch": 0.01, + "learning_rate": 4.999230907419753e-05, + "loss": 1.0204, + "step": 7995 + }, + { + "epoch": 0.01, + "learning_rate": 4.999230712587115e-05, + "loss": 0.9703, + "step": 7996 + }, + { + "epoch": 0.01, + "learning_rate": 4.999230517729805e-05, + "loss": 1.0797, + "step": 7997 + }, + { + "epoch": 0.01, + "learning_rate": 4.999230322847824e-05, + "loss": 1.1712, + "step": 7998 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992301279411716e-05, + "loss": 1.0651, + "step": 7999 + }, + { + "epoch": 0.01, + "learning_rate": 4.999229933009848e-05, + "loss": 0.9631, + "step": 8000 + }, + { + "epoch": 0.01, + "eval_loss": 1.054444432258606, + "eval_runtime": 84.4105, + "eval_samples_per_second": 16.408, + "eval_steps_per_second": 4.111, + "step": 8000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992297380538534e-05, + "loss": 1.1107, + "step": 8001 + }, + { + "epoch": 0.01, + "learning_rate": 4.999229543073187e-05, + "loss": 1.0455, + "step": 8002 + }, + { + "epoch": 0.01, + "learning_rate": 4.999229348067849e-05, + "loss": 1.0669, + "step": 8003 + }, + { + "epoch": 0.01, + "learning_rate": 4.99922915303784e-05, + "loss": 1.3531, + "step": 8004 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992289579831595e-05, + "loss": 1.266, + "step": 8005 + }, + { + "epoch": 0.01, + "learning_rate": 4.999228762903808e-05, + "loss": 1.009, + "step": 8006 + }, + { + "epoch": 0.01, + "learning_rate": 4.999228567799785e-05, + "loss": 0.7729, + "step": 8007 + }, + { + "epoch": 0.01, + "learning_rate": 4.999228372671091e-05, + "loss": 1.1114, + "step": 8008 + }, + { + "epoch": 0.01, + "learning_rate": 4.999228177517725e-05, + "loss": 0.8466, + "step": 8009 + }, + { + "epoch": 0.01, + "learning_rate": 4.999227982339688e-05, + "loss": 1.5055, + "step": 8010 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992277871369806e-05, + "loss": 1.2978, + "step": 8011 + }, + { + "epoch": 0.01, + "learning_rate": 4.999227591909601e-05, + "loss": 1.2087, + "step": 8012 + }, + { + "epoch": 0.01, + "learning_rate": 4.99922739665755e-05, + "loss": 1.776, + "step": 8013 + }, + { + "epoch": 0.01, + "learning_rate": 4.999227201380827e-05, + "loss": 1.6039, + "step": 8014 + }, + { + "epoch": 0.01, + "learning_rate": 4.999227006079434e-05, + "loss": 1.1489, + "step": 8015 + }, + { + "epoch": 0.01, + "learning_rate": 4.999226810753369e-05, + "loss": 1.2858, + "step": 8016 + }, + { + "epoch": 0.01, + "learning_rate": 4.999226615402632e-05, + "loss": 1.231, + "step": 8017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992264200272254e-05, + "loss": 1.1254, + "step": 8018 + }, + { + "epoch": 0.01, + "learning_rate": 4.999226224627146e-05, + "loss": 1.0541, + "step": 8019 + }, + { + "epoch": 0.01, + "learning_rate": 4.999226029202396e-05, + "loss": 1.0562, + "step": 8020 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992258337529745e-05, + "loss": 1.3727, + "step": 8021 + }, + { + "epoch": 0.01, + "learning_rate": 4.999225638278882e-05, + "loss": 1.0765, + "step": 8022 + }, + { + "epoch": 0.01, + "learning_rate": 4.999225442780118e-05, + "loss": 1.0304, + "step": 8023 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992252472566825e-05, + "loss": 0.962, + "step": 8024 + }, + { + "epoch": 0.01, + "learning_rate": 4.999225051708576e-05, + "loss": 0.9971, + "step": 8025 + }, + { + "epoch": 0.01, + "learning_rate": 4.999224856135798e-05, + "loss": 1.0339, + "step": 8026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992246605383485e-05, + "loss": 1.0693, + "step": 8027 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992244649162276e-05, + "loss": 1.1078, + "step": 8028 + }, + { + "epoch": 0.01, + "learning_rate": 4.999224269269436e-05, + "loss": 1.3662, + "step": 8029 + }, + { + "epoch": 0.01, + "learning_rate": 4.999224073597973e-05, + "loss": 1.4999, + "step": 8030 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992238779018386e-05, + "loss": 1.6307, + "step": 8031 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992236821810336e-05, + "loss": 1.8442, + "step": 8032 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992234864355556e-05, + "loss": 1.3682, + "step": 8033 + }, + { + "epoch": 0.01, + "learning_rate": 4.999223290665408e-05, + "loss": 1.2394, + "step": 8034 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992230948705884e-05, + "loss": 1.0143, + "step": 8035 + }, + { + "epoch": 0.01, + "learning_rate": 4.999222899051097e-05, + "loss": 0.952, + "step": 8036 + }, + { + "epoch": 0.01, + "learning_rate": 4.999222703206936e-05, + "loss": 0.9989, + "step": 8037 + }, + { + "epoch": 0.01, + "learning_rate": 4.999222507338102e-05, + "loss": 1.2007, + "step": 8038 + }, + { + "epoch": 0.01, + "learning_rate": 4.999222311444597e-05, + "loss": 1.1594, + "step": 8039 + }, + { + "epoch": 0.01, + "learning_rate": 4.999222115526422e-05, + "loss": 1.6533, + "step": 8040 + }, + { + "epoch": 0.01, + "learning_rate": 4.999221919583574e-05, + "loss": 1.2455, + "step": 8041 + }, + { + "epoch": 0.01, + "learning_rate": 4.999221723616056e-05, + "loss": 1.1915, + "step": 8042 + }, + { + "epoch": 0.01, + "learning_rate": 4.999221527623866e-05, + "loss": 0.9804, + "step": 8043 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992213316070054e-05, + "loss": 1.0802, + "step": 8044 + }, + { + "epoch": 0.01, + "learning_rate": 4.999221135565473e-05, + "loss": 1.2212, + "step": 8045 + }, + { + "epoch": 0.01, + "learning_rate": 4.99922093949927e-05, + "loss": 1.0551, + "step": 8046 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992207434083946e-05, + "loss": 1.2246, + "step": 8047 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992205472928485e-05, + "loss": 1.357, + "step": 8048 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992203511526314e-05, + "loss": 1.0874, + "step": 8049 + }, + { + "epoch": 0.01, + "learning_rate": 4.999220154987743e-05, + "loss": 1.0338, + "step": 8050 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992199587981835e-05, + "loss": 1.3102, + "step": 8051 + }, + { + "epoch": 0.01, + "learning_rate": 4.999219762583952e-05, + "loss": 0.9823, + "step": 8052 + }, + { + "epoch": 0.01, + "learning_rate": 4.99921956634505e-05, + "loss": 1.0771, + "step": 8053 + }, + { + "epoch": 0.01, + "learning_rate": 4.999219370081477e-05, + "loss": 0.9605, + "step": 8054 + }, + { + "epoch": 0.01, + "learning_rate": 4.999219173793232e-05, + "loss": 0.8811, + "step": 8055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992189774803163e-05, + "loss": 3.6941, + "step": 8056 + }, + { + "epoch": 0.01, + "learning_rate": 4.999218781142729e-05, + "loss": 6.1163, + "step": 8057 + }, + { + "epoch": 0.01, + "learning_rate": 4.999218584780471e-05, + "loss": 4.5305, + "step": 8058 + }, + { + "epoch": 0.01, + "learning_rate": 4.999218388393541e-05, + "loss": 5.9376, + "step": 8059 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992181919819403e-05, + "loss": 5.7547, + "step": 8060 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992179955456676e-05, + "loss": 5.3154, + "step": 8061 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992177990847245e-05, + "loss": 3.2164, + "step": 8062 + }, + { + "epoch": 0.01, + "learning_rate": 4.99921760259911e-05, + "loss": 1.0417, + "step": 8063 + }, + { + "epoch": 0.01, + "learning_rate": 4.999217406088824e-05, + "loss": 1.563, + "step": 8064 + }, + { + "epoch": 0.01, + "learning_rate": 4.999217209553867e-05, + "loss": 1.1756, + "step": 8065 + }, + { + "epoch": 0.01, + "learning_rate": 4.999217012994239e-05, + "loss": 1.4225, + "step": 8066 + }, + { + "epoch": 0.01, + "learning_rate": 4.99921681640994e-05, + "loss": 1.0189, + "step": 8067 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992166198009685e-05, + "loss": 0.7917, + "step": 8068 + }, + { + "epoch": 0.01, + "learning_rate": 4.999216423167327e-05, + "loss": 1.1507, + "step": 8069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992162265090137e-05, + "loss": 1.0214, + "step": 8070 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992160298260294e-05, + "loss": 0.8676, + "step": 8071 + }, + { + "epoch": 0.01, + "learning_rate": 4.999215833118374e-05, + "loss": 1.2787, + "step": 8072 + }, + { + "epoch": 0.01, + "learning_rate": 4.999215636386048e-05, + "loss": 1.5972, + "step": 8073 + }, + { + "epoch": 0.01, + "learning_rate": 4.999215439629049e-05, + "loss": 1.3106, + "step": 8074 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992152428473806e-05, + "loss": 1.2539, + "step": 8075 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992150460410405e-05, + "loss": 1.1408, + "step": 8076 + }, + { + "epoch": 0.01, + "learning_rate": 4.999214849210029e-05, + "loss": 1.209, + "step": 8077 + }, + { + "epoch": 0.01, + "learning_rate": 4.999214652354346e-05, + "loss": 1.2816, + "step": 8078 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992144554739915e-05, + "loss": 1.1278, + "step": 8079 + }, + { + "epoch": 0.01, + "learning_rate": 4.999214258568967e-05, + "loss": 1.2074, + "step": 8080 + }, + { + "epoch": 0.01, + "learning_rate": 4.99921406163927e-05, + "loss": 1.1355, + "step": 8081 + }, + { + "epoch": 0.01, + "learning_rate": 4.999213864684903e-05, + "loss": 0.933, + "step": 8082 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992136677058646e-05, + "loss": 1.1354, + "step": 8083 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992134707021544e-05, + "loss": 1.0388, + "step": 8084 + }, + { + "epoch": 0.01, + "learning_rate": 4.999213273673774e-05, + "loss": 3.3175, + "step": 8085 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992130766207214e-05, + "loss": 1.2026, + "step": 8086 + }, + { + "epoch": 0.01, + "learning_rate": 4.999212879542998e-05, + "loss": 1.2803, + "step": 8087 + }, + { + "epoch": 0.01, + "learning_rate": 4.999212682440604e-05, + "loss": 1.1053, + "step": 8088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992124853135384e-05, + "loss": 1.3526, + "step": 8089 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992122881618006e-05, + "loss": 1.3599, + "step": 8090 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992120909853925e-05, + "loss": 0.9951, + "step": 8091 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992118937843134e-05, + "loss": 1.264, + "step": 8092 + }, + { + "epoch": 0.01, + "learning_rate": 4.999211696558563e-05, + "loss": 0.7493, + "step": 8093 + }, + { + "epoch": 0.01, + "learning_rate": 4.999211499308142e-05, + "loss": 1.1717, + "step": 8094 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992113020330486e-05, + "loss": 1.0709, + "step": 8095 + }, + { + "epoch": 0.01, + "learning_rate": 4.999211104733285e-05, + "loss": 1.1257, + "step": 8096 + }, + { + "epoch": 0.01, + "learning_rate": 4.99921090740885e-05, + "loss": 0.7699, + "step": 8097 + }, + { + "epoch": 0.01, + "learning_rate": 4.999210710059743e-05, + "loss": 2.4386, + "step": 8098 + }, + { + "epoch": 0.01, + "learning_rate": 4.999210512685966e-05, + "loss": 1.8469, + "step": 8099 + }, + { + "epoch": 0.01, + "learning_rate": 4.999210315287518e-05, + "loss": 1.0731, + "step": 8100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999210117864398e-05, + "loss": 1.175, + "step": 8101 + }, + { + "epoch": 0.01, + "learning_rate": 4.999209920416607e-05, + "loss": 1.1062, + "step": 8102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992097229441456e-05, + "loss": 0.981, + "step": 8103 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992095254470115e-05, + "loss": 0.514, + "step": 8104 + }, + { + "epoch": 0.01, + "learning_rate": 4.999209327925208e-05, + "loss": 0.6874, + "step": 8105 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992091303787324e-05, + "loss": 1.045, + "step": 8106 + }, + { + "epoch": 0.01, + "learning_rate": 4.999208932807586e-05, + "loss": 1.1178, + "step": 8107 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992087352117684e-05, + "loss": 1.1132, + "step": 8108 + }, + { + "epoch": 0.01, + "learning_rate": 4.999208537591279e-05, + "loss": 0.8647, + "step": 8109 + }, + { + "epoch": 0.01, + "learning_rate": 4.99920833994612e-05, + "loss": 1.5936, + "step": 8110 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992081422762885e-05, + "loss": 0.9714, + "step": 8111 + }, + { + "epoch": 0.01, + "learning_rate": 4.999207944581786e-05, + "loss": 0.8233, + "step": 8112 + }, + { + "epoch": 0.01, + "learning_rate": 4.999207746862613e-05, + "loss": 1.031, + "step": 8113 + }, + { + "epoch": 0.01, + "learning_rate": 4.999207549118768e-05, + "loss": 0.4453, + "step": 8114 + }, + { + "epoch": 0.01, + "learning_rate": 4.999207351350252e-05, + "loss": 0.3886, + "step": 8115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992071535570665e-05, + "loss": 1.0795, + "step": 8116 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992069557392085e-05, + "loss": 0.8687, + "step": 8117 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992067578966794e-05, + "loss": 1.0988, + "step": 8118 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992065600294794e-05, + "loss": 1.2491, + "step": 8119 + }, + { + "epoch": 0.01, + "learning_rate": 4.999206362137608e-05, + "loss": 1.3787, + "step": 8120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992061642210655e-05, + "loss": 1.224, + "step": 8121 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992059662798524e-05, + "loss": 1.234, + "step": 8122 + }, + { + "epoch": 0.01, + "learning_rate": 4.999205768313968e-05, + "loss": 1.1286, + "step": 8123 + }, + { + "epoch": 0.01, + "learning_rate": 4.999205570323412e-05, + "loss": 0.8692, + "step": 8124 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992053723081856e-05, + "loss": 1.1534, + "step": 8125 + }, + { + "epoch": 0.01, + "learning_rate": 4.999205174268288e-05, + "loss": 1.1841, + "step": 8126 + }, + { + "epoch": 0.01, + "learning_rate": 4.999204976203719e-05, + "loss": 1.2964, + "step": 8127 + }, + { + "epoch": 0.01, + "learning_rate": 4.999204778114479e-05, + "loss": 1.3143, + "step": 8128 + }, + { + "epoch": 0.01, + "learning_rate": 4.999204580000568e-05, + "loss": 1.3841, + "step": 8129 + }, + { + "epoch": 0.01, + "learning_rate": 4.999204381861985e-05, + "loss": 1.3286, + "step": 8130 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992041836987323e-05, + "loss": 1.1715, + "step": 8131 + }, + { + "epoch": 0.01, + "learning_rate": 4.999203985510808e-05, + "loss": 1.2359, + "step": 8132 + }, + { + "epoch": 0.01, + "learning_rate": 4.999203787298212e-05, + "loss": 1.3515, + "step": 8133 + }, + { + "epoch": 0.01, + "learning_rate": 4.999203589060946e-05, + "loss": 1.2288, + "step": 8134 + }, + { + "epoch": 0.01, + "learning_rate": 4.999203390799008e-05, + "loss": 1.0974, + "step": 8135 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992031925123994e-05, + "loss": 1.0515, + "step": 8136 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992029942011197e-05, + "loss": 1.1022, + "step": 8137 + }, + { + "epoch": 0.01, + "learning_rate": 4.999202795865169e-05, + "loss": 1.0654, + "step": 8138 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992025975045464e-05, + "loss": 1.0587, + "step": 8139 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992023991192535e-05, + "loss": 0.9211, + "step": 8140 + }, + { + "epoch": 0.01, + "learning_rate": 4.99920220070929e-05, + "loss": 1.0319, + "step": 8141 + }, + { + "epoch": 0.01, + "learning_rate": 4.999202002274654e-05, + "loss": 1.1965, + "step": 8142 + }, + { + "epoch": 0.01, + "learning_rate": 4.999201803815348e-05, + "loss": 1.1252, + "step": 8143 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992016053313706e-05, + "loss": 1.1956, + "step": 8144 + }, + { + "epoch": 0.01, + "learning_rate": 4.999201406822722e-05, + "loss": 1.0656, + "step": 8145 + }, + { + "epoch": 0.01, + "learning_rate": 4.999201208289403e-05, + "loss": 1.1604, + "step": 8146 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992010097314124e-05, + "loss": 0.9163, + "step": 8147 + }, + { + "epoch": 0.01, + "learning_rate": 4.999200811148751e-05, + "loss": 1.1694, + "step": 8148 + }, + { + "epoch": 0.01, + "learning_rate": 4.999200612541418e-05, + "loss": 1.1699, + "step": 8149 + }, + { + "epoch": 0.01, + "learning_rate": 4.999200413909414e-05, + "loss": 1.3381, + "step": 8150 + }, + { + "epoch": 0.01, + "learning_rate": 4.9992002152527395e-05, + "loss": 1.2251, + "step": 8151 + }, + { + "epoch": 0.01, + "learning_rate": 4.999200016571394e-05, + "loss": 1.0544, + "step": 8152 + }, + { + "epoch": 0.01, + "learning_rate": 4.999199817865377e-05, + "loss": 1.4957, + "step": 8153 + }, + { + "epoch": 0.01, + "learning_rate": 4.999199619134689e-05, + "loss": 1.6528, + "step": 8154 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991994203793303e-05, + "loss": 1.618, + "step": 8155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991992215993e-05, + "loss": 1.1554, + "step": 8156 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991990227945996e-05, + "loss": 1.1553, + "step": 8157 + }, + { + "epoch": 0.01, + "learning_rate": 4.999198823965228e-05, + "loss": 1.0059, + "step": 8158 + }, + { + "epoch": 0.01, + "learning_rate": 4.999198625111184e-05, + "loss": 1.2516, + "step": 8159 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991984262324696e-05, + "loss": 1.2199, + "step": 8160 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991982273290846e-05, + "loss": 1.0004, + "step": 8161 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991980284010287e-05, + "loss": 1.0749, + "step": 8162 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197829448302e-05, + "loss": 0.9781, + "step": 8163 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197630470903e-05, + "loss": 0.3949, + "step": 8164 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197431468834e-05, + "loss": 1.1743, + "step": 8165 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197232442093e-05, + "loss": 0.8457, + "step": 8166 + }, + { + "epoch": 0.01, + "learning_rate": 4.999197033390682e-05, + "loss": 0.8697, + "step": 8167 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991968343146e-05, + "loss": 1.1964, + "step": 8168 + }, + { + "epoch": 0.01, + "learning_rate": 4.999196635213846e-05, + "loss": 0.86, + "step": 8169 + }, + { + "epoch": 0.01, + "learning_rate": 4.999196436088422e-05, + "loss": 1.0926, + "step": 8170 + }, + { + "epoch": 0.01, + "learning_rate": 4.999196236938327e-05, + "loss": 1.0888, + "step": 8171 + }, + { + "epoch": 0.01, + "learning_rate": 4.999196037763561e-05, + "loss": 0.7222, + "step": 8172 + }, + { + "epoch": 0.01, + "learning_rate": 4.999195838564123e-05, + "loss": 1.1379, + "step": 8173 + }, + { + "epoch": 0.01, + "learning_rate": 4.999195639340015e-05, + "loss": 1.0324, + "step": 8174 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991954400912355e-05, + "loss": 1.1589, + "step": 8175 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991952408177846e-05, + "loss": 1.0928, + "step": 8176 + }, + { + "epoch": 0.01, + "learning_rate": 4.999195041519663e-05, + "loss": 0.9772, + "step": 8177 + }, + { + "epoch": 0.01, + "learning_rate": 4.999194842196871e-05, + "loss": 1.2171, + "step": 8178 + }, + { + "epoch": 0.01, + "learning_rate": 4.999194642849408e-05, + "loss": 1.1504, + "step": 8179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991944434772734e-05, + "loss": 1.19, + "step": 8180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999194244080467e-05, + "loss": 1.1003, + "step": 8181 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991940446589916e-05, + "loss": 1.0247, + "step": 8182 + }, + { + "epoch": 0.01, + "learning_rate": 4.999193845212844e-05, + "loss": 1.1476, + "step": 8183 + }, + { + "epoch": 0.01, + "learning_rate": 4.999193645742025e-05, + "loss": 1.2773, + "step": 8184 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991934462465364e-05, + "loss": 1.5125, + "step": 8185 + }, + { + "epoch": 0.01, + "learning_rate": 4.999193246726376e-05, + "loss": 1.5328, + "step": 8186 + }, + { + "epoch": 0.01, + "learning_rate": 4.999193047181544e-05, + "loss": 0.9536, + "step": 8187 + }, + { + "epoch": 0.01, + "learning_rate": 4.999192847612042e-05, + "loss": 0.8888, + "step": 8188 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991926480178684e-05, + "loss": 1.2458, + "step": 8189 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991924483990246e-05, + "loss": 1.0091, + "step": 8190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999192248755509e-05, + "loss": 0.1777, + "step": 8191 + }, + { + "epoch": 0.01, + "learning_rate": 4.999192049087323e-05, + "loss": 0.1513, + "step": 8192 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991918493944655e-05, + "loss": 0.2262, + "step": 8193 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991916496769376e-05, + "loss": 1.1995, + "step": 8194 + }, + { + "epoch": 0.01, + "learning_rate": 4.999191449934738e-05, + "loss": 1.1373, + "step": 8195 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991912501678686e-05, + "loss": 1.0447, + "step": 8196 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991910503763276e-05, + "loss": 1.281, + "step": 8197 + }, + { + "epoch": 0.01, + "learning_rate": 4.999190850560115e-05, + "loss": 0.925, + "step": 8198 + }, + { + "epoch": 0.01, + "learning_rate": 4.999190650719232e-05, + "loss": 1.2642, + "step": 8199 + }, + { + "epoch": 0.01, + "learning_rate": 4.999190450853678e-05, + "loss": 1.0075, + "step": 8200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991902509634534e-05, + "loss": 1.1623, + "step": 8201 + }, + { + "epoch": 0.01, + "learning_rate": 4.999190051048557e-05, + "loss": 0.9961, + "step": 8202 + }, + { + "epoch": 0.01, + "learning_rate": 4.999189851108991e-05, + "loss": 1.1757, + "step": 8203 + }, + { + "epoch": 0.01, + "learning_rate": 4.999189651144753e-05, + "loss": 1.3607, + "step": 8204 + }, + { + "epoch": 0.01, + "learning_rate": 4.999189451155844e-05, + "loss": 1.2876, + "step": 8205 + }, + { + "epoch": 0.01, + "learning_rate": 4.999189251142265e-05, + "loss": 1.4278, + "step": 8206 + }, + { + "epoch": 0.01, + "learning_rate": 4.999189051104014e-05, + "loss": 1.3018, + "step": 8207 + }, + { + "epoch": 0.01, + "learning_rate": 4.999188851041092e-05, + "loss": 1.1068, + "step": 8208 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991886509535e-05, + "loss": 1.1355, + "step": 8209 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991884508412364e-05, + "loss": 0.9839, + "step": 8210 + }, + { + "epoch": 0.01, + "learning_rate": 4.999188250704302e-05, + "loss": 1.1312, + "step": 8211 + }, + { + "epoch": 0.01, + "learning_rate": 4.999188050542697e-05, + "loss": 0.9444, + "step": 8212 + }, + { + "epoch": 0.01, + "learning_rate": 4.99918785035642e-05, + "loss": 0.7048, + "step": 8213 + }, + { + "epoch": 0.01, + "learning_rate": 4.999187650145474e-05, + "loss": 0.9837, + "step": 8214 + }, + { + "epoch": 0.01, + "learning_rate": 4.999187449909856e-05, + "loss": 1.1194, + "step": 8215 + }, + { + "epoch": 0.01, + "learning_rate": 4.999187249649566e-05, + "loss": 1.1048, + "step": 8216 + }, + { + "epoch": 0.01, + "learning_rate": 4.999187049364607e-05, + "loss": 0.9613, + "step": 8217 + }, + { + "epoch": 0.01, + "learning_rate": 4.999186849054975e-05, + "loss": 1.1043, + "step": 8218 + }, + { + "epoch": 0.01, + "learning_rate": 4.999186648720674e-05, + "loss": 1.1788, + "step": 8219 + }, + { + "epoch": 0.01, + "learning_rate": 4.999186448361701e-05, + "loss": 1.0607, + "step": 8220 + }, + { + "epoch": 0.01, + "learning_rate": 4.999186247978058e-05, + "loss": 1.3091, + "step": 8221 + }, + { + "epoch": 0.01, + "learning_rate": 4.999186047569743e-05, + "loss": 1.0853, + "step": 8222 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991858471367584e-05, + "loss": 0.5792, + "step": 8223 + }, + { + "epoch": 0.01, + "learning_rate": 4.999185646679101e-05, + "loss": 0.6422, + "step": 8224 + }, + { + "epoch": 0.01, + "learning_rate": 4.999185446196774e-05, + "loss": 2.3584, + "step": 8225 + }, + { + "epoch": 0.01, + "learning_rate": 4.999185245689776e-05, + "loss": 2.8526, + "step": 8226 + }, + { + "epoch": 0.01, + "learning_rate": 4.999185045158107e-05, + "loss": 2.4602, + "step": 8227 + }, + { + "epoch": 0.01, + "learning_rate": 4.999184844601767e-05, + "loss": 2.0139, + "step": 8228 + }, + { + "epoch": 0.01, + "learning_rate": 4.999184644020756e-05, + "loss": 1.1379, + "step": 8229 + }, + { + "epoch": 0.01, + "learning_rate": 4.999184443415075e-05, + "loss": 1.3758, + "step": 8230 + }, + { + "epoch": 0.01, + "learning_rate": 4.999184242784722e-05, + "loss": 1.0097, + "step": 8231 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991840421296984e-05, + "loss": 1.2272, + "step": 8232 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991838414500034e-05, + "loss": 1.3314, + "step": 8233 + }, + { + "epoch": 0.01, + "learning_rate": 4.999183640745638e-05, + "loss": 0.9933, + "step": 8234 + }, + { + "epoch": 0.01, + "learning_rate": 4.999183440016603e-05, + "loss": 0.9973, + "step": 8235 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991832392628955e-05, + "loss": 1.0917, + "step": 8236 + }, + { + "epoch": 0.01, + "learning_rate": 4.999183038484517e-05, + "loss": 0.9456, + "step": 8237 + }, + { + "epoch": 0.01, + "learning_rate": 4.999182837681469e-05, + "loss": 1.409, + "step": 8238 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991826368537485e-05, + "loss": 1.0647, + "step": 8239 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991824360013584e-05, + "loss": 0.9981, + "step": 8240 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991822351242965e-05, + "loss": 1.1877, + "step": 8241 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991820342225644e-05, + "loss": 1.3668, + "step": 8242 + }, + { + "epoch": 0.01, + "learning_rate": 4.999181833296162e-05, + "loss": 1.2226, + "step": 8243 + }, + { + "epoch": 0.01, + "learning_rate": 4.999181632345087e-05, + "loss": 1.1048, + "step": 8244 + }, + { + "epoch": 0.01, + "learning_rate": 4.999181431369342e-05, + "loss": 0.9423, + "step": 8245 + }, + { + "epoch": 0.01, + "learning_rate": 4.999181230368926e-05, + "loss": 1.2384, + "step": 8246 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991810293438395e-05, + "loss": 1.7832, + "step": 8247 + }, + { + "epoch": 0.01, + "learning_rate": 4.999180828294082e-05, + "loss": 0.9334, + "step": 8248 + }, + { + "epoch": 0.01, + "learning_rate": 4.999180627219653e-05, + "loss": 1.1745, + "step": 8249 + }, + { + "epoch": 0.01, + "learning_rate": 4.999180426120554e-05, + "loss": 0.8047, + "step": 8250 + }, + { + "epoch": 0.01, + "learning_rate": 4.999180224996784e-05, + "loss": 1.095, + "step": 8251 + }, + { + "epoch": 0.01, + "learning_rate": 4.999180023848343e-05, + "loss": 0.8205, + "step": 8252 + }, + { + "epoch": 0.01, + "learning_rate": 4.999179822675231e-05, + "loss": 1.1301, + "step": 8253 + }, + { + "epoch": 0.01, + "learning_rate": 4.999179621477449e-05, + "loss": 1.3518, + "step": 8254 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991794202549954e-05, + "loss": 1.0477, + "step": 8255 + }, + { + "epoch": 0.01, + "learning_rate": 4.999179219007871e-05, + "loss": 0.4998, + "step": 8256 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991790177360755e-05, + "loss": 1.0548, + "step": 8257 + }, + { + "epoch": 0.01, + "learning_rate": 4.999178816439609e-05, + "loss": 1.2165, + "step": 8258 + }, + { + "epoch": 0.01, + "learning_rate": 4.999178615118473e-05, + "loss": 0.8835, + "step": 8259 + }, + { + "epoch": 0.01, + "learning_rate": 4.999178413772665e-05, + "loss": 1.2443, + "step": 8260 + }, + { + "epoch": 0.01, + "learning_rate": 4.999178212402186e-05, + "loss": 0.9742, + "step": 8261 + }, + { + "epoch": 0.01, + "learning_rate": 4.999178011007037e-05, + "loss": 1.2147, + "step": 8262 + }, + { + "epoch": 0.01, + "learning_rate": 4.999177809587217e-05, + "loss": 1.1196, + "step": 8263 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991776081427255e-05, + "loss": 1.172, + "step": 8264 + }, + { + "epoch": 0.01, + "learning_rate": 4.999177406673564e-05, + "loss": 1.1525, + "step": 8265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991772051797306e-05, + "loss": 0.9987, + "step": 8266 + }, + { + "epoch": 0.01, + "learning_rate": 4.999177003661227e-05, + "loss": 1.1509, + "step": 8267 + }, + { + "epoch": 0.01, + "learning_rate": 4.999176802118053e-05, + "loss": 1.2156, + "step": 8268 + }, + { + "epoch": 0.01, + "learning_rate": 4.999176600550208e-05, + "loss": 1.0428, + "step": 8269 + }, + { + "epoch": 0.01, + "learning_rate": 4.999176398957692e-05, + "loss": 1.0568, + "step": 8270 + }, + { + "epoch": 0.01, + "learning_rate": 4.999176197340505e-05, + "loss": 1.1073, + "step": 8271 + }, + { + "epoch": 0.01, + "learning_rate": 4.999175995698647e-05, + "loss": 1.2529, + "step": 8272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991757940321186e-05, + "loss": 1.3799, + "step": 8273 + }, + { + "epoch": 0.01, + "learning_rate": 4.999175592340919e-05, + "loss": 1.3727, + "step": 8274 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991753906250494e-05, + "loss": 1.2017, + "step": 8275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991751888845086e-05, + "loss": 0.9024, + "step": 8276 + }, + { + "epoch": 0.01, + "learning_rate": 4.999174987119297e-05, + "loss": 0.7042, + "step": 8277 + }, + { + "epoch": 0.01, + "learning_rate": 4.999174785329415e-05, + "loss": 0.962, + "step": 8278 + }, + { + "epoch": 0.01, + "learning_rate": 4.999174583514861e-05, + "loss": 1.1191, + "step": 8279 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991743816756367e-05, + "loss": 1.3123, + "step": 8280 + }, + { + "epoch": 0.01, + "learning_rate": 4.999174179811742e-05, + "loss": 0.7662, + "step": 8281 + }, + { + "epoch": 0.01, + "learning_rate": 4.999173977923176e-05, + "loss": 0.8643, + "step": 8282 + }, + { + "epoch": 0.01, + "learning_rate": 4.99917377600994e-05, + "loss": 1.2898, + "step": 8283 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991735740720324e-05, + "loss": 1.2237, + "step": 8284 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991733721094545e-05, + "loss": 1.3147, + "step": 8285 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991731701222056e-05, + "loss": 1.1869, + "step": 8286 + }, + { + "epoch": 0.01, + "learning_rate": 4.999172968110286e-05, + "loss": 1.265, + "step": 8287 + }, + { + "epoch": 0.01, + "learning_rate": 4.999172766073695e-05, + "loss": 1.3549, + "step": 8288 + }, + { + "epoch": 0.01, + "learning_rate": 4.999172564012434e-05, + "loss": 1.2528, + "step": 8289 + }, + { + "epoch": 0.01, + "learning_rate": 4.999172361926502e-05, + "loss": 1.1335, + "step": 8290 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991721598158996e-05, + "loss": 1.05, + "step": 8291 + }, + { + "epoch": 0.01, + "learning_rate": 4.999171957680626e-05, + "loss": 1.0144, + "step": 8292 + }, + { + "epoch": 0.01, + "learning_rate": 4.999171755520682e-05, + "loss": 0.9581, + "step": 8293 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991715533360674e-05, + "loss": 1.1402, + "step": 8294 + }, + { + "epoch": 0.01, + "learning_rate": 4.999171351126781e-05, + "loss": 1.1666, + "step": 8295 + }, + { + "epoch": 0.01, + "learning_rate": 4.999171148892824e-05, + "loss": 0.9455, + "step": 8296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991709466341974e-05, + "loss": 1.3822, + "step": 8297 + }, + { + "epoch": 0.01, + "learning_rate": 4.999170744350899e-05, + "loss": 1.3498, + "step": 8298 + }, + { + "epoch": 0.01, + "learning_rate": 4.99917054204293e-05, + "loss": 1.2778, + "step": 8299 + }, + { + "epoch": 0.01, + "learning_rate": 4.999170339710291e-05, + "loss": 1.682, + "step": 8300 + }, + { + "epoch": 0.01, + "learning_rate": 4.99917013735298e-05, + "loss": 1.2964, + "step": 8301 + }, + { + "epoch": 0.01, + "learning_rate": 4.999169934970999e-05, + "loss": 1.1784, + "step": 8302 + }, + { + "epoch": 0.01, + "learning_rate": 4.999169732564347e-05, + "loss": 1.1179, + "step": 8303 + }, + { + "epoch": 0.01, + "learning_rate": 4.999169530133024e-05, + "loss": 1.1623, + "step": 8304 + }, + { + "epoch": 0.01, + "learning_rate": 4.999169327677031e-05, + "loss": 1.2041, + "step": 8305 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991691251963665e-05, + "loss": 1.1615, + "step": 8306 + }, + { + "epoch": 0.01, + "learning_rate": 4.999168922691032e-05, + "loss": 1.0726, + "step": 8307 + }, + { + "epoch": 0.01, + "learning_rate": 4.999168720161026e-05, + "loss": 0.8384, + "step": 8308 + }, + { + "epoch": 0.01, + "learning_rate": 4.99916851760635e-05, + "loss": 0.9102, + "step": 8309 + }, + { + "epoch": 0.01, + "learning_rate": 4.999168315027003e-05, + "loss": 0.8444, + "step": 8310 + }, + { + "epoch": 0.01, + "learning_rate": 4.999168112422985e-05, + "loss": 0.7279, + "step": 8311 + }, + { + "epoch": 0.01, + "learning_rate": 4.999167909794296e-05, + "loss": 0.5946, + "step": 8312 + }, + { + "epoch": 0.01, + "learning_rate": 4.999167707140937e-05, + "loss": 0.8794, + "step": 8313 + }, + { + "epoch": 0.01, + "learning_rate": 4.999167504462907e-05, + "loss": 1.1536, + "step": 8314 + }, + { + "epoch": 0.01, + "learning_rate": 4.999167301760206e-05, + "loss": 0.9535, + "step": 8315 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991670990328346e-05, + "loss": 1.1651, + "step": 8316 + }, + { + "epoch": 0.01, + "learning_rate": 4.999166896280793e-05, + "loss": 0.8064, + "step": 8317 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991666935040793e-05, + "loss": 0.8431, + "step": 8318 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991664907026955e-05, + "loss": 1.3013, + "step": 8319 + }, + { + "epoch": 0.01, + "learning_rate": 4.999166287876641e-05, + "loss": 1.0501, + "step": 8320 + }, + { + "epoch": 0.01, + "learning_rate": 4.999166085025916e-05, + "loss": 0.9354, + "step": 8321 + }, + { + "epoch": 0.01, + "learning_rate": 4.999165882150521e-05, + "loss": 0.9781, + "step": 8322 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991656792504535e-05, + "loss": 0.8342, + "step": 8323 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991654763257166e-05, + "loss": 1.1559, + "step": 8324 + }, + { + "epoch": 0.01, + "learning_rate": 4.999165273376309e-05, + "loss": 1.1642, + "step": 8325 + }, + { + "epoch": 0.01, + "learning_rate": 4.99916507040223e-05, + "loss": 1.2031, + "step": 8326 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991648674034805e-05, + "loss": 1.2286, + "step": 8327 + }, + { + "epoch": 0.01, + "learning_rate": 4.99916466438006e-05, + "loss": 1.1127, + "step": 8328 + }, + { + "epoch": 0.01, + "learning_rate": 4.99916446133197e-05, + "loss": 1.3282, + "step": 8329 + }, + { + "epoch": 0.01, + "learning_rate": 4.999164258259208e-05, + "loss": 1.2678, + "step": 8330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991640551617766e-05, + "loss": 1.3532, + "step": 8331 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991638520396736e-05, + "loss": 1.1879, + "step": 8332 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991636488928996e-05, + "loss": 1.2062, + "step": 8333 + }, + { + "epoch": 0.01, + "learning_rate": 4.999163445721455e-05, + "loss": 1.2149, + "step": 8334 + }, + { + "epoch": 0.01, + "learning_rate": 4.999163242525341e-05, + "loss": 1.1491, + "step": 8335 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991630393045554e-05, + "loss": 1.0911, + "step": 8336 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991628360590986e-05, + "loss": 1.0811, + "step": 8337 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991626327889715e-05, + "loss": 1.2723, + "step": 8338 + }, + { + "epoch": 0.01, + "learning_rate": 4.999162429494174e-05, + "loss": 0.9685, + "step": 8339 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991622261747056e-05, + "loss": 0.91, + "step": 8340 + }, + { + "epoch": 0.01, + "learning_rate": 4.999162022830567e-05, + "loss": 1.0759, + "step": 8341 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991618194617577e-05, + "loss": 1.2971, + "step": 8342 + }, + { + "epoch": 0.01, + "learning_rate": 4.999161616068276e-05, + "loss": 1.0533, + "step": 8343 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991614126501256e-05, + "loss": 0.9694, + "step": 8344 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991612092073034e-05, + "loss": 1.0244, + "step": 8345 + }, + { + "epoch": 0.01, + "learning_rate": 4.999161005739812e-05, + "loss": 1.284, + "step": 8346 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991608022476486e-05, + "loss": 1.184, + "step": 8347 + }, + { + "epoch": 0.01, + "learning_rate": 4.999160598730815e-05, + "loss": 1.2793, + "step": 8348 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991603951893104e-05, + "loss": 0.9557, + "step": 8349 + }, + { + "epoch": 0.01, + "learning_rate": 4.999160191623136e-05, + "loss": 1.1593, + "step": 8350 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991599880322895e-05, + "loss": 1.137, + "step": 8351 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991597844167736e-05, + "loss": 0.6558, + "step": 8352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991595807765866e-05, + "loss": 1.1623, + "step": 8353 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991593771117286e-05, + "loss": 1.269, + "step": 8354 + }, + { + "epoch": 0.01, + "learning_rate": 4.999159173422201e-05, + "loss": 1.1349, + "step": 8355 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991589697080015e-05, + "loss": 1.1626, + "step": 8356 + }, + { + "epoch": 0.01, + "learning_rate": 4.999158765969132e-05, + "loss": 1.279, + "step": 8357 + }, + { + "epoch": 0.01, + "learning_rate": 4.999158562205592e-05, + "loss": 1.2476, + "step": 8358 + }, + { + "epoch": 0.01, + "learning_rate": 4.999158358417381e-05, + "loss": 1.1814, + "step": 8359 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991581546045e-05, + "loss": 1.097, + "step": 8360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991579507669475e-05, + "loss": 1.1707, + "step": 8361 + }, + { + "epoch": 0.01, + "learning_rate": 4.999157746904725e-05, + "loss": 1.0193, + "step": 8362 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991575430178316e-05, + "loss": 0.79, + "step": 8363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991573391062674e-05, + "loss": 1.1222, + "step": 8364 + }, + { + "epoch": 0.01, + "learning_rate": 4.999157135170033e-05, + "loss": 1.1173, + "step": 8365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991569312091274e-05, + "loss": 1.5073, + "step": 8366 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991567272235516e-05, + "loss": 0.9067, + "step": 8367 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991565232133054e-05, + "loss": 0.6524, + "step": 8368 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991563191783875e-05, + "loss": 0.7113, + "step": 8369 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991561151188e-05, + "loss": 0.7702, + "step": 8370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991559110345413e-05, + "loss": 1.1213, + "step": 8371 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991557069256124e-05, + "loss": 1.0213, + "step": 8372 + }, + { + "epoch": 0.01, + "learning_rate": 4.999155502792013e-05, + "loss": 1.1007, + "step": 8373 + }, + { + "epoch": 0.01, + "learning_rate": 4.999155298633743e-05, + "loss": 1.0572, + "step": 8374 + }, + { + "epoch": 0.01, + "learning_rate": 4.999155094450802e-05, + "loss": 0.9129, + "step": 8375 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991548902431906e-05, + "loss": 1.0989, + "step": 8376 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991546860109086e-05, + "loss": 0.918, + "step": 8377 + }, + { + "epoch": 0.01, + "learning_rate": 4.999154481753956e-05, + "loss": 1.1115, + "step": 8378 + }, + { + "epoch": 0.01, + "learning_rate": 4.999154277472333e-05, + "loss": 1.1857, + "step": 8379 + }, + { + "epoch": 0.01, + "learning_rate": 4.999154073166039e-05, + "loss": 1.066, + "step": 8380 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991538688350745e-05, + "loss": 0.7562, + "step": 8381 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991536644794394e-05, + "loss": 1.1435, + "step": 8382 + }, + { + "epoch": 0.01, + "learning_rate": 4.999153460099134e-05, + "loss": 0.8951, + "step": 8383 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991532556941576e-05, + "loss": 1.1395, + "step": 8384 + }, + { + "epoch": 0.01, + "learning_rate": 4.999153051264511e-05, + "loss": 0.9641, + "step": 8385 + }, + { + "epoch": 0.01, + "learning_rate": 4.999152846810193e-05, + "loss": 1.1511, + "step": 8386 + }, + { + "epoch": 0.01, + "learning_rate": 4.999152642331205e-05, + "loss": 0.9422, + "step": 8387 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991524378275464e-05, + "loss": 1.1244, + "step": 8388 + }, + { + "epoch": 0.01, + "learning_rate": 4.999152233299218e-05, + "loss": 1.1136, + "step": 8389 + }, + { + "epoch": 0.01, + "learning_rate": 4.999152028746218e-05, + "loss": 1.1821, + "step": 8390 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991518241685476e-05, + "loss": 1.1484, + "step": 8391 + }, + { + "epoch": 0.01, + "learning_rate": 4.999151619566207e-05, + "loss": 0.9898, + "step": 8392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991514149391955e-05, + "loss": 0.9929, + "step": 8393 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991512102875137e-05, + "loss": 1.2335, + "step": 8394 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991510056111614e-05, + "loss": 0.8588, + "step": 8395 + }, + { + "epoch": 0.01, + "learning_rate": 4.999150800910138e-05, + "loss": 0.5444, + "step": 8396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991505961844446e-05, + "loss": 0.6808, + "step": 8397 + }, + { + "epoch": 0.01, + "learning_rate": 4.99915039143408e-05, + "loss": 0.3225, + "step": 8398 + }, + { + "epoch": 0.01, + "learning_rate": 4.999150186659045e-05, + "loss": 0.5494, + "step": 8399 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991499818593404e-05, + "loss": 0.6306, + "step": 8400 + }, + { + "epoch": 0.01, + "learning_rate": 4.999149777034964e-05, + "loss": 1.2615, + "step": 8401 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991495721859174e-05, + "loss": 1.2836, + "step": 8402 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991493673122004e-05, + "loss": 1.2379, + "step": 8403 + }, + { + "epoch": 0.01, + "learning_rate": 4.999149162413813e-05, + "loss": 1.2976, + "step": 8404 + }, + { + "epoch": 0.01, + "learning_rate": 4.999148957490755e-05, + "loss": 0.9837, + "step": 8405 + }, + { + "epoch": 0.01, + "learning_rate": 4.999148752543026e-05, + "loss": 1.1341, + "step": 8406 + }, + { + "epoch": 0.01, + "learning_rate": 4.999148547570627e-05, + "loss": 0.6938, + "step": 8407 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991483425735575e-05, + "loss": 1.1328, + "step": 8408 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991481375518165e-05, + "loss": 1.086, + "step": 8409 + }, + { + "epoch": 0.01, + "learning_rate": 4.999147932505406e-05, + "loss": 1.2981, + "step": 8410 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991477274343246e-05, + "loss": 1.4599, + "step": 8411 + }, + { + "epoch": 0.01, + "learning_rate": 4.999147522338573e-05, + "loss": 1.4413, + "step": 8412 + }, + { + "epoch": 0.01, + "learning_rate": 4.99914731721815e-05, + "loss": 0.7609, + "step": 8413 + }, + { + "epoch": 0.01, + "learning_rate": 4.999147112073057e-05, + "loss": 0.608, + "step": 8414 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991469069032934e-05, + "loss": 0.3055, + "step": 8415 + }, + { + "epoch": 0.01, + "learning_rate": 4.99914670170886e-05, + "loss": 0.4677, + "step": 8416 + }, + { + "epoch": 0.01, + "learning_rate": 4.999146496489755e-05, + "loss": 0.4433, + "step": 8417 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991462912459805e-05, + "loss": 0.3726, + "step": 8418 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991460859775346e-05, + "loss": 0.2769, + "step": 8419 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991458806844184e-05, + "loss": 0.3368, + "step": 8420 + }, + { + "epoch": 0.01, + "learning_rate": 4.999145675366632e-05, + "loss": 0.3348, + "step": 8421 + }, + { + "epoch": 0.01, + "learning_rate": 4.999145470024175e-05, + "loss": 0.3991, + "step": 8422 + }, + { + "epoch": 0.01, + "learning_rate": 4.999145264657047e-05, + "loss": 0.9928, + "step": 8423 + }, + { + "epoch": 0.01, + "learning_rate": 4.999145059265249e-05, + "loss": 1.1942, + "step": 8424 + }, + { + "epoch": 0.01, + "learning_rate": 4.999144853848781e-05, + "loss": 1.1893, + "step": 8425 + }, + { + "epoch": 0.01, + "learning_rate": 4.999144648407641e-05, + "loss": 0.9798, + "step": 8426 + }, + { + "epoch": 0.01, + "learning_rate": 4.999144442941832e-05, + "loss": 0.7083, + "step": 8427 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991442374513516e-05, + "loss": 0.942, + "step": 8428 + }, + { + "epoch": 0.01, + "learning_rate": 4.999144031936201e-05, + "loss": 1.0437, + "step": 8429 + }, + { + "epoch": 0.01, + "learning_rate": 4.99914382639638e-05, + "loss": 0.7463, + "step": 8430 + }, + { + "epoch": 0.01, + "learning_rate": 4.999143620831888e-05, + "loss": 0.7416, + "step": 8431 + }, + { + "epoch": 0.01, + "learning_rate": 4.999143415242726e-05, + "loss": 1.4202, + "step": 8432 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991432096288935e-05, + "loss": 2.1545, + "step": 8433 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991430039903904e-05, + "loss": 2.3004, + "step": 8434 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991427983272164e-05, + "loss": 1.0673, + "step": 8435 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991425926393726e-05, + "loss": 1.0957, + "step": 8436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991423869268585e-05, + "loss": 0.9421, + "step": 8437 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991421811896734e-05, + "loss": 0.3061, + "step": 8438 + }, + { + "epoch": 0.01, + "learning_rate": 4.999141975427817e-05, + "loss": 1.0725, + "step": 8439 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991417696412915e-05, + "loss": 1.3373, + "step": 8440 + }, + { + "epoch": 0.01, + "learning_rate": 4.999141563830095e-05, + "loss": 1.6996, + "step": 8441 + }, + { + "epoch": 0.01, + "learning_rate": 4.999141357994228e-05, + "loss": 1.1349, + "step": 8442 + }, + { + "epoch": 0.01, + "learning_rate": 4.999141152133691e-05, + "loss": 1.2943, + "step": 8443 + }, + { + "epoch": 0.01, + "learning_rate": 4.999140946248483e-05, + "loss": 1.1657, + "step": 8444 + }, + { + "epoch": 0.01, + "learning_rate": 4.999140740338604e-05, + "loss": 1.0395, + "step": 8445 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991405344040555e-05, + "loss": 1.4422, + "step": 8446 + }, + { + "epoch": 0.01, + "learning_rate": 4.999140328444836e-05, + "loss": 1.5469, + "step": 8447 + }, + { + "epoch": 0.01, + "learning_rate": 4.999140122460946e-05, + "loss": 1.4338, + "step": 8448 + }, + { + "epoch": 0.01, + "learning_rate": 4.999139916452386e-05, + "loss": 1.5494, + "step": 8449 + }, + { + "epoch": 0.01, + "learning_rate": 4.999139710419155e-05, + "loss": 1.0026, + "step": 8450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991395043612544e-05, + "loss": 0.7611, + "step": 8451 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991392982786825e-05, + "loss": 0.9159, + "step": 8452 + }, + { + "epoch": 0.01, + "learning_rate": 4.999139092171441e-05, + "loss": 1.0654, + "step": 8453 + }, + { + "epoch": 0.01, + "learning_rate": 4.999138886039528e-05, + "loss": 1.3249, + "step": 8454 + }, + { + "epoch": 0.01, + "learning_rate": 4.999138679882945e-05, + "loss": 1.4767, + "step": 8455 + }, + { + "epoch": 0.01, + "learning_rate": 4.999138473701692e-05, + "loss": 1.1754, + "step": 8456 + }, + { + "epoch": 0.01, + "learning_rate": 4.999138267495769e-05, + "loss": 1.2345, + "step": 8457 + }, + { + "epoch": 0.01, + "learning_rate": 4.999138061265174e-05, + "loss": 1.1498, + "step": 8458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991378550099093e-05, + "loss": 1.229, + "step": 8459 + }, + { + "epoch": 0.01, + "learning_rate": 4.999137648729975e-05, + "loss": 1.3139, + "step": 8460 + }, + { + "epoch": 0.01, + "learning_rate": 4.999137442425369e-05, + "loss": 0.9882, + "step": 8461 + }, + { + "epoch": 0.01, + "learning_rate": 4.999137236096093e-05, + "loss": 1.2208, + "step": 8462 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991370297421466e-05, + "loss": 1.1347, + "step": 8463 + }, + { + "epoch": 0.01, + "learning_rate": 4.999136823363529e-05, + "loss": 1.2023, + "step": 8464 + }, + { + "epoch": 0.01, + "learning_rate": 4.999136616960243e-05, + "loss": 1.6499, + "step": 8465 + }, + { + "epoch": 0.01, + "learning_rate": 4.999136410532285e-05, + "loss": 1.1429, + "step": 8466 + }, + { + "epoch": 0.01, + "learning_rate": 4.999136204079656e-05, + "loss": 1.0542, + "step": 8467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991359976023575e-05, + "loss": 1.0439, + "step": 8468 + }, + { + "epoch": 0.01, + "learning_rate": 4.999135791100389e-05, + "loss": 0.9817, + "step": 8469 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991355845737496e-05, + "loss": 0.9858, + "step": 8470 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991353780224405e-05, + "loss": 1.2853, + "step": 8471 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991351714464596e-05, + "loss": 1.2696, + "step": 8472 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134964845809e-05, + "loss": 1.5929, + "step": 8473 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134758220488e-05, + "loss": 1.0557, + "step": 8474 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134551570497e-05, + "loss": 1.1169, + "step": 8475 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134344895835e-05, + "loss": 0.8522, + "step": 8476 + }, + { + "epoch": 0.01, + "learning_rate": 4.999134138196503e-05, + "loss": 1.2347, + "step": 8477 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991339314725e-05, + "loss": 1.1851, + "step": 8478 + }, + { + "epoch": 0.01, + "learning_rate": 4.999133724723827e-05, + "loss": 1.2193, + "step": 8479 + }, + { + "epoch": 0.01, + "learning_rate": 4.999133517950484e-05, + "loss": 0.9337, + "step": 8480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991333111524695e-05, + "loss": 1.1871, + "step": 8481 + }, + { + "epoch": 0.01, + "learning_rate": 4.999133104329786e-05, + "loss": 1.1762, + "step": 8482 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991328974824314e-05, + "loss": 1.2306, + "step": 8483 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991326906104064e-05, + "loss": 0.8215, + "step": 8484 + }, + { + "epoch": 0.01, + "learning_rate": 4.999132483713711e-05, + "loss": 1.1186, + "step": 8485 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991322767923455e-05, + "loss": 1.0112, + "step": 8486 + }, + { + "epoch": 0.01, + "learning_rate": 4.999132069846309e-05, + "loss": 1.1783, + "step": 8487 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991318628756026e-05, + "loss": 1.3128, + "step": 8488 + }, + { + "epoch": 0.01, + "learning_rate": 4.999131655880226e-05, + "loss": 1.1166, + "step": 8489 + }, + { + "epoch": 0.01, + "learning_rate": 4.999131448860178e-05, + "loss": 0.995, + "step": 8490 + }, + { + "epoch": 0.01, + "learning_rate": 4.999131241815461e-05, + "loss": 1.2413, + "step": 8491 + }, + { + "epoch": 0.01, + "learning_rate": 4.999131034746073e-05, + "loss": 1.0379, + "step": 8492 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991308276520145e-05, + "loss": 1.1876, + "step": 8493 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991306205332855e-05, + "loss": 1.2118, + "step": 8494 + }, + { + "epoch": 0.01, + "learning_rate": 4.999130413389887e-05, + "loss": 1.316, + "step": 8495 + }, + { + "epoch": 0.01, + "learning_rate": 4.999130206221817e-05, + "loss": 0.973, + "step": 8496 + }, + { + "epoch": 0.01, + "learning_rate": 4.999129999029078e-05, + "loss": 1.3465, + "step": 8497 + }, + { + "epoch": 0.01, + "learning_rate": 4.999129791811667e-05, + "loss": 0.9186, + "step": 8498 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991295845695865e-05, + "loss": 1.2283, + "step": 8499 + }, + { + "epoch": 0.01, + "learning_rate": 4.999129377302836e-05, + "loss": 1.1846, + "step": 8500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0542309284210205, + "eval_runtime": 99.0013, + "eval_samples_per_second": 13.99, + "eval_steps_per_second": 3.505, + "step": 8500 + }, + { + "epoch": 0.01, + "learning_rate": 4.999129170011415e-05, + "loss": 0.6902, + "step": 8501 + }, + { + "epoch": 0.01, + "learning_rate": 4.999128962695323e-05, + "loss": 0.8112, + "step": 8502 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991287553545614e-05, + "loss": 1.0512, + "step": 8503 + }, + { + "epoch": 0.01, + "learning_rate": 4.999128547989129e-05, + "loss": 1.0738, + "step": 8504 + }, + { + "epoch": 0.01, + "learning_rate": 4.999128340599026e-05, + "loss": 1.0696, + "step": 8505 + }, + { + "epoch": 0.01, + "learning_rate": 4.999128133184253e-05, + "loss": 0.9503, + "step": 8506 + }, + { + "epoch": 0.01, + "learning_rate": 4.99912792574481e-05, + "loss": 1.1886, + "step": 8507 + }, + { + "epoch": 0.01, + "learning_rate": 4.999127718280697e-05, + "loss": 1.2016, + "step": 8508 + }, + { + "epoch": 0.01, + "learning_rate": 4.999127510791913e-05, + "loss": 1.1265, + "step": 8509 + }, + { + "epoch": 0.01, + "learning_rate": 4.999127303278458e-05, + "loss": 1.1183, + "step": 8510 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991270957403334e-05, + "loss": 1.6124, + "step": 8511 + }, + { + "epoch": 0.01, + "learning_rate": 4.999126888177539e-05, + "loss": 1.1347, + "step": 8512 + }, + { + "epoch": 0.01, + "learning_rate": 4.999126680590074e-05, + "loss": 1.2228, + "step": 8513 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991264729779374e-05, + "loss": 1.1211, + "step": 8514 + }, + { + "epoch": 0.01, + "learning_rate": 4.999126265341132e-05, + "loss": 1.1596, + "step": 8515 + }, + { + "epoch": 0.01, + "learning_rate": 4.999126057679655e-05, + "loss": 0.8949, + "step": 8516 + }, + { + "epoch": 0.01, + "learning_rate": 4.999125849993509e-05, + "loss": 0.7996, + "step": 8517 + }, + { + "epoch": 0.01, + "learning_rate": 4.999125642282692e-05, + "loss": 0.7547, + "step": 8518 + }, + { + "epoch": 0.01, + "learning_rate": 4.999125434547205e-05, + "loss": 0.7877, + "step": 8519 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991252267870475e-05, + "loss": 0.7012, + "step": 8520 + }, + { + "epoch": 0.01, + "learning_rate": 4.99912501900222e-05, + "loss": 0.7266, + "step": 8521 + }, + { + "epoch": 0.01, + "learning_rate": 4.999124811192721e-05, + "loss": 0.7987, + "step": 8522 + }, + { + "epoch": 0.01, + "learning_rate": 4.999124603358553e-05, + "loss": 0.6141, + "step": 8523 + }, + { + "epoch": 0.01, + "learning_rate": 4.999124395499715e-05, + "loss": 0.6994, + "step": 8524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991241876162053e-05, + "loss": 0.6509, + "step": 8525 + }, + { + "epoch": 0.01, + "learning_rate": 4.999123979708026e-05, + "loss": 0.8349, + "step": 8526 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991237717751757e-05, + "loss": 0.3005, + "step": 8527 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991235638176556e-05, + "loss": 1.373, + "step": 8528 + }, + { + "epoch": 0.01, + "learning_rate": 4.999123355835466e-05, + "loss": 1.057, + "step": 8529 + }, + { + "epoch": 0.01, + "learning_rate": 4.999123147828605e-05, + "loss": 1.438, + "step": 8530 + }, + { + "epoch": 0.01, + "learning_rate": 4.999122939797075e-05, + "loss": 1.1025, + "step": 8531 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991227317408735e-05, + "loss": 1.017, + "step": 8532 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991225236600024e-05, + "loss": 1.1234, + "step": 8533 + }, + { + "epoch": 0.01, + "learning_rate": 4.999122315554461e-05, + "loss": 1.251, + "step": 8534 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991221074242486e-05, + "loss": 1.1081, + "step": 8535 + }, + { + "epoch": 0.01, + "learning_rate": 4.999121899269366e-05, + "loss": 1.1494, + "step": 8536 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991216910898134e-05, + "loss": 1.2125, + "step": 8537 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991214828855907e-05, + "loss": 1.5262, + "step": 8538 + }, + { + "epoch": 0.01, + "learning_rate": 4.999121274656698e-05, + "loss": 0.9857, + "step": 8539 + }, + { + "epoch": 0.01, + "learning_rate": 4.999121066403134e-05, + "loss": 1.1487, + "step": 8540 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991208581249e-05, + "loss": 1.0883, + "step": 8541 + }, + { + "epoch": 0.01, + "learning_rate": 4.999120649821997e-05, + "loss": 1.5234, + "step": 8542 + }, + { + "epoch": 0.01, + "learning_rate": 4.999120441494422e-05, + "loss": 2.2477, + "step": 8543 + }, + { + "epoch": 0.01, + "learning_rate": 4.999120233142178e-05, + "loss": 2.9172, + "step": 8544 + }, + { + "epoch": 0.01, + "learning_rate": 4.999120024765264e-05, + "loss": 2.8604, + "step": 8545 + }, + { + "epoch": 0.01, + "learning_rate": 4.999119816363679e-05, + "loss": 2.2149, + "step": 8546 + }, + { + "epoch": 0.01, + "learning_rate": 4.999119607937424e-05, + "loss": 0.9414, + "step": 8547 + }, + { + "epoch": 0.01, + "learning_rate": 4.999119399486498e-05, + "loss": 1.1388, + "step": 8548 + }, + { + "epoch": 0.01, + "learning_rate": 4.999119191010902e-05, + "loss": 0.8004, + "step": 8549 + }, + { + "epoch": 0.01, + "learning_rate": 4.999118982510637e-05, + "loss": 1.2408, + "step": 8550 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991187739857e-05, + "loss": 1.2502, + "step": 8551 + }, + { + "epoch": 0.01, + "learning_rate": 4.999118565436094e-05, + "loss": 1.3305, + "step": 8552 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991183568618175e-05, + "loss": 0.9993, + "step": 8553 + }, + { + "epoch": 0.01, + "learning_rate": 4.99911814826287e-05, + "loss": 0.1552, + "step": 8554 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991179396392526e-05, + "loss": 0.1205, + "step": 8555 + }, + { + "epoch": 0.01, + "learning_rate": 4.999117730990965e-05, + "loss": 0.1116, + "step": 8556 + }, + { + "epoch": 0.01, + "learning_rate": 4.999117522318008e-05, + "loss": 1.049, + "step": 8557 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991173136203795e-05, + "loss": 1.0186, + "step": 8558 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991171048980816e-05, + "loss": 1.154, + "step": 8559 + }, + { + "epoch": 0.01, + "learning_rate": 4.999116896151114e-05, + "loss": 1.5328, + "step": 8560 + }, + { + "epoch": 0.01, + "learning_rate": 4.999116687379475e-05, + "loss": 1.1713, + "step": 8561 + }, + { + "epoch": 0.01, + "learning_rate": 4.999116478583166e-05, + "loss": 1.2756, + "step": 8562 + }, + { + "epoch": 0.01, + "learning_rate": 4.999116269762187e-05, + "loss": 1.2244, + "step": 8563 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991160609165374e-05, + "loss": 1.2125, + "step": 8564 + }, + { + "epoch": 0.01, + "learning_rate": 4.999115852046218e-05, + "loss": 1.0166, + "step": 8565 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991156431512285e-05, + "loss": 0.795, + "step": 8566 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991154342315685e-05, + "loss": 1.2471, + "step": 8567 + }, + { + "epoch": 0.01, + "learning_rate": 4.999115225287239e-05, + "loss": 0.8797, + "step": 8568 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991150163182374e-05, + "loss": 0.3646, + "step": 8569 + }, + { + "epoch": 0.01, + "learning_rate": 4.999114807324568e-05, + "loss": 1.0403, + "step": 8570 + }, + { + "epoch": 0.01, + "learning_rate": 4.999114598306227e-05, + "loss": 1.2882, + "step": 8571 + }, + { + "epoch": 0.01, + "learning_rate": 4.999114389263216e-05, + "loss": 1.0653, + "step": 8572 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991141801955347e-05, + "loss": 1.1562, + "step": 8573 + }, + { + "epoch": 0.01, + "learning_rate": 4.999113971103183e-05, + "loss": 1.317, + "step": 8574 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991137619861615e-05, + "loss": 1.2079, + "step": 8575 + }, + { + "epoch": 0.01, + "learning_rate": 4.99911355284447e-05, + "loss": 1.2094, + "step": 8576 + }, + { + "epoch": 0.01, + "learning_rate": 4.999113343678108e-05, + "loss": 1.0105, + "step": 8577 + }, + { + "epoch": 0.01, + "learning_rate": 4.999113134487075e-05, + "loss": 0.6547, + "step": 8578 + }, + { + "epoch": 0.01, + "learning_rate": 4.999112925271373e-05, + "loss": 0.8973, + "step": 8579 + }, + { + "epoch": 0.01, + "learning_rate": 4.999112716031001e-05, + "loss": 1.044, + "step": 8580 + }, + { + "epoch": 0.01, + "learning_rate": 4.999112506765958e-05, + "loss": 1.189, + "step": 8581 + }, + { + "epoch": 0.01, + "learning_rate": 4.999112297476245e-05, + "loss": 1.3591, + "step": 8582 + }, + { + "epoch": 0.01, + "learning_rate": 4.999112088161862e-05, + "loss": 1.0588, + "step": 8583 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991118788228084e-05, + "loss": 0.9702, + "step": 8584 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991116694590856e-05, + "loss": 0.9531, + "step": 8585 + }, + { + "epoch": 0.01, + "learning_rate": 4.999111460070691e-05, + "loss": 0.8241, + "step": 8586 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991112506576276e-05, + "loss": 1.1639, + "step": 8587 + }, + { + "epoch": 0.01, + "learning_rate": 4.999111041219894e-05, + "loss": 1.12, + "step": 8588 + }, + { + "epoch": 0.01, + "learning_rate": 4.99911083175749e-05, + "loss": 1.3793, + "step": 8589 + }, + { + "epoch": 0.01, + "learning_rate": 4.999110622270415e-05, + "loss": 0.9946, + "step": 8590 + }, + { + "epoch": 0.01, + "learning_rate": 4.999110412758671e-05, + "loss": 0.8347, + "step": 8591 + }, + { + "epoch": 0.01, + "learning_rate": 4.999110203222256e-05, + "loss": 0.4633, + "step": 8592 + }, + { + "epoch": 0.01, + "learning_rate": 4.999109993661171e-05, + "loss": 0.2479, + "step": 8593 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991097840754165e-05, + "loss": 0.373, + "step": 8594 + }, + { + "epoch": 0.01, + "learning_rate": 4.999109574464991e-05, + "loss": 0.8449, + "step": 8595 + }, + { + "epoch": 0.01, + "learning_rate": 4.999109364829896e-05, + "loss": 1.3162, + "step": 8596 + }, + { + "epoch": 0.01, + "learning_rate": 4.99910915517013e-05, + "loss": 1.2068, + "step": 8597 + }, + { + "epoch": 0.01, + "learning_rate": 4.999108945485695e-05, + "loss": 1.4203, + "step": 8598 + }, + { + "epoch": 0.01, + "learning_rate": 4.999108735776589e-05, + "loss": 1.4168, + "step": 8599 + }, + { + "epoch": 0.01, + "learning_rate": 4.999108526042813e-05, + "loss": 1.3545, + "step": 8600 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991083162843666e-05, + "loss": 0.9488, + "step": 8601 + }, + { + "epoch": 0.01, + "learning_rate": 4.999108106501251e-05, + "loss": 1.1733, + "step": 8602 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991078966934645e-05, + "loss": 1.254, + "step": 8603 + }, + { + "epoch": 0.01, + "learning_rate": 4.999107686861008e-05, + "loss": 1.0556, + "step": 8604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991074770038817e-05, + "loss": 1.1763, + "step": 8605 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991072671220844e-05, + "loss": 1.1645, + "step": 8606 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991070572156174e-05, + "loss": 1.0256, + "step": 8607 + }, + { + "epoch": 0.01, + "learning_rate": 4.99910684728448e-05, + "loss": 1.0187, + "step": 8608 + }, + { + "epoch": 0.01, + "learning_rate": 4.999106637328673e-05, + "loss": 1.0299, + "step": 8609 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991064273481966e-05, + "loss": 0.8712, + "step": 8610 + }, + { + "epoch": 0.01, + "learning_rate": 4.999106217343049e-05, + "loss": 1.0586, + "step": 8611 + }, + { + "epoch": 0.01, + "learning_rate": 4.999106007313231e-05, + "loss": 0.9963, + "step": 8612 + }, + { + "epoch": 0.01, + "learning_rate": 4.999105797258744e-05, + "loss": 1.1931, + "step": 8613 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991055871795854e-05, + "loss": 1.1627, + "step": 8614 + }, + { + "epoch": 0.01, + "learning_rate": 4.999105377075758e-05, + "loss": 1.0302, + "step": 8615 + }, + { + "epoch": 0.01, + "learning_rate": 4.99910516694726e-05, + "loss": 1.0384, + "step": 8616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991049567940914e-05, + "loss": 1.2491, + "step": 8617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991047466162534e-05, + "loss": 0.9819, + "step": 8618 + }, + { + "epoch": 0.01, + "learning_rate": 4.999104536413746e-05, + "loss": 0.6744, + "step": 8619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991043261865664e-05, + "loss": 1.2716, + "step": 8620 + }, + { + "epoch": 0.01, + "learning_rate": 4.999104115934719e-05, + "loss": 1.1201, + "step": 8621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991039056581994e-05, + "loss": 0.8674, + "step": 8622 + }, + { + "epoch": 0.01, + "learning_rate": 4.999103695357011e-05, + "loss": 1.0039, + "step": 8623 + }, + { + "epoch": 0.01, + "learning_rate": 4.999103485031152e-05, + "loss": 1.1552, + "step": 8624 + }, + { + "epoch": 0.01, + "learning_rate": 4.999103274680623e-05, + "loss": 1.206, + "step": 8625 + }, + { + "epoch": 0.01, + "learning_rate": 4.999103064305424e-05, + "loss": 1.3881, + "step": 8626 + }, + { + "epoch": 0.01, + "learning_rate": 4.999102853905555e-05, + "loss": 0.9618, + "step": 8627 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991026434810154e-05, + "loss": 1.0148, + "step": 8628 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991024330318064e-05, + "loss": 0.8362, + "step": 8629 + }, + { + "epoch": 0.01, + "learning_rate": 4.999102222557927e-05, + "loss": 0.9687, + "step": 8630 + }, + { + "epoch": 0.01, + "learning_rate": 4.999102012059378e-05, + "loss": 1.278, + "step": 8631 + }, + { + "epoch": 0.01, + "learning_rate": 4.999101801536158e-05, + "loss": 0.9909, + "step": 8632 + }, + { + "epoch": 0.01, + "learning_rate": 4.999101590988268e-05, + "loss": 0.8792, + "step": 8633 + }, + { + "epoch": 0.01, + "learning_rate": 4.999101380415708e-05, + "loss": 1.0751, + "step": 8634 + }, + { + "epoch": 0.01, + "learning_rate": 4.999101169818479e-05, + "loss": 1.0353, + "step": 8635 + }, + { + "epoch": 0.01, + "learning_rate": 4.999100959196579e-05, + "loss": 1.1233, + "step": 8636 + }, + { + "epoch": 0.01, + "learning_rate": 4.999100748550009e-05, + "loss": 1.1861, + "step": 8637 + }, + { + "epoch": 0.01, + "learning_rate": 4.999100537878769e-05, + "loss": 1.0026, + "step": 8638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991003271828585e-05, + "loss": 1.0155, + "step": 8639 + }, + { + "epoch": 0.01, + "learning_rate": 4.9991001164622784e-05, + "loss": 1.0189, + "step": 8640 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990999057170286e-05, + "loss": 1.4193, + "step": 8641 + }, + { + "epoch": 0.01, + "learning_rate": 4.999099694947108e-05, + "loss": 0.6558, + "step": 8642 + }, + { + "epoch": 0.01, + "learning_rate": 4.999099484152518e-05, + "loss": 0.3156, + "step": 8643 + }, + { + "epoch": 0.01, + "learning_rate": 4.999099273333258e-05, + "loss": 0.2008, + "step": 8644 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990990624893274e-05, + "loss": 0.4412, + "step": 8645 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990988516207265e-05, + "loss": 1.0611, + "step": 8646 + }, + { + "epoch": 0.01, + "learning_rate": 4.999098640727456e-05, + "loss": 1.0295, + "step": 8647 + }, + { + "epoch": 0.01, + "learning_rate": 4.999098429809516e-05, + "loss": 1.2576, + "step": 8648 + }, + { + "epoch": 0.01, + "learning_rate": 4.999098218866905e-05, + "loss": 1.1036, + "step": 8649 + }, + { + "epoch": 0.01, + "learning_rate": 4.999098007899624e-05, + "loss": 1.1413, + "step": 8650 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990977969076734e-05, + "loss": 1.3274, + "step": 8651 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990975858910526e-05, + "loss": 0.8323, + "step": 8652 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990973748497614e-05, + "loss": 1.0233, + "step": 8653 + }, + { + "epoch": 0.01, + "learning_rate": 4.999097163783801e-05, + "loss": 0.6758, + "step": 8654 + }, + { + "epoch": 0.01, + "learning_rate": 4.99909695269317e-05, + "loss": 1.018, + "step": 8655 + }, + { + "epoch": 0.01, + "learning_rate": 4.999096741577869e-05, + "loss": 1.0897, + "step": 8656 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990965304378986e-05, + "loss": 0.9251, + "step": 8657 + }, + { + "epoch": 0.01, + "learning_rate": 4.999096319273258e-05, + "loss": 0.9358, + "step": 8658 + }, + { + "epoch": 0.01, + "learning_rate": 4.999096108083946e-05, + "loss": 1.3181, + "step": 8659 + }, + { + "epoch": 0.01, + "learning_rate": 4.999095896869965e-05, + "loss": 1.065, + "step": 8660 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990956856313144e-05, + "loss": 1.2166, + "step": 8661 + }, + { + "epoch": 0.01, + "learning_rate": 4.999095474367993e-05, + "loss": 1.2498, + "step": 8662 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990952630800023e-05, + "loss": 1.0486, + "step": 8663 + }, + { + "epoch": 0.01, + "learning_rate": 4.999095051767341e-05, + "loss": 1.0942, + "step": 8664 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990948404300095e-05, + "loss": 1.3692, + "step": 8665 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990946290680086e-05, + "loss": 1.2716, + "step": 8666 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990944176813374e-05, + "loss": 1.1882, + "step": 8667 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990942062699965e-05, + "loss": 1.1674, + "step": 8668 + }, + { + "epoch": 0.01, + "learning_rate": 4.999093994833985e-05, + "loss": 0.8575, + "step": 8669 + }, + { + "epoch": 0.01, + "learning_rate": 4.999093783373304e-05, + "loss": 0.8663, + "step": 8670 + }, + { + "epoch": 0.01, + "learning_rate": 4.999093571887953e-05, + "loss": 1.1732, + "step": 8671 + }, + { + "epoch": 0.01, + "learning_rate": 4.999093360377932e-05, + "loss": 1.4423, + "step": 8672 + }, + { + "epoch": 0.01, + "learning_rate": 4.999093148843241e-05, + "loss": 1.1841, + "step": 8673 + }, + { + "epoch": 0.01, + "learning_rate": 4.99909293728388e-05, + "loss": 1.1292, + "step": 8674 + }, + { + "epoch": 0.01, + "learning_rate": 4.999092725699849e-05, + "loss": 0.7134, + "step": 8675 + }, + { + "epoch": 0.01, + "learning_rate": 4.999092514091148e-05, + "loss": 1.5599, + "step": 8676 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990923024577764e-05, + "loss": 0.8031, + "step": 8677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990920907997355e-05, + "loss": 1.0661, + "step": 8678 + }, + { + "epoch": 0.01, + "learning_rate": 4.999091879117024e-05, + "loss": 1.6182, + "step": 8679 + }, + { + "epoch": 0.01, + "learning_rate": 4.999091667409643e-05, + "loss": 1.2834, + "step": 8680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990914556775926e-05, + "loss": 0.929, + "step": 8681 + }, + { + "epoch": 0.01, + "learning_rate": 4.999091243920872e-05, + "loss": 0.9997, + "step": 8682 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990910321394804e-05, + "loss": 0.9971, + "step": 8683 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990908203334194e-05, + "loss": 1.1674, + "step": 8684 + }, + { + "epoch": 0.01, + "learning_rate": 4.999090608502688e-05, + "loss": 1.2116, + "step": 8685 + }, + { + "epoch": 0.01, + "learning_rate": 4.999090396647287e-05, + "loss": 1.0061, + "step": 8686 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990901847672165e-05, + "loss": 1.3171, + "step": 8687 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990899728624755e-05, + "loss": 1.1013, + "step": 8688 + }, + { + "epoch": 0.01, + "learning_rate": 4.999089760933065e-05, + "loss": 1.5107, + "step": 8689 + }, + { + "epoch": 0.01, + "learning_rate": 4.999089548978984e-05, + "loss": 0.9969, + "step": 8690 + }, + { + "epoch": 0.01, + "learning_rate": 4.999089337000233e-05, + "loss": 1.0103, + "step": 8691 + }, + { + "epoch": 0.01, + "learning_rate": 4.999089124996813e-05, + "loss": 1.3219, + "step": 8692 + }, + { + "epoch": 0.01, + "learning_rate": 4.999088912968722e-05, + "loss": 1.202, + "step": 8693 + }, + { + "epoch": 0.01, + "learning_rate": 4.999088700915961e-05, + "loss": 0.8583, + "step": 8694 + }, + { + "epoch": 0.01, + "learning_rate": 4.999088488838531e-05, + "loss": 0.652, + "step": 8695 + }, + { + "epoch": 0.01, + "learning_rate": 4.999088276736431e-05, + "loss": 0.5651, + "step": 8696 + }, + { + "epoch": 0.01, + "learning_rate": 4.999088064609661e-05, + "loss": 0.5008, + "step": 8697 + }, + { + "epoch": 0.01, + "learning_rate": 4.99908785245822e-05, + "loss": 1.1029, + "step": 8698 + }, + { + "epoch": 0.01, + "learning_rate": 4.99908764028211e-05, + "loss": 1.2576, + "step": 8699 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990874280813294e-05, + "loss": 1.2909, + "step": 8700 + }, + { + "epoch": 0.01, + "learning_rate": 4.999087215855879e-05, + "loss": 1.0101, + "step": 8701 + }, + { + "epoch": 0.01, + "learning_rate": 4.999087003605759e-05, + "loss": 1.3033, + "step": 8702 + }, + { + "epoch": 0.01, + "learning_rate": 4.999086791330969e-05, + "loss": 1.1506, + "step": 8703 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990865790315086e-05, + "loss": 1.0781, + "step": 8704 + }, + { + "epoch": 0.01, + "learning_rate": 4.999086366707379e-05, + "loss": 1.2098, + "step": 8705 + }, + { + "epoch": 0.01, + "learning_rate": 4.999086154358579e-05, + "loss": 1.1135, + "step": 8706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990859419851095e-05, + "loss": 0.985, + "step": 8707 + }, + { + "epoch": 0.01, + "learning_rate": 4.99908572958697e-05, + "loss": 1.0368, + "step": 8708 + }, + { + "epoch": 0.01, + "learning_rate": 4.99908551716416e-05, + "loss": 1.3571, + "step": 8709 + }, + { + "epoch": 0.01, + "learning_rate": 4.999085304716681e-05, + "loss": 1.4024, + "step": 8710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990850922445306e-05, + "loss": 1.2478, + "step": 8711 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990848797477116e-05, + "loss": 1.0357, + "step": 8712 + }, + { + "epoch": 0.01, + "learning_rate": 4.999084667226223e-05, + "loss": 1.0545, + "step": 8713 + }, + { + "epoch": 0.01, + "learning_rate": 4.999084454680063e-05, + "loss": 0.9262, + "step": 8714 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990842421092344e-05, + "loss": 0.9385, + "step": 8715 + }, + { + "epoch": 0.01, + "learning_rate": 4.999084029513735e-05, + "loss": 1.1249, + "step": 8716 + }, + { + "epoch": 0.01, + "learning_rate": 4.999083816893566e-05, + "loss": 1.2108, + "step": 8717 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990836042487275e-05, + "loss": 0.9183, + "step": 8718 + }, + { + "epoch": 0.01, + "learning_rate": 4.999083391579219e-05, + "loss": 1.0582, + "step": 8719 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990831788850404e-05, + "loss": 1.0237, + "step": 8720 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990829661661916e-05, + "loss": 1.0782, + "step": 8721 + }, + { + "epoch": 0.01, + "learning_rate": 4.999082753422673e-05, + "loss": 1.018, + "step": 8722 + }, + { + "epoch": 0.01, + "learning_rate": 4.999082540654485e-05, + "loss": 1.084, + "step": 8723 + }, + { + "epoch": 0.01, + "learning_rate": 4.999082327861627e-05, + "loss": 1.151, + "step": 8724 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990821150440986e-05, + "loss": 0.7537, + "step": 8725 + }, + { + "epoch": 0.01, + "learning_rate": 4.999081902201901e-05, + "loss": 1.154, + "step": 8726 + }, + { + "epoch": 0.01, + "learning_rate": 4.999081689335032e-05, + "loss": 1.5061, + "step": 8727 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990814764434944e-05, + "loss": 1.2121, + "step": 8728 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990812635272876e-05, + "loss": 0.9162, + "step": 8729 + }, + { + "epoch": 0.01, + "learning_rate": 4.99908105058641e-05, + "loss": 1.1134, + "step": 8730 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990808376208624e-05, + "loss": 1.0876, + "step": 8731 + }, + { + "epoch": 0.01, + "learning_rate": 4.999080624630645e-05, + "loss": 0.8893, + "step": 8732 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990804116157586e-05, + "loss": 1.0382, + "step": 8733 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990801985762014e-05, + "loss": 1.1064, + "step": 8734 + }, + { + "epoch": 0.01, + "learning_rate": 4.999079985511974e-05, + "loss": 1.2253, + "step": 8735 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990797724230776e-05, + "loss": 1.4358, + "step": 8736 + }, + { + "epoch": 0.01, + "learning_rate": 4.999079559309511e-05, + "loss": 1.6728, + "step": 8737 + }, + { + "epoch": 0.01, + "learning_rate": 4.999079346171275e-05, + "loss": 1.3647, + "step": 8738 + }, + { + "epoch": 0.01, + "learning_rate": 4.999079133008369e-05, + "loss": 1.1633, + "step": 8739 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990789198207925e-05, + "loss": 1.621, + "step": 8740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990787066085463e-05, + "loss": 1.548, + "step": 8741 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990784933716306e-05, + "loss": 1.3156, + "step": 8742 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990782801100444e-05, + "loss": 1.0951, + "step": 8743 + }, + { + "epoch": 0.01, + "learning_rate": 4.999078066823789e-05, + "loss": 1.5253, + "step": 8744 + }, + { + "epoch": 0.01, + "learning_rate": 4.999077853512863e-05, + "loss": 0.2305, + "step": 8745 + }, + { + "epoch": 0.01, + "learning_rate": 4.999077640177269e-05, + "loss": 0.3264, + "step": 8746 + }, + { + "epoch": 0.01, + "learning_rate": 4.999077426817004e-05, + "loss": 0.3777, + "step": 8747 + }, + { + "epoch": 0.01, + "learning_rate": 4.999077213432068e-05, + "loss": 0.0945, + "step": 8748 + }, + { + "epoch": 0.01, + "learning_rate": 4.999077000022464e-05, + "loss": 0.0586, + "step": 8749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990767865881896e-05, + "loss": 0.0599, + "step": 8750 + }, + { + "epoch": 0.01, + "learning_rate": 4.999076573129245e-05, + "loss": 1.2068, + "step": 8751 + }, + { + "epoch": 0.01, + "learning_rate": 4.99907635964563e-05, + "loss": 1.1719, + "step": 8752 + }, + { + "epoch": 0.01, + "learning_rate": 4.999076146137347e-05, + "loss": 1.2801, + "step": 8753 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990759326043924e-05, + "loss": 1.1722, + "step": 8754 + }, + { + "epoch": 0.01, + "learning_rate": 4.999075719046769e-05, + "loss": 1.0336, + "step": 8755 + }, + { + "epoch": 0.01, + "learning_rate": 4.999075505464476e-05, + "loss": 1.0087, + "step": 8756 + }, + { + "epoch": 0.01, + "learning_rate": 4.999075291857512e-05, + "loss": 1.2859, + "step": 8757 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990750782258785e-05, + "loss": 1.125, + "step": 8758 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990748645695764e-05, + "loss": 1.0779, + "step": 8759 + }, + { + "epoch": 0.01, + "learning_rate": 4.999074650888603e-05, + "loss": 1.3599, + "step": 8760 + }, + { + "epoch": 0.01, + "learning_rate": 4.99907443718296e-05, + "loss": 1.9964, + "step": 8761 + }, + { + "epoch": 0.01, + "learning_rate": 4.999074223452648e-05, + "loss": 3.0764, + "step": 8762 + }, + { + "epoch": 0.01, + "learning_rate": 4.999074009697666e-05, + "loss": 0.4825, + "step": 8763 + }, + { + "epoch": 0.01, + "learning_rate": 4.999073795918013e-05, + "loss": 0.9809, + "step": 8764 + }, + { + "epoch": 0.01, + "learning_rate": 4.999073582113692e-05, + "loss": 1.3737, + "step": 8765 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990733682847e-05, + "loss": 1.3429, + "step": 8766 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990731544310386e-05, + "loss": 1.1932, + "step": 8767 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990729405527074e-05, + "loss": 1.2749, + "step": 8768 + }, + { + "epoch": 0.01, + "learning_rate": 4.999072726649706e-05, + "loss": 1.0659, + "step": 8769 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990725127220355e-05, + "loss": 1.0079, + "step": 8770 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990722987696947e-05, + "loss": 1.2343, + "step": 8771 + }, + { + "epoch": 0.01, + "learning_rate": 4.999072084792684e-05, + "loss": 0.8325, + "step": 8772 + }, + { + "epoch": 0.01, + "learning_rate": 4.999071870791005e-05, + "loss": 1.0094, + "step": 8773 + }, + { + "epoch": 0.01, + "learning_rate": 4.999071656764654e-05, + "loss": 1.2184, + "step": 8774 + }, + { + "epoch": 0.01, + "learning_rate": 4.999071442713634e-05, + "loss": 0.8084, + "step": 8775 + }, + { + "epoch": 0.01, + "learning_rate": 4.999071228637945e-05, + "loss": 0.5433, + "step": 8776 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990710145375854e-05, + "loss": 0.7831, + "step": 8777 + }, + { + "epoch": 0.01, + "learning_rate": 4.999070800412556e-05, + "loss": 1.2789, + "step": 8778 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990705862628575e-05, + "loss": 1.3226, + "step": 8779 + }, + { + "epoch": 0.01, + "learning_rate": 4.999070372088489e-05, + "loss": 0.8119, + "step": 8780 + }, + { + "epoch": 0.01, + "learning_rate": 4.999070157889451e-05, + "loss": 1.311, + "step": 8781 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990699436657424e-05, + "loss": 1.1202, + "step": 8782 + }, + { + "epoch": 0.01, + "learning_rate": 4.999069729417364e-05, + "loss": 1.1229, + "step": 8783 + }, + { + "epoch": 0.01, + "learning_rate": 4.999069515144317e-05, + "loss": 1.1352, + "step": 8784 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990693008465996e-05, + "loss": 0.9946, + "step": 8785 + }, + { + "epoch": 0.01, + "learning_rate": 4.999069086524212e-05, + "loss": 1.2596, + "step": 8786 + }, + { + "epoch": 0.01, + "learning_rate": 4.999068872177155e-05, + "loss": 1.1081, + "step": 8787 + }, + { + "epoch": 0.01, + "learning_rate": 4.999068657805428e-05, + "loss": 0.9151, + "step": 8788 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990684434090316e-05, + "loss": 0.9901, + "step": 8789 + }, + { + "epoch": 0.01, + "learning_rate": 4.999068228987965e-05, + "loss": 1.2045, + "step": 8790 + }, + { + "epoch": 0.01, + "learning_rate": 4.99906801454223e-05, + "loss": 0.8862, + "step": 8791 + }, + { + "epoch": 0.01, + "learning_rate": 4.999067800071824e-05, + "loss": 1.1898, + "step": 8792 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990675855767484e-05, + "loss": 0.9776, + "step": 8793 + }, + { + "epoch": 0.01, + "learning_rate": 4.999067371057003e-05, + "loss": 1.017, + "step": 8794 + }, + { + "epoch": 0.01, + "learning_rate": 4.999067156512588e-05, + "loss": 1.2088, + "step": 8795 + }, + { + "epoch": 0.01, + "learning_rate": 4.999066941943503e-05, + "loss": 1.3442, + "step": 8796 + }, + { + "epoch": 0.01, + "learning_rate": 4.999066727349749e-05, + "loss": 1.1206, + "step": 8797 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990665127313246e-05, + "loss": 1.4715, + "step": 8798 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990662980882305e-05, + "loss": 1.6074, + "step": 8799 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990660834204673e-05, + "loss": 1.2598, + "step": 8800 + }, + { + "epoch": 0.01, + "learning_rate": 4.999065868728033e-05, + "loss": 1.1772, + "step": 8801 + }, + { + "epoch": 0.01, + "learning_rate": 4.999065654010931e-05, + "loss": 1.1079, + "step": 8802 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990654392691586e-05, + "loss": 0.9349, + "step": 8803 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990652245027154e-05, + "loss": 0.7483, + "step": 8804 + }, + { + "epoch": 0.01, + "learning_rate": 4.999065009711603e-05, + "loss": 0.6774, + "step": 8805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990647948958215e-05, + "loss": 0.6102, + "step": 8806 + }, + { + "epoch": 0.01, + "learning_rate": 4.999064580055369e-05, + "loss": 0.4901, + "step": 8807 + }, + { + "epoch": 0.01, + "learning_rate": 4.999064365190248e-05, + "loss": 0.6469, + "step": 8808 + }, + { + "epoch": 0.01, + "learning_rate": 4.999064150300457e-05, + "loss": 0.7031, + "step": 8809 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990639353859956e-05, + "loss": 1.2631, + "step": 8810 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990637204468654e-05, + "loss": 0.9765, + "step": 8811 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990635054830656e-05, + "loss": 1.6765, + "step": 8812 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990632904945955e-05, + "loss": 1.2318, + "step": 8813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990630754814564e-05, + "loss": 1.4459, + "step": 8814 + }, + { + "epoch": 0.01, + "learning_rate": 4.999062860443646e-05, + "loss": 1.3727, + "step": 8815 + }, + { + "epoch": 0.01, + "learning_rate": 4.999062645381167e-05, + "loss": 1.4865, + "step": 8816 + }, + { + "epoch": 0.01, + "learning_rate": 4.999062430294019e-05, + "loss": 1.0549, + "step": 8817 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990622151822005e-05, + "loss": 1.2053, + "step": 8818 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990620000457123e-05, + "loss": 1.2681, + "step": 8819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990617848845545e-05, + "loss": 1.2183, + "step": 8820 + }, + { + "epoch": 0.01, + "learning_rate": 4.999061569698727e-05, + "loss": 1.1343, + "step": 8821 + }, + { + "epoch": 0.01, + "learning_rate": 4.99906135448823e-05, + "loss": 1.234, + "step": 8822 + }, + { + "epoch": 0.01, + "learning_rate": 4.999061139253063e-05, + "loss": 1.0511, + "step": 8823 + }, + { + "epoch": 0.01, + "learning_rate": 4.999060923993226e-05, + "loss": 1.2056, + "step": 8824 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990607087087206e-05, + "loss": 1.6887, + "step": 8825 + }, + { + "epoch": 0.01, + "learning_rate": 4.999060493399544e-05, + "loss": 1.3319, + "step": 8826 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990602780656993e-05, + "loss": 1.0182, + "step": 8827 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990600627071835e-05, + "loss": 1.0378, + "step": 8828 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990598473239994e-05, + "loss": 1.4682, + "step": 8829 + }, + { + "epoch": 0.01, + "learning_rate": 4.999059631916144e-05, + "loss": 1.3639, + "step": 8830 + }, + { + "epoch": 0.01, + "learning_rate": 4.99905941648362e-05, + "loss": 1.1836, + "step": 8831 + }, + { + "epoch": 0.01, + "learning_rate": 4.999059201026426e-05, + "loss": 1.3155, + "step": 8832 + }, + { + "epoch": 0.01, + "learning_rate": 4.999058985544562e-05, + "loss": 1.128, + "step": 8833 + }, + { + "epoch": 0.01, + "learning_rate": 4.99905877003803e-05, + "loss": 1.2607, + "step": 8834 + }, + { + "epoch": 0.01, + "learning_rate": 4.999058554506827e-05, + "loss": 1.1759, + "step": 8835 + }, + { + "epoch": 0.01, + "learning_rate": 4.999058338950954e-05, + "loss": 0.9747, + "step": 8836 + }, + { + "epoch": 0.01, + "learning_rate": 4.999058123370412e-05, + "loss": 1.1861, + "step": 8837 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990579077652e-05, + "loss": 0.6354, + "step": 8838 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990576921353187e-05, + "loss": 0.6298, + "step": 8839 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990574764807675e-05, + "loss": 0.5622, + "step": 8840 + }, + { + "epoch": 0.01, + "learning_rate": 4.999057260801547e-05, + "loss": 0.3542, + "step": 8841 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990570450976564e-05, + "loss": 0.4076, + "step": 8842 + }, + { + "epoch": 0.01, + "learning_rate": 4.999056829369096e-05, + "loss": 0.5068, + "step": 8843 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990566136158665e-05, + "loss": 0.6871, + "step": 8844 + }, + { + "epoch": 0.01, + "learning_rate": 4.999056397837968e-05, + "loss": 1.1034, + "step": 8845 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990561820353986e-05, + "loss": 1.3705, + "step": 8846 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990559662081606e-05, + "loss": 1.0481, + "step": 8847 + }, + { + "epoch": 0.01, + "learning_rate": 4.999055750356252e-05, + "loss": 1.5051, + "step": 8848 + }, + { + "epoch": 0.01, + "learning_rate": 4.999055534479674e-05, + "loss": 1.2912, + "step": 8849 + }, + { + "epoch": 0.01, + "learning_rate": 4.999055318578427e-05, + "loss": 1.1822, + "step": 8850 + }, + { + "epoch": 0.01, + "learning_rate": 4.99905510265251e-05, + "loss": 1.1203, + "step": 8851 + }, + { + "epoch": 0.01, + "learning_rate": 4.999054886701923e-05, + "loss": 0.8883, + "step": 8852 + }, + { + "epoch": 0.01, + "learning_rate": 4.999054670726667e-05, + "loss": 1.2353, + "step": 8853 + }, + { + "epoch": 0.01, + "learning_rate": 4.999054454726741e-05, + "loss": 0.9927, + "step": 8854 + }, + { + "epoch": 0.01, + "learning_rate": 4.999054238702145e-05, + "loss": 0.6175, + "step": 8855 + }, + { + "epoch": 0.01, + "learning_rate": 4.99905402265288e-05, + "loss": 1.0129, + "step": 8856 + }, + { + "epoch": 0.01, + "learning_rate": 4.999053806578946e-05, + "loss": 1.0805, + "step": 8857 + }, + { + "epoch": 0.01, + "learning_rate": 4.999053590480342e-05, + "loss": 1.1087, + "step": 8858 + }, + { + "epoch": 0.01, + "learning_rate": 4.999053374357068e-05, + "loss": 1.2239, + "step": 8859 + }, + { + "epoch": 0.01, + "learning_rate": 4.999053158209124e-05, + "loss": 1.129, + "step": 8860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990529420365106e-05, + "loss": 0.9646, + "step": 8861 + }, + { + "epoch": 0.01, + "learning_rate": 4.999052725839228e-05, + "loss": 1.0584, + "step": 8862 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990525096172755e-05, + "loss": 1.085, + "step": 8863 + }, + { + "epoch": 0.01, + "learning_rate": 4.999052293370654e-05, + "loss": 1.0483, + "step": 8864 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990520770993624e-05, + "loss": 1.0508, + "step": 8865 + }, + { + "epoch": 0.01, + "learning_rate": 4.999051860803401e-05, + "loss": 1.2326, + "step": 8866 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990516444827706e-05, + "loss": 1.1117, + "step": 8867 + }, + { + "epoch": 0.01, + "learning_rate": 4.99905142813747e-05, + "loss": 0.9983, + "step": 8868 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990512117675e-05, + "loss": 0.9904, + "step": 8869 + }, + { + "epoch": 0.01, + "learning_rate": 4.999050995372861e-05, + "loss": 1.2398, + "step": 8870 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990507789535525e-05, + "loss": 1.4326, + "step": 8871 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990505625095735e-05, + "loss": 1.0302, + "step": 8872 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990503460409255e-05, + "loss": 1.2675, + "step": 8873 + }, + { + "epoch": 0.01, + "learning_rate": 4.999050129547608e-05, + "loss": 0.8434, + "step": 8874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990499130296204e-05, + "loss": 0.3367, + "step": 8875 + }, + { + "epoch": 0.01, + "learning_rate": 4.999049696486964e-05, + "loss": 1.0727, + "step": 8876 + }, + { + "epoch": 0.01, + "learning_rate": 4.999049479919637e-05, + "loss": 1.4354, + "step": 8877 + }, + { + "epoch": 0.01, + "learning_rate": 4.999049263327641e-05, + "loss": 1.0382, + "step": 8878 + }, + { + "epoch": 0.01, + "learning_rate": 4.999049046710975e-05, + "loss": 0.8141, + "step": 8879 + }, + { + "epoch": 0.01, + "learning_rate": 4.99904883006964e-05, + "loss": 0.7363, + "step": 8880 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990486134036354e-05, + "loss": 1.6423, + "step": 8881 + }, + { + "epoch": 0.01, + "learning_rate": 4.999048396712962e-05, + "loss": 2.6529, + "step": 8882 + }, + { + "epoch": 0.01, + "learning_rate": 4.999048179997618e-05, + "loss": 1.0003, + "step": 8883 + }, + { + "epoch": 0.01, + "learning_rate": 4.999047963257604e-05, + "loss": 0.9851, + "step": 8884 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990477464929214e-05, + "loss": 0.9985, + "step": 8885 + }, + { + "epoch": 0.01, + "learning_rate": 4.999047529703569e-05, + "loss": 1.1115, + "step": 8886 + }, + { + "epoch": 0.01, + "learning_rate": 4.999047312889547e-05, + "loss": 1.3173, + "step": 8887 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990470960508555e-05, + "loss": 0.7853, + "step": 8888 + }, + { + "epoch": 0.01, + "learning_rate": 4.999046879187494e-05, + "loss": 0.6068, + "step": 8889 + }, + { + "epoch": 0.01, + "learning_rate": 4.999046662299464e-05, + "loss": 0.5536, + "step": 8890 + }, + { + "epoch": 0.01, + "learning_rate": 4.999046445386763e-05, + "loss": 0.5109, + "step": 8891 + }, + { + "epoch": 0.01, + "learning_rate": 4.999046228449394e-05, + "loss": 0.4583, + "step": 8892 + }, + { + "epoch": 0.01, + "learning_rate": 4.999046011487355e-05, + "loss": 0.4123, + "step": 8893 + }, + { + "epoch": 0.01, + "learning_rate": 4.999045794500646e-05, + "loss": 0.4004, + "step": 8894 + }, + { + "epoch": 0.01, + "learning_rate": 4.999045577489268e-05, + "loss": 0.3739, + "step": 8895 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990453604532205e-05, + "loss": 0.3816, + "step": 8896 + }, + { + "epoch": 0.01, + "learning_rate": 4.999045143392503e-05, + "loss": 0.4391, + "step": 8897 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990449263071156e-05, + "loss": 1.1637, + "step": 8898 + }, + { + "epoch": 0.01, + "learning_rate": 4.99904470919706e-05, + "loss": 1.235, + "step": 8899 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990444920623334e-05, + "loss": 0.955, + "step": 8900 + }, + { + "epoch": 0.01, + "learning_rate": 4.999044274902939e-05, + "loss": 0.7153, + "step": 8901 + }, + { + "epoch": 0.01, + "learning_rate": 4.999044057718873e-05, + "loss": 0.9503, + "step": 8902 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990438405101394e-05, + "loss": 1.3083, + "step": 8903 + }, + { + "epoch": 0.01, + "learning_rate": 4.999043623276736e-05, + "loss": 0.7842, + "step": 8904 + }, + { + "epoch": 0.01, + "learning_rate": 4.999043406018662e-05, + "loss": 0.6211, + "step": 8905 + }, + { + "epoch": 0.01, + "learning_rate": 4.999043188735919e-05, + "loss": 0.996, + "step": 8906 + }, + { + "epoch": 0.01, + "learning_rate": 4.999042971428507e-05, + "loss": 0.991, + "step": 8907 + }, + { + "epoch": 0.01, + "learning_rate": 4.999042754096425e-05, + "loss": 1.1821, + "step": 8908 + }, + { + "epoch": 0.01, + "learning_rate": 4.999042536739673e-05, + "loss": 1.0599, + "step": 8909 + }, + { + "epoch": 0.01, + "learning_rate": 4.999042319358253e-05, + "loss": 1.1233, + "step": 8910 + }, + { + "epoch": 0.01, + "learning_rate": 4.999042101952162e-05, + "loss": 1.0006, + "step": 8911 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990418845214026e-05, + "loss": 1.2382, + "step": 8912 + }, + { + "epoch": 0.01, + "learning_rate": 4.999041667065973e-05, + "loss": 1.105, + "step": 8913 + }, + { + "epoch": 0.01, + "learning_rate": 4.999041449585874e-05, + "loss": 1.1125, + "step": 8914 + }, + { + "epoch": 0.01, + "learning_rate": 4.999041232081106e-05, + "loss": 1.1714, + "step": 8915 + }, + { + "epoch": 0.01, + "learning_rate": 4.999041014551668e-05, + "loss": 1.079, + "step": 8916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990407969975606e-05, + "loss": 0.4372, + "step": 8917 + }, + { + "epoch": 0.01, + "learning_rate": 4.999040579418784e-05, + "loss": 0.5173, + "step": 8918 + }, + { + "epoch": 0.01, + "learning_rate": 4.999040361815338e-05, + "loss": 1.5945, + "step": 8919 + }, + { + "epoch": 0.01, + "learning_rate": 4.999040144187222e-05, + "loss": 1.1579, + "step": 8920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990399265344366e-05, + "loss": 0.8848, + "step": 8921 + }, + { + "epoch": 0.01, + "learning_rate": 4.999039708856982e-05, + "loss": 1.0272, + "step": 8922 + }, + { + "epoch": 0.01, + "learning_rate": 4.999039491154858e-05, + "loss": 1.1456, + "step": 8923 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990392734280644e-05, + "loss": 0.9839, + "step": 8924 + }, + { + "epoch": 0.01, + "learning_rate": 4.999039055676601e-05, + "loss": 0.9463, + "step": 8925 + }, + { + "epoch": 0.01, + "learning_rate": 4.999038837900469e-05, + "loss": 1.1473, + "step": 8926 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990386200996666e-05, + "loss": 1.6757, + "step": 8927 + }, + { + "epoch": 0.01, + "learning_rate": 4.999038402274195e-05, + "loss": 2.2957, + "step": 8928 + }, + { + "epoch": 0.01, + "learning_rate": 4.999038184424054e-05, + "loss": 0.9495, + "step": 8929 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990379665492446e-05, + "loss": 1.2136, + "step": 8930 + }, + { + "epoch": 0.01, + "learning_rate": 4.999037748649764e-05, + "loss": 1.3899, + "step": 8931 + }, + { + "epoch": 0.01, + "learning_rate": 4.999037530725615e-05, + "loss": 1.2546, + "step": 8932 + }, + { + "epoch": 0.01, + "learning_rate": 4.999037312776796e-05, + "loss": 1.3167, + "step": 8933 + }, + { + "epoch": 0.01, + "learning_rate": 4.999037094803308e-05, + "loss": 1.0479, + "step": 8934 + }, + { + "epoch": 0.01, + "learning_rate": 4.999036876805151e-05, + "loss": 0.8051, + "step": 8935 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990366587823236e-05, + "loss": 0.6917, + "step": 8936 + }, + { + "epoch": 0.01, + "learning_rate": 4.999036440734827e-05, + "loss": 0.6909, + "step": 8937 + }, + { + "epoch": 0.01, + "learning_rate": 4.999036222662661e-05, + "loss": 0.7093, + "step": 8938 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990360045658266e-05, + "loss": 1.1164, + "step": 8939 + }, + { + "epoch": 0.01, + "learning_rate": 4.999035786444322e-05, + "loss": 1.2123, + "step": 8940 + }, + { + "epoch": 0.01, + "learning_rate": 4.999035568298147e-05, + "loss": 1.0801, + "step": 8941 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990353501273034e-05, + "loss": 1.0261, + "step": 8942 + }, + { + "epoch": 0.01, + "learning_rate": 4.999035131931791e-05, + "loss": 0.8396, + "step": 8943 + }, + { + "epoch": 0.01, + "learning_rate": 4.999034913711608e-05, + "loss": 1.0039, + "step": 8944 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990346954667566e-05, + "loss": 1.102, + "step": 8945 + }, + { + "epoch": 0.01, + "learning_rate": 4.999034477197235e-05, + "loss": 0.9566, + "step": 8946 + }, + { + "epoch": 0.01, + "learning_rate": 4.999034258903045e-05, + "loss": 1.3018, + "step": 8947 + }, + { + "epoch": 0.01, + "learning_rate": 4.999034040584184e-05, + "loss": 1.2828, + "step": 8948 + }, + { + "epoch": 0.01, + "learning_rate": 4.999033822240655e-05, + "loss": 1.1951, + "step": 8949 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990336038724564e-05, + "loss": 1.3283, + "step": 8950 + }, + { + "epoch": 0.01, + "learning_rate": 4.999033385479588e-05, + "loss": 1.3253, + "step": 8951 + }, + { + "epoch": 0.01, + "learning_rate": 4.99903316706205e-05, + "loss": 1.4602, + "step": 8952 + }, + { + "epoch": 0.01, + "learning_rate": 4.999032948619843e-05, + "loss": 0.837, + "step": 8953 + }, + { + "epoch": 0.01, + "learning_rate": 4.999032730152966e-05, + "loss": 1.0146, + "step": 8954 + }, + { + "epoch": 0.01, + "learning_rate": 4.99903251166142e-05, + "loss": 1.0879, + "step": 8955 + }, + { + "epoch": 0.01, + "learning_rate": 4.999032293145205e-05, + "loss": 1.4743, + "step": 8956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990320746043204e-05, + "loss": 1.0642, + "step": 8957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990318560387664e-05, + "loss": 1.0941, + "step": 8958 + }, + { + "epoch": 0.01, + "learning_rate": 4.999031637448542e-05, + "loss": 0.8432, + "step": 8959 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990314188336495e-05, + "loss": 1.264, + "step": 8960 + }, + { + "epoch": 0.01, + "learning_rate": 4.999031200194088e-05, + "loss": 0.8801, + "step": 8961 + }, + { + "epoch": 0.01, + "learning_rate": 4.999030981529856e-05, + "loss": 0.9694, + "step": 8962 + }, + { + "epoch": 0.01, + "learning_rate": 4.999030762840955e-05, + "loss": 1.2519, + "step": 8963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990305441273844e-05, + "loss": 1.1655, + "step": 8964 + }, + { + "epoch": 0.01, + "learning_rate": 4.999030325389145e-05, + "loss": 1.7452, + "step": 8965 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990301066262355e-05, + "loss": 1.1885, + "step": 8966 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990298878386566e-05, + "loss": 1.1091, + "step": 8967 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990296690264086e-05, + "loss": 1.0145, + "step": 8968 + }, + { + "epoch": 0.01, + "learning_rate": 4.999029450189492e-05, + "loss": 1.1637, + "step": 8969 + }, + { + "epoch": 0.01, + "learning_rate": 4.999029231327905e-05, + "loss": 0.8908, + "step": 8970 + }, + { + "epoch": 0.01, + "learning_rate": 4.999029012441649e-05, + "loss": 1.0207, + "step": 8971 + }, + { + "epoch": 0.01, + "learning_rate": 4.999028793530724e-05, + "loss": 1.0373, + "step": 8972 + }, + { + "epoch": 0.01, + "learning_rate": 4.999028574595129e-05, + "loss": 1.2048, + "step": 8973 + }, + { + "epoch": 0.01, + "learning_rate": 4.999028355634865e-05, + "loss": 1.6187, + "step": 8974 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990281366499314e-05, + "loss": 1.1053, + "step": 8975 + }, + { + "epoch": 0.01, + "learning_rate": 4.999027917640329e-05, + "loss": 1.4927, + "step": 8976 + }, + { + "epoch": 0.01, + "learning_rate": 4.999027698606056e-05, + "loss": 1.1898, + "step": 8977 + }, + { + "epoch": 0.01, + "learning_rate": 4.999027479547115e-05, + "loss": 1.4062, + "step": 8978 + }, + { + "epoch": 0.01, + "learning_rate": 4.999027260463503e-05, + "loss": 0.9272, + "step": 8979 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990270413552235e-05, + "loss": 0.9686, + "step": 8980 + }, + { + "epoch": 0.01, + "learning_rate": 4.999026822222274e-05, + "loss": 1.0166, + "step": 8981 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990266030646555e-05, + "loss": 1.2849, + "step": 8982 + }, + { + "epoch": 0.01, + "learning_rate": 4.999026383882367e-05, + "loss": 0.7406, + "step": 8983 + }, + { + "epoch": 0.01, + "learning_rate": 4.99902616467541e-05, + "loss": 0.7619, + "step": 8984 + }, + { + "epoch": 0.01, + "learning_rate": 4.999025945443783e-05, + "loss": 1.2153, + "step": 8985 + }, + { + "epoch": 0.01, + "learning_rate": 4.999025726187486e-05, + "loss": 0.8999, + "step": 8986 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990255069065215e-05, + "loss": 0.9631, + "step": 8987 + }, + { + "epoch": 0.01, + "learning_rate": 4.999025287600886e-05, + "loss": 0.8158, + "step": 8988 + }, + { + "epoch": 0.01, + "learning_rate": 4.999025068270582e-05, + "loss": 1.0232, + "step": 8989 + }, + { + "epoch": 0.01, + "learning_rate": 4.999024848915609e-05, + "loss": 1.1381, + "step": 8990 + }, + { + "epoch": 0.01, + "learning_rate": 4.999024629535966e-05, + "loss": 0.9671, + "step": 8991 + }, + { + "epoch": 0.01, + "learning_rate": 4.999024410131653e-05, + "loss": 1.0486, + "step": 8992 + }, + { + "epoch": 0.01, + "learning_rate": 4.999024190702672e-05, + "loss": 1.0093, + "step": 8993 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990239712490215e-05, + "loss": 0.9589, + "step": 8994 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990237517707014e-05, + "loss": 0.9503, + "step": 8995 + }, + { + "epoch": 0.01, + "learning_rate": 4.999023532267712e-05, + "loss": 1.0971, + "step": 8996 + }, + { + "epoch": 0.01, + "learning_rate": 4.999023312740053e-05, + "loss": 1.1159, + "step": 8997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990230931877246e-05, + "loss": 0.8094, + "step": 8998 + }, + { + "epoch": 0.01, + "learning_rate": 4.999022873610728e-05, + "loss": 1.1743, + "step": 8999 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990226540090615e-05, + "loss": 1.1199, + "step": 9000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0580137968063354, + "eval_runtime": 83.5773, + "eval_samples_per_second": 16.571, + "eval_steps_per_second": 4.152, + "step": 9000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990224343827255e-05, + "loss": 0.9495, + "step": 9001 + }, + { + "epoch": 0.01, + "learning_rate": 4.99902221473172e-05, + "loss": 1.0997, + "step": 9002 + }, + { + "epoch": 0.01, + "learning_rate": 4.999021995056046e-05, + "loss": 1.0835, + "step": 9003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990217753557015e-05, + "loss": 1.1702, + "step": 9004 + }, + { + "epoch": 0.01, + "learning_rate": 4.999021555630689e-05, + "loss": 1.4746, + "step": 9005 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990213358810065e-05, + "loss": 4.1693, + "step": 9006 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990211161066545e-05, + "loss": 1.4417, + "step": 9007 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990208963076335e-05, + "loss": 1.0626, + "step": 9008 + }, + { + "epoch": 0.01, + "learning_rate": 4.999020676483943e-05, + "loss": 1.1296, + "step": 9009 + }, + { + "epoch": 0.01, + "learning_rate": 4.999020456635583e-05, + "loss": 1.0832, + "step": 9010 + }, + { + "epoch": 0.01, + "learning_rate": 4.999020236762555e-05, + "loss": 1.0376, + "step": 9011 + }, + { + "epoch": 0.01, + "learning_rate": 4.999020016864857e-05, + "loss": 1.1048, + "step": 9012 + }, + { + "epoch": 0.01, + "learning_rate": 4.99901979694249e-05, + "loss": 1.0899, + "step": 9013 + }, + { + "epoch": 0.01, + "learning_rate": 4.999019576995453e-05, + "loss": 1.1138, + "step": 9014 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990193570237476e-05, + "loss": 1.126, + "step": 9015 + }, + { + "epoch": 0.01, + "learning_rate": 4.999019137027372e-05, + "loss": 1.1415, + "step": 9016 + }, + { + "epoch": 0.01, + "learning_rate": 4.999018917006328e-05, + "loss": 1.0015, + "step": 9017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990186969606146e-05, + "loss": 0.7915, + "step": 9018 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990184768902314e-05, + "loss": 0.869, + "step": 9019 + }, + { + "epoch": 0.01, + "learning_rate": 4.999018256795179e-05, + "loss": 0.9655, + "step": 9020 + }, + { + "epoch": 0.01, + "learning_rate": 4.999018036675458e-05, + "loss": 1.0987, + "step": 9021 + }, + { + "epoch": 0.01, + "learning_rate": 4.999017816531067e-05, + "loss": 1.1674, + "step": 9022 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990175963620066e-05, + "loss": 1.2031, + "step": 9023 + }, + { + "epoch": 0.01, + "learning_rate": 4.999017376168278e-05, + "loss": 0.9145, + "step": 9024 + }, + { + "epoch": 0.01, + "learning_rate": 4.999017155949879e-05, + "loss": 0.8055, + "step": 9025 + }, + { + "epoch": 0.01, + "learning_rate": 4.999016935706812e-05, + "loss": 0.8942, + "step": 9026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990167154390746e-05, + "loss": 1.1422, + "step": 9027 + }, + { + "epoch": 0.01, + "learning_rate": 4.999016495146669e-05, + "loss": 1.146, + "step": 9028 + }, + { + "epoch": 0.01, + "learning_rate": 4.999016274829593e-05, + "loss": 1.1712, + "step": 9029 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990160544878485e-05, + "loss": 1.0368, + "step": 9030 + }, + { + "epoch": 0.01, + "learning_rate": 4.999015834121435e-05, + "loss": 0.8829, + "step": 9031 + }, + { + "epoch": 0.01, + "learning_rate": 4.999015613730352e-05, + "loss": 1.1787, + "step": 9032 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990153933145995e-05, + "loss": 1.5084, + "step": 9033 + }, + { + "epoch": 0.01, + "learning_rate": 4.999015172874178e-05, + "loss": 0.6153, + "step": 9034 + }, + { + "epoch": 0.01, + "learning_rate": 4.999014952409087e-05, + "loss": 1.1142, + "step": 9035 + }, + { + "epoch": 0.01, + "learning_rate": 4.999014731919327e-05, + "loss": 1.2513, + "step": 9036 + }, + { + "epoch": 0.01, + "learning_rate": 4.999014511404898e-05, + "loss": 1.0711, + "step": 9037 + }, + { + "epoch": 0.01, + "learning_rate": 4.999014290865799e-05, + "loss": 0.9331, + "step": 9038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990140703020315e-05, + "loss": 1.4594, + "step": 9039 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990138497135954e-05, + "loss": 1.106, + "step": 9040 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990136291004884e-05, + "loss": 1.03, + "step": 9041 + }, + { + "epoch": 0.01, + "learning_rate": 4.999013408462714e-05, + "loss": 1.2, + "step": 9042 + }, + { + "epoch": 0.01, + "learning_rate": 4.999013187800269e-05, + "loss": 0.9152, + "step": 9043 + }, + { + "epoch": 0.01, + "learning_rate": 4.999012967113156e-05, + "loss": 0.9454, + "step": 9044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990127464013723e-05, + "loss": 1.101, + "step": 9045 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990125256649204e-05, + "loss": 1.1816, + "step": 9046 + }, + { + "epoch": 0.01, + "learning_rate": 4.999012304903799e-05, + "loss": 1.1778, + "step": 9047 + }, + { + "epoch": 0.01, + "learning_rate": 4.999012084118009e-05, + "loss": 1.3422, + "step": 9048 + }, + { + "epoch": 0.01, + "learning_rate": 4.999011863307549e-05, + "loss": 1.1377, + "step": 9049 + }, + { + "epoch": 0.01, + "learning_rate": 4.99901164247242e-05, + "loss": 1.1026, + "step": 9050 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990114216126216e-05, + "loss": 1.2882, + "step": 9051 + }, + { + "epoch": 0.01, + "learning_rate": 4.999011200728155e-05, + "loss": 1.2365, + "step": 9052 + }, + { + "epoch": 0.01, + "learning_rate": 4.999010979819018e-05, + "loss": 1.1337, + "step": 9053 + }, + { + "epoch": 0.01, + "learning_rate": 4.999010758885213e-05, + "loss": 0.9899, + "step": 9054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990105379267386e-05, + "loss": 1.2565, + "step": 9055 + }, + { + "epoch": 0.01, + "learning_rate": 4.999010316943594e-05, + "loss": 1.5078, + "step": 9056 + }, + { + "epoch": 0.01, + "learning_rate": 4.999010095935781e-05, + "loss": 1.4685, + "step": 9057 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990098749032986e-05, + "loss": 1.4006, + "step": 9058 + }, + { + "epoch": 0.01, + "learning_rate": 4.999009653846147e-05, + "loss": 1.1974, + "step": 9059 + }, + { + "epoch": 0.01, + "learning_rate": 4.999009432764326e-05, + "loss": 1.3823, + "step": 9060 + }, + { + "epoch": 0.01, + "learning_rate": 4.999009211657836e-05, + "loss": 0.6458, + "step": 9061 + }, + { + "epoch": 0.01, + "learning_rate": 4.999008990526677e-05, + "loss": 1.0128, + "step": 9062 + }, + { + "epoch": 0.01, + "learning_rate": 4.999008769370849e-05, + "loss": 1.1074, + "step": 9063 + }, + { + "epoch": 0.01, + "learning_rate": 4.999008548190351e-05, + "loss": 1.2405, + "step": 9064 + }, + { + "epoch": 0.01, + "learning_rate": 4.999008326985185e-05, + "loss": 1.6292, + "step": 9065 + }, + { + "epoch": 0.01, + "learning_rate": 4.999008105755349e-05, + "loss": 1.1772, + "step": 9066 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990078845008444e-05, + "loss": 1.118, + "step": 9067 + }, + { + "epoch": 0.01, + "learning_rate": 4.999007663221671e-05, + "loss": 1.3651, + "step": 9068 + }, + { + "epoch": 0.01, + "learning_rate": 4.999007441917828e-05, + "loss": 2.1085, + "step": 9069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990072205893155e-05, + "loss": 1.1137, + "step": 9070 + }, + { + "epoch": 0.01, + "learning_rate": 4.999006999236134e-05, + "loss": 1.2094, + "step": 9071 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990067778582837e-05, + "loss": 0.8978, + "step": 9072 + }, + { + "epoch": 0.01, + "learning_rate": 4.999006556455764e-05, + "loss": 0.7341, + "step": 9073 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990063350285745e-05, + "loss": 1.0128, + "step": 9074 + }, + { + "epoch": 0.01, + "learning_rate": 4.999006113576717e-05, + "loss": 1.4113, + "step": 9075 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990058921001895e-05, + "loss": 1.2249, + "step": 9076 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990056705989934e-05, + "loss": 1.3765, + "step": 9077 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990054490731284e-05, + "loss": 1.348, + "step": 9078 + }, + { + "epoch": 0.01, + "learning_rate": 4.999005227522594e-05, + "loss": 1.5085, + "step": 9079 + }, + { + "epoch": 0.01, + "learning_rate": 4.99900500594739e-05, + "loss": 1.3146, + "step": 9080 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990047843475174e-05, + "loss": 1.1631, + "step": 9081 + }, + { + "epoch": 0.01, + "learning_rate": 4.999004562722975e-05, + "loss": 1.3994, + "step": 9082 + }, + { + "epoch": 0.01, + "learning_rate": 4.999004341073764e-05, + "loss": 1.1258, + "step": 9083 + }, + { + "epoch": 0.01, + "learning_rate": 4.999004119399884e-05, + "loss": 1.2075, + "step": 9084 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990038977013356e-05, + "loss": 1.3821, + "step": 9085 + }, + { + "epoch": 0.01, + "learning_rate": 4.999003675978117e-05, + "loss": 1.0025, + "step": 9086 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990034542302294e-05, + "loss": 1.1581, + "step": 9087 + }, + { + "epoch": 0.01, + "learning_rate": 4.999003232457673e-05, + "loss": 1.0726, + "step": 9088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990030106604466e-05, + "loss": 1.0915, + "step": 9089 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990027888385524e-05, + "loss": 1.2369, + "step": 9090 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990025669919885e-05, + "loss": 1.0006, + "step": 9091 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990023451207557e-05, + "loss": 1.1857, + "step": 9092 + }, + { + "epoch": 0.01, + "learning_rate": 4.999002123224854e-05, + "loss": 1.0311, + "step": 9093 + }, + { + "epoch": 0.01, + "learning_rate": 4.999001901304282e-05, + "loss": 1.1941, + "step": 9094 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990016793590425e-05, + "loss": 1.2601, + "step": 9095 + }, + { + "epoch": 0.01, + "learning_rate": 4.999001457389133e-05, + "loss": 1.0129, + "step": 9096 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990012353945545e-05, + "loss": 1.2379, + "step": 9097 + }, + { + "epoch": 0.01, + "learning_rate": 4.999001013375307e-05, + "loss": 1.4153, + "step": 9098 + }, + { + "epoch": 0.01, + "learning_rate": 4.99900079133139e-05, + "loss": 1.368, + "step": 9099 + }, + { + "epoch": 0.01, + "learning_rate": 4.9990005692628044e-05, + "loss": 1.4062, + "step": 9100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999000347169549e-05, + "loss": 0.7764, + "step": 9101 + }, + { + "epoch": 0.01, + "learning_rate": 4.999000125051626e-05, + "loss": 0.9343, + "step": 9102 + }, + { + "epoch": 0.01, + "learning_rate": 4.998999902909033e-05, + "loss": 1.0394, + "step": 9103 + }, + { + "epoch": 0.01, + "learning_rate": 4.998999680741771e-05, + "loss": 0.9598, + "step": 9104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989994585498404e-05, + "loss": 1.0693, + "step": 9105 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989992363332396e-05, + "loss": 0.4744, + "step": 9106 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989990140919706e-05, + "loss": 0.3535, + "step": 9107 + }, + { + "epoch": 0.01, + "learning_rate": 4.998998791826032e-05, + "loss": 0.3838, + "step": 9108 + }, + { + "epoch": 0.01, + "learning_rate": 4.998998569535425e-05, + "loss": 0.2679, + "step": 9109 + }, + { + "epoch": 0.01, + "learning_rate": 4.998998347220148e-05, + "loss": 0.2629, + "step": 9110 + }, + { + "epoch": 0.01, + "learning_rate": 4.998998124880203e-05, + "loss": 0.2073, + "step": 9111 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989979025155886e-05, + "loss": 0.7131, + "step": 9112 + }, + { + "epoch": 0.01, + "learning_rate": 4.998997680126305e-05, + "loss": 0.7333, + "step": 9113 + }, + { + "epoch": 0.01, + "learning_rate": 4.998997457712352e-05, + "loss": 1.3434, + "step": 9114 + }, + { + "epoch": 0.01, + "learning_rate": 4.99899723527373e-05, + "loss": 1.5403, + "step": 9115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989970128104394e-05, + "loss": 1.2184, + "step": 9116 + }, + { + "epoch": 0.01, + "learning_rate": 4.99899679032248e-05, + "loss": 1.1485, + "step": 9117 + }, + { + "epoch": 0.01, + "learning_rate": 4.998996567809851e-05, + "loss": 1.2869, + "step": 9118 + }, + { + "epoch": 0.01, + "learning_rate": 4.998996345272553e-05, + "loss": 1.7247, + "step": 9119 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989961227105865e-05, + "loss": 1.1431, + "step": 9120 + }, + { + "epoch": 0.01, + "learning_rate": 4.99899590012395e-05, + "loss": 1.1697, + "step": 9121 + }, + { + "epoch": 0.01, + "learning_rate": 4.998995677512646e-05, + "loss": 1.182, + "step": 9122 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989954548766715e-05, + "loss": 1.1294, + "step": 9123 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989952322160284e-05, + "loss": 1.0839, + "step": 9124 + }, + { + "epoch": 0.01, + "learning_rate": 4.998995009530716e-05, + "loss": 0.7609, + "step": 9125 + }, + { + "epoch": 0.01, + "learning_rate": 4.998994786820736e-05, + "loss": 1.1856, + "step": 9126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989945640860856e-05, + "loss": 1.1002, + "step": 9127 + }, + { + "epoch": 0.01, + "learning_rate": 4.998994341326766e-05, + "loss": 0.9823, + "step": 9128 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989941185427777e-05, + "loss": 1.3698, + "step": 9129 + }, + { + "epoch": 0.01, + "learning_rate": 4.998993895734121e-05, + "loss": 0.9, + "step": 9130 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989936729007945e-05, + "loss": 1.2307, + "step": 9131 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989934500427994e-05, + "loss": 1.1759, + "step": 9132 + }, + { + "epoch": 0.01, + "learning_rate": 4.998993227160135e-05, + "loss": 0.8295, + "step": 9133 + }, + { + "epoch": 0.01, + "learning_rate": 4.998993004252802e-05, + "loss": 1.1786, + "step": 9134 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989927813207995e-05, + "loss": 1.3618, + "step": 9135 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989925583641285e-05, + "loss": 1.3421, + "step": 9136 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989923353827885e-05, + "loss": 1.1763, + "step": 9137 + }, + { + "epoch": 0.01, + "learning_rate": 4.998992112376779e-05, + "loss": 1.0261, + "step": 9138 + }, + { + "epoch": 0.01, + "learning_rate": 4.998991889346101e-05, + "loss": 1.1373, + "step": 9139 + }, + { + "epoch": 0.01, + "learning_rate": 4.998991666290754e-05, + "loss": 1.1484, + "step": 9140 + }, + { + "epoch": 0.01, + "learning_rate": 4.998991443210738e-05, + "loss": 1.1353, + "step": 9141 + }, + { + "epoch": 0.01, + "learning_rate": 4.998991220106052e-05, + "loss": 1.166, + "step": 9142 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989909969766976e-05, + "loss": 1.2454, + "step": 9143 + }, + { + "epoch": 0.01, + "learning_rate": 4.998990773822675e-05, + "loss": 1.2827, + "step": 9144 + }, + { + "epoch": 0.01, + "learning_rate": 4.998990550643983e-05, + "loss": 1.0495, + "step": 9145 + }, + { + "epoch": 0.01, + "learning_rate": 4.998990327440621e-05, + "loss": 1.1319, + "step": 9146 + }, + { + "epoch": 0.01, + "learning_rate": 4.998990104212591e-05, + "loss": 1.2523, + "step": 9147 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989898809598924e-05, + "loss": 1.0989, + "step": 9148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989896576825246e-05, + "loss": 1.2735, + "step": 9149 + }, + { + "epoch": 0.01, + "learning_rate": 4.998989434380487e-05, + "loss": 1.3394, + "step": 9150 + }, + { + "epoch": 0.01, + "learning_rate": 4.998989211053782e-05, + "loss": 1.1166, + "step": 9151 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989889877024064e-05, + "loss": 1.2377, + "step": 9152 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989887643263625e-05, + "loss": 1.1499, + "step": 9153 + }, + { + "epoch": 0.01, + "learning_rate": 4.99898854092565e-05, + "loss": 1.1686, + "step": 9154 + }, + { + "epoch": 0.01, + "learning_rate": 4.998988317500267e-05, + "loss": 1.0717, + "step": 9155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989880940502165e-05, + "loss": 1.1107, + "step": 9156 + }, + { + "epoch": 0.01, + "learning_rate": 4.998987870575497e-05, + "loss": 1.4478, + "step": 9157 + }, + { + "epoch": 0.01, + "learning_rate": 4.998987647076108e-05, + "loss": 1.3044, + "step": 9158 + }, + { + "epoch": 0.01, + "learning_rate": 4.998987423552051e-05, + "loss": 1.1052, + "step": 9159 + }, + { + "epoch": 0.01, + "learning_rate": 4.998987200003324e-05, + "loss": 0.9653, + "step": 9160 + }, + { + "epoch": 0.01, + "learning_rate": 4.998986976429928e-05, + "loss": 1.0159, + "step": 9161 + }, + { + "epoch": 0.01, + "learning_rate": 4.998986752831864e-05, + "loss": 1.042, + "step": 9162 + }, + { + "epoch": 0.01, + "learning_rate": 4.99898652920913e-05, + "loss": 1.109, + "step": 9163 + }, + { + "epoch": 0.01, + "learning_rate": 4.998986305561728e-05, + "loss": 1.3906, + "step": 9164 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989860818896564e-05, + "loss": 1.334, + "step": 9165 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989858581929164e-05, + "loss": 1.0409, + "step": 9166 + }, + { + "epoch": 0.01, + "learning_rate": 4.998985634471507e-05, + "loss": 0.8628, + "step": 9167 + }, + { + "epoch": 0.01, + "learning_rate": 4.998985410725429e-05, + "loss": 0.5957, + "step": 9168 + }, + { + "epoch": 0.01, + "learning_rate": 4.998985186954682e-05, + "loss": 1.0405, + "step": 9169 + }, + { + "epoch": 0.01, + "learning_rate": 4.998984963159265e-05, + "loss": 1.1866, + "step": 9170 + }, + { + "epoch": 0.01, + "learning_rate": 4.998984739339181e-05, + "loss": 1.0571, + "step": 9171 + }, + { + "epoch": 0.01, + "learning_rate": 4.998984515494427e-05, + "loss": 1.0295, + "step": 9172 + }, + { + "epoch": 0.01, + "learning_rate": 4.998984291625004e-05, + "loss": 1.2393, + "step": 9173 + }, + { + "epoch": 0.01, + "learning_rate": 4.998984067730912e-05, + "loss": 0.9602, + "step": 9174 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989838438121515e-05, + "loss": 1.2002, + "step": 9175 + }, + { + "epoch": 0.01, + "learning_rate": 4.998983619868722e-05, + "loss": 1.4526, + "step": 9176 + }, + { + "epoch": 0.01, + "learning_rate": 4.998983395900624e-05, + "loss": 1.1412, + "step": 9177 + }, + { + "epoch": 0.01, + "learning_rate": 4.998983171907856e-05, + "loss": 1.3989, + "step": 9178 + }, + { + "epoch": 0.01, + "learning_rate": 4.99898294789042e-05, + "loss": 1.0866, + "step": 9179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989827238483144e-05, + "loss": 1.22, + "step": 9180 + }, + { + "epoch": 0.01, + "learning_rate": 4.99898249978154e-05, + "loss": 1.0868, + "step": 9181 + }, + { + "epoch": 0.01, + "learning_rate": 4.998982275690097e-05, + "loss": 0.6667, + "step": 9182 + }, + { + "epoch": 0.01, + "learning_rate": 4.998982051573986e-05, + "loss": 0.258, + "step": 9183 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989818274332047e-05, + "loss": 0.5565, + "step": 9184 + }, + { + "epoch": 0.01, + "learning_rate": 4.998981603267755e-05, + "loss": 1.094, + "step": 9185 + }, + { + "epoch": 0.01, + "learning_rate": 4.998981379077636e-05, + "loss": 1.0107, + "step": 9186 + }, + { + "epoch": 0.01, + "learning_rate": 4.998981154862849e-05, + "loss": 0.566, + "step": 9187 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989809306233925e-05, + "loss": 1.1785, + "step": 9188 + }, + { + "epoch": 0.01, + "learning_rate": 4.998980706359268e-05, + "loss": 0.5826, + "step": 9189 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989804820704735e-05, + "loss": 0.9039, + "step": 9190 + }, + { + "epoch": 0.01, + "learning_rate": 4.99898025775701e-05, + "loss": 1.2496, + "step": 9191 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989800334188786e-05, + "loss": 1.0865, + "step": 9192 + }, + { + "epoch": 0.01, + "learning_rate": 4.998979809056078e-05, + "loss": 0.7945, + "step": 9193 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989795846686084e-05, + "loss": 0.9995, + "step": 9194 + }, + { + "epoch": 0.01, + "learning_rate": 4.998979360256469e-05, + "loss": 1.2338, + "step": 9195 + }, + { + "epoch": 0.01, + "learning_rate": 4.998979135819662e-05, + "loss": 1.2236, + "step": 9196 + }, + { + "epoch": 0.01, + "learning_rate": 4.998978911358186e-05, + "loss": 1.3668, + "step": 9197 + }, + { + "epoch": 0.01, + "learning_rate": 4.998978686872041e-05, + "loss": 1.1045, + "step": 9198 + }, + { + "epoch": 0.01, + "learning_rate": 4.998978462361227e-05, + "loss": 1.1917, + "step": 9199 + }, + { + "epoch": 0.01, + "learning_rate": 4.998978237825744e-05, + "loss": 1.1984, + "step": 9200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989780132655916e-05, + "loss": 1.2166, + "step": 9201 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989777886807714e-05, + "loss": 1.0184, + "step": 9202 + }, + { + "epoch": 0.01, + "learning_rate": 4.998977564071282e-05, + "loss": 1.1177, + "step": 9203 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989773394371235e-05, + "loss": 1.1258, + "step": 9204 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989771147782964e-05, + "loss": 1.5137, + "step": 9205 + }, + { + "epoch": 0.01, + "learning_rate": 4.998976890094801e-05, + "loss": 1.0125, + "step": 9206 + }, + { + "epoch": 0.01, + "learning_rate": 4.998976665386635e-05, + "loss": 1.0696, + "step": 9207 + }, + { + "epoch": 0.01, + "learning_rate": 4.998976440653802e-05, + "loss": 1.1876, + "step": 9208 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989762158962995e-05, + "loss": 1.1406, + "step": 9209 + }, + { + "epoch": 0.01, + "learning_rate": 4.998975991114128e-05, + "loss": 0.9748, + "step": 9210 + }, + { + "epoch": 0.01, + "learning_rate": 4.998975766307288e-05, + "loss": 0.9761, + "step": 9211 + }, + { + "epoch": 0.01, + "learning_rate": 4.998975541475779e-05, + "loss": 1.0726, + "step": 9212 + }, + { + "epoch": 0.01, + "learning_rate": 4.998975316619601e-05, + "loss": 0.8541, + "step": 9213 + }, + { + "epoch": 0.01, + "learning_rate": 4.998975091738755e-05, + "loss": 1.1607, + "step": 9214 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989748668332385e-05, + "loss": 1.1494, + "step": 9215 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989746419030546e-05, + "loss": 1.4115, + "step": 9216 + }, + { + "epoch": 0.01, + "learning_rate": 4.998974416948201e-05, + "loss": 1.1737, + "step": 9217 + }, + { + "epoch": 0.01, + "learning_rate": 4.99897419196868e-05, + "loss": 1.1807, + "step": 9218 + }, + { + "epoch": 0.01, + "learning_rate": 4.998973966964489e-05, + "loss": 1.0111, + "step": 9219 + }, + { + "epoch": 0.01, + "learning_rate": 4.998973741935628e-05, + "loss": 0.8739, + "step": 9220 + }, + { + "epoch": 0.01, + "learning_rate": 4.998973516882101e-05, + "loss": 0.6346, + "step": 9221 + }, + { + "epoch": 0.01, + "learning_rate": 4.998973291803903e-05, + "loss": 1.0811, + "step": 9222 + }, + { + "epoch": 0.01, + "learning_rate": 4.998973066701037e-05, + "loss": 0.8113, + "step": 9223 + }, + { + "epoch": 0.01, + "learning_rate": 4.998972841573502e-05, + "loss": 1.1547, + "step": 9224 + }, + { + "epoch": 0.01, + "learning_rate": 4.998972616421298e-05, + "loss": 1.2342, + "step": 9225 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989723912444257e-05, + "loss": 1.4094, + "step": 9226 + }, + { + "epoch": 0.01, + "learning_rate": 4.998972166042884e-05, + "loss": 1.1467, + "step": 9227 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989719408166745e-05, + "loss": 0.7494, + "step": 9228 + }, + { + "epoch": 0.01, + "learning_rate": 4.998971715565796e-05, + "loss": 1.2098, + "step": 9229 + }, + { + "epoch": 0.01, + "learning_rate": 4.998971490290247e-05, + "loss": 1.2357, + "step": 9230 + }, + { + "epoch": 0.01, + "learning_rate": 4.998971264990031e-05, + "loss": 1.1795, + "step": 9231 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989710396651456e-05, + "loss": 0.8123, + "step": 9232 + }, + { + "epoch": 0.01, + "learning_rate": 4.998970814315592e-05, + "loss": 0.8651, + "step": 9233 + }, + { + "epoch": 0.01, + "learning_rate": 4.998970588941369e-05, + "loss": 1.2199, + "step": 9234 + }, + { + "epoch": 0.01, + "learning_rate": 4.998970363542477e-05, + "loss": 0.5233, + "step": 9235 + }, + { + "epoch": 0.01, + "learning_rate": 4.998970138118917e-05, + "loss": 1.163, + "step": 9236 + }, + { + "epoch": 0.01, + "learning_rate": 4.998969912670688e-05, + "loss": 1.7315, + "step": 9237 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896968719779e-05, + "loss": 1.5246, + "step": 9238 + }, + { + "epoch": 0.01, + "learning_rate": 4.998969461700223e-05, + "loss": 1.2901, + "step": 9239 + }, + { + "epoch": 0.01, + "learning_rate": 4.998969236177987e-05, + "loss": 1.0149, + "step": 9240 + }, + { + "epoch": 0.01, + "learning_rate": 4.998969010631083e-05, + "loss": 0.9027, + "step": 9241 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989687850595105e-05, + "loss": 1.275, + "step": 9242 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989685594632686e-05, + "loss": 1.1306, + "step": 9243 + }, + { + "epoch": 0.01, + "learning_rate": 4.998968333842358e-05, + "loss": 1.2844, + "step": 9244 + }, + { + "epoch": 0.01, + "learning_rate": 4.998968108196778e-05, + "loss": 1.2132, + "step": 9245 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896788252653e-05, + "loss": 1.1061, + "step": 9246 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989676568316134e-05, + "loss": 0.9193, + "step": 9247 + }, + { + "epoch": 0.01, + "learning_rate": 4.998967431112027e-05, + "loss": 1.0906, + "step": 9248 + }, + { + "epoch": 0.01, + "learning_rate": 4.998967205367773e-05, + "loss": 0.9511, + "step": 9249 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896697959885e-05, + "loss": 1.1308, + "step": 9250 + }, + { + "epoch": 0.01, + "learning_rate": 4.998966753805258e-05, + "loss": 0.7206, + "step": 9251 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989665279869977e-05, + "loss": 1.072, + "step": 9252 + }, + { + "epoch": 0.01, + "learning_rate": 4.998966302144068e-05, + "loss": 1.1586, + "step": 9253 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896607627647e-05, + "loss": 1.266, + "step": 9254 + }, + { + "epoch": 0.01, + "learning_rate": 4.998965850384203e-05, + "loss": 1.3535, + "step": 9255 + }, + { + "epoch": 0.01, + "learning_rate": 4.998965624467267e-05, + "loss": 1.1896, + "step": 9256 + }, + { + "epoch": 0.01, + "learning_rate": 4.998965398525663e-05, + "loss": 0.433, + "step": 9257 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896517255939e-05, + "loss": 0.373, + "step": 9258 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989649465684485e-05, + "loss": 0.321, + "step": 9259 + }, + { + "epoch": 0.01, + "learning_rate": 4.998964720552838e-05, + "loss": 0.4569, + "step": 9260 + }, + { + "epoch": 0.01, + "learning_rate": 4.998964494512558e-05, + "loss": 0.3219, + "step": 9261 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896426844761e-05, + "loss": 0.9249, + "step": 9262 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989640423579936e-05, + "loss": 0.8442, + "step": 9263 + }, + { + "epoch": 0.01, + "learning_rate": 4.998963816243708e-05, + "loss": 1.4251, + "step": 9264 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989635901047536e-05, + "loss": 1.3881, + "step": 9265 + }, + { + "epoch": 0.01, + "learning_rate": 4.998963363941131e-05, + "loss": 1.1606, + "step": 9266 + }, + { + "epoch": 0.01, + "learning_rate": 4.99896313775284e-05, + "loss": 1.3775, + "step": 9267 + }, + { + "epoch": 0.01, + "learning_rate": 4.998962911539879e-05, + "loss": 1.3913, + "step": 9268 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989626853022505e-05, + "loss": 1.0168, + "step": 9269 + }, + { + "epoch": 0.01, + "learning_rate": 4.998962459039953e-05, + "loss": 1.1377, + "step": 9270 + }, + { + "epoch": 0.01, + "learning_rate": 4.998962232752986e-05, + "loss": 1.2716, + "step": 9271 + }, + { + "epoch": 0.01, + "learning_rate": 4.998962006441351e-05, + "loss": 1.3371, + "step": 9272 + }, + { + "epoch": 0.01, + "learning_rate": 4.998961780105047e-05, + "loss": 1.0409, + "step": 9273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989615537440746e-05, + "loss": 1.2988, + "step": 9274 + }, + { + "epoch": 0.01, + "learning_rate": 4.998961327358433e-05, + "loss": 0.8929, + "step": 9275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989611009481236e-05, + "loss": 1.1388, + "step": 9276 + }, + { + "epoch": 0.01, + "learning_rate": 4.998960874513145e-05, + "loss": 0.7059, + "step": 9277 + }, + { + "epoch": 0.01, + "learning_rate": 4.998960648053498e-05, + "loss": 0.6566, + "step": 9278 + }, + { + "epoch": 0.01, + "learning_rate": 4.998960421569182e-05, + "loss": 0.6692, + "step": 9279 + }, + { + "epoch": 0.01, + "learning_rate": 4.998960195060197e-05, + "loss": 1.1011, + "step": 9280 + }, + { + "epoch": 0.01, + "learning_rate": 4.998959968526544e-05, + "loss": 0.6837, + "step": 9281 + }, + { + "epoch": 0.01, + "learning_rate": 4.998959741968222e-05, + "loss": 1.2752, + "step": 9282 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989595153852306e-05, + "loss": 1.2245, + "step": 9283 + }, + { + "epoch": 0.01, + "learning_rate": 4.998959288777572e-05, + "loss": 1.2041, + "step": 9284 + }, + { + "epoch": 0.01, + "learning_rate": 4.998959062145244e-05, + "loss": 1.539, + "step": 9285 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989588354882465e-05, + "loss": 0.8351, + "step": 9286 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989586088065814e-05, + "loss": 1.0497, + "step": 9287 + }, + { + "epoch": 0.01, + "learning_rate": 4.998958382100247e-05, + "loss": 1.0906, + "step": 9288 + }, + { + "epoch": 0.01, + "learning_rate": 4.998958155369244e-05, + "loss": 0.7888, + "step": 9289 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989579286135735e-05, + "loss": 0.9287, + "step": 9290 + }, + { + "epoch": 0.01, + "learning_rate": 4.998957701833233e-05, + "loss": 1.2553, + "step": 9291 + }, + { + "epoch": 0.01, + "learning_rate": 4.998957475028225e-05, + "loss": 1.2213, + "step": 9292 + }, + { + "epoch": 0.01, + "learning_rate": 4.998957248198547e-05, + "loss": 1.154, + "step": 9293 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989570213442014e-05, + "loss": 0.9525, + "step": 9294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989567944651864e-05, + "loss": 1.2354, + "step": 9295 + }, + { + "epoch": 0.01, + "learning_rate": 4.998956567561504e-05, + "loss": 1.3407, + "step": 9296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989563406331516e-05, + "loss": 1.2901, + "step": 9297 + }, + { + "epoch": 0.01, + "learning_rate": 4.998956113680131e-05, + "loss": 1.5461, + "step": 9298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989558867024414e-05, + "loss": 0.7708, + "step": 9299 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989556597000835e-05, + "loss": 1.1066, + "step": 9300 + }, + { + "epoch": 0.01, + "learning_rate": 4.998955432673057e-05, + "loss": 1.3445, + "step": 9301 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989552056213615e-05, + "loss": 1.076, + "step": 9302 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989549785449986e-05, + "loss": 1.2831, + "step": 9303 + }, + { + "epoch": 0.01, + "learning_rate": 4.998954751443966e-05, + "loss": 1.116, + "step": 9304 + }, + { + "epoch": 0.01, + "learning_rate": 4.998954524318265e-05, + "loss": 0.6092, + "step": 9305 + }, + { + "epoch": 0.01, + "learning_rate": 4.998954297167895e-05, + "loss": 1.1854, + "step": 9306 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989540699928575e-05, + "loss": 1.0404, + "step": 9307 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989538427931504e-05, + "loss": 1.1735, + "step": 9308 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989536155687744e-05, + "loss": 1.2108, + "step": 9309 + }, + { + "epoch": 0.01, + "learning_rate": 4.998953388319731e-05, + "loss": 1.1294, + "step": 9310 + }, + { + "epoch": 0.01, + "learning_rate": 4.998953161046017e-05, + "loss": 1.0185, + "step": 9311 + }, + { + "epoch": 0.01, + "learning_rate": 4.998952933747636e-05, + "loss": 0.8502, + "step": 9312 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989527064245864e-05, + "loss": 0.98, + "step": 9313 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989524790768674e-05, + "loss": 1.2549, + "step": 9314 + }, + { + "epoch": 0.01, + "learning_rate": 4.998952251704481e-05, + "loss": 1.2172, + "step": 9315 + }, + { + "epoch": 0.01, + "learning_rate": 4.998952024307425e-05, + "loss": 0.9761, + "step": 9316 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989517968857e-05, + "loss": 1.0534, + "step": 9317 + }, + { + "epoch": 0.01, + "learning_rate": 4.998951569439308e-05, + "loss": 1.0157, + "step": 9318 + }, + { + "epoch": 0.01, + "learning_rate": 4.998951341968245e-05, + "loss": 4.9269, + "step": 9319 + }, + { + "epoch": 0.01, + "learning_rate": 4.998951114472516e-05, + "loss": 4.9151, + "step": 9320 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989508869521165e-05, + "loss": 1.274, + "step": 9321 + }, + { + "epoch": 0.01, + "learning_rate": 4.998950659407049e-05, + "loss": 0.8477, + "step": 9322 + }, + { + "epoch": 0.01, + "learning_rate": 4.998950431837314e-05, + "loss": 0.8863, + "step": 9323 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989502042429085e-05, + "loss": 1.1951, + "step": 9324 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989499766238354e-05, + "loss": 1.0261, + "step": 9325 + }, + { + "epoch": 0.01, + "learning_rate": 4.998949748980094e-05, + "loss": 1.0344, + "step": 9326 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989495213116836e-05, + "loss": 1.8301, + "step": 9327 + }, + { + "epoch": 0.01, + "learning_rate": 4.998949293618605e-05, + "loss": 1.6782, + "step": 9328 + }, + { + "epoch": 0.01, + "learning_rate": 4.998949065900857e-05, + "loss": 1.6676, + "step": 9329 + }, + { + "epoch": 0.01, + "learning_rate": 4.998948838158441e-05, + "loss": 1.5011, + "step": 9330 + }, + { + "epoch": 0.01, + "learning_rate": 4.998948610391357e-05, + "loss": 1.6872, + "step": 9331 + }, + { + "epoch": 0.01, + "learning_rate": 4.998948382599604e-05, + "loss": 1.1545, + "step": 9332 + }, + { + "epoch": 0.01, + "learning_rate": 4.998948154783182e-05, + "loss": 1.0702, + "step": 9333 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989479269420915e-05, + "loss": 1.3011, + "step": 9334 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989476990763326e-05, + "loss": 0.8697, + "step": 9335 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989474711859054e-05, + "loss": 0.5981, + "step": 9336 + }, + { + "epoch": 0.01, + "learning_rate": 4.998947243270809e-05, + "loss": 0.7164, + "step": 9337 + }, + { + "epoch": 0.01, + "learning_rate": 4.998947015331045e-05, + "loss": 1.2006, + "step": 9338 + }, + { + "epoch": 0.01, + "learning_rate": 4.998946787366612e-05, + "loss": 1.3245, + "step": 9339 + }, + { + "epoch": 0.01, + "learning_rate": 4.99894655937751e-05, + "loss": 1.077, + "step": 9340 + }, + { + "epoch": 0.01, + "learning_rate": 4.99894633136374e-05, + "loss": 1.1189, + "step": 9341 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989461033253016e-05, + "loss": 1.1796, + "step": 9342 + }, + { + "epoch": 0.01, + "learning_rate": 4.998945875262194e-05, + "loss": 1.222, + "step": 9343 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989456471744184e-05, + "loss": 1.2311, + "step": 9344 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989454190619744e-05, + "loss": 1.1154, + "step": 9345 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989451909248614e-05, + "loss": 0.9449, + "step": 9346 + }, + { + "epoch": 0.01, + "learning_rate": 4.99894496276308e-05, + "loss": 1.1974, + "step": 9347 + }, + { + "epoch": 0.01, + "learning_rate": 4.99894473457663e-05, + "loss": 1.0124, + "step": 9348 + }, + { + "epoch": 0.01, + "learning_rate": 4.998944506365512e-05, + "loss": 1.0151, + "step": 9349 + }, + { + "epoch": 0.01, + "learning_rate": 4.998944278129725e-05, + "loss": 0.7458, + "step": 9350 + }, + { + "epoch": 0.01, + "learning_rate": 4.998944049869269e-05, + "loss": 0.9251, + "step": 9351 + }, + { + "epoch": 0.01, + "learning_rate": 4.998943821584145e-05, + "loss": 1.1292, + "step": 9352 + }, + { + "epoch": 0.01, + "learning_rate": 4.998943593274352e-05, + "loss": 1.2313, + "step": 9353 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989433649398913e-05, + "loss": 0.7314, + "step": 9354 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989431365807615e-05, + "loss": 1.3168, + "step": 9355 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989429081969634e-05, + "loss": 0.7289, + "step": 9356 + }, + { + "epoch": 0.01, + "learning_rate": 4.998942679788497e-05, + "loss": 1.2072, + "step": 9357 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989424513553615e-05, + "loss": 1.1549, + "step": 9358 + }, + { + "epoch": 0.01, + "learning_rate": 4.998942222897558e-05, + "loss": 1.0921, + "step": 9359 + }, + { + "epoch": 0.01, + "learning_rate": 4.998941994415086e-05, + "loss": 0.9914, + "step": 9360 + }, + { + "epoch": 0.01, + "learning_rate": 4.998941765907945e-05, + "loss": 1.1754, + "step": 9361 + }, + { + "epoch": 0.01, + "learning_rate": 4.998941537376136e-05, + "loss": 1.2133, + "step": 9362 + }, + { + "epoch": 0.01, + "learning_rate": 4.998941308819659e-05, + "loss": 1.4551, + "step": 9363 + }, + { + "epoch": 0.01, + "learning_rate": 4.998941080238513e-05, + "loss": 1.2509, + "step": 9364 + }, + { + "epoch": 0.01, + "learning_rate": 4.998940851632698e-05, + "loss": 1.1247, + "step": 9365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989406230022154e-05, + "loss": 1.0529, + "step": 9366 + }, + { + "epoch": 0.01, + "learning_rate": 4.998940394347064e-05, + "loss": 1.0516, + "step": 9367 + }, + { + "epoch": 0.01, + "learning_rate": 4.998940165667243e-05, + "loss": 0.8707, + "step": 9368 + }, + { + "epoch": 0.01, + "learning_rate": 4.998939936962754e-05, + "loss": 1.4619, + "step": 9369 + }, + { + "epoch": 0.01, + "learning_rate": 4.998939708233598e-05, + "loss": 1.1269, + "step": 9370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989394794797716e-05, + "loss": 1.0765, + "step": 9371 + }, + { + "epoch": 0.01, + "learning_rate": 4.998939250701278e-05, + "loss": 1.1936, + "step": 9372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989390218981157e-05, + "loss": 1.3124, + "step": 9373 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989387930702845e-05, + "loss": 1.4301, + "step": 9374 + }, + { + "epoch": 0.01, + "learning_rate": 4.998938564217786e-05, + "loss": 0.4058, + "step": 9375 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989383353406174e-05, + "loss": 0.9463, + "step": 9376 + }, + { + "epoch": 0.01, + "learning_rate": 4.998938106438781e-05, + "loss": 1.3724, + "step": 9377 + }, + { + "epoch": 0.01, + "learning_rate": 4.998937877512276e-05, + "loss": 0.7495, + "step": 9378 + }, + { + "epoch": 0.01, + "learning_rate": 4.998937648561103e-05, + "loss": 1.0482, + "step": 9379 + }, + { + "epoch": 0.01, + "learning_rate": 4.998937419585261e-05, + "loss": 1.1149, + "step": 9380 + }, + { + "epoch": 0.01, + "learning_rate": 4.998937190584751e-05, + "loss": 1.0348, + "step": 9381 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989369615595724e-05, + "loss": 0.7258, + "step": 9382 + }, + { + "epoch": 0.01, + "learning_rate": 4.998936732509725e-05, + "loss": 0.2154, + "step": 9383 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989365034352096e-05, + "loss": 0.3616, + "step": 9384 + }, + { + "epoch": 0.01, + "learning_rate": 4.998936274336026e-05, + "loss": 1.0633, + "step": 9385 + }, + { + "epoch": 0.01, + "learning_rate": 4.998936045212173e-05, + "loss": 1.0139, + "step": 9386 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989358160636523e-05, + "loss": 1.0686, + "step": 9387 + }, + { + "epoch": 0.01, + "learning_rate": 4.998935586890463e-05, + "loss": 1.367, + "step": 9388 + }, + { + "epoch": 0.01, + "learning_rate": 4.998935357692605e-05, + "loss": 0.723, + "step": 9389 + }, + { + "epoch": 0.01, + "learning_rate": 4.998935128470079e-05, + "loss": 0.5576, + "step": 9390 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989348992228845e-05, + "loss": 0.7907, + "step": 9391 + }, + { + "epoch": 0.01, + "learning_rate": 4.998934669951021e-05, + "loss": 1.1041, + "step": 9392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989344406544894e-05, + "loss": 1.063, + "step": 9393 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989342113332894e-05, + "loss": 1.1615, + "step": 9394 + }, + { + "epoch": 0.01, + "learning_rate": 4.998933981987422e-05, + "loss": 1.0552, + "step": 9395 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989337526168844e-05, + "loss": 1.0791, + "step": 9396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989335232216795e-05, + "loss": 1.0922, + "step": 9397 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989332938018055e-05, + "loss": 1.0177, + "step": 9398 + }, + { + "epoch": 0.01, + "learning_rate": 4.998933064357263e-05, + "loss": 1.3498, + "step": 9399 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989328348880534e-05, + "loss": 1.2368, + "step": 9400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989326053941745e-05, + "loss": 0.96, + "step": 9401 + }, + { + "epoch": 0.01, + "learning_rate": 4.998932375875627e-05, + "loss": 0.6169, + "step": 9402 + }, + { + "epoch": 0.01, + "learning_rate": 4.998932146332411e-05, + "loss": 0.5365, + "step": 9403 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989319167645274e-05, + "loss": 0.4449, + "step": 9404 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989316871719746e-05, + "loss": 0.9199, + "step": 9405 + }, + { + "epoch": 0.01, + "learning_rate": 4.998931457554754e-05, + "loss": 0.6908, + "step": 9406 + }, + { + "epoch": 0.01, + "learning_rate": 4.998931227912864e-05, + "loss": 0.4082, + "step": 9407 + }, + { + "epoch": 0.01, + "learning_rate": 4.998930998246307e-05, + "loss": 0.6992, + "step": 9408 + }, + { + "epoch": 0.01, + "learning_rate": 4.998930768555081e-05, + "loss": 0.5228, + "step": 9409 + }, + { + "epoch": 0.01, + "learning_rate": 4.998930538839186e-05, + "loss": 1.0521, + "step": 9410 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989303090986234e-05, + "loss": 0.9689, + "step": 9411 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989300793333924e-05, + "loss": 0.9596, + "step": 9412 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989298495434925e-05, + "loss": 0.6542, + "step": 9413 + }, + { + "epoch": 0.01, + "learning_rate": 4.998929619728925e-05, + "loss": 1.0697, + "step": 9414 + }, + { + "epoch": 0.01, + "learning_rate": 4.998929389889688e-05, + "loss": 1.5153, + "step": 9415 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989291600257834e-05, + "loss": 1.2395, + "step": 9416 + }, + { + "epoch": 0.01, + "learning_rate": 4.99892893013721e-05, + "loss": 1.1297, + "step": 9417 + }, + { + "epoch": 0.01, + "learning_rate": 4.998928700223968e-05, + "loss": 1.3725, + "step": 9418 + }, + { + "epoch": 0.01, + "learning_rate": 4.998928470286058e-05, + "loss": 1.166, + "step": 9419 + }, + { + "epoch": 0.01, + "learning_rate": 4.99892824032348e-05, + "loss": 1.2635, + "step": 9420 + }, + { + "epoch": 0.01, + "learning_rate": 4.998928010336234e-05, + "loss": 1.3999, + "step": 9421 + }, + { + "epoch": 0.01, + "learning_rate": 4.998927780324318e-05, + "loss": 1.1487, + "step": 9422 + }, + { + "epoch": 0.01, + "learning_rate": 4.998927550287735e-05, + "loss": 1.0197, + "step": 9423 + }, + { + "epoch": 0.01, + "learning_rate": 4.998927320226483e-05, + "loss": 1.1196, + "step": 9424 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989270901405636e-05, + "loss": 1.0885, + "step": 9425 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989268600299756e-05, + "loss": 1.2092, + "step": 9426 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989266298947187e-05, + "loss": 1.1147, + "step": 9427 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989263997347934e-05, + "loss": 0.9987, + "step": 9428 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989261695502e-05, + "loss": 0.9774, + "step": 9429 + }, + { + "epoch": 0.01, + "learning_rate": 4.998925939340938e-05, + "loss": 1.0896, + "step": 9430 + }, + { + "epoch": 0.01, + "learning_rate": 4.998925709107008e-05, + "loss": 1.327, + "step": 9431 + }, + { + "epoch": 0.01, + "learning_rate": 4.99892547884841e-05, + "loss": 1.1995, + "step": 9432 + }, + { + "epoch": 0.01, + "learning_rate": 4.998925248565143e-05, + "loss": 1.1151, + "step": 9433 + }, + { + "epoch": 0.01, + "learning_rate": 4.998925018257207e-05, + "loss": 0.2965, + "step": 9434 + }, + { + "epoch": 0.01, + "learning_rate": 4.998924787924604e-05, + "loss": 1.1415, + "step": 9435 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989245575673314e-05, + "loss": 1.1174, + "step": 9436 + }, + { + "epoch": 0.01, + "learning_rate": 4.998924327185391e-05, + "loss": 0.9524, + "step": 9437 + }, + { + "epoch": 0.01, + "learning_rate": 4.998924096778783e-05, + "loss": 1.2086, + "step": 9438 + }, + { + "epoch": 0.01, + "learning_rate": 4.998923866347506e-05, + "loss": 1.1131, + "step": 9439 + }, + { + "epoch": 0.01, + "learning_rate": 4.998923635891561e-05, + "loss": 1.2125, + "step": 9440 + }, + { + "epoch": 0.01, + "learning_rate": 4.998923405410948e-05, + "loss": 1.3067, + "step": 9441 + }, + { + "epoch": 0.01, + "learning_rate": 4.998923174905666e-05, + "loss": 1.2938, + "step": 9442 + }, + { + "epoch": 0.01, + "learning_rate": 4.998922944375716e-05, + "loss": 1.057, + "step": 9443 + }, + { + "epoch": 0.01, + "learning_rate": 4.998922713821097e-05, + "loss": 1.1474, + "step": 9444 + }, + { + "epoch": 0.01, + "learning_rate": 4.99892248324181e-05, + "loss": 1.0418, + "step": 9445 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989222526378555e-05, + "loss": 0.9933, + "step": 9446 + }, + { + "epoch": 0.01, + "learning_rate": 4.998922022009232e-05, + "loss": 1.1512, + "step": 9447 + }, + { + "epoch": 0.01, + "learning_rate": 4.998921791355941e-05, + "loss": 0.9912, + "step": 9448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989215606779804e-05, + "loss": 1.1426, + "step": 9449 + }, + { + "epoch": 0.01, + "learning_rate": 4.998921329975352e-05, + "loss": 1.3306, + "step": 9450 + }, + { + "epoch": 0.01, + "learning_rate": 4.998921099248056e-05, + "loss": 1.353, + "step": 9451 + }, + { + "epoch": 0.01, + "learning_rate": 4.998920868496091e-05, + "loss": 1.0916, + "step": 9452 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989206377194575e-05, + "loss": 1.0523, + "step": 9453 + }, + { + "epoch": 0.01, + "learning_rate": 4.998920406918156e-05, + "loss": 1.2159, + "step": 9454 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989201760921865e-05, + "loss": 1.2316, + "step": 9455 + }, + { + "epoch": 0.01, + "learning_rate": 4.998919945241548e-05, + "loss": 1.054, + "step": 9456 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989197143662416e-05, + "loss": 1.178, + "step": 9457 + }, + { + "epoch": 0.01, + "learning_rate": 4.998919483466267e-05, + "loss": 0.9162, + "step": 9458 + }, + { + "epoch": 0.01, + "learning_rate": 4.998919252541624e-05, + "loss": 1.32, + "step": 9459 + }, + { + "epoch": 0.01, + "learning_rate": 4.998919021592313e-05, + "loss": 0.9844, + "step": 9460 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989187906183335e-05, + "loss": 1.0082, + "step": 9461 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989185596196864e-05, + "loss": 1.0474, + "step": 9462 + }, + { + "epoch": 0.01, + "learning_rate": 4.99891832859637e-05, + "loss": 1.077, + "step": 9463 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989180975483865e-05, + "loss": 1.0237, + "step": 9464 + }, + { + "epoch": 0.01, + "learning_rate": 4.998917866475733e-05, + "loss": 1.1396, + "step": 9465 + }, + { + "epoch": 0.01, + "learning_rate": 4.998917635378413e-05, + "loss": 1.1195, + "step": 9466 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989174042564234e-05, + "loss": 1.1072, + "step": 9467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989171731097664e-05, + "loss": 1.8758, + "step": 9468 + }, + { + "epoch": 0.01, + "learning_rate": 4.998916941938441e-05, + "loss": 1.7659, + "step": 9469 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989167107424475e-05, + "loss": 1.2814, + "step": 9470 + }, + { + "epoch": 0.01, + "learning_rate": 4.998916479521785e-05, + "loss": 1.4703, + "step": 9471 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989162482764546e-05, + "loss": 1.1845, + "step": 9472 + }, + { + "epoch": 0.01, + "learning_rate": 4.998916017006456e-05, + "loss": 1.1917, + "step": 9473 + }, + { + "epoch": 0.01, + "learning_rate": 4.998915785711789e-05, + "loss": 1.2706, + "step": 9474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989155543924534e-05, + "loss": 1.229, + "step": 9475 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989153230484506e-05, + "loss": 0.9546, + "step": 9476 + }, + { + "epoch": 0.01, + "learning_rate": 4.998915091679779e-05, + "loss": 0.955, + "step": 9477 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989148602864394e-05, + "loss": 1.3085, + "step": 9478 + }, + { + "epoch": 0.01, + "learning_rate": 4.998914628868432e-05, + "loss": 1.2113, + "step": 9479 + }, + { + "epoch": 0.01, + "learning_rate": 4.998914397425755e-05, + "loss": 0.991, + "step": 9480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989141659584106e-05, + "loss": 1.1268, + "step": 9481 + }, + { + "epoch": 0.01, + "learning_rate": 4.998913934466398e-05, + "loss": 1.2348, + "step": 9482 + }, + { + "epoch": 0.01, + "learning_rate": 4.998913702949717e-05, + "loss": 1.2291, + "step": 9483 + }, + { + "epoch": 0.01, + "learning_rate": 4.998913471408368e-05, + "loss": 1.0154, + "step": 9484 + }, + { + "epoch": 0.01, + "learning_rate": 4.998913239842351e-05, + "loss": 1.1843, + "step": 9485 + }, + { + "epoch": 0.01, + "learning_rate": 4.998913008251665e-05, + "loss": 1.2066, + "step": 9486 + }, + { + "epoch": 0.01, + "learning_rate": 4.998912776636311e-05, + "loss": 1.1451, + "step": 9487 + }, + { + "epoch": 0.01, + "learning_rate": 4.998912544996289e-05, + "loss": 0.9843, + "step": 9488 + }, + { + "epoch": 0.01, + "learning_rate": 4.998912313331599e-05, + "loss": 0.9165, + "step": 9489 + }, + { + "epoch": 0.01, + "learning_rate": 4.998912081642241e-05, + "loss": 1.1365, + "step": 9490 + }, + { + "epoch": 0.01, + "learning_rate": 4.998911849928214e-05, + "loss": 1.0598, + "step": 9491 + }, + { + "epoch": 0.01, + "learning_rate": 4.998911618189519e-05, + "loss": 1.1168, + "step": 9492 + }, + { + "epoch": 0.01, + "learning_rate": 4.998911386426156e-05, + "loss": 1.021, + "step": 9493 + }, + { + "epoch": 0.01, + "learning_rate": 4.998911154638125e-05, + "loss": 1.054, + "step": 9494 + }, + { + "epoch": 0.01, + "learning_rate": 4.998910922825425e-05, + "loss": 0.9778, + "step": 9495 + }, + { + "epoch": 0.01, + "learning_rate": 4.998910690988058e-05, + "loss": 1.0052, + "step": 9496 + }, + { + "epoch": 0.01, + "learning_rate": 4.998910459126022e-05, + "loss": 1.1659, + "step": 9497 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989102272393176e-05, + "loss": 1.3724, + "step": 9498 + }, + { + "epoch": 0.01, + "learning_rate": 4.998909995327946e-05, + "loss": 2.5844, + "step": 9499 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989097633919055e-05, + "loss": 1.0749, + "step": 9500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0519160032272339, + "eval_runtime": 86.0341, + "eval_samples_per_second": 16.098, + "eval_steps_per_second": 4.033, + "step": 9500 + }, + { + "epoch": 0.01, + "learning_rate": 4.998909531431197e-05, + "loss": 1.1633, + "step": 9501 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989092994458195e-05, + "loss": 0.8895, + "step": 9502 + }, + { + "epoch": 0.01, + "learning_rate": 4.998909067435775e-05, + "loss": 1.1799, + "step": 9503 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989088354010616e-05, + "loss": 0.8809, + "step": 9504 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989086033416806e-05, + "loss": 0.9955, + "step": 9505 + }, + { + "epoch": 0.01, + "learning_rate": 4.998908371257631e-05, + "loss": 1.0974, + "step": 9506 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989081391489135e-05, + "loss": 1.0556, + "step": 9507 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989079070155275e-05, + "loss": 0.9914, + "step": 9508 + }, + { + "epoch": 0.01, + "learning_rate": 4.998907674857474e-05, + "loss": 1.2065, + "step": 9509 + }, + { + "epoch": 0.01, + "learning_rate": 4.998907442674752e-05, + "loss": 1.0535, + "step": 9510 + }, + { + "epoch": 0.01, + "learning_rate": 4.998907210467361e-05, + "loss": 1.067, + "step": 9511 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989069782353024e-05, + "loss": 1.2242, + "step": 9512 + }, + { + "epoch": 0.01, + "learning_rate": 4.998906745978576e-05, + "loss": 1.039, + "step": 9513 + }, + { + "epoch": 0.01, + "learning_rate": 4.998906513697181e-05, + "loss": 0.7001, + "step": 9514 + }, + { + "epoch": 0.01, + "learning_rate": 4.998906281391118e-05, + "loss": 0.5576, + "step": 9515 + }, + { + "epoch": 0.01, + "learning_rate": 4.998906049060387e-05, + "loss": 0.5156, + "step": 9516 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989058167049884e-05, + "loss": 0.303, + "step": 9517 + }, + { + "epoch": 0.01, + "learning_rate": 4.998905584324921e-05, + "loss": 0.194, + "step": 9518 + }, + { + "epoch": 0.01, + "learning_rate": 4.998905351920186e-05, + "loss": 1.6149, + "step": 9519 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989051194907815e-05, + "loss": 5.9522, + "step": 9520 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989048870367096e-05, + "loss": 5.3922, + "step": 9521 + }, + { + "epoch": 0.01, + "learning_rate": 4.99890465455797e-05, + "loss": 5.8952, + "step": 9522 + }, + { + "epoch": 0.01, + "learning_rate": 4.998904422054562e-05, + "loss": 5.8887, + "step": 9523 + }, + { + "epoch": 0.01, + "learning_rate": 4.998904189526486e-05, + "loss": 5.8202, + "step": 9524 + }, + { + "epoch": 0.01, + "learning_rate": 4.998903956973741e-05, + "loss": 5.7676, + "step": 9525 + }, + { + "epoch": 0.01, + "learning_rate": 4.998903724396329e-05, + "loss": 5.8913, + "step": 9526 + }, + { + "epoch": 0.01, + "learning_rate": 4.998903491794248e-05, + "loss": 5.7219, + "step": 9527 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989032591675e-05, + "loss": 5.3908, + "step": 9528 + }, + { + "epoch": 0.01, + "learning_rate": 4.998903026516083e-05, + "loss": 5.8612, + "step": 9529 + }, + { + "epoch": 0.01, + "learning_rate": 4.998902793839998e-05, + "loss": 5.416, + "step": 9530 + }, + { + "epoch": 0.01, + "learning_rate": 4.998902561139245e-05, + "loss": 5.6954, + "step": 9531 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989023284138234e-05, + "loss": 5.797, + "step": 9532 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989020956637344e-05, + "loss": 5.6869, + "step": 9533 + }, + { + "epoch": 0.01, + "learning_rate": 4.998901862888977e-05, + "loss": 5.8472, + "step": 9534 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989016300895514e-05, + "loss": 5.7293, + "step": 9535 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989013972654575e-05, + "loss": 5.6829, + "step": 9536 + }, + { + "epoch": 0.01, + "learning_rate": 4.998901164416696e-05, + "loss": 5.8825, + "step": 9537 + }, + { + "epoch": 0.01, + "learning_rate": 4.998900931543267e-05, + "loss": 5.7073, + "step": 9538 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989006986451685e-05, + "loss": 5.5586, + "step": 9539 + }, + { + "epoch": 0.01, + "learning_rate": 4.998900465722403e-05, + "loss": 5.7517, + "step": 9540 + }, + { + "epoch": 0.01, + "learning_rate": 4.9989002327749686e-05, + "loss": 4.9932, + "step": 9541 + }, + { + "epoch": 0.01, + "learning_rate": 4.998899999802866e-05, + "loss": 5.8724, + "step": 9542 + }, + { + "epoch": 0.01, + "learning_rate": 4.998899766806096e-05, + "loss": 5.8379, + "step": 9543 + }, + { + "epoch": 0.01, + "learning_rate": 4.998899533784658e-05, + "loss": 5.7381, + "step": 9544 + }, + { + "epoch": 0.01, + "learning_rate": 4.998899300738551e-05, + "loss": 5.697, + "step": 9545 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988990676677774e-05, + "loss": 5.7577, + "step": 9546 + }, + { + "epoch": 0.01, + "learning_rate": 4.998898834572334e-05, + "loss": 5.6929, + "step": 9547 + }, + { + "epoch": 0.01, + "learning_rate": 4.998898601452223e-05, + "loss": 5.6557, + "step": 9548 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988983683074445e-05, + "loss": 5.7832, + "step": 9549 + }, + { + "epoch": 0.01, + "learning_rate": 4.998898135137998e-05, + "loss": 5.6743, + "step": 9550 + }, + { + "epoch": 0.01, + "learning_rate": 4.998897901943883e-05, + "loss": 5.754, + "step": 9551 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988976687250996e-05, + "loss": 5.1066, + "step": 9552 + }, + { + "epoch": 0.01, + "learning_rate": 4.998897435481649e-05, + "loss": 1.4271, + "step": 9553 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988972022135304e-05, + "loss": 1.3299, + "step": 9554 + }, + { + "epoch": 0.01, + "learning_rate": 4.998896968920743e-05, + "loss": 0.9807, + "step": 9555 + }, + { + "epoch": 0.01, + "learning_rate": 4.998896735603288e-05, + "loss": 1.043, + "step": 9556 + }, + { + "epoch": 0.01, + "learning_rate": 4.998896502261164e-05, + "loss": 0.8069, + "step": 9557 + }, + { + "epoch": 0.01, + "learning_rate": 4.998896268894373e-05, + "loss": 1.1061, + "step": 9558 + }, + { + "epoch": 0.01, + "learning_rate": 4.998896035502914e-05, + "loss": 1.2951, + "step": 9559 + }, + { + "epoch": 0.01, + "learning_rate": 4.998895802086787e-05, + "loss": 0.7709, + "step": 9560 + }, + { + "epoch": 0.01, + "learning_rate": 4.998895568645992e-05, + "loss": 0.327, + "step": 9561 + }, + { + "epoch": 0.01, + "learning_rate": 4.998895335180528e-05, + "loss": 0.6259, + "step": 9562 + }, + { + "epoch": 0.01, + "learning_rate": 4.998895101690396e-05, + "loss": 1.0665, + "step": 9563 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988948681755965e-05, + "loss": 1.0852, + "step": 9564 + }, + { + "epoch": 0.01, + "learning_rate": 4.998894634636129e-05, + "loss": 0.8451, + "step": 9565 + }, + { + "epoch": 0.01, + "learning_rate": 4.998894401071993e-05, + "loss": 1.0385, + "step": 9566 + }, + { + "epoch": 0.01, + "learning_rate": 4.99889416748319e-05, + "loss": 1.1364, + "step": 9567 + }, + { + "epoch": 0.01, + "learning_rate": 4.998893933869718e-05, + "loss": 0.9963, + "step": 9568 + }, + { + "epoch": 0.01, + "learning_rate": 4.998893700231578e-05, + "loss": 1.2045, + "step": 9569 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988934665687704e-05, + "loss": 1.1008, + "step": 9570 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988932328812945e-05, + "loss": 1.0951, + "step": 9571 + }, + { + "epoch": 0.01, + "learning_rate": 4.998892999169151e-05, + "loss": 1.003, + "step": 9572 + }, + { + "epoch": 0.01, + "learning_rate": 4.998892765432339e-05, + "loss": 1.1747, + "step": 9573 + }, + { + "epoch": 0.01, + "learning_rate": 4.998892531670859e-05, + "loss": 1.0342, + "step": 9574 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988922978847115e-05, + "loss": 0.7845, + "step": 9575 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988920640738954e-05, + "loss": 1.0553, + "step": 9576 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988918302384116e-05, + "loss": 0.9495, + "step": 9577 + }, + { + "epoch": 0.01, + "learning_rate": 4.99889159637826e-05, + "loss": 1.1584, + "step": 9578 + }, + { + "epoch": 0.01, + "learning_rate": 4.99889136249344e-05, + "loss": 0.8282, + "step": 9579 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988911285839523e-05, + "loss": 1.1773, + "step": 9580 + }, + { + "epoch": 0.01, + "learning_rate": 4.998890894649796e-05, + "loss": 1.0359, + "step": 9581 + }, + { + "epoch": 0.01, + "learning_rate": 4.998890660690973e-05, + "loss": 1.1669, + "step": 9582 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988904267074804e-05, + "loss": 0.765, + "step": 9583 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988901926993204e-05, + "loss": 1.1385, + "step": 9584 + }, + { + "epoch": 0.01, + "learning_rate": 4.998889958666493e-05, + "loss": 1.0772, + "step": 9585 + }, + { + "epoch": 0.01, + "learning_rate": 4.998889724608997e-05, + "loss": 1.0362, + "step": 9586 + }, + { + "epoch": 0.01, + "learning_rate": 4.998889490526833e-05, + "loss": 1.168, + "step": 9587 + }, + { + "epoch": 0.01, + "learning_rate": 4.998889256420001e-05, + "loss": 1.2161, + "step": 9588 + }, + { + "epoch": 0.01, + "learning_rate": 4.998889022288501e-05, + "loss": 1.0458, + "step": 9589 + }, + { + "epoch": 0.01, + "learning_rate": 4.998888788132333e-05, + "loss": 1.1914, + "step": 9590 + }, + { + "epoch": 0.01, + "learning_rate": 4.998888553951498e-05, + "loss": 1.0189, + "step": 9591 + }, + { + "epoch": 0.01, + "learning_rate": 4.998888319745994e-05, + "loss": 0.2751, + "step": 9592 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988880855158226e-05, + "loss": 0.9459, + "step": 9593 + }, + { + "epoch": 0.01, + "learning_rate": 4.998887851260983e-05, + "loss": 1.2398, + "step": 9594 + }, + { + "epoch": 0.01, + "learning_rate": 4.998887616981475e-05, + "loss": 1.1093, + "step": 9595 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988873826772994e-05, + "loss": 1.1416, + "step": 9596 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988871483484555e-05, + "loss": 0.9976, + "step": 9597 + }, + { + "epoch": 0.01, + "learning_rate": 4.998886913994945e-05, + "loss": 0.247, + "step": 9598 + }, + { + "epoch": 0.01, + "learning_rate": 4.998886679616765e-05, + "loss": 0.4209, + "step": 9599 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988864452139174e-05, + "loss": 1.2544, + "step": 9600 + }, + { + "epoch": 0.01, + "learning_rate": 4.998886210786402e-05, + "loss": 0.9261, + "step": 9601 + }, + { + "epoch": 0.01, + "learning_rate": 4.998885976334219e-05, + "loss": 1.3198, + "step": 9602 + }, + { + "epoch": 0.01, + "learning_rate": 4.998885741857368e-05, + "loss": 1.3817, + "step": 9603 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988855073558484e-05, + "loss": 0.9208, + "step": 9604 + }, + { + "epoch": 0.01, + "learning_rate": 4.998885272829661e-05, + "loss": 1.2108, + "step": 9605 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988850382788055e-05, + "loss": 1.4153, + "step": 9606 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988848037032825e-05, + "loss": 0.9594, + "step": 9607 + }, + { + "epoch": 0.01, + "learning_rate": 4.998884569103092e-05, + "loss": 1.1309, + "step": 9608 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988843344782324e-05, + "loss": 1.1749, + "step": 9609 + }, + { + "epoch": 0.01, + "learning_rate": 4.998884099828706e-05, + "loss": 0.9763, + "step": 9610 + }, + { + "epoch": 0.01, + "learning_rate": 4.998883865154511e-05, + "loss": 1.2261, + "step": 9611 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988836304556487e-05, + "loss": 1.0687, + "step": 9612 + }, + { + "epoch": 0.01, + "learning_rate": 4.998883395732118e-05, + "loss": 1.0324, + "step": 9613 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988831609839195e-05, + "loss": 1.3793, + "step": 9614 + }, + { + "epoch": 0.01, + "learning_rate": 4.998882926211053e-05, + "loss": 0.9648, + "step": 9615 + }, + { + "epoch": 0.01, + "learning_rate": 4.998882691413518e-05, + "loss": 1.2078, + "step": 9616 + }, + { + "epoch": 0.01, + "learning_rate": 4.998882456591316e-05, + "loss": 1.2542, + "step": 9617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988822217444455e-05, + "loss": 1.3269, + "step": 9618 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988819868729076e-05, + "loss": 1.1625, + "step": 9619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988817519767014e-05, + "loss": 1.1604, + "step": 9620 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988815170558276e-05, + "loss": 1.0048, + "step": 9621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988812821102854e-05, + "loss": 0.9177, + "step": 9622 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988810471400756e-05, + "loss": 0.7633, + "step": 9623 + }, + { + "epoch": 0.01, + "learning_rate": 4.998880812145198e-05, + "loss": 0.6968, + "step": 9624 + }, + { + "epoch": 0.01, + "learning_rate": 4.998880577125652e-05, + "loss": 1.1713, + "step": 9625 + }, + { + "epoch": 0.01, + "learning_rate": 4.998880342081439e-05, + "loss": 1.2567, + "step": 9626 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988801070125574e-05, + "loss": 0.8987, + "step": 9627 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988798719190074e-05, + "loss": 1.107, + "step": 9628 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988796368007905e-05, + "loss": 1.1169, + "step": 9629 + }, + { + "epoch": 0.01, + "learning_rate": 4.998879401657905e-05, + "loss": 1.0902, + "step": 9630 + }, + { + "epoch": 0.01, + "learning_rate": 4.998879166490353e-05, + "loss": 1.0164, + "step": 9631 + }, + { + "epoch": 0.01, + "learning_rate": 4.998878931298132e-05, + "loss": 0.9741, + "step": 9632 + }, + { + "epoch": 0.01, + "learning_rate": 4.998878696081244e-05, + "loss": 0.9065, + "step": 9633 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988784608396865e-05, + "loss": 1.2073, + "step": 9634 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988782255734624e-05, + "loss": 1.1442, + "step": 9635 + }, + { + "epoch": 0.01, + "learning_rate": 4.99887799028257e-05, + "loss": 1.2982, + "step": 9636 + }, + { + "epoch": 0.01, + "learning_rate": 4.998877754967009e-05, + "loss": 1.0833, + "step": 9637 + }, + { + "epoch": 0.01, + "learning_rate": 4.998877519626782e-05, + "loss": 1.2827, + "step": 9638 + }, + { + "epoch": 0.01, + "learning_rate": 4.998877284261886e-05, + "loss": 1.3037, + "step": 9639 + }, + { + "epoch": 0.01, + "learning_rate": 4.998877048872322e-05, + "loss": 1.1347, + "step": 9640 + }, + { + "epoch": 0.01, + "learning_rate": 4.99887681345809e-05, + "loss": 1.0885, + "step": 9641 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988765780191906e-05, + "loss": 1.1442, + "step": 9642 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988763425556234e-05, + "loss": 0.748, + "step": 9643 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988761070673886e-05, + "loss": 1.2061, + "step": 9644 + }, + { + "epoch": 0.01, + "learning_rate": 4.998875871554485e-05, + "loss": 1.2798, + "step": 9645 + }, + { + "epoch": 0.01, + "learning_rate": 4.998875636016914e-05, + "loss": 1.0119, + "step": 9646 + }, + { + "epoch": 0.01, + "learning_rate": 4.998875400454676e-05, + "loss": 1.0513, + "step": 9647 + }, + { + "epoch": 0.01, + "learning_rate": 4.998875164867769e-05, + "loss": 1.0932, + "step": 9648 + }, + { + "epoch": 0.01, + "learning_rate": 4.998874929256194e-05, + "loss": 1.0462, + "step": 9649 + }, + { + "epoch": 0.01, + "learning_rate": 4.998874693619952e-05, + "loss": 0.8771, + "step": 9650 + }, + { + "epoch": 0.01, + "learning_rate": 4.998874457959042e-05, + "loss": 0.9754, + "step": 9651 + }, + { + "epoch": 0.01, + "learning_rate": 4.998874222273464e-05, + "loss": 1.2265, + "step": 9652 + }, + { + "epoch": 0.01, + "learning_rate": 4.998873986563218e-05, + "loss": 1.1009, + "step": 9653 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988737508283044e-05, + "loss": 1.1044, + "step": 9654 + }, + { + "epoch": 0.01, + "learning_rate": 4.998873515068723e-05, + "loss": 1.0872, + "step": 9655 + }, + { + "epoch": 0.01, + "learning_rate": 4.998873279284474e-05, + "loss": 1.3058, + "step": 9656 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988730434755564e-05, + "loss": 0.9225, + "step": 9657 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988728076419714e-05, + "loss": 0.9627, + "step": 9658 + }, + { + "epoch": 0.01, + "learning_rate": 4.998872571783719e-05, + "loss": 1.1607, + "step": 9659 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988723359007983e-05, + "loss": 1.1587, + "step": 9660 + }, + { + "epoch": 0.01, + "learning_rate": 4.99887209999321e-05, + "loss": 1.2425, + "step": 9661 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988718640609535e-05, + "loss": 1.0909, + "step": 9662 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988716281040296e-05, + "loss": 1.0877, + "step": 9663 + }, + { + "epoch": 0.01, + "learning_rate": 4.998871392122438e-05, + "loss": 1.1519, + "step": 9664 + }, + { + "epoch": 0.01, + "learning_rate": 4.998871156116178e-05, + "loss": 1.5058, + "step": 9665 + }, + { + "epoch": 0.01, + "learning_rate": 4.998870920085251e-05, + "loss": 1.6072, + "step": 9666 + }, + { + "epoch": 0.01, + "learning_rate": 4.998870684029655e-05, + "loss": 1.698, + "step": 9667 + }, + { + "epoch": 0.01, + "learning_rate": 4.998870447949392e-05, + "loss": 1.301, + "step": 9668 + }, + { + "epoch": 0.01, + "learning_rate": 4.998870211844462e-05, + "loss": 1.3828, + "step": 9669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988699757148625e-05, + "loss": 1.5221, + "step": 9670 + }, + { + "epoch": 0.01, + "learning_rate": 4.998869739560597e-05, + "loss": 1.0533, + "step": 9671 + }, + { + "epoch": 0.01, + "learning_rate": 4.998869503381662e-05, + "loss": 0.8912, + "step": 9672 + }, + { + "epoch": 0.01, + "learning_rate": 4.99886926717806e-05, + "loss": 0.7509, + "step": 9673 + }, + { + "epoch": 0.01, + "learning_rate": 4.99886903094979e-05, + "loss": 0.8431, + "step": 9674 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988687946968526e-05, + "loss": 0.188, + "step": 9675 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988685584192475e-05, + "loss": 0.6945, + "step": 9676 + }, + { + "epoch": 0.01, + "learning_rate": 4.998868322116975e-05, + "loss": 1.049, + "step": 9677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988680857900336e-05, + "loss": 1.0678, + "step": 9678 + }, + { + "epoch": 0.01, + "learning_rate": 4.998867849438425e-05, + "loss": 1.2841, + "step": 9679 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988676130621485e-05, + "loss": 1.0755, + "step": 9680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988673766612045e-05, + "loss": 0.8511, + "step": 9681 + }, + { + "epoch": 0.01, + "learning_rate": 4.998867140235592e-05, + "loss": 1.0256, + "step": 9682 + }, + { + "epoch": 0.01, + "learning_rate": 4.998866903785312e-05, + "loss": 1.1308, + "step": 9683 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988666673103654e-05, + "loss": 0.9747, + "step": 9684 + }, + { + "epoch": 0.01, + "learning_rate": 4.99886643081075e-05, + "loss": 1.1631, + "step": 9685 + }, + { + "epoch": 0.01, + "learning_rate": 4.998866194286467e-05, + "loss": 1.09, + "step": 9686 + }, + { + "epoch": 0.01, + "learning_rate": 4.998865957737516e-05, + "loss": 1.8164, + "step": 9687 + }, + { + "epoch": 0.01, + "learning_rate": 4.998865721163898e-05, + "loss": 1.7847, + "step": 9688 + }, + { + "epoch": 0.01, + "learning_rate": 4.998865484565611e-05, + "loss": 1.7774, + "step": 9689 + }, + { + "epoch": 0.01, + "learning_rate": 4.998865247942657e-05, + "loss": 1.3281, + "step": 9690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988650112950354e-05, + "loss": 1.3932, + "step": 9691 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988647746227454e-05, + "loss": 1.4631, + "step": 9692 + }, + { + "epoch": 0.01, + "learning_rate": 4.998864537925789e-05, + "loss": 1.2774, + "step": 9693 + }, + { + "epoch": 0.01, + "learning_rate": 4.998864301204164e-05, + "loss": 1.2605, + "step": 9694 + }, + { + "epoch": 0.01, + "learning_rate": 4.998864064457871e-05, + "loss": 0.9692, + "step": 9695 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988638276869106e-05, + "loss": 1.1142, + "step": 9696 + }, + { + "epoch": 0.01, + "learning_rate": 4.998863590891283e-05, + "loss": 1.2291, + "step": 9697 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988633540709867e-05, + "loss": 1.0415, + "step": 9698 + }, + { + "epoch": 0.01, + "learning_rate": 4.998863117226023e-05, + "loss": 1.2165, + "step": 9699 + }, + { + "epoch": 0.01, + "learning_rate": 4.998862880356392e-05, + "loss": 1.092, + "step": 9700 + }, + { + "epoch": 0.01, + "learning_rate": 4.998862643462092e-05, + "loss": 1.5197, + "step": 9701 + }, + { + "epoch": 0.01, + "learning_rate": 4.998862406543125e-05, + "loss": 1.032, + "step": 9702 + }, + { + "epoch": 0.01, + "learning_rate": 4.998862169599491e-05, + "loss": 1.2112, + "step": 9703 + }, + { + "epoch": 0.01, + "learning_rate": 4.998861932631189e-05, + "loss": 1.0574, + "step": 9704 + }, + { + "epoch": 0.01, + "learning_rate": 4.998861695638219e-05, + "loss": 1.1226, + "step": 9705 + }, + { + "epoch": 0.01, + "learning_rate": 4.998861458620582e-05, + "loss": 1.1803, + "step": 9706 + }, + { + "epoch": 0.01, + "learning_rate": 4.998861221578276e-05, + "loss": 1.3568, + "step": 9707 + }, + { + "epoch": 0.01, + "learning_rate": 4.998860984511303e-05, + "loss": 1.6537, + "step": 9708 + }, + { + "epoch": 0.01, + "learning_rate": 4.998860747419663e-05, + "loss": 1.0766, + "step": 9709 + }, + { + "epoch": 0.01, + "learning_rate": 4.998860510303354e-05, + "loss": 1.119, + "step": 9710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988602731623776e-05, + "loss": 1.1105, + "step": 9711 + }, + { + "epoch": 0.01, + "learning_rate": 4.998860035996734e-05, + "loss": 0.682, + "step": 9712 + }, + { + "epoch": 0.01, + "learning_rate": 4.998859798806422e-05, + "loss": 1.0323, + "step": 9713 + }, + { + "epoch": 0.01, + "learning_rate": 4.998859561591443e-05, + "loss": 0.7753, + "step": 9714 + }, + { + "epoch": 0.01, + "learning_rate": 4.998859324351796e-05, + "loss": 1.0854, + "step": 9715 + }, + { + "epoch": 0.01, + "learning_rate": 4.998859087087482e-05, + "loss": 0.9504, + "step": 9716 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988588497984986e-05, + "loss": 1.2355, + "step": 9717 + }, + { + "epoch": 0.01, + "learning_rate": 4.998858612484849e-05, + "loss": 1.2058, + "step": 9718 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988583751465314e-05, + "loss": 1.6141, + "step": 9719 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988581377835464e-05, + "loss": 1.2754, + "step": 9720 + }, + { + "epoch": 0.01, + "learning_rate": 4.998857900395894e-05, + "loss": 0.976, + "step": 9721 + }, + { + "epoch": 0.01, + "learning_rate": 4.998857662983573e-05, + "loss": 1.2198, + "step": 9722 + }, + { + "epoch": 0.01, + "learning_rate": 4.998857425546584e-05, + "loss": 0.9265, + "step": 9723 + }, + { + "epoch": 0.01, + "learning_rate": 4.998857188084928e-05, + "loss": 1.2309, + "step": 9724 + }, + { + "epoch": 0.01, + "learning_rate": 4.998856950598604e-05, + "loss": 0.8348, + "step": 9725 + }, + { + "epoch": 0.01, + "learning_rate": 4.998856713087613e-05, + "loss": 1.1515, + "step": 9726 + }, + { + "epoch": 0.01, + "learning_rate": 4.998856475551954e-05, + "loss": 1.0123, + "step": 9727 + }, + { + "epoch": 0.01, + "learning_rate": 4.998856237991627e-05, + "loss": 1.0243, + "step": 9728 + }, + { + "epoch": 0.01, + "learning_rate": 4.998856000406633e-05, + "loss": 1.0206, + "step": 9729 + }, + { + "epoch": 0.01, + "learning_rate": 4.998855762796971e-05, + "loss": 1.1601, + "step": 9730 + }, + { + "epoch": 0.01, + "learning_rate": 4.998855525162641e-05, + "loss": 0.9926, + "step": 9731 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988552875036446e-05, + "loss": 0.7021, + "step": 9732 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988550498199796e-05, + "loss": 0.964, + "step": 9733 + }, + { + "epoch": 0.01, + "learning_rate": 4.998854812111647e-05, + "loss": 0.972, + "step": 9734 + }, + { + "epoch": 0.01, + "learning_rate": 4.998854574378646e-05, + "loss": 1.144, + "step": 9735 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988543366209784e-05, + "loss": 1.2402, + "step": 9736 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988540988386436e-05, + "loss": 1.0957, + "step": 9737 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988538610316405e-05, + "loss": 1.081, + "step": 9738 + }, + { + "epoch": 0.01, + "learning_rate": 4.998853623199969e-05, + "loss": 1.106, + "step": 9739 + }, + { + "epoch": 0.01, + "learning_rate": 4.998853385343631e-05, + "loss": 0.6211, + "step": 9740 + }, + { + "epoch": 0.01, + "learning_rate": 4.998853147462625e-05, + "loss": 1.0139, + "step": 9741 + }, + { + "epoch": 0.01, + "learning_rate": 4.998852909556951e-05, + "loss": 1.1727, + "step": 9742 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988526716266106e-05, + "loss": 0.9099, + "step": 9743 + }, + { + "epoch": 0.01, + "learning_rate": 4.998852433671602e-05, + "loss": 0.9932, + "step": 9744 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988521956919245e-05, + "loss": 0.7721, + "step": 9745 + }, + { + "epoch": 0.01, + "learning_rate": 4.99885195768758e-05, + "loss": 1.2619, + "step": 9746 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988517196585685e-05, + "loss": 1.2188, + "step": 9747 + }, + { + "epoch": 0.01, + "learning_rate": 4.998851481604889e-05, + "loss": 1.2085, + "step": 9748 + }, + { + "epoch": 0.01, + "learning_rate": 4.998851243526542e-05, + "loss": 0.9917, + "step": 9749 + }, + { + "epoch": 0.01, + "learning_rate": 4.998851005423528e-05, + "loss": 1.3981, + "step": 9750 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988507672958464e-05, + "loss": 1.2426, + "step": 9751 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988505291434965e-05, + "loss": 1.0428, + "step": 9752 + }, + { + "epoch": 0.01, + "learning_rate": 4.998850290966479e-05, + "loss": 1.1453, + "step": 9753 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988500527647944e-05, + "loss": 1.0549, + "step": 9754 + }, + { + "epoch": 0.01, + "learning_rate": 4.998849814538441e-05, + "loss": 1.0695, + "step": 9755 + }, + { + "epoch": 0.01, + "learning_rate": 4.998849576287421e-05, + "loss": 1.1746, + "step": 9756 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988493380117336e-05, + "loss": 1.2758, + "step": 9757 + }, + { + "epoch": 0.01, + "learning_rate": 4.998849099711378e-05, + "loss": 1.1487, + "step": 9758 + }, + { + "epoch": 0.01, + "learning_rate": 4.998848861386355e-05, + "loss": 1.1116, + "step": 9759 + }, + { + "epoch": 0.01, + "learning_rate": 4.998848623036665e-05, + "loss": 1.1157, + "step": 9760 + }, + { + "epoch": 0.01, + "learning_rate": 4.998848384662307e-05, + "loss": 1.0451, + "step": 9761 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988481462632806e-05, + "loss": 0.8861, + "step": 9762 + }, + { + "epoch": 0.01, + "learning_rate": 4.998847907839588e-05, + "loss": 1.2327, + "step": 9763 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988476693912266e-05, + "loss": 1.5421, + "step": 9764 + }, + { + "epoch": 0.01, + "learning_rate": 4.998847430918199e-05, + "loss": 0.8174, + "step": 9765 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988471924205026e-05, + "loss": 0.9378, + "step": 9766 + }, + { + "epoch": 0.01, + "learning_rate": 4.998846953898139e-05, + "loss": 1.1805, + "step": 9767 + }, + { + "epoch": 0.01, + "learning_rate": 4.998846715351108e-05, + "loss": 1.1691, + "step": 9768 + }, + { + "epoch": 0.01, + "learning_rate": 4.99884647677941e-05, + "loss": 1.0429, + "step": 9769 + }, + { + "epoch": 0.01, + "learning_rate": 4.998846238183044e-05, + "loss": 1.0347, + "step": 9770 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988459995620096e-05, + "loss": 1.0842, + "step": 9771 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988457609163083e-05, + "loss": 1.4174, + "step": 9772 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988455222459395e-05, + "loss": 1.3438, + "step": 9773 + }, + { + "epoch": 0.01, + "learning_rate": 4.998845283550902e-05, + "loss": 1.3013, + "step": 9774 + }, + { + "epoch": 0.01, + "learning_rate": 4.998845044831199e-05, + "loss": 0.4824, + "step": 9775 + }, + { + "epoch": 0.01, + "learning_rate": 4.998844806086828e-05, + "loss": 0.2471, + "step": 9776 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988445673177884e-05, + "loss": 0.2471, + "step": 9777 + }, + { + "epoch": 0.01, + "learning_rate": 4.998844328524081e-05, + "loss": 0.1561, + "step": 9778 + }, + { + "epoch": 0.01, + "learning_rate": 4.998844089705707e-05, + "loss": 0.1945, + "step": 9779 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988438508626664e-05, + "loss": 0.1443, + "step": 9780 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988436119949565e-05, + "loss": 0.1177, + "step": 9781 + }, + { + "epoch": 0.01, + "learning_rate": 4.99884337310258e-05, + "loss": 0.1291, + "step": 9782 + }, + { + "epoch": 0.01, + "learning_rate": 4.998843134185536e-05, + "loss": 0.2359, + "step": 9783 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988428952438236e-05, + "loss": 0.3172, + "step": 9784 + }, + { + "epoch": 0.01, + "learning_rate": 4.998842656277445e-05, + "loss": 0.2112, + "step": 9785 + }, + { + "epoch": 0.01, + "learning_rate": 4.998842417286398e-05, + "loss": 0.8715, + "step": 9786 + }, + { + "epoch": 0.01, + "learning_rate": 4.998842178270684e-05, + "loss": 1.3531, + "step": 9787 + }, + { + "epoch": 0.01, + "learning_rate": 4.998841939230302e-05, + "loss": 1.3317, + "step": 9788 + }, + { + "epoch": 0.01, + "learning_rate": 4.998841700165252e-05, + "loss": 1.3058, + "step": 9789 + }, + { + "epoch": 0.01, + "learning_rate": 4.998841461075535e-05, + "loss": 1.2345, + "step": 9790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988412219611516e-05, + "loss": 0.7695, + "step": 9791 + }, + { + "epoch": 0.01, + "learning_rate": 4.998840982822099e-05, + "loss": 0.8632, + "step": 9792 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988407436583796e-05, + "loss": 1.1626, + "step": 9793 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988405044699925e-05, + "loss": 1.0613, + "step": 9794 + }, + { + "epoch": 0.01, + "learning_rate": 4.998840265256939e-05, + "loss": 1.042, + "step": 9795 + }, + { + "epoch": 0.01, + "learning_rate": 4.998840026019217e-05, + "loss": 0.3696, + "step": 9796 + }, + { + "epoch": 0.01, + "learning_rate": 4.998839786756827e-05, + "loss": 0.77, + "step": 9797 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988395474697705e-05, + "loss": 1.2609, + "step": 9798 + }, + { + "epoch": 0.01, + "learning_rate": 4.998839308158046e-05, + "loss": 1.0351, + "step": 9799 + }, + { + "epoch": 0.01, + "learning_rate": 4.998839068821654e-05, + "loss": 2.8369, + "step": 9800 + }, + { + "epoch": 0.01, + "learning_rate": 4.998838829460595e-05, + "loss": 1.3971, + "step": 9801 + }, + { + "epoch": 0.01, + "learning_rate": 4.998838590074868e-05, + "loss": 1.3847, + "step": 9802 + }, + { + "epoch": 0.01, + "learning_rate": 4.998838350664474e-05, + "loss": 1.1103, + "step": 9803 + }, + { + "epoch": 0.01, + "learning_rate": 4.998838111229411e-05, + "loss": 1.404, + "step": 9804 + }, + { + "epoch": 0.01, + "learning_rate": 4.998837871769683e-05, + "loss": 1.1215, + "step": 9805 + }, + { + "epoch": 0.01, + "learning_rate": 4.998837632285286e-05, + "loss": 0.905, + "step": 9806 + }, + { + "epoch": 0.01, + "learning_rate": 4.998837392776221e-05, + "loss": 1.1019, + "step": 9807 + }, + { + "epoch": 0.01, + "learning_rate": 4.99883715324249e-05, + "loss": 0.7928, + "step": 9808 + }, + { + "epoch": 0.01, + "learning_rate": 4.998836913684091e-05, + "loss": 0.9298, + "step": 9809 + }, + { + "epoch": 0.01, + "learning_rate": 4.998836674101024e-05, + "loss": 1.22, + "step": 9810 + }, + { + "epoch": 0.01, + "learning_rate": 4.99883643449329e-05, + "loss": 1.2782, + "step": 9811 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988361948608885e-05, + "loss": 1.0752, + "step": 9812 + }, + { + "epoch": 0.01, + "learning_rate": 4.99883595520382e-05, + "loss": 1.2075, + "step": 9813 + }, + { + "epoch": 0.01, + "learning_rate": 4.998835715522083e-05, + "loss": 1.3142, + "step": 9814 + }, + { + "epoch": 0.01, + "learning_rate": 4.998835475815679e-05, + "loss": 1.3853, + "step": 9815 + }, + { + "epoch": 0.01, + "learning_rate": 4.998835236084608e-05, + "loss": 1.2796, + "step": 9816 + }, + { + "epoch": 0.01, + "learning_rate": 4.998834996328869e-05, + "loss": 1.1469, + "step": 9817 + }, + { + "epoch": 0.01, + "learning_rate": 4.998834756548463e-05, + "loss": 0.8047, + "step": 9818 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988345167433884e-05, + "loss": 1.2134, + "step": 9819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988342769136476e-05, + "loss": 1.0523, + "step": 9820 + }, + { + "epoch": 0.01, + "learning_rate": 4.998834037059239e-05, + "loss": 1.109, + "step": 9821 + }, + { + "epoch": 0.01, + "learning_rate": 4.998833797180163e-05, + "loss": 1.2216, + "step": 9822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988335572764193e-05, + "loss": 1.0843, + "step": 9823 + }, + { + "epoch": 0.01, + "learning_rate": 4.998833317348009e-05, + "loss": 1.0799, + "step": 9824 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988330773949304e-05, + "loss": 1.2196, + "step": 9825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988328374171845e-05, + "loss": 1.1059, + "step": 9826 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988325974147716e-05, + "loss": 1.1167, + "step": 9827 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988323573876904e-05, + "loss": 1.1676, + "step": 9828 + }, + { + "epoch": 0.01, + "learning_rate": 4.998832117335943e-05, + "loss": 0.9388, + "step": 9829 + }, + { + "epoch": 0.01, + "learning_rate": 4.998831877259528e-05, + "loss": 0.8102, + "step": 9830 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988316371584444e-05, + "loss": 0.9359, + "step": 9831 + }, + { + "epoch": 0.01, + "learning_rate": 4.998831397032694e-05, + "loss": 1.1201, + "step": 9832 + }, + { + "epoch": 0.01, + "learning_rate": 4.998831156882277e-05, + "loss": 1.09, + "step": 9833 + }, + { + "epoch": 0.01, + "learning_rate": 4.998830916707191e-05, + "loss": 0.981, + "step": 9834 + }, + { + "epoch": 0.01, + "learning_rate": 4.998830676507439e-05, + "loss": 0.9748, + "step": 9835 + }, + { + "epoch": 0.01, + "learning_rate": 4.998830436283019e-05, + "loss": 1.1448, + "step": 9836 + }, + { + "epoch": 0.01, + "learning_rate": 4.998830196033932e-05, + "loss": 0.9496, + "step": 9837 + }, + { + "epoch": 0.01, + "learning_rate": 4.998829955760177e-05, + "loss": 1.2107, + "step": 9838 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988297154617546e-05, + "loss": 0.8966, + "step": 9839 + }, + { + "epoch": 0.01, + "learning_rate": 4.998829475138666e-05, + "loss": 0.731, + "step": 9840 + }, + { + "epoch": 0.01, + "learning_rate": 4.998829234790908e-05, + "loss": 1.031, + "step": 9841 + }, + { + "epoch": 0.01, + "learning_rate": 4.998828994418484e-05, + "loss": 0.3472, + "step": 9842 + }, + { + "epoch": 0.01, + "learning_rate": 4.998828754021393e-05, + "loss": 0.3417, + "step": 9843 + }, + { + "epoch": 0.01, + "learning_rate": 4.998828513599634e-05, + "loss": 0.3128, + "step": 9844 + }, + { + "epoch": 0.01, + "learning_rate": 4.998828273153208e-05, + "loss": 0.3333, + "step": 9845 + }, + { + "epoch": 0.01, + "learning_rate": 4.998828032682113e-05, + "loss": 0.5605, + "step": 9846 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988277921863525e-05, + "loss": 0.2648, + "step": 9847 + }, + { + "epoch": 0.01, + "learning_rate": 4.998827551665924e-05, + "loss": 0.2204, + "step": 9848 + }, + { + "epoch": 0.01, + "learning_rate": 4.998827311120828e-05, + "loss": 0.2649, + "step": 9849 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988270705510644e-05, + "loss": 1.7215, + "step": 9850 + }, + { + "epoch": 0.01, + "learning_rate": 4.998826829956634e-05, + "loss": 1.6377, + "step": 9851 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988265893375354e-05, + "loss": 1.4358, + "step": 9852 + }, + { + "epoch": 0.01, + "learning_rate": 4.99882634869377e-05, + "loss": 1.1824, + "step": 9853 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988261080253374e-05, + "loss": 1.102, + "step": 9854 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988258673322376e-05, + "loss": 1.1893, + "step": 9855 + }, + { + "epoch": 0.01, + "learning_rate": 4.99882562661447e-05, + "loss": 0.9691, + "step": 9856 + }, + { + "epoch": 0.01, + "learning_rate": 4.998825385872035e-05, + "loss": 0.7626, + "step": 9857 + }, + { + "epoch": 0.01, + "learning_rate": 4.998825145104933e-05, + "loss": 0.5945, + "step": 9858 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988249043131634e-05, + "loss": 1.1725, + "step": 9859 + }, + { + "epoch": 0.01, + "learning_rate": 4.998824663496727e-05, + "loss": 1.2295, + "step": 9860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988244226556225e-05, + "loss": 1.0714, + "step": 9861 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988241817898506e-05, + "loss": 0.6421, + "step": 9862 + }, + { + "epoch": 0.01, + "learning_rate": 4.998823940899412e-05, + "loss": 1.0365, + "step": 9863 + }, + { + "epoch": 0.01, + "learning_rate": 4.998823699984306e-05, + "loss": 1.0579, + "step": 9864 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988234590445326e-05, + "loss": 0.9681, + "step": 9865 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988232180800915e-05, + "loss": 1.1143, + "step": 9866 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988229770909835e-05, + "loss": 1.2837, + "step": 9867 + }, + { + "epoch": 0.01, + "learning_rate": 4.998822736077208e-05, + "loss": 1.1725, + "step": 9868 + }, + { + "epoch": 0.01, + "learning_rate": 4.998822495038765e-05, + "loss": 1.0829, + "step": 9869 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988222539756544e-05, + "loss": 1.0975, + "step": 9870 + }, + { + "epoch": 0.01, + "learning_rate": 4.998822012887877e-05, + "loss": 1.1761, + "step": 9871 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988217717754324e-05, + "loss": 1.1875, + "step": 9872 + }, + { + "epoch": 0.01, + "learning_rate": 4.99882153063832e-05, + "loss": 1.2365, + "step": 9873 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988212894765405e-05, + "loss": 1.3565, + "step": 9874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988210482900935e-05, + "loss": 1.0789, + "step": 9875 + }, + { + "epoch": 0.01, + "learning_rate": 4.99882080707898e-05, + "loss": 1.159, + "step": 9876 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988205658431985e-05, + "loss": 1.3166, + "step": 9877 + }, + { + "epoch": 0.01, + "learning_rate": 4.99882032458275e-05, + "loss": 0.9486, + "step": 9878 + }, + { + "epoch": 0.01, + "learning_rate": 4.998820083297634e-05, + "loss": 0.9855, + "step": 9879 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988198419878506e-05, + "loss": 0.9638, + "step": 9880 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988196006534e-05, + "loss": 1.309, + "step": 9881 + }, + { + "epoch": 0.01, + "learning_rate": 4.998819359294282e-05, + "loss": 1.0043, + "step": 9882 + }, + { + "epoch": 0.01, + "learning_rate": 4.998819117910497e-05, + "loss": 1.1235, + "step": 9883 + }, + { + "epoch": 0.01, + "learning_rate": 4.998818876502044e-05, + "loss": 1.2024, + "step": 9884 + }, + { + "epoch": 0.01, + "learning_rate": 4.998818635068925e-05, + "loss": 0.9608, + "step": 9885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988183936111374e-05, + "loss": 0.9681, + "step": 9886 + }, + { + "epoch": 0.01, + "learning_rate": 4.998818152128683e-05, + "loss": 1.2117, + "step": 9887 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988179106215614e-05, + "loss": 1.1707, + "step": 9888 + }, + { + "epoch": 0.01, + "learning_rate": 4.998817669089773e-05, + "loss": 0.6553, + "step": 9889 + }, + { + "epoch": 0.01, + "learning_rate": 4.998817427533317e-05, + "loss": 1.047, + "step": 9890 + }, + { + "epoch": 0.01, + "learning_rate": 4.998817185952193e-05, + "loss": 0.9099, + "step": 9891 + }, + { + "epoch": 0.01, + "learning_rate": 4.998816944346403e-05, + "loss": 1.2208, + "step": 9892 + }, + { + "epoch": 0.01, + "learning_rate": 4.998816702715944e-05, + "loss": 1.1677, + "step": 9893 + }, + { + "epoch": 0.01, + "learning_rate": 4.998816461060819e-05, + "loss": 1.0616, + "step": 9894 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988162193810266e-05, + "loss": 1.1913, + "step": 9895 + }, + { + "epoch": 0.01, + "learning_rate": 4.998815977676567e-05, + "loss": 1.8925, + "step": 9896 + }, + { + "epoch": 0.01, + "learning_rate": 4.99881573594744e-05, + "loss": 1.3356, + "step": 9897 + }, + { + "epoch": 0.01, + "learning_rate": 4.998815494193646e-05, + "loss": 1.1028, + "step": 9898 + }, + { + "epoch": 0.01, + "learning_rate": 4.998815252415184e-05, + "loss": 1.2409, + "step": 9899 + }, + { + "epoch": 0.01, + "learning_rate": 4.998815010612056e-05, + "loss": 1.1552, + "step": 9900 + }, + { + "epoch": 0.01, + "learning_rate": 4.998814768784259e-05, + "loss": 1.0364, + "step": 9901 + }, + { + "epoch": 0.01, + "learning_rate": 4.998814526931796e-05, + "loss": 0.6646, + "step": 9902 + }, + { + "epoch": 0.01, + "learning_rate": 4.998814285054666e-05, + "loss": 0.9307, + "step": 9903 + }, + { + "epoch": 0.01, + "learning_rate": 4.998814043152868e-05, + "loss": 0.902, + "step": 9904 + }, + { + "epoch": 0.01, + "learning_rate": 4.998813801226403e-05, + "loss": 1.3225, + "step": 9905 + }, + { + "epoch": 0.01, + "learning_rate": 4.998813559275271e-05, + "loss": 1.0705, + "step": 9906 + }, + { + "epoch": 0.01, + "learning_rate": 4.998813317299471e-05, + "loss": 1.1976, + "step": 9907 + }, + { + "epoch": 0.01, + "learning_rate": 4.998813075299005e-05, + "loss": 0.8782, + "step": 9908 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988128332738706e-05, + "loss": 1.282, + "step": 9909 + }, + { + "epoch": 0.01, + "learning_rate": 4.998812591224069e-05, + "loss": 1.3072, + "step": 9910 + }, + { + "epoch": 0.01, + "learning_rate": 4.998812349149601e-05, + "loss": 1.2509, + "step": 9911 + }, + { + "epoch": 0.01, + "learning_rate": 4.998812107050466e-05, + "loss": 1.2104, + "step": 9912 + }, + { + "epoch": 0.01, + "learning_rate": 4.998811864926662e-05, + "loss": 1.1429, + "step": 9913 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988116227781925e-05, + "loss": 1.34, + "step": 9914 + }, + { + "epoch": 0.01, + "learning_rate": 4.998811380605055e-05, + "loss": 0.9676, + "step": 9915 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988111384072514e-05, + "loss": 0.9763, + "step": 9916 + }, + { + "epoch": 0.01, + "learning_rate": 4.998810896184779e-05, + "loss": 1.2044, + "step": 9917 + }, + { + "epoch": 0.01, + "learning_rate": 4.99881065393764e-05, + "loss": 1.0246, + "step": 9918 + }, + { + "epoch": 0.01, + "learning_rate": 4.998810411665834e-05, + "loss": 1.0786, + "step": 9919 + }, + { + "epoch": 0.01, + "learning_rate": 4.998810169369361e-05, + "loss": 1.0917, + "step": 9920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988099270482205e-05, + "loss": 1.1904, + "step": 9921 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988096847024124e-05, + "loss": 1.2247, + "step": 9922 + }, + { + "epoch": 0.01, + "learning_rate": 4.998809442331937e-05, + "loss": 1.1823, + "step": 9923 + }, + { + "epoch": 0.01, + "learning_rate": 4.998809199936796e-05, + "loss": 1.0782, + "step": 9924 + }, + { + "epoch": 0.01, + "learning_rate": 4.998808957516986e-05, + "loss": 0.9249, + "step": 9925 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988087150725096e-05, + "loss": 1.1004, + "step": 9926 + }, + { + "epoch": 0.01, + "learning_rate": 4.998808472603366e-05, + "loss": 1.1934, + "step": 9927 + }, + { + "epoch": 0.01, + "learning_rate": 4.998808230109555e-05, + "loss": 1.3222, + "step": 9928 + }, + { + "epoch": 0.01, + "learning_rate": 4.998807987591077e-05, + "loss": 1.2111, + "step": 9929 + }, + { + "epoch": 0.01, + "learning_rate": 4.998807745047932e-05, + "loss": 1.0947, + "step": 9930 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988075024801194e-05, + "loss": 1.2274, + "step": 9931 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988072598876404e-05, + "loss": 1.1896, + "step": 9932 + }, + { + "epoch": 0.01, + "learning_rate": 4.998807017270494e-05, + "loss": 1.0705, + "step": 9933 + }, + { + "epoch": 0.01, + "learning_rate": 4.99880677462868e-05, + "loss": 1.2374, + "step": 9934 + }, + { + "epoch": 0.01, + "learning_rate": 4.998806531962199e-05, + "loss": 1.466, + "step": 9935 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988062892710506e-05, + "loss": 0.999, + "step": 9936 + }, + { + "epoch": 0.01, + "learning_rate": 4.998806046555235e-05, + "loss": 1.0264, + "step": 9937 + }, + { + "epoch": 0.01, + "learning_rate": 4.998805803814752e-05, + "loss": 1.4109, + "step": 9938 + }, + { + "epoch": 0.01, + "learning_rate": 4.998805561049603e-05, + "loss": 1.1204, + "step": 9939 + }, + { + "epoch": 0.01, + "learning_rate": 4.998805318259786e-05, + "loss": 1.11, + "step": 9940 + }, + { + "epoch": 0.01, + "learning_rate": 4.998805075445302e-05, + "loss": 0.5099, + "step": 9941 + }, + { + "epoch": 0.01, + "learning_rate": 4.998804832606151e-05, + "loss": 0.4688, + "step": 9942 + }, + { + "epoch": 0.01, + "learning_rate": 4.998804589742332e-05, + "loss": 0.3469, + "step": 9943 + }, + { + "epoch": 0.01, + "learning_rate": 4.998804346853847e-05, + "loss": 0.1816, + "step": 9944 + }, + { + "epoch": 0.01, + "learning_rate": 4.998804103940694e-05, + "loss": 0.1119, + "step": 9945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988038610028745e-05, + "loss": 0.1391, + "step": 9946 + }, + { + "epoch": 0.01, + "learning_rate": 4.998803618040388e-05, + "loss": 0.7113, + "step": 9947 + }, + { + "epoch": 0.01, + "learning_rate": 4.998803375053234e-05, + "loss": 1.4228, + "step": 9948 + }, + { + "epoch": 0.01, + "learning_rate": 4.998803132041413e-05, + "loss": 1.482, + "step": 9949 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988028890049244e-05, + "loss": 1.5651, + "step": 9950 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988026459437685e-05, + "loss": 1.1674, + "step": 9951 + }, + { + "epoch": 0.01, + "learning_rate": 4.998802402857946e-05, + "loss": 1.149, + "step": 9952 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988021597474565e-05, + "loss": 1.245, + "step": 9953 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988019166123004e-05, + "loss": 1.153, + "step": 9954 + }, + { + "epoch": 0.01, + "learning_rate": 4.998801673452476e-05, + "loss": 0.9997, + "step": 9955 + }, + { + "epoch": 0.01, + "learning_rate": 4.998801430267985e-05, + "loss": 1.0358, + "step": 9956 + }, + { + "epoch": 0.01, + "learning_rate": 4.998801187058827e-05, + "loss": 1.0735, + "step": 9957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988009438250017e-05, + "loss": 1.0173, + "step": 9958 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988007005665094e-05, + "loss": 1.2286, + "step": 9959 + }, + { + "epoch": 0.01, + "learning_rate": 4.9988004572833496e-05, + "loss": 1.1883, + "step": 9960 + }, + { + "epoch": 0.01, + "learning_rate": 4.998800213975523e-05, + "loss": 1.0479, + "step": 9961 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987999706430296e-05, + "loss": 1.4511, + "step": 9962 + }, + { + "epoch": 0.01, + "learning_rate": 4.998799727285869e-05, + "loss": 1.6647, + "step": 9963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987994839040405e-05, + "loss": 1.3232, + "step": 9964 + }, + { + "epoch": 0.01, + "learning_rate": 4.998799240497546e-05, + "loss": 1.1381, + "step": 9965 + }, + { + "epoch": 0.01, + "learning_rate": 4.998798997066384e-05, + "loss": 0.5195, + "step": 9966 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987987536105544e-05, + "loss": 1.1474, + "step": 9967 + }, + { + "epoch": 0.01, + "learning_rate": 4.998798510130058e-05, + "loss": 0.6412, + "step": 9968 + }, + { + "epoch": 0.01, + "learning_rate": 4.998798266624895e-05, + "loss": 0.6699, + "step": 9969 + }, + { + "epoch": 0.01, + "learning_rate": 4.998798023095065e-05, + "loss": 1.0617, + "step": 9970 + }, + { + "epoch": 0.01, + "learning_rate": 4.998797779540567e-05, + "loss": 1.1165, + "step": 9971 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987975359614026e-05, + "loss": 1.2524, + "step": 9972 + }, + { + "epoch": 0.01, + "learning_rate": 4.998797292357571e-05, + "loss": 1.064, + "step": 9973 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987970487290724e-05, + "loss": 1.1155, + "step": 9974 + }, + { + "epoch": 0.01, + "learning_rate": 4.998796805075907e-05, + "loss": 1.4537, + "step": 9975 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987965613980737e-05, + "loss": 1.5731, + "step": 9976 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987963176955735e-05, + "loss": 1.3807, + "step": 9977 + }, + { + "epoch": 0.01, + "learning_rate": 4.998796073968407e-05, + "loss": 1.4099, + "step": 9978 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987958302165724e-05, + "loss": 1.3611, + "step": 9979 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987955864400714e-05, + "loss": 1.2911, + "step": 9980 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987953426389035e-05, + "loss": 1.2902, + "step": 9981 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987950988130686e-05, + "loss": 1.2613, + "step": 9982 + }, + { + "epoch": 0.01, + "learning_rate": 4.998794854962566e-05, + "loss": 1.1833, + "step": 9983 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987946110873965e-05, + "loss": 1.0884, + "step": 9984 + }, + { + "epoch": 0.01, + "learning_rate": 4.99879436718756e-05, + "loss": 1.4898, + "step": 9985 + }, + { + "epoch": 0.01, + "learning_rate": 4.998794123263057e-05, + "loss": 1.5612, + "step": 9986 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987938793138863e-05, + "loss": 1.1664, + "step": 9987 + }, + { + "epoch": 0.01, + "learning_rate": 4.998793635340049e-05, + "loss": 1.3252, + "step": 9988 + }, + { + "epoch": 0.01, + "learning_rate": 4.998793391341544e-05, + "loss": 1.0936, + "step": 9989 + }, + { + "epoch": 0.01, + "learning_rate": 4.998793147318372e-05, + "loss": 1.1245, + "step": 9990 + }, + { + "epoch": 0.01, + "learning_rate": 4.998792903270534e-05, + "loss": 1.123, + "step": 9991 + }, + { + "epoch": 0.01, + "learning_rate": 4.998792659198028e-05, + "loss": 1.253, + "step": 9992 + }, + { + "epoch": 0.01, + "learning_rate": 4.998792415100856e-05, + "loss": 1.044, + "step": 9993 + }, + { + "epoch": 0.01, + "learning_rate": 4.998792170979016e-05, + "loss": 1.0668, + "step": 9994 + }, + { + "epoch": 0.01, + "learning_rate": 4.998791926832509e-05, + "loss": 1.3681, + "step": 9995 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987916826613355e-05, + "loss": 1.0164, + "step": 9996 + }, + { + "epoch": 0.01, + "learning_rate": 4.998791438465494e-05, + "loss": 1.3209, + "step": 9997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987911942449874e-05, + "loss": 0.9155, + "step": 9998 + }, + { + "epoch": 0.01, + "learning_rate": 4.998790949999812e-05, + "loss": 1.1548, + "step": 9999 + }, + { + "epoch": 0.01, + "learning_rate": 4.998790705729971e-05, + "loss": 1.1474, + "step": 10000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0545191764831543, + "eval_runtime": 85.9243, + "eval_samples_per_second": 16.119, + "eval_steps_per_second": 4.038, + "step": 10000 + }, + { + "epoch": 0.01, + "learning_rate": 4.998790461435462e-05, + "loss": 0.9587, + "step": 10001 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987902171162856e-05, + "loss": 1.1316, + "step": 10002 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987899727724427e-05, + "loss": 1.2127, + "step": 10003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987897284039334e-05, + "loss": 1.0664, + "step": 10004 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987894840107565e-05, + "loss": 1.0854, + "step": 10005 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987892395929127e-05, + "loss": 1.1176, + "step": 10006 + }, + { + "epoch": 0.01, + "learning_rate": 4.998788995150402e-05, + "loss": 1.1545, + "step": 10007 + }, + { + "epoch": 0.01, + "learning_rate": 4.998788750683224e-05, + "loss": 1.1105, + "step": 10008 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987885061913794e-05, + "loss": 1.8349, + "step": 10009 + }, + { + "epoch": 0.01, + "learning_rate": 4.998788261674867e-05, + "loss": 1.2364, + "step": 10010 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987880171336884e-05, + "loss": 1.3148, + "step": 10011 + }, + { + "epoch": 0.01, + "learning_rate": 4.998787772567843e-05, + "loss": 0.9995, + "step": 10012 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987875279773296e-05, + "loss": 1.2341, + "step": 10013 + }, + { + "epoch": 0.01, + "learning_rate": 4.998787283362151e-05, + "loss": 1.3008, + "step": 10014 + }, + { + "epoch": 0.01, + "learning_rate": 4.998787038722304e-05, + "loss": 0.7189, + "step": 10015 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878679405779e-05, + "loss": 1.0264, + "step": 10016 + }, + { + "epoch": 0.01, + "learning_rate": 4.998786549368609e-05, + "loss": 1.1821, + "step": 10017 + }, + { + "epoch": 0.01, + "learning_rate": 4.998786304654762e-05, + "loss": 0.9963, + "step": 10018 + }, + { + "epoch": 0.01, + "learning_rate": 4.998786059916247e-05, + "loss": 1.0399, + "step": 10019 + }, + { + "epoch": 0.01, + "learning_rate": 4.998785815153066e-05, + "loss": 1.0536, + "step": 10020 + }, + { + "epoch": 0.01, + "learning_rate": 4.998785570365218e-05, + "loss": 1.2832, + "step": 10021 + }, + { + "epoch": 0.01, + "learning_rate": 4.998785325552702e-05, + "loss": 1.0504, + "step": 10022 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878508071552e-05, + "loss": 0.9803, + "step": 10023 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878483585367e-05, + "loss": 0.9373, + "step": 10024 + }, + { + "epoch": 0.01, + "learning_rate": 4.998784590967154e-05, + "loss": 1.2109, + "step": 10025 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987843460559705e-05, + "loss": 1.3152, + "step": 10026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987841011201206e-05, + "loss": 1.4545, + "step": 10027 + }, + { + "epoch": 0.01, + "learning_rate": 4.998783856159604e-05, + "loss": 1.1669, + "step": 10028 + }, + { + "epoch": 0.01, + "learning_rate": 4.998783611174419e-05, + "loss": 1.2884, + "step": 10029 + }, + { + "epoch": 0.01, + "learning_rate": 4.998783366164568e-05, + "loss": 2.1015, + "step": 10030 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878312113005e-05, + "loss": 1.3097, + "step": 10031 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987828760708654e-05, + "loss": 0.4655, + "step": 10032 + }, + { + "epoch": 0.01, + "learning_rate": 4.998782630987013e-05, + "loss": 0.5985, + "step": 10033 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987823858784946e-05, + "loss": 1.4386, + "step": 10034 + }, + { + "epoch": 0.01, + "learning_rate": 4.998782140745309e-05, + "loss": 1.3263, + "step": 10035 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987818955874565e-05, + "loss": 1.1649, + "step": 10036 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987816504049364e-05, + "loss": 1.2357, + "step": 10037 + }, + { + "epoch": 0.01, + "learning_rate": 4.998781405197751e-05, + "loss": 1.011, + "step": 10038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987811599658973e-05, + "loss": 1.3485, + "step": 10039 + }, + { + "epoch": 0.01, + "learning_rate": 4.998780914709377e-05, + "loss": 1.03, + "step": 10040 + }, + { + "epoch": 0.01, + "learning_rate": 4.99878066942819e-05, + "loss": 0.361, + "step": 10041 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987804241223356e-05, + "loss": 1.4018, + "step": 10042 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987801787918144e-05, + "loss": 1.2243, + "step": 10043 + }, + { + "epoch": 0.01, + "learning_rate": 4.998779933436627e-05, + "loss": 0.8755, + "step": 10044 + }, + { + "epoch": 0.01, + "learning_rate": 4.998779688056772e-05, + "loss": 1.2429, + "step": 10045 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987794426522505e-05, + "loss": 1.2466, + "step": 10046 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987791972230615e-05, + "loss": 1.1774, + "step": 10047 + }, + { + "epoch": 0.01, + "learning_rate": 4.998778951769206e-05, + "loss": 1.3135, + "step": 10048 + }, + { + "epoch": 0.01, + "learning_rate": 4.998778706290684e-05, + "loss": 1.2678, + "step": 10049 + }, + { + "epoch": 0.01, + "learning_rate": 4.998778460787494e-05, + "loss": 1.1939, + "step": 10050 + }, + { + "epoch": 0.01, + "learning_rate": 4.998778215259639e-05, + "loss": 1.1722, + "step": 10051 + }, + { + "epoch": 0.01, + "learning_rate": 4.998777969707115e-05, + "loss": 1.1416, + "step": 10052 + }, + { + "epoch": 0.01, + "learning_rate": 4.998777724129925e-05, + "loss": 1.092, + "step": 10053 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987774785280686e-05, + "loss": 1.1459, + "step": 10054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987772329015447e-05, + "loss": 1.047, + "step": 10055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987769872503544e-05, + "loss": 1.2231, + "step": 10056 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987767415744966e-05, + "loss": 1.2234, + "step": 10057 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987764958739724e-05, + "loss": 0.9417, + "step": 10058 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987762501487813e-05, + "loss": 0.9673, + "step": 10059 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987760043989226e-05, + "loss": 1.2353, + "step": 10060 + }, + { + "epoch": 0.01, + "learning_rate": 4.998775758624398e-05, + "loss": 1.1185, + "step": 10061 + }, + { + "epoch": 0.01, + "learning_rate": 4.998775512825207e-05, + "loss": 1.0758, + "step": 10062 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987752670013474e-05, + "loss": 1.0797, + "step": 10063 + }, + { + "epoch": 0.01, + "learning_rate": 4.998775021152822e-05, + "loss": 1.1433, + "step": 10064 + }, + { + "epoch": 0.01, + "learning_rate": 4.99877477527963e-05, + "loss": 0.9668, + "step": 10065 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987745293817703e-05, + "loss": 0.7362, + "step": 10066 + }, + { + "epoch": 0.01, + "learning_rate": 4.998774283459244e-05, + "loss": 0.916, + "step": 10067 + }, + { + "epoch": 0.01, + "learning_rate": 4.998774037512052e-05, + "loss": 1.0283, + "step": 10068 + }, + { + "epoch": 0.01, + "learning_rate": 4.998773791540192e-05, + "loss": 1.2069, + "step": 10069 + }, + { + "epoch": 0.01, + "learning_rate": 4.998773545543666e-05, + "loss": 0.7493, + "step": 10070 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987732995224726e-05, + "loss": 0.9055, + "step": 10071 + }, + { + "epoch": 0.01, + "learning_rate": 4.998773053476612e-05, + "loss": 0.9794, + "step": 10072 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987728074060847e-05, + "loss": 1.4686, + "step": 10073 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987725613108906e-05, + "loss": 1.1496, + "step": 10074 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987723151910296e-05, + "loss": 1.0109, + "step": 10075 + }, + { + "epoch": 0.01, + "learning_rate": 4.998772069046502e-05, + "loss": 0.953, + "step": 10076 + }, + { + "epoch": 0.01, + "learning_rate": 4.998771822877308e-05, + "loss": 1.1296, + "step": 10077 + }, + { + "epoch": 0.01, + "learning_rate": 4.998771576683446e-05, + "loss": 0.6741, + "step": 10078 + }, + { + "epoch": 0.01, + "learning_rate": 4.998771330464919e-05, + "loss": 0.9119, + "step": 10079 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987710842217236e-05, + "loss": 0.5591, + "step": 10080 + }, + { + "epoch": 0.01, + "learning_rate": 4.998770837953862e-05, + "loss": 0.4912, + "step": 10081 + }, + { + "epoch": 0.01, + "learning_rate": 4.998770591661334e-05, + "loss": 0.3447, + "step": 10082 + }, + { + "epoch": 0.01, + "learning_rate": 4.998770345344138e-05, + "loss": 0.2822, + "step": 10083 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987700990022764e-05, + "loss": 0.1761, + "step": 10084 + }, + { + "epoch": 0.01, + "learning_rate": 4.998769852635747e-05, + "loss": 0.1722, + "step": 10085 + }, + { + "epoch": 0.01, + "learning_rate": 4.998769606244552e-05, + "loss": 0.4746, + "step": 10086 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987693598286886e-05, + "loss": 1.0607, + "step": 10087 + }, + { + "epoch": 0.01, + "learning_rate": 4.99876911338816e-05, + "loss": 1.2912, + "step": 10088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987688669229635e-05, + "loss": 1.1969, + "step": 10089 + }, + { + "epoch": 0.01, + "learning_rate": 4.998768620433101e-05, + "loss": 1.169, + "step": 10090 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987683739185706e-05, + "loss": 1.293, + "step": 10091 + }, + { + "epoch": 0.01, + "learning_rate": 4.998768127379374e-05, + "loss": 1.08, + "step": 10092 + }, + { + "epoch": 0.01, + "learning_rate": 4.998767880815511e-05, + "loss": 0.8878, + "step": 10093 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987676342269816e-05, + "loss": 1.5998, + "step": 10094 + }, + { + "epoch": 0.01, + "learning_rate": 4.998767387613784e-05, + "loss": 1.1856, + "step": 10095 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987671409759205e-05, + "loss": 1.3133, + "step": 10096 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987668943133905e-05, + "loss": 1.43, + "step": 10097 + }, + { + "epoch": 0.01, + "learning_rate": 4.998766647626193e-05, + "loss": 0.6464, + "step": 10098 + }, + { + "epoch": 0.01, + "learning_rate": 4.998766400914329e-05, + "loss": 1.3154, + "step": 10099 + }, + { + "epoch": 0.01, + "learning_rate": 4.998766154177798e-05, + "loss": 1.3452, + "step": 10100 + }, + { + "epoch": 0.01, + "learning_rate": 4.998765907416601e-05, + "loss": 0.9236, + "step": 10101 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987656606307365e-05, + "loss": 0.8636, + "step": 10102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987654138202055e-05, + "loss": 1.3781, + "step": 10103 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987651669850075e-05, + "loss": 1.2948, + "step": 10104 + }, + { + "epoch": 0.01, + "learning_rate": 4.998764920125143e-05, + "loss": 1.1604, + "step": 10105 + }, + { + "epoch": 0.01, + "learning_rate": 4.998764673240612e-05, + "loss": 1.1219, + "step": 10106 + }, + { + "epoch": 0.01, + "learning_rate": 4.998764426331414e-05, + "loss": 1.3069, + "step": 10107 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987641793975496e-05, + "loss": 1.5225, + "step": 10108 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987639324390175e-05, + "loss": 1.0722, + "step": 10109 + }, + { + "epoch": 0.01, + "learning_rate": 4.998763685455819e-05, + "loss": 0.8805, + "step": 10110 + }, + { + "epoch": 0.01, + "learning_rate": 4.998763438447954e-05, + "loss": 1.0155, + "step": 10111 + }, + { + "epoch": 0.01, + "learning_rate": 4.998763191415423e-05, + "loss": 1.0353, + "step": 10112 + }, + { + "epoch": 0.01, + "learning_rate": 4.998762944358224e-05, + "loss": 1.3936, + "step": 10113 + }, + { + "epoch": 0.01, + "learning_rate": 4.998762697276359e-05, + "loss": 1.2141, + "step": 10114 + }, + { + "epoch": 0.01, + "learning_rate": 4.998762450169827e-05, + "loss": 1.3697, + "step": 10115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987622030386286e-05, + "loss": 1.1035, + "step": 10116 + }, + { + "epoch": 0.01, + "learning_rate": 4.998761955882763e-05, + "loss": 1.2115, + "step": 10117 + }, + { + "epoch": 0.01, + "learning_rate": 4.998761708702231e-05, + "loss": 0.8148, + "step": 10118 + }, + { + "epoch": 0.01, + "learning_rate": 4.998761461497032e-05, + "loss": 0.94, + "step": 10119 + }, + { + "epoch": 0.01, + "learning_rate": 4.998761214267166e-05, + "loss": 1.0566, + "step": 10120 + }, + { + "epoch": 0.01, + "learning_rate": 4.998760967012634e-05, + "loss": 1.2239, + "step": 10121 + }, + { + "epoch": 0.01, + "learning_rate": 4.998760719733435e-05, + "loss": 0.9975, + "step": 10122 + }, + { + "epoch": 0.01, + "learning_rate": 4.99876047242957e-05, + "loss": 1.152, + "step": 10123 + }, + { + "epoch": 0.01, + "learning_rate": 4.998760225101037e-05, + "loss": 1.0156, + "step": 10124 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987599777478376e-05, + "loss": 1.3767, + "step": 10125 + }, + { + "epoch": 0.01, + "learning_rate": 4.998759730369972e-05, + "loss": 0.971, + "step": 10126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987594829674395e-05, + "loss": 1.2178, + "step": 10127 + }, + { + "epoch": 0.01, + "learning_rate": 4.99875923554024e-05, + "loss": 1.136, + "step": 10128 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987589880883744e-05, + "loss": 1.0516, + "step": 10129 + }, + { + "epoch": 0.01, + "learning_rate": 4.998758740611842e-05, + "loss": 1.1713, + "step": 10130 + }, + { + "epoch": 0.01, + "learning_rate": 4.998758493110642e-05, + "loss": 1.0194, + "step": 10131 + }, + { + "epoch": 0.01, + "learning_rate": 4.998758245584776e-05, + "loss": 1.0846, + "step": 10132 + }, + { + "epoch": 0.01, + "learning_rate": 4.998757998034243e-05, + "loss": 0.9837, + "step": 10133 + }, + { + "epoch": 0.01, + "learning_rate": 4.998757750459044e-05, + "loss": 0.9976, + "step": 10134 + }, + { + "epoch": 0.01, + "learning_rate": 4.998757502859178e-05, + "loss": 1.0837, + "step": 10135 + }, + { + "epoch": 0.01, + "learning_rate": 4.998757255234645e-05, + "loss": 1.0913, + "step": 10136 + }, + { + "epoch": 0.01, + "learning_rate": 4.998757007585446e-05, + "loss": 0.5574, + "step": 10137 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987567599115794e-05, + "loss": 0.5928, + "step": 10138 + }, + { + "epoch": 0.01, + "learning_rate": 4.998756512213046e-05, + "loss": 0.7991, + "step": 10139 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987562644898473e-05, + "loss": 1.1505, + "step": 10140 + }, + { + "epoch": 0.01, + "learning_rate": 4.998756016741981e-05, + "loss": 1.046, + "step": 10141 + }, + { + "epoch": 0.01, + "learning_rate": 4.998755768969449e-05, + "loss": 1.133, + "step": 10142 + }, + { + "epoch": 0.01, + "learning_rate": 4.998755521172249e-05, + "loss": 1.1574, + "step": 10143 + }, + { + "epoch": 0.01, + "learning_rate": 4.998755273350383e-05, + "loss": 1.1993, + "step": 10144 + }, + { + "epoch": 0.01, + "learning_rate": 4.99875502550385e-05, + "loss": 1.1066, + "step": 10145 + }, + { + "epoch": 0.01, + "learning_rate": 4.99875477763265e-05, + "loss": 1.1578, + "step": 10146 + }, + { + "epoch": 0.01, + "learning_rate": 4.998754529736784e-05, + "loss": 0.9662, + "step": 10147 + }, + { + "epoch": 0.01, + "learning_rate": 4.998754281816251e-05, + "loss": 1.1373, + "step": 10148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987540338710523e-05, + "loss": 1.1941, + "step": 10149 + }, + { + "epoch": 0.01, + "learning_rate": 4.998753785901186e-05, + "loss": 0.9031, + "step": 10150 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987535379066534e-05, + "loss": 0.9422, + "step": 10151 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987532898874545e-05, + "loss": 1.0914, + "step": 10152 + }, + { + "epoch": 0.01, + "learning_rate": 4.998753041843588e-05, + "loss": 1.0855, + "step": 10153 + }, + { + "epoch": 0.01, + "learning_rate": 4.998752793775056e-05, + "loss": 1.1082, + "step": 10154 + }, + { + "epoch": 0.01, + "learning_rate": 4.998752545681856e-05, + "loss": 1.0152, + "step": 10155 + }, + { + "epoch": 0.01, + "learning_rate": 4.998752297563991e-05, + "loss": 1.1159, + "step": 10156 + }, + { + "epoch": 0.01, + "learning_rate": 4.998752049421458e-05, + "loss": 1.4043, + "step": 10157 + }, + { + "epoch": 0.01, + "learning_rate": 4.998751801254259e-05, + "loss": 1.177, + "step": 10158 + }, + { + "epoch": 0.01, + "learning_rate": 4.998751553062393e-05, + "loss": 1.1901, + "step": 10159 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987513048458603e-05, + "loss": 0.8716, + "step": 10160 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987510566046616e-05, + "loss": 0.9739, + "step": 10161 + }, + { + "epoch": 0.01, + "learning_rate": 4.998750808338796e-05, + "loss": 0.9237, + "step": 10162 + }, + { + "epoch": 0.01, + "learning_rate": 4.998750560048264e-05, + "loss": 0.8929, + "step": 10163 + }, + { + "epoch": 0.01, + "learning_rate": 4.998750311733065e-05, + "loss": 0.9751, + "step": 10164 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987500633931996e-05, + "loss": 1.2216, + "step": 10165 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987498150286674e-05, + "loss": 1.19, + "step": 10166 + }, + { + "epoch": 0.01, + "learning_rate": 4.998749566639469e-05, + "loss": 1.2974, + "step": 10167 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987493182256035e-05, + "loss": 1.2664, + "step": 10168 + }, + { + "epoch": 0.01, + "learning_rate": 4.998749069787072e-05, + "loss": 0.8888, + "step": 10169 + }, + { + "epoch": 0.01, + "learning_rate": 4.998748821323873e-05, + "loss": 0.9302, + "step": 10170 + }, + { + "epoch": 0.01, + "learning_rate": 4.998748572836008e-05, + "loss": 1.3394, + "step": 10171 + }, + { + "epoch": 0.01, + "learning_rate": 4.998748324323477e-05, + "loss": 0.7946, + "step": 10172 + }, + { + "epoch": 0.01, + "learning_rate": 4.998748075786278e-05, + "loss": 0.5738, + "step": 10173 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987478272244136e-05, + "loss": 0.8554, + "step": 10174 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987475786378816e-05, + "loss": 1.3842, + "step": 10175 + }, + { + "epoch": 0.01, + "learning_rate": 4.998747330026684e-05, + "loss": 1.4482, + "step": 10176 + }, + { + "epoch": 0.01, + "learning_rate": 4.998747081390819e-05, + "loss": 1.4754, + "step": 10177 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987468327302876e-05, + "loss": 1.29, + "step": 10178 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987465840450905e-05, + "loss": 1.1728, + "step": 10179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987463353352256e-05, + "loss": 1.3689, + "step": 10180 + }, + { + "epoch": 0.01, + "learning_rate": 4.998746086600695e-05, + "loss": 1.229, + "step": 10181 + }, + { + "epoch": 0.01, + "learning_rate": 4.998745837841498e-05, + "loss": 1.9158, + "step": 10182 + }, + { + "epoch": 0.01, + "learning_rate": 4.998745589057634e-05, + "loss": 1.06, + "step": 10183 + }, + { + "epoch": 0.01, + "learning_rate": 4.998745340249103e-05, + "loss": 1.0289, + "step": 10184 + }, + { + "epoch": 0.01, + "learning_rate": 4.998745091415906e-05, + "loss": 1.1272, + "step": 10185 + }, + { + "epoch": 0.01, + "learning_rate": 4.998744842558042e-05, + "loss": 1.09, + "step": 10186 + }, + { + "epoch": 0.01, + "learning_rate": 4.998744593675512e-05, + "loss": 1.127, + "step": 10187 + }, + { + "epoch": 0.01, + "learning_rate": 4.998744344768315e-05, + "loss": 1.1904, + "step": 10188 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987440958364516e-05, + "loss": 0.9387, + "step": 10189 + }, + { + "epoch": 0.01, + "learning_rate": 4.998743846879922e-05, + "loss": 0.9157, + "step": 10190 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987435978987254e-05, + "loss": 1.0315, + "step": 10191 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987433488928626e-05, + "loss": 1.5297, + "step": 10192 + }, + { + "epoch": 0.01, + "learning_rate": 4.998743099862333e-05, + "loss": 1.3291, + "step": 10193 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987428508071367e-05, + "loss": 1.2469, + "step": 10194 + }, + { + "epoch": 0.01, + "learning_rate": 4.998742601727274e-05, + "loss": 0.5735, + "step": 10195 + }, + { + "epoch": 0.01, + "learning_rate": 4.998742352622745e-05, + "loss": 1.1063, + "step": 10196 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987421034935494e-05, + "loss": 1.3326, + "step": 10197 + }, + { + "epoch": 0.01, + "learning_rate": 4.998741854339687e-05, + "loss": 1.0274, + "step": 10198 + }, + { + "epoch": 0.01, + "learning_rate": 4.998741605161158e-05, + "loss": 1.5276, + "step": 10199 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987413559579636e-05, + "loss": 1.4601, + "step": 10200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987411067301015e-05, + "loss": 0.9156, + "step": 10201 + }, + { + "epoch": 0.01, + "learning_rate": 4.998740857477573e-05, + "loss": 1.0569, + "step": 10202 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987406082003785e-05, + "loss": 0.9294, + "step": 10203 + }, + { + "epoch": 0.01, + "learning_rate": 4.998740358898517e-05, + "loss": 1.1716, + "step": 10204 + }, + { + "epoch": 0.01, + "learning_rate": 4.998740109571989e-05, + "loss": 0.6784, + "step": 10205 + }, + { + "epoch": 0.01, + "learning_rate": 4.998739860220795e-05, + "loss": 0.723, + "step": 10206 + }, + { + "epoch": 0.01, + "learning_rate": 4.998739610844934e-05, + "loss": 0.7166, + "step": 10207 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987393614444066e-05, + "loss": 0.6935, + "step": 10208 + }, + { + "epoch": 0.01, + "learning_rate": 4.998739112019213e-05, + "loss": 1.0554, + "step": 10209 + }, + { + "epoch": 0.01, + "learning_rate": 4.998738862569352e-05, + "loss": 1.0829, + "step": 10210 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987386130948255e-05, + "loss": 0.9208, + "step": 10211 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987383635956317e-05, + "loss": 1.1552, + "step": 10212 + }, + { + "epoch": 0.01, + "learning_rate": 4.998738114071772e-05, + "loss": 1.3641, + "step": 10213 + }, + { + "epoch": 0.01, + "learning_rate": 4.998737864523246e-05, + "loss": 0.9493, + "step": 10214 + }, + { + "epoch": 0.01, + "learning_rate": 4.998737614950053e-05, + "loss": 1.0372, + "step": 10215 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987373653521936e-05, + "loss": 1.0349, + "step": 10216 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987371157296684e-05, + "loss": 1.3293, + "step": 10217 + }, + { + "epoch": 0.01, + "learning_rate": 4.998736866082475e-05, + "loss": 1.1373, + "step": 10218 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987366164106164e-05, + "loss": 1.2073, + "step": 10219 + }, + { + "epoch": 0.01, + "learning_rate": 4.998736366714091e-05, + "loss": 1.0581, + "step": 10220 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987361169929e-05, + "loss": 1.7155, + "step": 10221 + }, + { + "epoch": 0.01, + "learning_rate": 4.998735867247042e-05, + "loss": 1.326, + "step": 10222 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987356174765165e-05, + "loss": 1.0172, + "step": 10223 + }, + { + "epoch": 0.01, + "learning_rate": 4.998735367681326e-05, + "loss": 1.2729, + "step": 10224 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987351178614686e-05, + "loss": 0.8779, + "step": 10225 + }, + { + "epoch": 0.01, + "learning_rate": 4.998734868016944e-05, + "loss": 1.1188, + "step": 10226 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987346181477535e-05, + "loss": 1.2608, + "step": 10227 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987343682538965e-05, + "loss": 1.058, + "step": 10228 + }, + { + "epoch": 0.01, + "learning_rate": 4.998734118335373e-05, + "loss": 1.0546, + "step": 10229 + }, + { + "epoch": 0.01, + "learning_rate": 4.998733868392184e-05, + "loss": 1.0555, + "step": 10230 + }, + { + "epoch": 0.01, + "learning_rate": 4.998733618424327e-05, + "loss": 1.0756, + "step": 10231 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987333684318045e-05, + "loss": 2.1973, + "step": 10232 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987331184146155e-05, + "loss": 1.1023, + "step": 10233 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987328683727595e-05, + "loss": 1.2884, + "step": 10234 + }, + { + "epoch": 0.01, + "learning_rate": 4.998732618306238e-05, + "loss": 0.9701, + "step": 10235 + }, + { + "epoch": 0.01, + "learning_rate": 4.998732368215049e-05, + "loss": 1.4618, + "step": 10236 + }, + { + "epoch": 0.01, + "learning_rate": 4.998732118099194e-05, + "loss": 1.9832, + "step": 10237 + }, + { + "epoch": 0.01, + "learning_rate": 4.998731867958673e-05, + "loss": 1.0656, + "step": 10238 + }, + { + "epoch": 0.01, + "learning_rate": 4.998731617793485e-05, + "loss": 0.9133, + "step": 10239 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987313676036305e-05, + "loss": 0.7601, + "step": 10240 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987311173891105e-05, + "loss": 0.8458, + "step": 10241 + }, + { + "epoch": 0.01, + "learning_rate": 4.998730867149924e-05, + "loss": 1.1284, + "step": 10242 + }, + { + "epoch": 0.01, + "learning_rate": 4.998730616886069e-05, + "loss": 0.9589, + "step": 10243 + }, + { + "epoch": 0.01, + "learning_rate": 4.998730366597549e-05, + "loss": 0.9388, + "step": 10244 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987301162843634e-05, + "loss": 0.2874, + "step": 10245 + }, + { + "epoch": 0.01, + "learning_rate": 4.998729865946511e-05, + "loss": 0.2173, + "step": 10246 + }, + { + "epoch": 0.01, + "learning_rate": 4.998729615583991e-05, + "loss": 0.2438, + "step": 10247 + }, + { + "epoch": 0.01, + "learning_rate": 4.998729365196806e-05, + "loss": 0.173, + "step": 10248 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987291147849544e-05, + "loss": 0.2369, + "step": 10249 + }, + { + "epoch": 0.01, + "learning_rate": 4.998728864348435e-05, + "loss": 0.2054, + "step": 10250 + }, + { + "epoch": 0.01, + "learning_rate": 4.998728613887251e-05, + "loss": 0.205, + "step": 10251 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987283634013995e-05, + "loss": 0.5671, + "step": 10252 + }, + { + "epoch": 0.01, + "learning_rate": 4.998728112890882e-05, + "loss": 0.6497, + "step": 10253 + }, + { + "epoch": 0.01, + "learning_rate": 4.998727862355698e-05, + "loss": 0.658, + "step": 10254 + }, + { + "epoch": 0.01, + "learning_rate": 4.998727611795848e-05, + "loss": 0.645, + "step": 10255 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987273612113314e-05, + "loss": 0.2322, + "step": 10256 + }, + { + "epoch": 0.01, + "learning_rate": 4.998727110602148e-05, + "loss": 0.1367, + "step": 10257 + }, + { + "epoch": 0.01, + "learning_rate": 4.998726859968299e-05, + "loss": 0.352, + "step": 10258 + }, + { + "epoch": 0.01, + "learning_rate": 4.998726609309783e-05, + "loss": 1.0021, + "step": 10259 + }, + { + "epoch": 0.01, + "learning_rate": 4.998726358626601e-05, + "loss": 1.5276, + "step": 10260 + }, + { + "epoch": 0.01, + "learning_rate": 4.998726107918752e-05, + "loss": 1.2101, + "step": 10261 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987258571862375e-05, + "loss": 1.2582, + "step": 10262 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987256064290554e-05, + "loss": 1.0663, + "step": 10263 + }, + { + "epoch": 0.01, + "learning_rate": 4.998725355647208e-05, + "loss": 1.2152, + "step": 10264 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987251048406945e-05, + "loss": 1.0573, + "step": 10265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987248540095135e-05, + "loss": 1.2546, + "step": 10266 + }, + { + "epoch": 0.01, + "learning_rate": 4.998724603153667e-05, + "loss": 1.1061, + "step": 10267 + }, + { + "epoch": 0.01, + "learning_rate": 4.998724352273154e-05, + "loss": 1.0008, + "step": 10268 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987241013679745e-05, + "loss": 1.1719, + "step": 10269 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987238504381285e-05, + "loss": 0.9274, + "step": 10270 + }, + { + "epoch": 0.01, + "learning_rate": 4.998723599483616e-05, + "loss": 1.1606, + "step": 10271 + }, + { + "epoch": 0.01, + "learning_rate": 4.998723348504438e-05, + "loss": 1.1728, + "step": 10272 + }, + { + "epoch": 0.01, + "learning_rate": 4.998723097500593e-05, + "loss": 1.9997, + "step": 10273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987228464720816e-05, + "loss": 1.7191, + "step": 10274 + }, + { + "epoch": 0.01, + "learning_rate": 4.998722595418904e-05, + "loss": 1.1818, + "step": 10275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987223443410605e-05, + "loss": 1.0861, + "step": 10276 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987220932385506e-05, + "loss": 0.8447, + "step": 10277 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987218421113737e-05, + "loss": 0.4378, + "step": 10278 + }, + { + "epoch": 0.01, + "learning_rate": 4.998721590959531e-05, + "loss": 0.3998, + "step": 10279 + }, + { + "epoch": 0.01, + "learning_rate": 4.998721339783021e-05, + "loss": 0.6704, + "step": 10280 + }, + { + "epoch": 0.01, + "learning_rate": 4.998721088581846e-05, + "loss": 1.2137, + "step": 10281 + }, + { + "epoch": 0.01, + "learning_rate": 4.998720837356004e-05, + "loss": 0.7144, + "step": 10282 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987205861054956e-05, + "loss": 1.0599, + "step": 10283 + }, + { + "epoch": 0.01, + "learning_rate": 4.998720334830321e-05, + "loss": 0.772, + "step": 10284 + }, + { + "epoch": 0.01, + "learning_rate": 4.99872008353048e-05, + "loss": 0.9947, + "step": 10285 + }, + { + "epoch": 0.01, + "learning_rate": 4.998719832205974e-05, + "loss": 1.2363, + "step": 10286 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987195808567996e-05, + "loss": 0.9215, + "step": 10287 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987193294829606e-05, + "loss": 1.3128, + "step": 10288 + }, + { + "epoch": 0.01, + "learning_rate": 4.998719078084454e-05, + "loss": 1.0097, + "step": 10289 + }, + { + "epoch": 0.01, + "learning_rate": 4.998718826661282e-05, + "loss": 1.1504, + "step": 10290 + }, + { + "epoch": 0.01, + "learning_rate": 4.998718575213443e-05, + "loss": 1.2552, + "step": 10291 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987183237409384e-05, + "loss": 1.0095, + "step": 10292 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987180722437674e-05, + "loss": 1.0155, + "step": 10293 + }, + { + "epoch": 0.01, + "learning_rate": 4.99871782072193e-05, + "loss": 1.1331, + "step": 10294 + }, + { + "epoch": 0.01, + "learning_rate": 4.998717569175426e-05, + "loss": 1.3524, + "step": 10295 + }, + { + "epoch": 0.01, + "learning_rate": 4.998717317604256e-05, + "loss": 1.3273, + "step": 10296 + }, + { + "epoch": 0.01, + "learning_rate": 4.998717066008419e-05, + "loss": 1.216, + "step": 10297 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987168143879165e-05, + "loss": 1.551, + "step": 10298 + }, + { + "epoch": 0.01, + "learning_rate": 4.998716562742748e-05, + "loss": 0.6469, + "step": 10299 + }, + { + "epoch": 0.01, + "learning_rate": 4.998716311072912e-05, + "loss": 0.8778, + "step": 10300 + }, + { + "epoch": 0.01, + "learning_rate": 4.998716059378411e-05, + "loss": 1.2854, + "step": 10301 + }, + { + "epoch": 0.01, + "learning_rate": 4.998715807659243e-05, + "loss": 1.2548, + "step": 10302 + }, + { + "epoch": 0.01, + "learning_rate": 4.998715555915409e-05, + "loss": 0.8318, + "step": 10303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987153041469086e-05, + "loss": 0.7398, + "step": 10304 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987150523537415e-05, + "loss": 1.1692, + "step": 10305 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987148005359095e-05, + "loss": 0.6863, + "step": 10306 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987145486934106e-05, + "loss": 1.3325, + "step": 10307 + }, + { + "epoch": 0.01, + "learning_rate": 4.998714296826245e-05, + "loss": 1.3931, + "step": 10308 + }, + { + "epoch": 0.01, + "learning_rate": 4.998714044934413e-05, + "loss": 1.1937, + "step": 10309 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987137930179154e-05, + "loss": 1.1837, + "step": 10310 + }, + { + "epoch": 0.01, + "learning_rate": 4.998713541076751e-05, + "loss": 1.5333, + "step": 10311 + }, + { + "epoch": 0.01, + "learning_rate": 4.998713289110921e-05, + "loss": 0.6901, + "step": 10312 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987130371204244e-05, + "loss": 1.7999, + "step": 10313 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987127851052615e-05, + "loss": 1.1222, + "step": 10314 + }, + { + "epoch": 0.01, + "learning_rate": 4.998712533065433e-05, + "loss": 1.2129, + "step": 10315 + }, + { + "epoch": 0.01, + "learning_rate": 4.998712281000937e-05, + "loss": 1.3231, + "step": 10316 + }, + { + "epoch": 0.01, + "learning_rate": 4.998712028911776e-05, + "loss": 1.2286, + "step": 10317 + }, + { + "epoch": 0.01, + "learning_rate": 4.998711776797947e-05, + "loss": 1.1108, + "step": 10318 + }, + { + "epoch": 0.01, + "learning_rate": 4.998711524659454e-05, + "loss": 1.0662, + "step": 10319 + }, + { + "epoch": 0.01, + "learning_rate": 4.998711272496294e-05, + "loss": 0.9464, + "step": 10320 + }, + { + "epoch": 0.01, + "learning_rate": 4.998711020308467e-05, + "loss": 0.9488, + "step": 10321 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987107680959745e-05, + "loss": 1.0501, + "step": 10322 + }, + { + "epoch": 0.01, + "learning_rate": 4.998710515858816e-05, + "loss": 1.2066, + "step": 10323 + }, + { + "epoch": 0.01, + "learning_rate": 4.998710263596991e-05, + "loss": 1.1524, + "step": 10324 + }, + { + "epoch": 0.01, + "learning_rate": 4.998710011310499e-05, + "loss": 1.0147, + "step": 10325 + }, + { + "epoch": 0.01, + "learning_rate": 4.998709758999341e-05, + "loss": 1.0825, + "step": 10326 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987095066635175e-05, + "loss": 1.1649, + "step": 10327 + }, + { + "epoch": 0.01, + "learning_rate": 4.998709254303028e-05, + "loss": 1.1194, + "step": 10328 + }, + { + "epoch": 0.01, + "learning_rate": 4.998709001917872e-05, + "loss": 1.1093, + "step": 10329 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987087495080484e-05, + "loss": 1.2593, + "step": 10330 + }, + { + "epoch": 0.01, + "learning_rate": 4.998708497073561e-05, + "loss": 1.1496, + "step": 10331 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987082446144056e-05, + "loss": 1.1416, + "step": 10332 + }, + { + "epoch": 0.01, + "learning_rate": 4.998707992130585e-05, + "loss": 1.196, + "step": 10333 + }, + { + "epoch": 0.01, + "learning_rate": 4.998707739622098e-05, + "loss": 0.9892, + "step": 10334 + }, + { + "epoch": 0.01, + "learning_rate": 4.998707487088944e-05, + "loss": 1.2026, + "step": 10335 + }, + { + "epoch": 0.01, + "learning_rate": 4.998707234531125e-05, + "loss": 0.8014, + "step": 10336 + }, + { + "epoch": 0.01, + "learning_rate": 4.998706981948639e-05, + "loss": 2.2308, + "step": 10337 + }, + { + "epoch": 0.01, + "learning_rate": 4.998706729341487e-05, + "loss": 1.0938, + "step": 10338 + }, + { + "epoch": 0.01, + "learning_rate": 4.998706476709669e-05, + "loss": 0.7971, + "step": 10339 + }, + { + "epoch": 0.01, + "learning_rate": 4.998706224053185e-05, + "loss": 1.2027, + "step": 10340 + }, + { + "epoch": 0.01, + "learning_rate": 4.998705971372034e-05, + "loss": 1.139, + "step": 10341 + }, + { + "epoch": 0.01, + "learning_rate": 4.998705718666218e-05, + "loss": 0.6991, + "step": 10342 + }, + { + "epoch": 0.01, + "learning_rate": 4.998705465935734e-05, + "loss": 1.3724, + "step": 10343 + }, + { + "epoch": 0.01, + "learning_rate": 4.998705213180586e-05, + "loss": 1.1193, + "step": 10344 + }, + { + "epoch": 0.01, + "learning_rate": 4.998704960400771e-05, + "loss": 1.005, + "step": 10345 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987047075962893e-05, + "loss": 1.098, + "step": 10346 + }, + { + "epoch": 0.01, + "learning_rate": 4.998704454767143e-05, + "loss": 1.116, + "step": 10347 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987042019133284e-05, + "loss": 1.0914, + "step": 10348 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987039490348485e-05, + "loss": 1.8293, + "step": 10349 + }, + { + "epoch": 0.01, + "learning_rate": 4.998703696131703e-05, + "loss": 1.2251, + "step": 10350 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987034432038906e-05, + "loss": 1.2625, + "step": 10351 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987031902514126e-05, + "loss": 1.3011, + "step": 10352 + }, + { + "epoch": 0.01, + "learning_rate": 4.998702937274268e-05, + "loss": 0.5868, + "step": 10353 + }, + { + "epoch": 0.01, + "learning_rate": 4.998702684272458e-05, + "loss": 0.522, + "step": 10354 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987024312459815e-05, + "loss": 0.915, + "step": 10355 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987021781948384e-05, + "loss": 1.1415, + "step": 10356 + }, + { + "epoch": 0.01, + "learning_rate": 4.998701925119029e-05, + "loss": 1.3289, + "step": 10357 + }, + { + "epoch": 0.01, + "learning_rate": 4.998701672018554e-05, + "loss": 1.1288, + "step": 10358 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987014188934134e-05, + "loss": 1.0588, + "step": 10359 + }, + { + "epoch": 0.01, + "learning_rate": 4.998701165743606e-05, + "loss": 1.0426, + "step": 10360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987009125691326e-05, + "loss": 1.0733, + "step": 10361 + }, + { + "epoch": 0.01, + "learning_rate": 4.998700659369993e-05, + "loss": 0.9528, + "step": 10362 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987004061461875e-05, + "loss": 1.1307, + "step": 10363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9987001528977155e-05, + "loss": 1.1637, + "step": 10364 + }, + { + "epoch": 0.01, + "learning_rate": 4.998699899624578e-05, + "loss": 1.2464, + "step": 10365 + }, + { + "epoch": 0.01, + "learning_rate": 4.998699646326774e-05, + "loss": 0.776, + "step": 10366 + }, + { + "epoch": 0.01, + "learning_rate": 4.998699393004304e-05, + "loss": 0.906, + "step": 10367 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986991396571674e-05, + "loss": 0.9508, + "step": 10368 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986988862853654e-05, + "loss": 1.0688, + "step": 10369 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986986328888964e-05, + "loss": 1.1141, + "step": 10370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986983794677625e-05, + "loss": 0.9474, + "step": 10371 + }, + { + "epoch": 0.01, + "learning_rate": 4.998698126021961e-05, + "loss": 0.6913, + "step": 10372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986978725514945e-05, + "loss": 0.4359, + "step": 10373 + }, + { + "epoch": 0.01, + "learning_rate": 4.998697619056362e-05, + "loss": 1.1969, + "step": 10374 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986973655365634e-05, + "loss": 1.2375, + "step": 10375 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986971119920975e-05, + "loss": 1.0497, + "step": 10376 + }, + { + "epoch": 0.01, + "learning_rate": 4.998696858422967e-05, + "loss": 0.9796, + "step": 10377 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986966048291694e-05, + "loss": 1.0029, + "step": 10378 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986963512107067e-05, + "loss": 1.1816, + "step": 10379 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986960975675776e-05, + "loss": 1.1951, + "step": 10380 + }, + { + "epoch": 0.01, + "learning_rate": 4.998695843899782e-05, + "loss": 1.143, + "step": 10381 + }, + { + "epoch": 0.01, + "learning_rate": 4.998695590207321e-05, + "loss": 1.0644, + "step": 10382 + }, + { + "epoch": 0.01, + "learning_rate": 4.998695336490193e-05, + "loss": 1.1941, + "step": 10383 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986950827483994e-05, + "loss": 1.1132, + "step": 10384 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986948289819396e-05, + "loss": 0.549, + "step": 10385 + }, + { + "epoch": 0.01, + "learning_rate": 4.998694575190814e-05, + "loss": 1.0158, + "step": 10386 + }, + { + "epoch": 0.01, + "learning_rate": 4.998694321375022e-05, + "loss": 1.3903, + "step": 10387 + }, + { + "epoch": 0.01, + "learning_rate": 4.998694067534564e-05, + "loss": 1.4583, + "step": 10388 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986938136694405e-05, + "loss": 1.1652, + "step": 10389 + }, + { + "epoch": 0.01, + "learning_rate": 4.99869355977965e-05, + "loss": 1.2458, + "step": 10390 + }, + { + "epoch": 0.01, + "learning_rate": 4.998693305865194e-05, + "loss": 1.2432, + "step": 10391 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986930519260724e-05, + "loss": 1.3848, + "step": 10392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986927979622844e-05, + "loss": 1.0191, + "step": 10393 + }, + { + "epoch": 0.01, + "learning_rate": 4.99869254397383e-05, + "loss": 0.9536, + "step": 10394 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986922899607104e-05, + "loss": 1.1373, + "step": 10395 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986920359229237e-05, + "loss": 1.2518, + "step": 10396 + }, + { + "epoch": 0.01, + "learning_rate": 4.998691781860472e-05, + "loss": 1.3977, + "step": 10397 + }, + { + "epoch": 0.01, + "learning_rate": 4.998691527773353e-05, + "loss": 1.0759, + "step": 10398 + }, + { + "epoch": 0.01, + "learning_rate": 4.998691273661569e-05, + "loss": 1.6515, + "step": 10399 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986910195251186e-05, + "loss": 1.5402, + "step": 10400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986907653640025e-05, + "loss": 1.5662, + "step": 10401 + }, + { + "epoch": 0.01, + "learning_rate": 4.99869051117822e-05, + "loss": 1.4326, + "step": 10402 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986902569677714e-05, + "loss": 1.3334, + "step": 10403 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986900027326564e-05, + "loss": 1.2097, + "step": 10404 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986897484728766e-05, + "loss": 1.2671, + "step": 10405 + }, + { + "epoch": 0.01, + "learning_rate": 4.99868949418843e-05, + "loss": 1.1214, + "step": 10406 + }, + { + "epoch": 0.01, + "learning_rate": 4.998689239879317e-05, + "loss": 1.349, + "step": 10407 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986889855455386e-05, + "loss": 1.3452, + "step": 10408 + }, + { + "epoch": 0.01, + "learning_rate": 4.998688731187094e-05, + "loss": 1.3616, + "step": 10409 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986884768039844e-05, + "loss": 1.3294, + "step": 10410 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986882223962076e-05, + "loss": 1.2834, + "step": 10411 + }, + { + "epoch": 0.01, + "learning_rate": 4.998687967963766e-05, + "loss": 1.1846, + "step": 10412 + }, + { + "epoch": 0.01, + "learning_rate": 4.998687713506657e-05, + "loss": 1.2795, + "step": 10413 + }, + { + "epoch": 0.01, + "learning_rate": 4.998687459024882e-05, + "loss": 1.1784, + "step": 10414 + }, + { + "epoch": 0.01, + "learning_rate": 4.998687204518442e-05, + "loss": 1.3204, + "step": 10415 + }, + { + "epoch": 0.01, + "learning_rate": 4.998686949987336e-05, + "loss": 1.1975, + "step": 10416 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986866954315634e-05, + "loss": 1.1086, + "step": 10417 + }, + { + "epoch": 0.01, + "learning_rate": 4.998686440851125e-05, + "loss": 1.285, + "step": 10418 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986861862460196e-05, + "loss": 1.3251, + "step": 10419 + }, + { + "epoch": 0.01, + "learning_rate": 4.99868593161625e-05, + "loss": 0.9713, + "step": 10420 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986856769618134e-05, + "loss": 1.2093, + "step": 10421 + }, + { + "epoch": 0.01, + "learning_rate": 4.998685422282711e-05, + "loss": 0.9838, + "step": 10422 + }, + { + "epoch": 0.01, + "learning_rate": 4.998685167578943e-05, + "loss": 1.0117, + "step": 10423 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986849128505084e-05, + "loss": 1.0491, + "step": 10424 + }, + { + "epoch": 0.01, + "learning_rate": 4.998684658097409e-05, + "loss": 1.4273, + "step": 10425 + }, + { + "epoch": 0.01, + "learning_rate": 4.998684403319642e-05, + "loss": 1.3291, + "step": 10426 + }, + { + "epoch": 0.01, + "learning_rate": 4.99868414851721e-05, + "loss": 1.0863, + "step": 10427 + }, + { + "epoch": 0.01, + "learning_rate": 4.998683893690112e-05, + "loss": 0.884, + "step": 10428 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986836388383484e-05, + "loss": 1.0212, + "step": 10429 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986833839619184e-05, + "loss": 1.1829, + "step": 10430 + }, + { + "epoch": 0.01, + "learning_rate": 4.998683129060822e-05, + "loss": 0.7684, + "step": 10431 + }, + { + "epoch": 0.01, + "learning_rate": 4.99868287413506e-05, + "loss": 1.2292, + "step": 10432 + }, + { + "epoch": 0.01, + "learning_rate": 4.998682619184633e-05, + "loss": 1.5942, + "step": 10433 + }, + { + "epoch": 0.01, + "learning_rate": 4.998682364209539e-05, + "loss": 3.0241, + "step": 10434 + }, + { + "epoch": 0.01, + "learning_rate": 4.998682109209779e-05, + "loss": 1.0854, + "step": 10435 + }, + { + "epoch": 0.01, + "learning_rate": 4.998681854185353e-05, + "loss": 1.2607, + "step": 10436 + }, + { + "epoch": 0.01, + "learning_rate": 4.998681599136262e-05, + "loss": 1.3714, + "step": 10437 + }, + { + "epoch": 0.01, + "learning_rate": 4.998681344062504e-05, + "loss": 1.0285, + "step": 10438 + }, + { + "epoch": 0.01, + "learning_rate": 4.998681088964081e-05, + "loss": 1.0756, + "step": 10439 + }, + { + "epoch": 0.01, + "learning_rate": 4.998680833840991e-05, + "loss": 1.2608, + "step": 10440 + }, + { + "epoch": 0.01, + "learning_rate": 4.998680578693236e-05, + "loss": 1.3557, + "step": 10441 + }, + { + "epoch": 0.01, + "learning_rate": 4.998680323520814e-05, + "loss": 1.3043, + "step": 10442 + }, + { + "epoch": 0.01, + "learning_rate": 4.998680068323728e-05, + "loss": 0.7495, + "step": 10443 + }, + { + "epoch": 0.01, + "learning_rate": 4.998679813101975e-05, + "loss": 1.3086, + "step": 10444 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986795578555555e-05, + "loss": 1.6532, + "step": 10445 + }, + { + "epoch": 0.01, + "learning_rate": 4.998679302584471e-05, + "loss": 1.1917, + "step": 10446 + }, + { + "epoch": 0.01, + "learning_rate": 4.99867904728872e-05, + "loss": 1.2263, + "step": 10447 + }, + { + "epoch": 0.01, + "learning_rate": 4.998678791968303e-05, + "loss": 1.052, + "step": 10448 + }, + { + "epoch": 0.01, + "learning_rate": 4.998678536623221e-05, + "loss": 1.1604, + "step": 10449 + }, + { + "epoch": 0.01, + "learning_rate": 4.998678281253472e-05, + "loss": 1.1795, + "step": 10450 + }, + { + "epoch": 0.01, + "learning_rate": 4.998678025859058e-05, + "loss": 1.3203, + "step": 10451 + }, + { + "epoch": 0.01, + "learning_rate": 4.998677770439978e-05, + "loss": 1.0137, + "step": 10452 + }, + { + "epoch": 0.01, + "learning_rate": 4.998677514996232e-05, + "loss": 1.2603, + "step": 10453 + }, + { + "epoch": 0.01, + "learning_rate": 4.998677259527819e-05, + "loss": 1.1167, + "step": 10454 + }, + { + "epoch": 0.01, + "learning_rate": 4.998677004034742e-05, + "loss": 1.2954, + "step": 10455 + }, + { + "epoch": 0.01, + "learning_rate": 4.998676748516997e-05, + "loss": 1.1379, + "step": 10456 + }, + { + "epoch": 0.01, + "learning_rate": 4.998676492974588e-05, + "loss": 1.1313, + "step": 10457 + }, + { + "epoch": 0.01, + "learning_rate": 4.998676237407512e-05, + "loss": 1.1246, + "step": 10458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986759818157704e-05, + "loss": 0.5331, + "step": 10459 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986757261993636e-05, + "loss": 0.9279, + "step": 10460 + }, + { + "epoch": 0.01, + "learning_rate": 4.99867547055829e-05, + "loss": 1.4327, + "step": 10461 + }, + { + "epoch": 0.01, + "learning_rate": 4.998675214892551e-05, + "loss": 1.1658, + "step": 10462 + }, + { + "epoch": 0.01, + "learning_rate": 4.998674959202146e-05, + "loss": 0.872, + "step": 10463 + }, + { + "epoch": 0.01, + "learning_rate": 4.998674703487075e-05, + "loss": 2.5678, + "step": 10464 + }, + { + "epoch": 0.01, + "learning_rate": 4.998674447747338e-05, + "loss": 1.6675, + "step": 10465 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986741919829363e-05, + "loss": 1.2018, + "step": 10466 + }, + { + "epoch": 0.01, + "learning_rate": 4.998673936193867e-05, + "loss": 1.1596, + "step": 10467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986736803801326e-05, + "loss": 0.9724, + "step": 10468 + }, + { + "epoch": 0.01, + "learning_rate": 4.998673424541733e-05, + "loss": 1.0377, + "step": 10469 + }, + { + "epoch": 0.01, + "learning_rate": 4.998673168678667e-05, + "loss": 0.9609, + "step": 10470 + }, + { + "epoch": 0.01, + "learning_rate": 4.998672912790935e-05, + "loss": 1.1886, + "step": 10471 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986726568785367e-05, + "loss": 1.1853, + "step": 10472 + }, + { + "epoch": 0.01, + "learning_rate": 4.998672400941474e-05, + "loss": 0.8812, + "step": 10473 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986721449797444e-05, + "loss": 0.8519, + "step": 10474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986718889933495e-05, + "loss": 1.2709, + "step": 10475 + }, + { + "epoch": 0.01, + "learning_rate": 4.998671632982288e-05, + "loss": 1.3796, + "step": 10476 + }, + { + "epoch": 0.01, + "learning_rate": 4.998671376946561e-05, + "loss": 1.5551, + "step": 10477 + }, + { + "epoch": 0.01, + "learning_rate": 4.998671120886168e-05, + "loss": 1.0944, + "step": 10478 + }, + { + "epoch": 0.01, + "learning_rate": 4.99867086480111e-05, + "loss": 1.1696, + "step": 10479 + }, + { + "epoch": 0.01, + "learning_rate": 4.998670608691385e-05, + "loss": 1.1202, + "step": 10480 + }, + { + "epoch": 0.01, + "learning_rate": 4.998670352556995e-05, + "loss": 1.2514, + "step": 10481 + }, + { + "epoch": 0.01, + "learning_rate": 4.998670096397939e-05, + "loss": 0.9168, + "step": 10482 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986698402142175e-05, + "loss": 1.1231, + "step": 10483 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986695840058295e-05, + "loss": 0.9077, + "step": 10484 + }, + { + "epoch": 0.01, + "learning_rate": 4.998669327772776e-05, + "loss": 1.162, + "step": 10485 + }, + { + "epoch": 0.01, + "learning_rate": 4.998669071515057e-05, + "loss": 1.3504, + "step": 10486 + }, + { + "epoch": 0.01, + "learning_rate": 4.998668815232672e-05, + "loss": 1.1128, + "step": 10487 + }, + { + "epoch": 0.01, + "learning_rate": 4.998668558925621e-05, + "loss": 0.9506, + "step": 10488 + }, + { + "epoch": 0.01, + "learning_rate": 4.998668302593904e-05, + "loss": 0.9646, + "step": 10489 + }, + { + "epoch": 0.01, + "learning_rate": 4.998668046237522e-05, + "loss": 0.9416, + "step": 10490 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986677898564735e-05, + "loss": 0.8455, + "step": 10491 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986675334507594e-05, + "loss": 0.8593, + "step": 10492 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986672770203797e-05, + "loss": 0.7396, + "step": 10493 + }, + { + "epoch": 0.01, + "learning_rate": 4.998667020565334e-05, + "loss": 0.7632, + "step": 10494 + }, + { + "epoch": 0.01, + "learning_rate": 4.998666764085622e-05, + "loss": 1.1413, + "step": 10495 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986665075812455e-05, + "loss": 0.995, + "step": 10496 + }, + { + "epoch": 0.01, + "learning_rate": 4.998666251052202e-05, + "loss": 1.4334, + "step": 10497 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986659944984937e-05, + "loss": 0.8874, + "step": 10498 + }, + { + "epoch": 0.01, + "learning_rate": 4.998665737920118e-05, + "loss": 1.0983, + "step": 10499 + }, + { + "epoch": 0.01, + "learning_rate": 4.998665481317079e-05, + "loss": 1.3465, + "step": 10500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0590955018997192, + "eval_runtime": 83.8028, + "eval_samples_per_second": 16.527, + "eval_steps_per_second": 4.141, + "step": 10500 + }, + { + "epoch": 0.01, + "learning_rate": 4.998665224689372e-05, + "loss": 0.8434, + "step": 10501 + }, + { + "epoch": 0.01, + "learning_rate": 4.998664968037e-05, + "loss": 0.758, + "step": 10502 + }, + { + "epoch": 0.01, + "learning_rate": 4.998664711359963e-05, + "loss": 0.8072, + "step": 10503 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986644546582596e-05, + "loss": 1.0505, + "step": 10504 + }, + { + "epoch": 0.01, + "learning_rate": 4.99866419793189e-05, + "loss": 1.1189, + "step": 10505 + }, + { + "epoch": 0.01, + "learning_rate": 4.998663941180855e-05, + "loss": 1.2439, + "step": 10506 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986636844051546e-05, + "loss": 1.1577, + "step": 10507 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986634276047875e-05, + "loss": 1.0675, + "step": 10508 + }, + { + "epoch": 0.01, + "learning_rate": 4.998663170779756e-05, + "loss": 1.2374, + "step": 10509 + }, + { + "epoch": 0.01, + "learning_rate": 4.998662913930058e-05, + "loss": 1.16, + "step": 10510 + }, + { + "epoch": 0.01, + "learning_rate": 4.998662657055694e-05, + "loss": 1.1783, + "step": 10511 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986624001566644e-05, + "loss": 1.1148, + "step": 10512 + }, + { + "epoch": 0.01, + "learning_rate": 4.998662143232969e-05, + "loss": 1.207, + "step": 10513 + }, + { + "epoch": 0.01, + "learning_rate": 4.998661886284608e-05, + "loss": 1.1054, + "step": 10514 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986616293115816e-05, + "loss": 1.2763, + "step": 10515 + }, + { + "epoch": 0.01, + "learning_rate": 4.998661372313889e-05, + "loss": 0.8169, + "step": 10516 + }, + { + "epoch": 0.01, + "learning_rate": 4.998661115291531e-05, + "loss": 1.0664, + "step": 10517 + }, + { + "epoch": 0.01, + "learning_rate": 4.998660858244507e-05, + "loss": 1.1742, + "step": 10518 + }, + { + "epoch": 0.01, + "learning_rate": 4.998660601172818e-05, + "loss": 0.9612, + "step": 10519 + }, + { + "epoch": 0.01, + "learning_rate": 4.998660344076462e-05, + "loss": 1.2785, + "step": 10520 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986600869554415e-05, + "loss": 1.2946, + "step": 10521 + }, + { + "epoch": 0.01, + "learning_rate": 4.998659829809754e-05, + "loss": 1.2146, + "step": 10522 + }, + { + "epoch": 0.01, + "learning_rate": 4.998659572639402e-05, + "loss": 0.9512, + "step": 10523 + }, + { + "epoch": 0.01, + "learning_rate": 4.998659315444384e-05, + "loss": 1.1009, + "step": 10524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986590582247e-05, + "loss": 1.1699, + "step": 10525 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986588009803506e-05, + "loss": 1.2749, + "step": 10526 + }, + { + "epoch": 0.01, + "learning_rate": 4.998658543711335e-05, + "loss": 1.187, + "step": 10527 + }, + { + "epoch": 0.01, + "learning_rate": 4.998658286417653e-05, + "loss": 1.0756, + "step": 10528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986580290993065e-05, + "loss": 1.1111, + "step": 10529 + }, + { + "epoch": 0.01, + "learning_rate": 4.998657771756294e-05, + "loss": 1.0458, + "step": 10530 + }, + { + "epoch": 0.01, + "learning_rate": 4.998657514388616e-05, + "loss": 0.9649, + "step": 10531 + }, + { + "epoch": 0.01, + "learning_rate": 4.998657256996273e-05, + "loss": 0.9339, + "step": 10532 + }, + { + "epoch": 0.01, + "learning_rate": 4.998656999579263e-05, + "loss": 0.6619, + "step": 10533 + }, + { + "epoch": 0.01, + "learning_rate": 4.998656742137588e-05, + "loss": 0.5385, + "step": 10534 + }, + { + "epoch": 0.01, + "learning_rate": 4.998656484671247e-05, + "loss": 1.0374, + "step": 10535 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986562271802406e-05, + "loss": 0.5005, + "step": 10536 + }, + { + "epoch": 0.01, + "learning_rate": 4.998655969664568e-05, + "loss": 0.3531, + "step": 10537 + }, + { + "epoch": 0.01, + "learning_rate": 4.99865571212423e-05, + "loss": 0.5739, + "step": 10538 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986554545592266e-05, + "loss": 0.689, + "step": 10539 + }, + { + "epoch": 0.01, + "learning_rate": 4.998655196969557e-05, + "loss": 1.1641, + "step": 10540 + }, + { + "epoch": 0.01, + "learning_rate": 4.998654939355222e-05, + "loss": 0.9358, + "step": 10541 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986546817162214e-05, + "loss": 1.1991, + "step": 10542 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986544240525556e-05, + "loss": 1.6141, + "step": 10543 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986541663642236e-05, + "loss": 1.8724, + "step": 10544 + }, + { + "epoch": 0.01, + "learning_rate": 4.998653908651226e-05, + "loss": 1.3541, + "step": 10545 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986536509135626e-05, + "loss": 1.0405, + "step": 10546 + }, + { + "epoch": 0.01, + "learning_rate": 4.998653393151234e-05, + "loss": 0.9787, + "step": 10547 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986531353642386e-05, + "loss": 0.9666, + "step": 10548 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986528775525785e-05, + "loss": 0.9589, + "step": 10549 + }, + { + "epoch": 0.01, + "learning_rate": 4.998652619716253e-05, + "loss": 1.0897, + "step": 10550 + }, + { + "epoch": 0.01, + "learning_rate": 4.998652361855261e-05, + "loss": 1.1917, + "step": 10551 + }, + { + "epoch": 0.01, + "learning_rate": 4.998652103969604e-05, + "loss": 1.1123, + "step": 10552 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986518460592816e-05, + "loss": 0.8103, + "step": 10553 + }, + { + "epoch": 0.01, + "learning_rate": 4.998651588124292e-05, + "loss": 1.1595, + "step": 10554 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986513301646385e-05, + "loss": 1.1804, + "step": 10555 + }, + { + "epoch": 0.01, + "learning_rate": 4.998651072180319e-05, + "loss": 1.0873, + "step": 10556 + }, + { + "epoch": 0.01, + "learning_rate": 4.998650814171334e-05, + "loss": 1.1571, + "step": 10557 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986505561376826e-05, + "loss": 1.1775, + "step": 10558 + }, + { + "epoch": 0.01, + "learning_rate": 4.998650298079366e-05, + "loss": 1.4187, + "step": 10559 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986500399963835e-05, + "loss": 1.3483, + "step": 10560 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986497818887355e-05, + "loss": 1.0625, + "step": 10561 + }, + { + "epoch": 0.01, + "learning_rate": 4.998649523756423e-05, + "loss": 1.1701, + "step": 10562 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986492655994435e-05, + "loss": 1.0178, + "step": 10563 + }, + { + "epoch": 0.01, + "learning_rate": 4.998649007417798e-05, + "loss": 1.0857, + "step": 10564 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986487492114885e-05, + "loss": 0.7984, + "step": 10565 + }, + { + "epoch": 0.01, + "learning_rate": 4.998648490980512e-05, + "loss": 1.3078, + "step": 10566 + }, + { + "epoch": 0.01, + "learning_rate": 4.998648232724871e-05, + "loss": 1.1336, + "step": 10567 + }, + { + "epoch": 0.01, + "learning_rate": 4.998647974444564e-05, + "loss": 0.9312, + "step": 10568 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986477161395904e-05, + "loss": 1.1834, + "step": 10569 + }, + { + "epoch": 0.01, + "learning_rate": 4.998647457809953e-05, + "loss": 0.9114, + "step": 10570 + }, + { + "epoch": 0.01, + "learning_rate": 4.998647199455649e-05, + "loss": 1.1802, + "step": 10571 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986469410766786e-05, + "loss": 1.0653, + "step": 10572 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986466826730435e-05, + "loss": 0.5615, + "step": 10573 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986464242447435e-05, + "loss": 0.6149, + "step": 10574 + }, + { + "epoch": 0.01, + "learning_rate": 4.998646165791777e-05, + "loss": 0.8447, + "step": 10575 + }, + { + "epoch": 0.01, + "learning_rate": 4.998645907314146e-05, + "loss": 0.4378, + "step": 10576 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986456488118483e-05, + "loss": 1.224, + "step": 10577 + }, + { + "epoch": 0.01, + "learning_rate": 4.998645390284885e-05, + "loss": 1.3924, + "step": 10578 + }, + { + "epoch": 0.01, + "learning_rate": 4.998645131733256e-05, + "loss": 1.2941, + "step": 10579 + }, + { + "epoch": 0.01, + "learning_rate": 4.998644873156962e-05, + "loss": 1.149, + "step": 10580 + }, + { + "epoch": 0.01, + "learning_rate": 4.998644614556002e-05, + "loss": 1.003, + "step": 10581 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986443559303774e-05, + "loss": 1.0581, + "step": 10582 + }, + { + "epoch": 0.01, + "learning_rate": 4.998644097280086e-05, + "loss": 0.9061, + "step": 10583 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986438386051295e-05, + "loss": 1.0999, + "step": 10584 + }, + { + "epoch": 0.01, + "learning_rate": 4.998643579905508e-05, + "loss": 0.9629, + "step": 10585 + }, + { + "epoch": 0.01, + "learning_rate": 4.99864332118122e-05, + "loss": 1.1814, + "step": 10586 + }, + { + "epoch": 0.01, + "learning_rate": 4.998643062432267e-05, + "loss": 1.1246, + "step": 10587 + }, + { + "epoch": 0.01, + "learning_rate": 4.998642803658649e-05, + "loss": 1.0821, + "step": 10588 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986425448603646e-05, + "loss": 1.1333, + "step": 10589 + }, + { + "epoch": 0.01, + "learning_rate": 4.998642286037415e-05, + "loss": 0.9511, + "step": 10590 + }, + { + "epoch": 0.01, + "learning_rate": 4.998642027189799e-05, + "loss": 1.1535, + "step": 10591 + }, + { + "epoch": 0.01, + "learning_rate": 4.998641768317518e-05, + "loss": 1.1484, + "step": 10592 + }, + { + "epoch": 0.01, + "learning_rate": 4.998641509420572e-05, + "loss": 0.636, + "step": 10593 + }, + { + "epoch": 0.01, + "learning_rate": 4.99864125049896e-05, + "loss": 1.128, + "step": 10594 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986409915526834e-05, + "loss": 0.9554, + "step": 10595 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986407325817395e-05, + "loss": 0.5884, + "step": 10596 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986404735861314e-05, + "loss": 0.849, + "step": 10597 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986402145658576e-05, + "loss": 1.3073, + "step": 10598 + }, + { + "epoch": 0.01, + "learning_rate": 4.998639955520918e-05, + "loss": 1.1573, + "step": 10599 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986396964513126e-05, + "loss": 1.026, + "step": 10600 + }, + { + "epoch": 0.01, + "learning_rate": 4.998639437357042e-05, + "loss": 1.1495, + "step": 10601 + }, + { + "epoch": 0.01, + "learning_rate": 4.998639178238106e-05, + "loss": 0.9721, + "step": 10602 + }, + { + "epoch": 0.01, + "learning_rate": 4.998638919094505e-05, + "loss": 1.2164, + "step": 10603 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986386599262374e-05, + "loss": 1.0202, + "step": 10604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986384007333045e-05, + "loss": 0.9831, + "step": 10605 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986381415157066e-05, + "loss": 1.0955, + "step": 10606 + }, + { + "epoch": 0.01, + "learning_rate": 4.998637882273443e-05, + "loss": 1.1857, + "step": 10607 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986376230065134e-05, + "loss": 1.3291, + "step": 10608 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986373637149193e-05, + "loss": 1.1299, + "step": 10609 + }, + { + "epoch": 0.01, + "learning_rate": 4.998637104398659e-05, + "loss": 1.1586, + "step": 10610 + }, + { + "epoch": 0.01, + "learning_rate": 4.998636845057734e-05, + "loss": 1.114, + "step": 10611 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986365856921424e-05, + "loss": 1.36, + "step": 10612 + }, + { + "epoch": 0.01, + "learning_rate": 4.998636326301886e-05, + "loss": 1.0706, + "step": 10613 + }, + { + "epoch": 0.01, + "learning_rate": 4.998636066886963e-05, + "loss": 1.2458, + "step": 10614 + }, + { + "epoch": 0.01, + "learning_rate": 4.998635807447376e-05, + "loss": 1.0786, + "step": 10615 + }, + { + "epoch": 0.01, + "learning_rate": 4.998635547983123e-05, + "loss": 1.108, + "step": 10616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986352884942044e-05, + "loss": 1.1663, + "step": 10617 + }, + { + "epoch": 0.01, + "learning_rate": 4.998635028980621e-05, + "loss": 1.181, + "step": 10618 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986347694423714e-05, + "loss": 0.9882, + "step": 10619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986345098794565e-05, + "loss": 1.0248, + "step": 10620 + }, + { + "epoch": 0.01, + "learning_rate": 4.998634250291876e-05, + "loss": 0.8304, + "step": 10621 + }, + { + "epoch": 0.01, + "learning_rate": 4.99863399067963e-05, + "loss": 1.1176, + "step": 10622 + }, + { + "epoch": 0.01, + "learning_rate": 4.998633731042719e-05, + "loss": 1.0107, + "step": 10623 + }, + { + "epoch": 0.01, + "learning_rate": 4.998633471381142e-05, + "loss": 1.4044, + "step": 10624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986332116949e-05, + "loss": 4.4344, + "step": 10625 + }, + { + "epoch": 0.01, + "learning_rate": 4.998632951983992e-05, + "loss": 1.1785, + "step": 10626 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986326922484186e-05, + "loss": 1.0471, + "step": 10627 + }, + { + "epoch": 0.01, + "learning_rate": 4.99863243248818e-05, + "loss": 1.2066, + "step": 10628 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986321727032756e-05, + "loss": 0.9483, + "step": 10629 + }, + { + "epoch": 0.01, + "learning_rate": 4.998631912893707e-05, + "loss": 1.0307, + "step": 10630 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986316530594716e-05, + "loss": 1.0138, + "step": 10631 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986313932005715e-05, + "loss": 0.9078, + "step": 10632 + }, + { + "epoch": 0.01, + "learning_rate": 4.998631133317005e-05, + "loss": 1.1706, + "step": 10633 + }, + { + "epoch": 0.01, + "learning_rate": 4.998630873408774e-05, + "loss": 1.1575, + "step": 10634 + }, + { + "epoch": 0.01, + "learning_rate": 4.998630613475878e-05, + "loss": 1.1129, + "step": 10635 + }, + { + "epoch": 0.01, + "learning_rate": 4.998630353518315e-05, + "loss": 1.542, + "step": 10636 + }, + { + "epoch": 0.01, + "learning_rate": 4.998630093536088e-05, + "loss": 0.982, + "step": 10637 + }, + { + "epoch": 0.01, + "learning_rate": 4.998629833529195e-05, + "loss": 1.1706, + "step": 10638 + }, + { + "epoch": 0.01, + "learning_rate": 4.998629573497636e-05, + "loss": 0.9877, + "step": 10639 + }, + { + "epoch": 0.01, + "learning_rate": 4.998629313441413e-05, + "loss": 1.0951, + "step": 10640 + }, + { + "epoch": 0.01, + "learning_rate": 4.998629053360524e-05, + "loss": 1.1293, + "step": 10641 + }, + { + "epoch": 0.01, + "learning_rate": 4.998628793254969e-05, + "loss": 1.1675, + "step": 10642 + }, + { + "epoch": 0.01, + "learning_rate": 4.998628533124749e-05, + "loss": 1.1676, + "step": 10643 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986282729698633e-05, + "loss": 0.8128, + "step": 10644 + }, + { + "epoch": 0.01, + "learning_rate": 4.998628012790313e-05, + "loss": 1.0571, + "step": 10645 + }, + { + "epoch": 0.01, + "learning_rate": 4.998627752586096e-05, + "loss": 1.369, + "step": 10646 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986274923572154e-05, + "loss": 1.4563, + "step": 10647 + }, + { + "epoch": 0.01, + "learning_rate": 4.998627232103668e-05, + "loss": 1.2891, + "step": 10648 + }, + { + "epoch": 0.01, + "learning_rate": 4.998626971825455e-05, + "loss": 0.8835, + "step": 10649 + }, + { + "epoch": 0.01, + "learning_rate": 4.998626711522577e-05, + "loss": 0.9248, + "step": 10650 + }, + { + "epoch": 0.01, + "learning_rate": 4.998626451195034e-05, + "loss": 0.9569, + "step": 10651 + }, + { + "epoch": 0.01, + "learning_rate": 4.998626190842825e-05, + "loss": 1.1694, + "step": 10652 + }, + { + "epoch": 0.01, + "learning_rate": 4.998625930465952e-05, + "loss": 1.1396, + "step": 10653 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986256700644125e-05, + "loss": 1.131, + "step": 10654 + }, + { + "epoch": 0.01, + "learning_rate": 4.998625409638208e-05, + "loss": 1.0525, + "step": 10655 + }, + { + "epoch": 0.01, + "learning_rate": 4.998625149187337e-05, + "loss": 1.4169, + "step": 10656 + }, + { + "epoch": 0.01, + "learning_rate": 4.998624888711802e-05, + "loss": 1.29, + "step": 10657 + }, + { + "epoch": 0.01, + "learning_rate": 4.998624628211601e-05, + "loss": 0.8785, + "step": 10658 + }, + { + "epoch": 0.01, + "learning_rate": 4.998624367686735e-05, + "loss": 1.1464, + "step": 10659 + }, + { + "epoch": 0.01, + "learning_rate": 4.998624107137203e-05, + "loss": 1.2562, + "step": 10660 + }, + { + "epoch": 0.01, + "learning_rate": 4.998623846563007e-05, + "loss": 1.3613, + "step": 10661 + }, + { + "epoch": 0.01, + "learning_rate": 4.998623585964144e-05, + "loss": 1.4084, + "step": 10662 + }, + { + "epoch": 0.01, + "learning_rate": 4.998623325340616e-05, + "loss": 0.99, + "step": 10663 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986230646924234e-05, + "loss": 1.432, + "step": 10664 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986228040195646e-05, + "loss": 1.4405, + "step": 10665 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986225433220416e-05, + "loss": 1.2277, + "step": 10666 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986222825998517e-05, + "loss": 1.0819, + "step": 10667 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986220218529974e-05, + "loss": 1.0059, + "step": 10668 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986217610814776e-05, + "loss": 0.8576, + "step": 10669 + }, + { + "epoch": 0.01, + "learning_rate": 4.998621500285292e-05, + "loss": 0.6349, + "step": 10670 + }, + { + "epoch": 0.01, + "learning_rate": 4.998621239464442e-05, + "loss": 0.9552, + "step": 10671 + }, + { + "epoch": 0.01, + "learning_rate": 4.998620978618927e-05, + "loss": 1.2087, + "step": 10672 + }, + { + "epoch": 0.01, + "learning_rate": 4.998620717748745e-05, + "loss": 5.6711, + "step": 10673 + }, + { + "epoch": 0.01, + "learning_rate": 4.998620456853899e-05, + "loss": 3.5529, + "step": 10674 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986201959343873e-05, + "loss": 0.8959, + "step": 10675 + }, + { + "epoch": 0.01, + "learning_rate": 4.99861993499021e-05, + "loss": 1.1657, + "step": 10676 + }, + { + "epoch": 0.01, + "learning_rate": 4.998619674021367e-05, + "loss": 1.0985, + "step": 10677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986194130278597e-05, + "loss": 1.3274, + "step": 10678 + }, + { + "epoch": 0.01, + "learning_rate": 4.998619152009686e-05, + "loss": 1.1938, + "step": 10679 + }, + { + "epoch": 0.01, + "learning_rate": 4.998618890966847e-05, + "loss": 1.4198, + "step": 10680 + }, + { + "epoch": 0.01, + "learning_rate": 4.998618629899344e-05, + "loss": 1.1039, + "step": 10681 + }, + { + "epoch": 0.01, + "learning_rate": 4.998618368807175e-05, + "loss": 1.1376, + "step": 10682 + }, + { + "epoch": 0.01, + "learning_rate": 4.99861810769034e-05, + "loss": 1.0047, + "step": 10683 + }, + { + "epoch": 0.01, + "learning_rate": 4.998617846548841e-05, + "loss": 5.4369, + "step": 10684 + }, + { + "epoch": 0.01, + "learning_rate": 4.998617585382676e-05, + "loss": 4.1474, + "step": 10685 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986173241918456e-05, + "loss": 1.1917, + "step": 10686 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986170629763505e-05, + "loss": 1.261, + "step": 10687 + }, + { + "epoch": 0.01, + "learning_rate": 4.99861680173619e-05, + "loss": 1.0599, + "step": 10688 + }, + { + "epoch": 0.01, + "learning_rate": 4.998616540471363e-05, + "loss": 1.2908, + "step": 10689 + }, + { + "epoch": 0.01, + "learning_rate": 4.998616279181872e-05, + "loss": 0.9679, + "step": 10690 + }, + { + "epoch": 0.01, + "learning_rate": 4.998616017867716e-05, + "loss": 1.0874, + "step": 10691 + }, + { + "epoch": 0.01, + "learning_rate": 4.998615756528893e-05, + "loss": 1.1285, + "step": 10692 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986154951654066e-05, + "loss": 0.9024, + "step": 10693 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986152337772536e-05, + "loss": 0.9533, + "step": 10694 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986149723644357e-05, + "loss": 1.5688, + "step": 10695 + }, + { + "epoch": 0.01, + "learning_rate": 4.998614710926952e-05, + "loss": 1.0835, + "step": 10696 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986144494648043e-05, + "loss": 1.0618, + "step": 10697 + }, + { + "epoch": 0.01, + "learning_rate": 4.99861418797799e-05, + "loss": 0.9249, + "step": 10698 + }, + { + "epoch": 0.01, + "learning_rate": 4.998613926466511e-05, + "loss": 0.9606, + "step": 10699 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986136649303674e-05, + "loss": 1.0427, + "step": 10700 + }, + { + "epoch": 0.01, + "learning_rate": 4.998613403369557e-05, + "loss": 0.5878, + "step": 10701 + }, + { + "epoch": 0.01, + "learning_rate": 4.998613141784083e-05, + "loss": 0.7082, + "step": 10702 + }, + { + "epoch": 0.01, + "learning_rate": 4.998612880173943e-05, + "loss": 0.9104, + "step": 10703 + }, + { + "epoch": 0.01, + "learning_rate": 4.998612618539138e-05, + "loss": 0.8221, + "step": 10704 + }, + { + "epoch": 0.01, + "learning_rate": 4.998612356879667e-05, + "loss": 0.7588, + "step": 10705 + }, + { + "epoch": 0.01, + "learning_rate": 4.998612095195532e-05, + "loss": 0.9248, + "step": 10706 + }, + { + "epoch": 0.01, + "learning_rate": 4.99861183348673e-05, + "loss": 0.8445, + "step": 10707 + }, + { + "epoch": 0.01, + "learning_rate": 4.998611571753264e-05, + "loss": 0.8719, + "step": 10708 + }, + { + "epoch": 0.01, + "learning_rate": 4.998611309995133e-05, + "loss": 0.6463, + "step": 10709 + }, + { + "epoch": 0.01, + "learning_rate": 4.998611048212337e-05, + "loss": 0.558, + "step": 10710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986107864048746e-05, + "loss": 0.5944, + "step": 10711 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986105245727476e-05, + "loss": 1.0485, + "step": 10712 + }, + { + "epoch": 0.01, + "learning_rate": 4.998610262715955e-05, + "loss": 1.4276, + "step": 10713 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986100008344974e-05, + "loss": 1.1766, + "step": 10714 + }, + { + "epoch": 0.01, + "learning_rate": 4.998609738928375e-05, + "loss": 1.0757, + "step": 10715 + }, + { + "epoch": 0.01, + "learning_rate": 4.998609476997587e-05, + "loss": 1.0755, + "step": 10716 + }, + { + "epoch": 0.01, + "learning_rate": 4.998609215042134e-05, + "loss": 1.106, + "step": 10717 + }, + { + "epoch": 0.01, + "learning_rate": 4.998608953062015e-05, + "loss": 0.9057, + "step": 10718 + }, + { + "epoch": 0.01, + "learning_rate": 4.998608691057232e-05, + "loss": 0.8137, + "step": 10719 + }, + { + "epoch": 0.01, + "learning_rate": 4.998608429027783e-05, + "loss": 1.2992, + "step": 10720 + }, + { + "epoch": 0.01, + "learning_rate": 4.998608166973669e-05, + "loss": 1.1847, + "step": 10721 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986079048948884e-05, + "loss": 1.3237, + "step": 10722 + }, + { + "epoch": 0.01, + "learning_rate": 4.998607642791444e-05, + "loss": 1.5411, + "step": 10723 + }, + { + "epoch": 0.01, + "learning_rate": 4.998607380663335e-05, + "loss": 1.2056, + "step": 10724 + }, + { + "epoch": 0.01, + "learning_rate": 4.99860711851056e-05, + "loss": 1.0274, + "step": 10725 + }, + { + "epoch": 0.01, + "learning_rate": 4.998606856333119e-05, + "loss": 1.3021, + "step": 10726 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986065941310144e-05, + "loss": 1.1686, + "step": 10727 + }, + { + "epoch": 0.01, + "learning_rate": 4.998606331904244e-05, + "loss": 1.3316, + "step": 10728 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986060696528084e-05, + "loss": 1.3269, + "step": 10729 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986058073767074e-05, + "loss": 0.9971, + "step": 10730 + }, + { + "epoch": 0.01, + "learning_rate": 4.998605545075941e-05, + "loss": 0.9264, + "step": 10731 + }, + { + "epoch": 0.01, + "learning_rate": 4.99860528275051e-05, + "loss": 0.9454, + "step": 10732 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986050204004134e-05, + "loss": 1.2553, + "step": 10733 + }, + { + "epoch": 0.01, + "learning_rate": 4.998604758025652e-05, + "loss": 1.1812, + "step": 10734 + }, + { + "epoch": 0.01, + "learning_rate": 4.998604495626224e-05, + "loss": 1.0018, + "step": 10735 + }, + { + "epoch": 0.01, + "learning_rate": 4.998604233202133e-05, + "loss": 0.8275, + "step": 10736 + }, + { + "epoch": 0.01, + "learning_rate": 4.998603970753376e-05, + "loss": 1.2006, + "step": 10737 + }, + { + "epoch": 0.01, + "learning_rate": 4.998603708279954e-05, + "loss": 1.1241, + "step": 10738 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986034457818664e-05, + "loss": 1.2169, + "step": 10739 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986031832591134e-05, + "loss": 1.0624, + "step": 10740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986029207116955e-05, + "loss": 1.0672, + "step": 10741 + }, + { + "epoch": 0.01, + "learning_rate": 4.998602658139613e-05, + "loss": 0.9789, + "step": 10742 + }, + { + "epoch": 0.01, + "learning_rate": 4.998602395542864e-05, + "loss": 1.0947, + "step": 10743 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986021329214516e-05, + "loss": 1.75, + "step": 10744 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986018702753734e-05, + "loss": 1.4034, + "step": 10745 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986016076046295e-05, + "loss": 0.8919, + "step": 10746 + }, + { + "epoch": 0.01, + "learning_rate": 4.998601344909221e-05, + "loss": 1.254, + "step": 10747 + }, + { + "epoch": 0.01, + "learning_rate": 4.998601082189147e-05, + "loss": 1.0796, + "step": 10748 + }, + { + "epoch": 0.01, + "learning_rate": 4.998600819444408e-05, + "loss": 1.2729, + "step": 10749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986005566750036e-05, + "loss": 1.1744, + "step": 10750 + }, + { + "epoch": 0.01, + "learning_rate": 4.998600293880935e-05, + "loss": 1.2834, + "step": 10751 + }, + { + "epoch": 0.01, + "learning_rate": 4.9986000310622004e-05, + "loss": 1.1745, + "step": 10752 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985997682188014e-05, + "loss": 1.195, + "step": 10753 + }, + { + "epoch": 0.01, + "learning_rate": 4.998599505350736e-05, + "loss": 1.1958, + "step": 10754 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985992424580066e-05, + "loss": 1.4716, + "step": 10755 + }, + { + "epoch": 0.01, + "learning_rate": 4.998598979540612e-05, + "loss": 1.2591, + "step": 10756 + }, + { + "epoch": 0.01, + "learning_rate": 4.998598716598552e-05, + "loss": 1.2078, + "step": 10757 + }, + { + "epoch": 0.01, + "learning_rate": 4.998598453631827e-05, + "loss": 1.0542, + "step": 10758 + }, + { + "epoch": 0.01, + "learning_rate": 4.998598190640437e-05, + "loss": 1.0827, + "step": 10759 + }, + { + "epoch": 0.01, + "learning_rate": 4.998597927624382e-05, + "loss": 1.4116, + "step": 10760 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985976645836615e-05, + "loss": 1.0502, + "step": 10761 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985974015182755e-05, + "loss": 0.6948, + "step": 10762 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985971384282246e-05, + "loss": 0.5402, + "step": 10763 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985968753135094e-05, + "loss": 0.5075, + "step": 10764 + }, + { + "epoch": 0.01, + "learning_rate": 4.998596612174129e-05, + "loss": 0.4851, + "step": 10765 + }, + { + "epoch": 0.01, + "learning_rate": 4.998596349010083e-05, + "loss": 0.3154, + "step": 10766 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985960858213724e-05, + "loss": 0.3732, + "step": 10767 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985958226079955e-05, + "loss": 0.402, + "step": 10768 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985955593699544e-05, + "loss": 0.3886, + "step": 10769 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985952961072484e-05, + "loss": 1.1539, + "step": 10770 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985950328198774e-05, + "loss": 1.3236, + "step": 10771 + }, + { + "epoch": 0.01, + "learning_rate": 4.998594769507841e-05, + "loss": 1.0445, + "step": 10772 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985945061711393e-05, + "loss": 0.8028, + "step": 10773 + }, + { + "epoch": 0.01, + "learning_rate": 4.998594242809773e-05, + "loss": 0.4059, + "step": 10774 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985939794237416e-05, + "loss": 1.045, + "step": 10775 + }, + { + "epoch": 0.01, + "learning_rate": 4.998593716013045e-05, + "loss": 1.4564, + "step": 10776 + }, + { + "epoch": 0.01, + "learning_rate": 4.998593452577683e-05, + "loss": 1.2097, + "step": 10777 + }, + { + "epoch": 0.01, + "learning_rate": 4.998593189117656e-05, + "loss": 1.0419, + "step": 10778 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985929256329644e-05, + "loss": 1.1569, + "step": 10779 + }, + { + "epoch": 0.01, + "learning_rate": 4.998592662123607e-05, + "loss": 1.1554, + "step": 10780 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985923985895855e-05, + "loss": 1.0237, + "step": 10781 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985921350308984e-05, + "loss": 0.9649, + "step": 10782 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985918714475463e-05, + "loss": 1.0963, + "step": 10783 + }, + { + "epoch": 0.01, + "learning_rate": 4.998591607839529e-05, + "loss": 1.0937, + "step": 10784 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985913442068475e-05, + "loss": 1.3142, + "step": 10785 + }, + { + "epoch": 0.01, + "learning_rate": 4.998591080549499e-05, + "loss": 0.7393, + "step": 10786 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985908168674875e-05, + "loss": 1.1603, + "step": 10787 + }, + { + "epoch": 0.01, + "learning_rate": 4.99859055316081e-05, + "loss": 1.3147, + "step": 10788 + }, + { + "epoch": 0.01, + "learning_rate": 4.998590289429468e-05, + "loss": 1.0646, + "step": 10789 + }, + { + "epoch": 0.01, + "learning_rate": 4.998590025673461e-05, + "loss": 0.9826, + "step": 10790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985897618927886e-05, + "loss": 0.9658, + "step": 10791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985894980874516e-05, + "loss": 1.1047, + "step": 10792 + }, + { + "epoch": 0.01, + "learning_rate": 4.998589234257448e-05, + "loss": 0.9545, + "step": 10793 + }, + { + "epoch": 0.01, + "learning_rate": 4.998588970402781e-05, + "loss": 1.2241, + "step": 10794 + }, + { + "epoch": 0.01, + "learning_rate": 4.998588706523449e-05, + "loss": 0.6729, + "step": 10795 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985884426194516e-05, + "loss": 0.9699, + "step": 10796 + }, + { + "epoch": 0.01, + "learning_rate": 4.998588178690789e-05, + "loss": 0.7535, + "step": 10797 + }, + { + "epoch": 0.01, + "learning_rate": 4.998587914737461e-05, + "loss": 1.4064, + "step": 10798 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985876507594685e-05, + "loss": 1.1086, + "step": 10799 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985873867568114e-05, + "loss": 1.1563, + "step": 10800 + }, + { + "epoch": 0.01, + "learning_rate": 4.998587122729489e-05, + "loss": 1.0799, + "step": 10801 + }, + { + "epoch": 0.01, + "learning_rate": 4.998586858677501e-05, + "loss": 0.8134, + "step": 10802 + }, + { + "epoch": 0.01, + "learning_rate": 4.998586594600849e-05, + "loss": 1.1417, + "step": 10803 + }, + { + "epoch": 0.01, + "learning_rate": 4.998586330499532e-05, + "loss": 1.1602, + "step": 10804 + }, + { + "epoch": 0.01, + "learning_rate": 4.998586066373549e-05, + "loss": 1.1683, + "step": 10805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985858022229015e-05, + "loss": 1.2593, + "step": 10806 + }, + { + "epoch": 0.01, + "learning_rate": 4.998585538047589e-05, + "loss": 1.0802, + "step": 10807 + }, + { + "epoch": 0.01, + "learning_rate": 4.998585273847612e-05, + "loss": 0.6904, + "step": 10808 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985850096229694e-05, + "loss": 0.9784, + "step": 10809 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985847453736624e-05, + "loss": 1.2396, + "step": 10810 + }, + { + "epoch": 0.01, + "learning_rate": 4.99858448109969e-05, + "loss": 1.1279, + "step": 10811 + }, + { + "epoch": 0.01, + "learning_rate": 4.998584216801052e-05, + "loss": 1.1777, + "step": 10812 + }, + { + "epoch": 0.01, + "learning_rate": 4.99858395247775e-05, + "loss": 1.4183, + "step": 10813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985836881297826e-05, + "loss": 1.5293, + "step": 10814 + }, + { + "epoch": 0.01, + "learning_rate": 4.99858342375715e-05, + "loss": 1.2572, + "step": 10815 + }, + { + "epoch": 0.01, + "learning_rate": 4.998583159359853e-05, + "loss": 1.444, + "step": 10816 + }, + { + "epoch": 0.01, + "learning_rate": 4.998582894937891e-05, + "loss": 1.1602, + "step": 10817 + }, + { + "epoch": 0.01, + "learning_rate": 4.998582630491264e-05, + "loss": 0.947, + "step": 10818 + }, + { + "epoch": 0.01, + "learning_rate": 4.998582366019972e-05, + "loss": 1.0774, + "step": 10819 + }, + { + "epoch": 0.01, + "learning_rate": 4.998582101524015e-05, + "loss": 1.0768, + "step": 10820 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985818370033933e-05, + "loss": 1.3893, + "step": 10821 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985815724581066e-05, + "loss": 1.2026, + "step": 10822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985813078881536e-05, + "loss": 1.0682, + "step": 10823 + }, + { + "epoch": 0.01, + "learning_rate": 4.998581043293538e-05, + "loss": 1.1521, + "step": 10824 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985807786742556e-05, + "loss": 1.0433, + "step": 10825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985805140303085e-05, + "loss": 0.8613, + "step": 10826 + }, + { + "epoch": 0.01, + "learning_rate": 4.998580249361697e-05, + "loss": 1.178, + "step": 10827 + }, + { + "epoch": 0.01, + "learning_rate": 4.998579984668421e-05, + "loss": 0.9097, + "step": 10828 + }, + { + "epoch": 0.01, + "learning_rate": 4.998579719950479e-05, + "loss": 1.1461, + "step": 10829 + }, + { + "epoch": 0.01, + "learning_rate": 4.998579455207873e-05, + "loss": 1.397, + "step": 10830 + }, + { + "epoch": 0.01, + "learning_rate": 4.998579190440601e-05, + "loss": 1.2818, + "step": 10831 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985789256486654e-05, + "loss": 1.2913, + "step": 10832 + }, + { + "epoch": 0.01, + "learning_rate": 4.998578660832064e-05, + "loss": 1.5145, + "step": 10833 + }, + { + "epoch": 0.01, + "learning_rate": 4.998578395990798e-05, + "loss": 1.4816, + "step": 10834 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985781311248667e-05, + "loss": 0.6521, + "step": 10835 + }, + { + "epoch": 0.01, + "learning_rate": 4.998577866234271e-05, + "loss": 0.9915, + "step": 10836 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985776013190105e-05, + "loss": 1.2507, + "step": 10837 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985773363790843e-05, + "loss": 1.0145, + "step": 10838 + }, + { + "epoch": 0.01, + "learning_rate": 4.998577071414494e-05, + "loss": 1.146, + "step": 10839 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985768064252386e-05, + "loss": 0.9321, + "step": 10840 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985765414113184e-05, + "loss": 1.0879, + "step": 10841 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985762763727326e-05, + "loss": 1.134, + "step": 10842 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985760113094825e-05, + "loss": 0.8009, + "step": 10843 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985757462215675e-05, + "loss": 1.0375, + "step": 10844 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985754811089876e-05, + "loss": 1.517, + "step": 10845 + }, + { + "epoch": 0.01, + "learning_rate": 4.998575215971743e-05, + "loss": 1.6569, + "step": 10846 + }, + { + "epoch": 0.01, + "learning_rate": 4.998574950809833e-05, + "loss": 1.3168, + "step": 10847 + }, + { + "epoch": 0.01, + "learning_rate": 4.998574685623258e-05, + "loss": 1.1746, + "step": 10848 + }, + { + "epoch": 0.01, + "learning_rate": 4.998574420412019e-05, + "loss": 1.3589, + "step": 10849 + }, + { + "epoch": 0.01, + "learning_rate": 4.998574155176114e-05, + "loss": 1.181, + "step": 10850 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985738899155454e-05, + "loss": 1.0479, + "step": 10851 + }, + { + "epoch": 0.01, + "learning_rate": 4.998573624630311e-05, + "loss": 1.0858, + "step": 10852 + }, + { + "epoch": 0.01, + "learning_rate": 4.998573359320412e-05, + "loss": 1.0836, + "step": 10853 + }, + { + "epoch": 0.01, + "learning_rate": 4.998573093985848e-05, + "loss": 1.3111, + "step": 10854 + }, + { + "epoch": 0.01, + "learning_rate": 4.99857282862662e-05, + "loss": 1.0305, + "step": 10855 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985725632427263e-05, + "loss": 1.1478, + "step": 10856 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985722978341674e-05, + "loss": 0.5696, + "step": 10857 + }, + { + "epoch": 0.01, + "learning_rate": 4.998572032400944e-05, + "loss": 0.5121, + "step": 10858 + }, + { + "epoch": 0.01, + "learning_rate": 4.998571766943057e-05, + "loss": 0.7915, + "step": 10859 + }, + { + "epoch": 0.01, + "learning_rate": 4.998571501460503e-05, + "loss": 1.0314, + "step": 10860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985712359532856e-05, + "loss": 0.9899, + "step": 10861 + }, + { + "epoch": 0.01, + "learning_rate": 4.998570970421402e-05, + "loss": 1.0597, + "step": 10862 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985707048648555e-05, + "loss": 1.1329, + "step": 10863 + }, + { + "epoch": 0.01, + "learning_rate": 4.998570439283643e-05, + "loss": 0.8242, + "step": 10864 + }, + { + "epoch": 0.01, + "learning_rate": 4.998570173677766e-05, + "loss": 1.0128, + "step": 10865 + }, + { + "epoch": 0.01, + "learning_rate": 4.998569908047224e-05, + "loss": 1.2129, + "step": 10866 + }, + { + "epoch": 0.01, + "learning_rate": 4.998569642392017e-05, + "loss": 1.1209, + "step": 10867 + }, + { + "epoch": 0.01, + "learning_rate": 4.998569376712145e-05, + "loss": 0.6446, + "step": 10868 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985691110076086e-05, + "loss": 0.5986, + "step": 10869 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985688452784076e-05, + "loss": 0.905, + "step": 10870 + }, + { + "epoch": 0.01, + "learning_rate": 4.998568579524541e-05, + "loss": 1.1289, + "step": 10871 + }, + { + "epoch": 0.01, + "learning_rate": 4.99856831374601e-05, + "loss": 0.9742, + "step": 10872 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985680479428146e-05, + "loss": 1.108, + "step": 10873 + }, + { + "epoch": 0.01, + "learning_rate": 4.998567782114954e-05, + "loss": 1.1507, + "step": 10874 + }, + { + "epoch": 0.01, + "learning_rate": 4.998567516262429e-05, + "loss": 1.1444, + "step": 10875 + }, + { + "epoch": 0.01, + "learning_rate": 4.998567250385239e-05, + "loss": 1.1622, + "step": 10876 + }, + { + "epoch": 0.01, + "learning_rate": 4.998566984483384e-05, + "loss": 1.0635, + "step": 10877 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985667185568643e-05, + "loss": 1.2776, + "step": 10878 + }, + { + "epoch": 0.01, + "learning_rate": 4.998566452605679e-05, + "loss": 1.2305, + "step": 10879 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985661866298296e-05, + "loss": 1.2408, + "step": 10880 + }, + { + "epoch": 0.01, + "learning_rate": 4.998565920629316e-05, + "loss": 1.2548, + "step": 10881 + }, + { + "epoch": 0.01, + "learning_rate": 4.998565654604137e-05, + "loss": 1.0354, + "step": 10882 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985653885542924e-05, + "loss": 1.0019, + "step": 10883 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985651224797846e-05, + "loss": 0.9741, + "step": 10884 + }, + { + "epoch": 0.01, + "learning_rate": 4.998564856380611e-05, + "loss": 1.2451, + "step": 10885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985645902567735e-05, + "loss": 1.3416, + "step": 10886 + }, + { + "epoch": 0.01, + "learning_rate": 4.99856432410827e-05, + "loss": 1.1328, + "step": 10887 + }, + { + "epoch": 0.01, + "learning_rate": 4.998564057935102e-05, + "loss": 1.2498, + "step": 10888 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985637917372704e-05, + "loss": 1.1604, + "step": 10889 + }, + { + "epoch": 0.01, + "learning_rate": 4.998563525514773e-05, + "loss": 1.0942, + "step": 10890 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985632592676115e-05, + "loss": 1.0734, + "step": 10891 + }, + { + "epoch": 0.01, + "learning_rate": 4.998562992995784e-05, + "loss": 1.1091, + "step": 10892 + }, + { + "epoch": 0.01, + "learning_rate": 4.998562726699293e-05, + "loss": 1.1214, + "step": 10893 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985624603781365e-05, + "loss": 0.9871, + "step": 10894 + }, + { + "epoch": 0.01, + "learning_rate": 4.998562194032316e-05, + "loss": 1.0969, + "step": 10895 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985619276618304e-05, + "loss": 1.3956, + "step": 10896 + }, + { + "epoch": 0.01, + "learning_rate": 4.99856166126668e-05, + "loss": 0.9659, + "step": 10897 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985613948468646e-05, + "loss": 1.0178, + "step": 10898 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985611284023843e-05, + "loss": 1.0859, + "step": 10899 + }, + { + "epoch": 0.01, + "learning_rate": 4.998560861933239e-05, + "loss": 1.0746, + "step": 10900 + }, + { + "epoch": 0.01, + "learning_rate": 4.99856059543943e-05, + "loss": 1.1855, + "step": 10901 + }, + { + "epoch": 0.01, + "learning_rate": 4.998560328920956e-05, + "loss": 1.2518, + "step": 10902 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985600623778174e-05, + "loss": 1.0322, + "step": 10903 + }, + { + "epoch": 0.01, + "learning_rate": 4.998559795810013e-05, + "loss": 1.1024, + "step": 10904 + }, + { + "epoch": 0.01, + "learning_rate": 4.998559529217545e-05, + "loss": 1.1613, + "step": 10905 + }, + { + "epoch": 0.01, + "learning_rate": 4.998559262600412e-05, + "loss": 1.0327, + "step": 10906 + }, + { + "epoch": 0.01, + "learning_rate": 4.998558995958614e-05, + "loss": 1.1118, + "step": 10907 + }, + { + "epoch": 0.01, + "learning_rate": 4.998558729292151e-05, + "loss": 0.9098, + "step": 10908 + }, + { + "epoch": 0.01, + "learning_rate": 4.998558462601024e-05, + "loss": 1.2527, + "step": 10909 + }, + { + "epoch": 0.01, + "learning_rate": 4.998558195885232e-05, + "loss": 1.0318, + "step": 10910 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985579291447754e-05, + "loss": 1.4081, + "step": 10911 + }, + { + "epoch": 0.01, + "learning_rate": 4.998557662379654e-05, + "loss": 1.3483, + "step": 10912 + }, + { + "epoch": 0.01, + "learning_rate": 4.998557395589868e-05, + "loss": 1.1597, + "step": 10913 + }, + { + "epoch": 0.01, + "learning_rate": 4.998557128775417e-05, + "loss": 1.2172, + "step": 10914 + }, + { + "epoch": 0.01, + "learning_rate": 4.998556861936301e-05, + "loss": 1.0755, + "step": 10915 + }, + { + "epoch": 0.01, + "learning_rate": 4.998556595072521e-05, + "loss": 0.8813, + "step": 10916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985563281840764e-05, + "loss": 1.2554, + "step": 10917 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985560612709667e-05, + "loss": 0.6499, + "step": 10918 + }, + { + "epoch": 0.01, + "learning_rate": 4.998555794333192e-05, + "loss": 0.3899, + "step": 10919 + }, + { + "epoch": 0.01, + "learning_rate": 4.998555527370753e-05, + "loss": 0.3813, + "step": 10920 + }, + { + "epoch": 0.01, + "learning_rate": 4.998555260383649e-05, + "loss": 0.4494, + "step": 10921 + }, + { + "epoch": 0.01, + "learning_rate": 4.998554993371881e-05, + "loss": 0.447, + "step": 10922 + }, + { + "epoch": 0.01, + "learning_rate": 4.998554726335448e-05, + "loss": 1.1859, + "step": 10923 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985544592743504e-05, + "loss": 0.9167, + "step": 10924 + }, + { + "epoch": 0.01, + "learning_rate": 4.998554192188588e-05, + "loss": 0.8268, + "step": 10925 + }, + { + "epoch": 0.01, + "learning_rate": 4.998553925078161e-05, + "loss": 0.9506, + "step": 10926 + }, + { + "epoch": 0.01, + "learning_rate": 4.998553657943069e-05, + "loss": 1.0431, + "step": 10927 + }, + { + "epoch": 0.01, + "learning_rate": 4.998553390783313e-05, + "loss": 0.9283, + "step": 10928 + }, + { + "epoch": 0.01, + "learning_rate": 4.998553123598891e-05, + "loss": 1.0289, + "step": 10929 + }, + { + "epoch": 0.01, + "learning_rate": 4.998552856389806e-05, + "loss": 1.1909, + "step": 10930 + }, + { + "epoch": 0.01, + "learning_rate": 4.998552589156055e-05, + "loss": 1.4488, + "step": 10931 + }, + { + "epoch": 0.01, + "learning_rate": 4.99855232189764e-05, + "loss": 1.6222, + "step": 10932 + }, + { + "epoch": 0.01, + "learning_rate": 4.998552054614561e-05, + "loss": 1.1969, + "step": 10933 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985517873068156e-05, + "loss": 0.8274, + "step": 10934 + }, + { + "epoch": 0.01, + "learning_rate": 4.998551519974407e-05, + "loss": 1.3114, + "step": 10935 + }, + { + "epoch": 0.01, + "learning_rate": 4.998551252617333e-05, + "loss": 0.9761, + "step": 10936 + }, + { + "epoch": 0.01, + "learning_rate": 4.998550985235595e-05, + "loss": 1.1793, + "step": 10937 + }, + { + "epoch": 0.01, + "learning_rate": 4.998550717829192e-05, + "loss": 0.4267, + "step": 10938 + }, + { + "epoch": 0.01, + "learning_rate": 4.998550450398124e-05, + "loss": 0.2122, + "step": 10939 + }, + { + "epoch": 0.01, + "learning_rate": 4.998550182942392e-05, + "loss": 0.2233, + "step": 10940 + }, + { + "epoch": 0.01, + "learning_rate": 4.998549915461995e-05, + "loss": 0.1119, + "step": 10941 + }, + { + "epoch": 0.01, + "learning_rate": 4.998549647956934e-05, + "loss": 0.0817, + "step": 10942 + }, + { + "epoch": 0.01, + "learning_rate": 4.998549380427208e-05, + "loss": 0.1277, + "step": 10943 + }, + { + "epoch": 0.01, + "learning_rate": 4.998549112872817e-05, + "loss": 0.0484, + "step": 10944 + }, + { + "epoch": 0.01, + "learning_rate": 4.998548845293761e-05, + "loss": 0.0294, + "step": 10945 + }, + { + "epoch": 0.01, + "learning_rate": 4.998548577690041e-05, + "loss": 0.2159, + "step": 10946 + }, + { + "epoch": 0.01, + "learning_rate": 4.998548310061656e-05, + "loss": 1.1548, + "step": 10947 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985480424086076e-05, + "loss": 1.1134, + "step": 10948 + }, + { + "epoch": 0.01, + "learning_rate": 4.998547774730893e-05, + "loss": 0.8281, + "step": 10949 + }, + { + "epoch": 0.01, + "learning_rate": 4.998547507028515e-05, + "loss": 1.2531, + "step": 10950 + }, + { + "epoch": 0.01, + "learning_rate": 4.998547239301472e-05, + "loss": 0.9875, + "step": 10951 + }, + { + "epoch": 0.01, + "learning_rate": 4.998546971549764e-05, + "loss": 0.9606, + "step": 10952 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985467037733916e-05, + "loss": 0.5359, + "step": 10953 + }, + { + "epoch": 0.01, + "learning_rate": 4.998546435972354e-05, + "loss": 0.8507, + "step": 10954 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985461681466525e-05, + "loss": 1.152, + "step": 10955 + }, + { + "epoch": 0.01, + "learning_rate": 4.998545900296287e-05, + "loss": 1.2994, + "step": 10956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985456324212564e-05, + "loss": 1.015, + "step": 10957 + }, + { + "epoch": 0.01, + "learning_rate": 4.998545364521561e-05, + "loss": 1.0323, + "step": 10958 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985450965972014e-05, + "loss": 1.0276, + "step": 10959 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985448286481764e-05, + "loss": 0.9741, + "step": 10960 + }, + { + "epoch": 0.01, + "learning_rate": 4.998544560674487e-05, + "loss": 0.5701, + "step": 10961 + }, + { + "epoch": 0.01, + "learning_rate": 4.998544292676134e-05, + "loss": 1.1095, + "step": 10962 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985440246531155e-05, + "loss": 1.0364, + "step": 10963 + }, + { + "epoch": 0.01, + "learning_rate": 4.998543756605433e-05, + "loss": 1.1118, + "step": 10964 + }, + { + "epoch": 0.01, + "learning_rate": 4.998543488533086e-05, + "loss": 1.0536, + "step": 10965 + }, + { + "epoch": 0.01, + "learning_rate": 4.998543220436074e-05, + "loss": 1.1477, + "step": 10966 + }, + { + "epoch": 0.01, + "learning_rate": 4.998542952314398e-05, + "loss": 1.2684, + "step": 10967 + }, + { + "epoch": 0.01, + "learning_rate": 4.998542684168056e-05, + "loss": 1.2744, + "step": 10968 + }, + { + "epoch": 0.01, + "learning_rate": 4.99854241599705e-05, + "loss": 1.2041, + "step": 10969 + }, + { + "epoch": 0.01, + "learning_rate": 4.99854214780138e-05, + "loss": 1.0253, + "step": 10970 + }, + { + "epoch": 0.01, + "learning_rate": 4.998541879581046e-05, + "loss": 0.9462, + "step": 10971 + }, + { + "epoch": 0.01, + "learning_rate": 4.998541611336047e-05, + "loss": 1.1174, + "step": 10972 + }, + { + "epoch": 0.01, + "learning_rate": 4.998541343066383e-05, + "loss": 0.7509, + "step": 10973 + }, + { + "epoch": 0.01, + "learning_rate": 4.998541074772055e-05, + "loss": 1.1098, + "step": 10974 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985408064530615e-05, + "loss": 1.0753, + "step": 10975 + }, + { + "epoch": 0.01, + "learning_rate": 4.998540538109404e-05, + "loss": 0.9133, + "step": 10976 + }, + { + "epoch": 0.01, + "learning_rate": 4.998540269741082e-05, + "loss": 1.1574, + "step": 10977 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985400013480954e-05, + "loss": 1.0544, + "step": 10978 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985397329304444e-05, + "loss": 0.5128, + "step": 10979 + }, + { + "epoch": 0.01, + "learning_rate": 4.998539464488129e-05, + "loss": 0.4893, + "step": 10980 + }, + { + "epoch": 0.01, + "learning_rate": 4.998539196021149e-05, + "loss": 1.188, + "step": 10981 + }, + { + "epoch": 0.01, + "learning_rate": 4.998538927529504e-05, + "loss": 1.3035, + "step": 10982 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985386590131946e-05, + "loss": 1.0466, + "step": 10983 + }, + { + "epoch": 0.01, + "learning_rate": 4.998538390472222e-05, + "loss": 1.345, + "step": 10984 + }, + { + "epoch": 0.01, + "learning_rate": 4.998538121906583e-05, + "loss": 0.963, + "step": 10985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985378533162806e-05, + "loss": 1.0332, + "step": 10986 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985375847013136e-05, + "loss": 0.6341, + "step": 10987 + }, + { + "epoch": 0.01, + "learning_rate": 4.998537316061681e-05, + "loss": 1.2144, + "step": 10988 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985370473973856e-05, + "loss": 1.1795, + "step": 10989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985367787084245e-05, + "loss": 0.9995, + "step": 10990 + }, + { + "epoch": 0.01, + "learning_rate": 4.998536509994799e-05, + "loss": 1.219, + "step": 10991 + }, + { + "epoch": 0.01, + "learning_rate": 4.99853624125651e-05, + "loss": 1.206, + "step": 10992 + }, + { + "epoch": 0.01, + "learning_rate": 4.998535972493555e-05, + "loss": 1.2541, + "step": 10993 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985357037059365e-05, + "loss": 2.1262, + "step": 10994 + }, + { + "epoch": 0.01, + "learning_rate": 4.998535434893653e-05, + "loss": 1.2823, + "step": 10995 + }, + { + "epoch": 0.01, + "learning_rate": 4.998535166056706e-05, + "loss": 0.9986, + "step": 10996 + }, + { + "epoch": 0.01, + "learning_rate": 4.998534897195093e-05, + "loss": 1.1551, + "step": 10997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985346283088165e-05, + "loss": 1.1561, + "step": 10998 + }, + { + "epoch": 0.01, + "learning_rate": 4.998534359397875e-05, + "loss": 1.1731, + "step": 10999 + }, + { + "epoch": 0.01, + "learning_rate": 4.99853409046227e-05, + "loss": 1.0143, + "step": 11000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0610970258712769, + "eval_runtime": 86.5342, + "eval_samples_per_second": 16.005, + "eval_steps_per_second": 4.01, + "step": 11000 + }, + { + "epoch": 0.01, + "learning_rate": 4.998533821501999e-05, + "loss": 1.0851, + "step": 11001 + }, + { + "epoch": 0.01, + "learning_rate": 4.998533552517065e-05, + "loss": 1.0072, + "step": 11002 + }, + { + "epoch": 0.01, + "learning_rate": 4.998533283507466e-05, + "loss": 1.0418, + "step": 11003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985330144732024e-05, + "loss": 0.8995, + "step": 11004 + }, + { + "epoch": 0.01, + "learning_rate": 4.998532745414274e-05, + "loss": 0.4762, + "step": 11005 + }, + { + "epoch": 0.01, + "learning_rate": 4.998532476330682e-05, + "loss": 1.2354, + "step": 11006 + }, + { + "epoch": 0.01, + "learning_rate": 4.998532207222425e-05, + "loss": 1.5269, + "step": 11007 + }, + { + "epoch": 0.01, + "learning_rate": 4.998531938089503e-05, + "loss": 1.3113, + "step": 11008 + }, + { + "epoch": 0.01, + "learning_rate": 4.998531668931918e-05, + "loss": 1.17, + "step": 11009 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985313997496674e-05, + "loss": 1.1982, + "step": 11010 + }, + { + "epoch": 0.01, + "learning_rate": 4.998531130542753e-05, + "loss": 0.9628, + "step": 11011 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985308613111733e-05, + "loss": 1.1221, + "step": 11012 + }, + { + "epoch": 0.01, + "learning_rate": 4.99853059205493e-05, + "loss": 1.0852, + "step": 11013 + }, + { + "epoch": 0.01, + "learning_rate": 4.998530322774022e-05, + "loss": 1.0638, + "step": 11014 + }, + { + "epoch": 0.01, + "learning_rate": 4.998530053468449e-05, + "loss": 1.4711, + "step": 11015 + }, + { + "epoch": 0.01, + "learning_rate": 4.998529784138212e-05, + "loss": 1.0756, + "step": 11016 + }, + { + "epoch": 0.01, + "learning_rate": 4.998529514783311e-05, + "loss": 1.2878, + "step": 11017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985292454037445e-05, + "loss": 1.5491, + "step": 11018 + }, + { + "epoch": 0.01, + "learning_rate": 4.998528975999515e-05, + "loss": 1.0756, + "step": 11019 + }, + { + "epoch": 0.01, + "learning_rate": 4.99852870657062e-05, + "loss": 1.0738, + "step": 11020 + }, + { + "epoch": 0.01, + "learning_rate": 4.998528437117061e-05, + "loss": 0.9079, + "step": 11021 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985281676388374e-05, + "loss": 1.3341, + "step": 11022 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985278981359495e-05, + "loss": 1.7209, + "step": 11023 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985276286083974e-05, + "loss": 1.4991, + "step": 11024 + }, + { + "epoch": 0.01, + "learning_rate": 4.99852735905618e-05, + "loss": 0.9747, + "step": 11025 + }, + { + "epoch": 0.01, + "learning_rate": 4.998527089479299e-05, + "loss": 1.0822, + "step": 11026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985268198777535e-05, + "loss": 1.0989, + "step": 11027 + }, + { + "epoch": 0.01, + "learning_rate": 4.998526550251544e-05, + "loss": 0.8828, + "step": 11028 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985262806006696e-05, + "loss": 0.5451, + "step": 11029 + }, + { + "epoch": 0.01, + "learning_rate": 4.998526010925131e-05, + "loss": 1.0593, + "step": 11030 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985257412249275e-05, + "loss": 1.2572, + "step": 11031 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985254715000594e-05, + "loss": 1.1468, + "step": 11032 + }, + { + "epoch": 0.01, + "learning_rate": 4.998525201750528e-05, + "loss": 1.3098, + "step": 11033 + }, + { + "epoch": 0.01, + "learning_rate": 4.998524931976332e-05, + "loss": 1.1122, + "step": 11034 + }, + { + "epoch": 0.01, + "learning_rate": 4.998524662177471e-05, + "loss": 0.8548, + "step": 11035 + }, + { + "epoch": 0.01, + "learning_rate": 4.998524392353946e-05, + "loss": 0.9925, + "step": 11036 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985241225057565e-05, + "loss": 1.0917, + "step": 11037 + }, + { + "epoch": 0.01, + "learning_rate": 4.998523852632903e-05, + "loss": 1.0632, + "step": 11038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985235827353844e-05, + "loss": 1.0606, + "step": 11039 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985233128132016e-05, + "loss": 0.9986, + "step": 11040 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985230428663546e-05, + "loss": 1.0103, + "step": 11041 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985227728948434e-05, + "loss": 0.67, + "step": 11042 + }, + { + "epoch": 0.01, + "learning_rate": 4.998522502898668e-05, + "loss": 1.176, + "step": 11043 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985222328778275e-05, + "loss": 1.2349, + "step": 11044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985219628323235e-05, + "loss": 1.3533, + "step": 11045 + }, + { + "epoch": 0.01, + "learning_rate": 4.998521692762154e-05, + "loss": 1.4718, + "step": 11046 + }, + { + "epoch": 0.01, + "learning_rate": 4.998521422667321e-05, + "loss": 0.7743, + "step": 11047 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985211525478235e-05, + "loss": 1.2536, + "step": 11048 + }, + { + "epoch": 0.01, + "learning_rate": 4.998520882403662e-05, + "loss": 1.1083, + "step": 11049 + }, + { + "epoch": 0.01, + "learning_rate": 4.998520612234835e-05, + "loss": 1.0355, + "step": 11050 + }, + { + "epoch": 0.01, + "learning_rate": 4.998520342041345e-05, + "loss": 1.0358, + "step": 11051 + }, + { + "epoch": 0.01, + "learning_rate": 4.998520071823191e-05, + "loss": 0.9751, + "step": 11052 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985198015803715e-05, + "loss": 0.9187, + "step": 11053 + }, + { + "epoch": 0.01, + "learning_rate": 4.998519531312887e-05, + "loss": 1.0517, + "step": 11054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985192610207396e-05, + "loss": 1.2088, + "step": 11055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985189907039276e-05, + "loss": 1.1475, + "step": 11056 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985187203624506e-05, + "loss": 0.7752, + "step": 11057 + }, + { + "epoch": 0.01, + "learning_rate": 4.99851844999631e-05, + "loss": 0.8341, + "step": 11058 + }, + { + "epoch": 0.01, + "learning_rate": 4.998518179605505e-05, + "loss": 0.8604, + "step": 11059 + }, + { + "epoch": 0.01, + "learning_rate": 4.998517909190035e-05, + "loss": 0.9871, + "step": 11060 + }, + { + "epoch": 0.01, + "learning_rate": 4.998517638749901e-05, + "loss": 0.514, + "step": 11061 + }, + { + "epoch": 0.01, + "learning_rate": 4.998517368285103e-05, + "loss": 1.3165, + "step": 11062 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985170977956406e-05, + "loss": 1.1155, + "step": 11063 + }, + { + "epoch": 0.01, + "learning_rate": 4.998516827281514e-05, + "loss": 0.7402, + "step": 11064 + }, + { + "epoch": 0.01, + "learning_rate": 4.998516556742723e-05, + "loss": 0.7353, + "step": 11065 + }, + { + "epoch": 0.01, + "learning_rate": 4.998516286179268e-05, + "loss": 0.6377, + "step": 11066 + }, + { + "epoch": 0.01, + "learning_rate": 4.998516015591148e-05, + "loss": 0.6585, + "step": 11067 + }, + { + "epoch": 0.01, + "learning_rate": 4.998515744978364e-05, + "loss": 0.6662, + "step": 11068 + }, + { + "epoch": 0.01, + "learning_rate": 4.998515474340916e-05, + "loss": 0.6446, + "step": 11069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985152036788036e-05, + "loss": 0.6433, + "step": 11070 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985149329920266e-05, + "loss": 0.6352, + "step": 11071 + }, + { + "epoch": 0.01, + "learning_rate": 4.998514662280585e-05, + "loss": 0.6851, + "step": 11072 + }, + { + "epoch": 0.01, + "learning_rate": 4.99851439154448e-05, + "loss": 0.6676, + "step": 11073 + }, + { + "epoch": 0.01, + "learning_rate": 4.998514120783711e-05, + "loss": 0.6724, + "step": 11074 + }, + { + "epoch": 0.01, + "learning_rate": 4.998513849998277e-05, + "loss": 0.6671, + "step": 11075 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985135791881784e-05, + "loss": 0.7058, + "step": 11076 + }, + { + "epoch": 0.01, + "learning_rate": 4.998513308353416e-05, + "loss": 0.7303, + "step": 11077 + }, + { + "epoch": 0.01, + "learning_rate": 4.998513037493989e-05, + "loss": 0.7453, + "step": 11078 + }, + { + "epoch": 0.01, + "learning_rate": 4.998512766609898e-05, + "loss": 0.8886, + "step": 11079 + }, + { + "epoch": 0.01, + "learning_rate": 4.998512495701142e-05, + "loss": 1.077, + "step": 11080 + }, + { + "epoch": 0.01, + "learning_rate": 4.998512224767723e-05, + "loss": 1.3381, + "step": 11081 + }, + { + "epoch": 0.01, + "learning_rate": 4.998511953809639e-05, + "loss": 1.021, + "step": 11082 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985116828268905e-05, + "loss": 1.1992, + "step": 11083 + }, + { + "epoch": 0.01, + "learning_rate": 4.998511411819479e-05, + "loss": 1.2632, + "step": 11084 + }, + { + "epoch": 0.01, + "learning_rate": 4.998511140787402e-05, + "loss": 0.8558, + "step": 11085 + }, + { + "epoch": 0.01, + "learning_rate": 4.998510869730662e-05, + "loss": 0.8319, + "step": 11086 + }, + { + "epoch": 0.01, + "learning_rate": 4.998510598649256e-05, + "loss": 0.8487, + "step": 11087 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985103275431863e-05, + "loss": 1.1844, + "step": 11088 + }, + { + "epoch": 0.01, + "learning_rate": 4.998510056412453e-05, + "loss": 1.1179, + "step": 11089 + }, + { + "epoch": 0.01, + "learning_rate": 4.998509785257056e-05, + "loss": 1.1867, + "step": 11090 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985095140769933e-05, + "loss": 1.2414, + "step": 11091 + }, + { + "epoch": 0.01, + "learning_rate": 4.998509242872267e-05, + "loss": 1.1789, + "step": 11092 + }, + { + "epoch": 0.01, + "learning_rate": 4.998508971642877e-05, + "loss": 1.3805, + "step": 11093 + }, + { + "epoch": 0.01, + "learning_rate": 4.998508700388822e-05, + "loss": 0.9683, + "step": 11094 + }, + { + "epoch": 0.01, + "learning_rate": 4.998508429110103e-05, + "loss": 1.2478, + "step": 11095 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985081578067194e-05, + "loss": 1.7467, + "step": 11096 + }, + { + "epoch": 0.01, + "learning_rate": 4.998507886478673e-05, + "loss": 1.1114, + "step": 11097 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985076151259604e-05, + "loss": 0.9845, + "step": 11098 + }, + { + "epoch": 0.01, + "learning_rate": 4.998507343748585e-05, + "loss": 0.4566, + "step": 11099 + }, + { + "epoch": 0.01, + "learning_rate": 4.998507072346545e-05, + "loss": 0.8858, + "step": 11100 + }, + { + "epoch": 0.01, + "learning_rate": 4.998506800919841e-05, + "loss": 1.0575, + "step": 11101 + }, + { + "epoch": 0.01, + "learning_rate": 4.998506529468472e-05, + "loss": 1.1572, + "step": 11102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985062579924393e-05, + "loss": 1.1657, + "step": 11103 + }, + { + "epoch": 0.01, + "learning_rate": 4.998505986491743e-05, + "loss": 0.8033, + "step": 11104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985057149663816e-05, + "loss": 0.7907, + "step": 11105 + }, + { + "epoch": 0.01, + "learning_rate": 4.998505443416357e-05, + "loss": 1.0515, + "step": 11106 + }, + { + "epoch": 0.01, + "learning_rate": 4.998505171841667e-05, + "loss": 1.2635, + "step": 11107 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985049002423135e-05, + "loss": 1.2101, + "step": 11108 + }, + { + "epoch": 0.01, + "learning_rate": 4.998504628618296e-05, + "loss": 1.2433, + "step": 11109 + }, + { + "epoch": 0.01, + "learning_rate": 4.998504356969613e-05, + "loss": 1.0422, + "step": 11110 + }, + { + "epoch": 0.01, + "learning_rate": 4.998504085296267e-05, + "loss": 1.072, + "step": 11111 + }, + { + "epoch": 0.01, + "learning_rate": 4.998503813598256e-05, + "loss": 1.0027, + "step": 11112 + }, + { + "epoch": 0.01, + "learning_rate": 4.998503541875582e-05, + "loss": 1.4205, + "step": 11113 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985032701282434e-05, + "loss": 1.4059, + "step": 11114 + }, + { + "epoch": 0.01, + "learning_rate": 4.99850299835624e-05, + "loss": 1.1584, + "step": 11115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985027265595736e-05, + "loss": 1.2866, + "step": 11116 + }, + { + "epoch": 0.01, + "learning_rate": 4.998502454738242e-05, + "loss": 1.1668, + "step": 11117 + }, + { + "epoch": 0.01, + "learning_rate": 4.998502182892247e-05, + "loss": 1.2536, + "step": 11118 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985019110215875e-05, + "loss": 1.6954, + "step": 11119 + }, + { + "epoch": 0.01, + "learning_rate": 4.998501639126264e-05, + "loss": 1.1413, + "step": 11120 + }, + { + "epoch": 0.01, + "learning_rate": 4.998501367206276e-05, + "loss": 0.9921, + "step": 11121 + }, + { + "epoch": 0.01, + "learning_rate": 4.998501095261624e-05, + "loss": 0.9011, + "step": 11122 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985008232923075e-05, + "loss": 1.3716, + "step": 11123 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985005512983275e-05, + "loss": 1.0429, + "step": 11124 + }, + { + "epoch": 0.01, + "learning_rate": 4.9985002792796834e-05, + "loss": 3.123, + "step": 11125 + }, + { + "epoch": 0.01, + "learning_rate": 4.998500007236374e-05, + "loss": 2.4726, + "step": 11126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984997351684016e-05, + "loss": 0.9741, + "step": 11127 + }, + { + "epoch": 0.01, + "learning_rate": 4.998499463075764e-05, + "loss": 0.3929, + "step": 11128 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984991909584635e-05, + "loss": 1.1812, + "step": 11129 + }, + { + "epoch": 0.01, + "learning_rate": 4.998498918816499e-05, + "loss": 1.3151, + "step": 11130 + }, + { + "epoch": 0.01, + "learning_rate": 4.998498646649869e-05, + "loss": 1.2314, + "step": 11131 + }, + { + "epoch": 0.01, + "learning_rate": 4.998498374458576e-05, + "loss": 1.1442, + "step": 11132 + }, + { + "epoch": 0.01, + "learning_rate": 4.998498102242618e-05, + "loss": 1.1083, + "step": 11133 + }, + { + "epoch": 0.01, + "learning_rate": 4.998497830001997e-05, + "loss": 0.9796, + "step": 11134 + }, + { + "epoch": 0.01, + "learning_rate": 4.998497557736711e-05, + "loss": 1.0991, + "step": 11135 + }, + { + "epoch": 0.01, + "learning_rate": 4.998497285446761e-05, + "loss": 1.0666, + "step": 11136 + }, + { + "epoch": 0.01, + "learning_rate": 4.998497013132147e-05, + "loss": 1.2087, + "step": 11137 + }, + { + "epoch": 0.01, + "learning_rate": 4.998496740792868e-05, + "loss": 1.1696, + "step": 11138 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984964684289266e-05, + "loss": 1.259, + "step": 11139 + }, + { + "epoch": 0.01, + "learning_rate": 4.99849619604032e-05, + "loss": 1.1692, + "step": 11140 + }, + { + "epoch": 0.01, + "learning_rate": 4.99849592362705e-05, + "loss": 1.1599, + "step": 11141 + }, + { + "epoch": 0.01, + "learning_rate": 4.998495651189116e-05, + "loss": 1.1001, + "step": 11142 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984953787265166e-05, + "loss": 1.228, + "step": 11143 + }, + { + "epoch": 0.01, + "learning_rate": 4.998495106239254e-05, + "loss": 0.9668, + "step": 11144 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984948337273274e-05, + "loss": 0.8809, + "step": 11145 + }, + { + "epoch": 0.01, + "learning_rate": 4.998494561190736e-05, + "loss": 1.2722, + "step": 11146 + }, + { + "epoch": 0.01, + "learning_rate": 4.998494288629482e-05, + "loss": 1.1455, + "step": 11147 + }, + { + "epoch": 0.01, + "learning_rate": 4.998494016043562e-05, + "loss": 0.994, + "step": 11148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984937434329796e-05, + "loss": 1.2156, + "step": 11149 + }, + { + "epoch": 0.01, + "learning_rate": 4.998493470797731e-05, + "loss": 1.0701, + "step": 11150 + }, + { + "epoch": 0.01, + "learning_rate": 4.99849319813782e-05, + "loss": 1.0988, + "step": 11151 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984929254532454e-05, + "loss": 0.8265, + "step": 11152 + }, + { + "epoch": 0.01, + "learning_rate": 4.998492652744006e-05, + "loss": 1.4351, + "step": 11153 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984923800101026e-05, + "loss": 0.8209, + "step": 11154 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984921072515345e-05, + "loss": 1.0785, + "step": 11155 + }, + { + "epoch": 0.01, + "learning_rate": 4.998491834468303e-05, + "loss": 1.0431, + "step": 11156 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984915616604076e-05, + "loss": 1.0766, + "step": 11157 + }, + { + "epoch": 0.01, + "learning_rate": 4.998491288827848e-05, + "loss": 1.1995, + "step": 11158 + }, + { + "epoch": 0.01, + "learning_rate": 4.998491015970624e-05, + "loss": 0.9651, + "step": 11159 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984907430887363e-05, + "loss": 0.7261, + "step": 11160 + }, + { + "epoch": 0.01, + "learning_rate": 4.998490470182184e-05, + "loss": 1.0632, + "step": 11161 + }, + { + "epoch": 0.01, + "learning_rate": 4.998490197250968e-05, + "loss": 1.1548, + "step": 11162 + }, + { + "epoch": 0.01, + "learning_rate": 4.998489924295088e-05, + "loss": 1.144, + "step": 11163 + }, + { + "epoch": 0.01, + "learning_rate": 4.998489651314544e-05, + "loss": 1.1877, + "step": 11164 + }, + { + "epoch": 0.01, + "learning_rate": 4.998489378309337e-05, + "loss": 0.7849, + "step": 11165 + }, + { + "epoch": 0.01, + "learning_rate": 4.998489105279464e-05, + "loss": 1.0305, + "step": 11166 + }, + { + "epoch": 0.01, + "learning_rate": 4.998488832224928e-05, + "loss": 1.0139, + "step": 11167 + }, + { + "epoch": 0.01, + "learning_rate": 4.998488559145728e-05, + "loss": 1.0266, + "step": 11168 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984882860418637e-05, + "loss": 1.2371, + "step": 11169 + }, + { + "epoch": 0.01, + "learning_rate": 4.998488012913336e-05, + "loss": 1.2292, + "step": 11170 + }, + { + "epoch": 0.01, + "learning_rate": 4.998487739760144e-05, + "loss": 1.25, + "step": 11171 + }, + { + "epoch": 0.01, + "learning_rate": 4.998487466582287e-05, + "loss": 1.1316, + "step": 11172 + }, + { + "epoch": 0.01, + "learning_rate": 4.998487193379767e-05, + "loss": 1.1257, + "step": 11173 + }, + { + "epoch": 0.01, + "learning_rate": 4.998486920152583e-05, + "loss": 0.9874, + "step": 11174 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984866469007344e-05, + "loss": 0.9263, + "step": 11175 + }, + { + "epoch": 0.01, + "learning_rate": 4.998486373624222e-05, + "loss": 1.2612, + "step": 11176 + }, + { + "epoch": 0.01, + "learning_rate": 4.998486100323045e-05, + "loss": 1.3142, + "step": 11177 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984858269972055e-05, + "loss": 1.019, + "step": 11178 + }, + { + "epoch": 0.01, + "learning_rate": 4.998485553646701e-05, + "loss": 0.9364, + "step": 11179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984852802715335e-05, + "loss": 0.7494, + "step": 11180 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984850068717004e-05, + "loss": 0.9415, + "step": 11181 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984847334472045e-05, + "loss": 1.0966, + "step": 11182 + }, + { + "epoch": 0.01, + "learning_rate": 4.998484459998044e-05, + "loss": 1.0315, + "step": 11183 + }, + { + "epoch": 0.01, + "learning_rate": 4.99848418652422e-05, + "loss": 1.1631, + "step": 11184 + }, + { + "epoch": 0.01, + "learning_rate": 4.998483913025732e-05, + "loss": 1.348, + "step": 11185 + }, + { + "epoch": 0.01, + "learning_rate": 4.998483639502579e-05, + "loss": 1.2972, + "step": 11186 + }, + { + "epoch": 0.01, + "learning_rate": 4.998483365954764e-05, + "loss": 0.8458, + "step": 11187 + }, + { + "epoch": 0.01, + "learning_rate": 4.998483092382283e-05, + "loss": 0.9921, + "step": 11188 + }, + { + "epoch": 0.01, + "learning_rate": 4.998482818785139e-05, + "loss": 1.1262, + "step": 11189 + }, + { + "epoch": 0.01, + "learning_rate": 4.998482545163331e-05, + "loss": 0.9357, + "step": 11190 + }, + { + "epoch": 0.01, + "learning_rate": 4.998482271516859e-05, + "loss": 1.1249, + "step": 11191 + }, + { + "epoch": 0.01, + "learning_rate": 4.998481997845723e-05, + "loss": 0.9263, + "step": 11192 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984817241499227e-05, + "loss": 1.1577, + "step": 11193 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984814504294585e-05, + "loss": 1.2202, + "step": 11194 + }, + { + "epoch": 0.01, + "learning_rate": 4.998481176684331e-05, + "loss": 1.1899, + "step": 11195 + }, + { + "epoch": 0.01, + "learning_rate": 4.998480902914539e-05, + "loss": 1.3262, + "step": 11196 + }, + { + "epoch": 0.01, + "learning_rate": 4.998480629120084e-05, + "loss": 1.3787, + "step": 11197 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984803553009635e-05, + "loss": 1.1495, + "step": 11198 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984800814571795e-05, + "loss": 1.1057, + "step": 11199 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984798075887326e-05, + "loss": 1.1455, + "step": 11200 + }, + { + "epoch": 0.01, + "learning_rate": 4.99847953369562e-05, + "loss": 1.1896, + "step": 11201 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984792597778453e-05, + "loss": 1.0, + "step": 11202 + }, + { + "epoch": 0.01, + "learning_rate": 4.998478985835405e-05, + "loss": 1.0723, + "step": 11203 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984787118683025e-05, + "loss": 1.1118, + "step": 11204 + }, + { + "epoch": 0.01, + "learning_rate": 4.998478437876535e-05, + "loss": 1.128, + "step": 11205 + }, + { + "epoch": 0.01, + "learning_rate": 4.998478163860104e-05, + "loss": 1.2559, + "step": 11206 + }, + { + "epoch": 0.01, + "learning_rate": 4.998477889819009e-05, + "loss": 1.1666, + "step": 11207 + }, + { + "epoch": 0.01, + "learning_rate": 4.99847761575325e-05, + "loss": 1.0831, + "step": 11208 + }, + { + "epoch": 0.01, + "learning_rate": 4.998477341662826e-05, + "loss": 1.1202, + "step": 11209 + }, + { + "epoch": 0.01, + "learning_rate": 4.99847706754774e-05, + "loss": 0.7362, + "step": 11210 + }, + { + "epoch": 0.01, + "learning_rate": 4.998476793407989e-05, + "loss": 0.8456, + "step": 11211 + }, + { + "epoch": 0.01, + "learning_rate": 4.998476519243574e-05, + "loss": 0.4883, + "step": 11212 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984762450544955e-05, + "loss": 1.166, + "step": 11213 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984759708407526e-05, + "loss": 1.0985, + "step": 11214 + }, + { + "epoch": 0.01, + "learning_rate": 4.998475696602346e-05, + "loss": 0.7811, + "step": 11215 + }, + { + "epoch": 0.01, + "learning_rate": 4.998475422339276e-05, + "loss": 1.166, + "step": 11216 + }, + { + "epoch": 0.01, + "learning_rate": 4.998475148051542e-05, + "loss": 1.0113, + "step": 11217 + }, + { + "epoch": 0.01, + "learning_rate": 4.998474873739144e-05, + "loss": 1.2124, + "step": 11218 + }, + { + "epoch": 0.01, + "learning_rate": 4.998474599402082e-05, + "loss": 1.0753, + "step": 11219 + }, + { + "epoch": 0.01, + "learning_rate": 4.998474325040355e-05, + "loss": 1.2656, + "step": 11220 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984740506539654e-05, + "loss": 1.1407, + "step": 11221 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984737762429125e-05, + "loss": 0.7786, + "step": 11222 + }, + { + "epoch": 0.01, + "learning_rate": 4.998473501807195e-05, + "loss": 0.963, + "step": 11223 + }, + { + "epoch": 0.01, + "learning_rate": 4.998473227346813e-05, + "loss": 1.0386, + "step": 11224 + }, + { + "epoch": 0.01, + "learning_rate": 4.998472952861768e-05, + "loss": 1.1661, + "step": 11225 + }, + { + "epoch": 0.01, + "learning_rate": 4.998472678352059e-05, + "loss": 1.217, + "step": 11226 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984724038176854e-05, + "loss": 0.7382, + "step": 11227 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984721292586485e-05, + "loss": 1.2549, + "step": 11228 + }, + { + "epoch": 0.01, + "learning_rate": 4.998471854674947e-05, + "loss": 0.9207, + "step": 11229 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984715800665825e-05, + "loss": 1.0814, + "step": 11230 + }, + { + "epoch": 0.01, + "learning_rate": 4.998471305433554e-05, + "loss": 1.1673, + "step": 11231 + }, + { + "epoch": 0.01, + "learning_rate": 4.998471030775862e-05, + "loss": 1.0175, + "step": 11232 + }, + { + "epoch": 0.01, + "learning_rate": 4.998470756093506e-05, + "loss": 1.1739, + "step": 11233 + }, + { + "epoch": 0.01, + "learning_rate": 4.998470481386486e-05, + "loss": 0.8953, + "step": 11234 + }, + { + "epoch": 0.01, + "learning_rate": 4.998470206654802e-05, + "loss": 1.0875, + "step": 11235 + }, + { + "epoch": 0.01, + "learning_rate": 4.998469931898454e-05, + "loss": 1.0619, + "step": 11236 + }, + { + "epoch": 0.01, + "learning_rate": 4.998469657117443e-05, + "loss": 0.7886, + "step": 11237 + }, + { + "epoch": 0.01, + "learning_rate": 4.998469382311767e-05, + "loss": 0.75, + "step": 11238 + }, + { + "epoch": 0.01, + "learning_rate": 4.998469107481428e-05, + "loss": 1.1156, + "step": 11239 + }, + { + "epoch": 0.01, + "learning_rate": 4.998468832626425e-05, + "loss": 1.859, + "step": 11240 + }, + { + "epoch": 0.01, + "learning_rate": 4.998468557746758e-05, + "loss": 1.4037, + "step": 11241 + }, + { + "epoch": 0.01, + "learning_rate": 4.998468282842427e-05, + "loss": 1.1006, + "step": 11242 + }, + { + "epoch": 0.01, + "learning_rate": 4.998468007913432e-05, + "loss": 0.8764, + "step": 11243 + }, + { + "epoch": 0.01, + "learning_rate": 4.998467732959774e-05, + "loss": 1.0423, + "step": 11244 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984674579814515e-05, + "loss": 0.9886, + "step": 11245 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984671829784656e-05, + "loss": 0.8288, + "step": 11246 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984669079508155e-05, + "loss": 1.3285, + "step": 11247 + }, + { + "epoch": 0.01, + "learning_rate": 4.998466632898502e-05, + "loss": 1.2376, + "step": 11248 + }, + { + "epoch": 0.01, + "learning_rate": 4.998466357821524e-05, + "loss": 1.1547, + "step": 11249 + }, + { + "epoch": 0.01, + "learning_rate": 4.998466082719883e-05, + "loss": 1.1883, + "step": 11250 + }, + { + "epoch": 0.01, + "learning_rate": 4.998465807593578e-05, + "loss": 0.918, + "step": 11251 + }, + { + "epoch": 0.01, + "learning_rate": 4.998465532442609e-05, + "loss": 1.1977, + "step": 11252 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984652572669765e-05, + "loss": 1.2453, + "step": 11253 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984649820666794e-05, + "loss": 1.1108, + "step": 11254 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984647068417193e-05, + "loss": 1.2474, + "step": 11255 + }, + { + "epoch": 0.01, + "learning_rate": 4.998464431592095e-05, + "loss": 1.0577, + "step": 11256 + }, + { + "epoch": 0.01, + "learning_rate": 4.998464156317807e-05, + "loss": 1.0733, + "step": 11257 + }, + { + "epoch": 0.01, + "learning_rate": 4.998463881018856e-05, + "loss": 0.9653, + "step": 11258 + }, + { + "epoch": 0.01, + "learning_rate": 4.99846360569524e-05, + "loss": 1.0213, + "step": 11259 + }, + { + "epoch": 0.01, + "learning_rate": 4.998463330346961e-05, + "loss": 0.9565, + "step": 11260 + }, + { + "epoch": 0.01, + "learning_rate": 4.998463054974018e-05, + "loss": 1.2118, + "step": 11261 + }, + { + "epoch": 0.01, + "learning_rate": 4.998462779576412e-05, + "loss": 1.1782, + "step": 11262 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984625041541414e-05, + "loss": 1.181, + "step": 11263 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984622287072066e-05, + "loss": 1.0004, + "step": 11264 + }, + { + "epoch": 0.01, + "learning_rate": 4.998461953235608e-05, + "loss": 1.2491, + "step": 11265 + }, + { + "epoch": 0.01, + "learning_rate": 4.998461677739347e-05, + "loss": 1.4297, + "step": 11266 + }, + { + "epoch": 0.01, + "learning_rate": 4.998461402218421e-05, + "loss": 1.1271, + "step": 11267 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984611266728316e-05, + "loss": 1.1888, + "step": 11268 + }, + { + "epoch": 0.01, + "learning_rate": 4.998460851102578e-05, + "loss": 1.2104, + "step": 11269 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984605755076614e-05, + "loss": 1.0469, + "step": 11270 + }, + { + "epoch": 0.01, + "learning_rate": 4.998460299888081e-05, + "loss": 1.071, + "step": 11271 + }, + { + "epoch": 0.01, + "learning_rate": 4.998460024243836e-05, + "loss": 1.1764, + "step": 11272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984597485749286e-05, + "loss": 1.5474, + "step": 11273 + }, + { + "epoch": 0.01, + "learning_rate": 4.998459472881356e-05, + "loss": 1.2111, + "step": 11274 + }, + { + "epoch": 0.01, + "learning_rate": 4.998459197163121e-05, + "loss": 1.0373, + "step": 11275 + }, + { + "epoch": 0.01, + "learning_rate": 4.998458921420222e-05, + "loss": 0.9903, + "step": 11276 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984586456526585e-05, + "loss": 0.9765, + "step": 11277 + }, + { + "epoch": 0.01, + "learning_rate": 4.998458369860432e-05, + "loss": 1.1196, + "step": 11278 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984580940435414e-05, + "loss": 1.2614, + "step": 11279 + }, + { + "epoch": 0.01, + "learning_rate": 4.998457818201987e-05, + "loss": 0.6967, + "step": 11280 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984575423357686e-05, + "loss": 0.5994, + "step": 11281 + }, + { + "epoch": 0.01, + "learning_rate": 4.998457266444887e-05, + "loss": 0.6136, + "step": 11282 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984569905293416e-05, + "loss": 0.5445, + "step": 11283 + }, + { + "epoch": 0.01, + "learning_rate": 4.998456714589133e-05, + "loss": 0.7206, + "step": 11284 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984564386242596e-05, + "loss": 0.2923, + "step": 11285 + }, + { + "epoch": 0.01, + "learning_rate": 4.998456162634722e-05, + "loss": 0.2527, + "step": 11286 + }, + { + "epoch": 0.01, + "learning_rate": 4.998455886620523e-05, + "loss": 0.1374, + "step": 11287 + }, + { + "epoch": 0.01, + "learning_rate": 4.998455610581659e-05, + "loss": 0.099, + "step": 11288 + }, + { + "epoch": 0.01, + "learning_rate": 4.998455334518131e-05, + "loss": 0.1067, + "step": 11289 + }, + { + "epoch": 0.01, + "learning_rate": 4.998455058429939e-05, + "loss": 0.0849, + "step": 11290 + }, + { + "epoch": 0.01, + "learning_rate": 4.998454782317085e-05, + "loss": 0.0381, + "step": 11291 + }, + { + "epoch": 0.01, + "learning_rate": 4.998454506179566e-05, + "loss": 0.0622, + "step": 11292 + }, + { + "epoch": 0.01, + "learning_rate": 4.998454230017383e-05, + "loss": 0.0453, + "step": 11293 + }, + { + "epoch": 0.01, + "learning_rate": 4.998453953830538e-05, + "loss": 0.2759, + "step": 11294 + }, + { + "epoch": 0.01, + "learning_rate": 4.998453677619027e-05, + "loss": 0.3704, + "step": 11295 + }, + { + "epoch": 0.01, + "learning_rate": 4.998453401382855e-05, + "loss": 0.3534, + "step": 11296 + }, + { + "epoch": 0.01, + "learning_rate": 4.998453125122017e-05, + "loss": 0.3021, + "step": 11297 + }, + { + "epoch": 0.01, + "learning_rate": 4.998452848836516e-05, + "loss": 0.2176, + "step": 11298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984525725263515e-05, + "loss": 0.8484, + "step": 11299 + }, + { + "epoch": 0.01, + "learning_rate": 4.998452296191523e-05, + "loss": 0.9705, + "step": 11300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984520198320314e-05, + "loss": 2.57, + "step": 11301 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984517434478756e-05, + "loss": 4.6057, + "step": 11302 + }, + { + "epoch": 0.01, + "learning_rate": 4.998451467039056e-05, + "loss": 4.4689, + "step": 11303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984511906055733e-05, + "loss": 4.3654, + "step": 11304 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984509141474276e-05, + "loss": 4.1739, + "step": 11305 + }, + { + "epoch": 0.01, + "learning_rate": 4.998450637664617e-05, + "loss": 3.8854, + "step": 11306 + }, + { + "epoch": 0.01, + "learning_rate": 4.998450361157143e-05, + "loss": 3.6601, + "step": 11307 + }, + { + "epoch": 0.01, + "learning_rate": 4.998450084625006e-05, + "loss": 3.5853, + "step": 11308 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984498080682047e-05, + "loss": 3.4867, + "step": 11309 + }, + { + "epoch": 0.01, + "learning_rate": 4.99844953148674e-05, + "loss": 3.2333, + "step": 11310 + }, + { + "epoch": 0.01, + "learning_rate": 4.998449254880612e-05, + "loss": 3.3127, + "step": 11311 + }, + { + "epoch": 0.01, + "learning_rate": 4.998448978249819e-05, + "loss": 3.4235, + "step": 11312 + }, + { + "epoch": 0.01, + "learning_rate": 4.998448701594364e-05, + "loss": 3.0145, + "step": 11313 + }, + { + "epoch": 0.01, + "learning_rate": 4.998448424914244e-05, + "loss": 3.0155, + "step": 11314 + }, + { + "epoch": 0.01, + "learning_rate": 4.998448148209461e-05, + "loss": 2.968, + "step": 11315 + }, + { + "epoch": 0.01, + "learning_rate": 4.998447871480015e-05, + "loss": 2.445, + "step": 11316 + }, + { + "epoch": 0.01, + "learning_rate": 4.998447594725905e-05, + "loss": 0.4463, + "step": 11317 + }, + { + "epoch": 0.01, + "learning_rate": 4.998447317947131e-05, + "loss": 1.2936, + "step": 11318 + }, + { + "epoch": 0.01, + "learning_rate": 4.998447041143694e-05, + "loss": 1.6815, + "step": 11319 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984467643155924e-05, + "loss": 1.3543, + "step": 11320 + }, + { + "epoch": 0.01, + "learning_rate": 4.998446487462828e-05, + "loss": 0.8619, + "step": 11321 + }, + { + "epoch": 0.01, + "learning_rate": 4.998446210585399e-05, + "loss": 1.2308, + "step": 11322 + }, + { + "epoch": 0.01, + "learning_rate": 4.998445933683308e-05, + "loss": 1.9087, + "step": 11323 + }, + { + "epoch": 0.01, + "learning_rate": 4.998445656756552e-05, + "loss": 1.0822, + "step": 11324 + }, + { + "epoch": 0.01, + "learning_rate": 4.998445379805133e-05, + "loss": 0.9731, + "step": 11325 + }, + { + "epoch": 0.01, + "learning_rate": 4.99844510282905e-05, + "loss": 0.9626, + "step": 11326 + }, + { + "epoch": 0.01, + "learning_rate": 4.998444825828304e-05, + "loss": 1.0638, + "step": 11327 + }, + { + "epoch": 0.01, + "learning_rate": 4.998444548802894e-05, + "loss": 1.3116, + "step": 11328 + }, + { + "epoch": 0.01, + "learning_rate": 4.99844427175282e-05, + "loss": 0.529, + "step": 11329 + }, + { + "epoch": 0.01, + "learning_rate": 4.998443994678084e-05, + "loss": 1.1222, + "step": 11330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984437175786824e-05, + "loss": 1.2004, + "step": 11331 + }, + { + "epoch": 0.01, + "learning_rate": 4.998443440454619e-05, + "loss": 1.0878, + "step": 11332 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984431633058906e-05, + "loss": 1.2296, + "step": 11333 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984428861324994e-05, + "loss": 1.2536, + "step": 11334 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984426089344446e-05, + "loss": 1.0364, + "step": 11335 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984423317117255e-05, + "loss": 1.6685, + "step": 11336 + }, + { + "epoch": 0.01, + "learning_rate": 4.998442054464344e-05, + "loss": 1.0711, + "step": 11337 + }, + { + "epoch": 0.01, + "learning_rate": 4.998441777192298e-05, + "loss": 0.8518, + "step": 11338 + }, + { + "epoch": 0.01, + "learning_rate": 4.998441499895589e-05, + "loss": 0.7944, + "step": 11339 + }, + { + "epoch": 0.01, + "learning_rate": 4.998441222574216e-05, + "loss": 1.0822, + "step": 11340 + }, + { + "epoch": 0.01, + "learning_rate": 4.99844094522818e-05, + "loss": 0.9353, + "step": 11341 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984406678574796e-05, + "loss": 1.3678, + "step": 11342 + }, + { + "epoch": 0.01, + "learning_rate": 4.998440390462116e-05, + "loss": 1.8005, + "step": 11343 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984401130420886e-05, + "loss": 1.3246, + "step": 11344 + }, + { + "epoch": 0.01, + "learning_rate": 4.998439835597398e-05, + "loss": 1.0563, + "step": 11345 + }, + { + "epoch": 0.01, + "learning_rate": 4.998439558128045e-05, + "loss": 1.4994, + "step": 11346 + }, + { + "epoch": 0.01, + "learning_rate": 4.998439280634027e-05, + "loss": 3.3592, + "step": 11347 + }, + { + "epoch": 0.01, + "learning_rate": 4.998439003115346e-05, + "loss": 5.7064, + "step": 11348 + }, + { + "epoch": 0.01, + "learning_rate": 4.998438725572001e-05, + "loss": 5.6699, + "step": 11349 + }, + { + "epoch": 0.01, + "learning_rate": 4.998438448003992e-05, + "loss": 5.6864, + "step": 11350 + }, + { + "epoch": 0.01, + "learning_rate": 4.998438170411321e-05, + "loss": 5.5852, + "step": 11351 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984378927939856e-05, + "loss": 5.5625, + "step": 11352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984376151519866e-05, + "loss": 5.5607, + "step": 11353 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984373374853246e-05, + "loss": 5.4607, + "step": 11354 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984370597939985e-05, + "loss": 5.2944, + "step": 11355 + }, + { + "epoch": 0.01, + "learning_rate": 4.998436782078009e-05, + "loss": 5.3528, + "step": 11356 + }, + { + "epoch": 0.01, + "learning_rate": 4.998436504337356e-05, + "loss": 1.5605, + "step": 11357 + }, + { + "epoch": 0.01, + "learning_rate": 4.99843622657204e-05, + "loss": 0.9664, + "step": 11358 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984359487820596e-05, + "loss": 0.8769, + "step": 11359 + }, + { + "epoch": 0.01, + "learning_rate": 4.998435670967416e-05, + "loss": 0.7703, + "step": 11360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984353931281094e-05, + "loss": 0.6933, + "step": 11361 + }, + { + "epoch": 0.01, + "learning_rate": 4.998435115264138e-05, + "loss": 0.6304, + "step": 11362 + }, + { + "epoch": 0.01, + "learning_rate": 4.998434837375505e-05, + "loss": 0.599, + "step": 11363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984345594622074e-05, + "loss": 0.5209, + "step": 11364 + }, + { + "epoch": 0.01, + "learning_rate": 4.998434281524246e-05, + "loss": 0.5048, + "step": 11365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984340035616215e-05, + "loss": 0.45, + "step": 11366 + }, + { + "epoch": 0.01, + "learning_rate": 4.998433725574334e-05, + "loss": 0.3872, + "step": 11367 + }, + { + "epoch": 0.01, + "learning_rate": 4.998433447562383e-05, + "loss": 0.3557, + "step": 11368 + }, + { + "epoch": 0.01, + "learning_rate": 4.998433169525768e-05, + "loss": 0.327, + "step": 11369 + }, + { + "epoch": 0.01, + "learning_rate": 4.99843289146449e-05, + "loss": 0.2999, + "step": 11370 + }, + { + "epoch": 0.01, + "learning_rate": 4.998432613378547e-05, + "loss": 0.2661, + "step": 11371 + }, + { + "epoch": 0.01, + "learning_rate": 4.998432335267942e-05, + "loss": 0.2799, + "step": 11372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984320571326735e-05, + "loss": 0.2492, + "step": 11373 + }, + { + "epoch": 0.01, + "learning_rate": 4.998431778972741e-05, + "loss": 0.2166, + "step": 11374 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984315007881455e-05, + "loss": 0.2357, + "step": 11375 + }, + { + "epoch": 0.01, + "learning_rate": 4.998431222578886e-05, + "loss": 0.217, + "step": 11376 + }, + { + "epoch": 0.01, + "learning_rate": 4.998430944344963e-05, + "loss": 0.2398, + "step": 11377 + }, + { + "epoch": 0.01, + "learning_rate": 4.998430666086377e-05, + "loss": 0.205, + "step": 11378 + }, + { + "epoch": 0.01, + "learning_rate": 4.998430387803128e-05, + "loss": 0.2642, + "step": 11379 + }, + { + "epoch": 0.01, + "learning_rate": 4.998430109495215e-05, + "loss": 0.2823, + "step": 11380 + }, + { + "epoch": 0.01, + "learning_rate": 4.998429831162638e-05, + "loss": 0.7373, + "step": 11381 + }, + { + "epoch": 0.01, + "learning_rate": 4.998429552805398e-05, + "loss": 1.2914, + "step": 11382 + }, + { + "epoch": 0.01, + "learning_rate": 4.998429274423495e-05, + "loss": 0.6993, + "step": 11383 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984289960169286e-05, + "loss": 1.2087, + "step": 11384 + }, + { + "epoch": 0.01, + "learning_rate": 4.998428717585698e-05, + "loss": 1.1759, + "step": 11385 + }, + { + "epoch": 0.01, + "learning_rate": 4.998428439129804e-05, + "loss": 0.8273, + "step": 11386 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984281606492466e-05, + "loss": 1.3036, + "step": 11387 + }, + { + "epoch": 0.01, + "learning_rate": 4.998427882144027e-05, + "loss": 1.8611, + "step": 11388 + }, + { + "epoch": 0.01, + "learning_rate": 4.998427603614143e-05, + "loss": 1.269, + "step": 11389 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984273250595946e-05, + "loss": 1.1731, + "step": 11390 + }, + { + "epoch": 0.01, + "learning_rate": 4.998427046480384e-05, + "loss": 1.1321, + "step": 11391 + }, + { + "epoch": 0.01, + "learning_rate": 4.99842676787651e-05, + "loss": 1.0621, + "step": 11392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984264892479724e-05, + "loss": 1.1258, + "step": 11393 + }, + { + "epoch": 0.01, + "learning_rate": 4.998426210594771e-05, + "loss": 1.191, + "step": 11394 + }, + { + "epoch": 0.01, + "learning_rate": 4.998425931916907e-05, + "loss": 1.1869, + "step": 11395 + }, + { + "epoch": 0.01, + "learning_rate": 4.998425653214379e-05, + "loss": 1.1527, + "step": 11396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984253744871877e-05, + "loss": 0.9867, + "step": 11397 + }, + { + "epoch": 0.01, + "learning_rate": 4.998425095735333e-05, + "loss": 0.5327, + "step": 11398 + }, + { + "epoch": 0.01, + "learning_rate": 4.998424816958815e-05, + "loss": 0.9447, + "step": 11399 + }, + { + "epoch": 0.01, + "learning_rate": 4.998424538157633e-05, + "loss": 1.1777, + "step": 11400 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984242593317885e-05, + "loss": 1.1002, + "step": 11401 + }, + { + "epoch": 0.01, + "learning_rate": 4.99842398048128e-05, + "loss": 1.1605, + "step": 11402 + }, + { + "epoch": 0.01, + "learning_rate": 4.998423701606109e-05, + "loss": 1.0985, + "step": 11403 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984234227062734e-05, + "loss": 0.8734, + "step": 11404 + }, + { + "epoch": 0.01, + "learning_rate": 4.998423143781775e-05, + "loss": 1.2233, + "step": 11405 + }, + { + "epoch": 0.01, + "learning_rate": 4.998422864832614e-05, + "loss": 1.0246, + "step": 11406 + }, + { + "epoch": 0.01, + "learning_rate": 4.998422585858788e-05, + "loss": 1.3118, + "step": 11407 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984223068603e-05, + "loss": 1.359, + "step": 11408 + }, + { + "epoch": 0.01, + "learning_rate": 4.998422027837147e-05, + "loss": 1.284, + "step": 11409 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984217487893324e-05, + "loss": 1.2624, + "step": 11410 + }, + { + "epoch": 0.01, + "learning_rate": 4.998421469716853e-05, + "loss": 0.9914, + "step": 11411 + }, + { + "epoch": 0.01, + "learning_rate": 4.998421190619711e-05, + "loss": 0.9909, + "step": 11412 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984209114979064e-05, + "loss": 1.0387, + "step": 11413 + }, + { + "epoch": 0.01, + "learning_rate": 4.998420632351437e-05, + "loss": 1.1785, + "step": 11414 + }, + { + "epoch": 0.01, + "learning_rate": 4.998420353180305e-05, + "loss": 0.9459, + "step": 11415 + }, + { + "epoch": 0.01, + "learning_rate": 4.99842007398451e-05, + "loss": 1.1308, + "step": 11416 + }, + { + "epoch": 0.01, + "learning_rate": 4.998419794764051e-05, + "loss": 1.2018, + "step": 11417 + }, + { + "epoch": 0.01, + "learning_rate": 4.998419515518929e-05, + "loss": 1.0973, + "step": 11418 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984192362491436e-05, + "loss": 1.1917, + "step": 11419 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984189569546944e-05, + "loss": 0.8545, + "step": 11420 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984186776355824e-05, + "loss": 0.8211, + "step": 11421 + }, + { + "epoch": 0.01, + "learning_rate": 4.998418398291807e-05, + "loss": 1.113, + "step": 11422 + }, + { + "epoch": 0.01, + "learning_rate": 4.998418118923368e-05, + "loss": 1.032, + "step": 11423 + }, + { + "epoch": 0.01, + "learning_rate": 4.998417839530266e-05, + "loss": 1.2295, + "step": 11424 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984175601125005e-05, + "loss": 1.0615, + "step": 11425 + }, + { + "epoch": 0.01, + "learning_rate": 4.998417280670071e-05, + "loss": 0.175, + "step": 11426 + }, + { + "epoch": 0.01, + "learning_rate": 4.998417001202979e-05, + "loss": 0.3851, + "step": 11427 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984167217112235e-05, + "loss": 1.0646, + "step": 11428 + }, + { + "epoch": 0.01, + "learning_rate": 4.998416442194805e-05, + "loss": 1.359, + "step": 11429 + }, + { + "epoch": 0.01, + "learning_rate": 4.998416162653723e-05, + "loss": 1.206, + "step": 11430 + }, + { + "epoch": 0.01, + "learning_rate": 4.998415883087977e-05, + "loss": 1.2987, + "step": 11431 + }, + { + "epoch": 0.01, + "learning_rate": 4.998415603497568e-05, + "loss": 0.8914, + "step": 11432 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984153238824964e-05, + "loss": 1.0199, + "step": 11433 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984150442427614e-05, + "loss": 0.9027, + "step": 11434 + }, + { + "epoch": 0.01, + "learning_rate": 4.998414764578362e-05, + "loss": 0.9879, + "step": 11435 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984144848893006e-05, + "loss": 1.1858, + "step": 11436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984142051755756e-05, + "loss": 1.6896, + "step": 11437 + }, + { + "epoch": 0.01, + "learning_rate": 4.998413925437186e-05, + "loss": 1.8751, + "step": 11438 + }, + { + "epoch": 0.01, + "learning_rate": 4.998413645674135e-05, + "loss": 1.1724, + "step": 11439 + }, + { + "epoch": 0.01, + "learning_rate": 4.99841336588642e-05, + "loss": 1.2096, + "step": 11440 + }, + { + "epoch": 0.01, + "learning_rate": 4.998413086074042e-05, + "loss": 0.9712, + "step": 11441 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984128062369995e-05, + "loss": 1.1346, + "step": 11442 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984125263752944e-05, + "loss": 1.4867, + "step": 11443 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984122464889264e-05, + "loss": 1.1887, + "step": 11444 + }, + { + "epoch": 0.01, + "learning_rate": 4.998411966577895e-05, + "loss": 1.2453, + "step": 11445 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984116866422004e-05, + "loss": 1.0821, + "step": 11446 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984114066818424e-05, + "loss": 0.858, + "step": 11447 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984111266968215e-05, + "loss": 1.3127, + "step": 11448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984108466871364e-05, + "loss": 1.443, + "step": 11449 + }, + { + "epoch": 0.01, + "learning_rate": 4.998410566652788e-05, + "loss": 1.2599, + "step": 11450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984102865937774e-05, + "loss": 0.9952, + "step": 11451 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984100065101036e-05, + "loss": 1.2394, + "step": 11452 + }, + { + "epoch": 0.01, + "learning_rate": 4.998409726401766e-05, + "loss": 1.1049, + "step": 11453 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984094462687646e-05, + "loss": 0.6838, + "step": 11454 + }, + { + "epoch": 0.01, + "learning_rate": 4.998409166111101e-05, + "loss": 1.3454, + "step": 11455 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984088859287734e-05, + "loss": 1.0764, + "step": 11456 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984086057217824e-05, + "loss": 1.18, + "step": 11457 + }, + { + "epoch": 0.01, + "learning_rate": 4.998408325490129e-05, + "loss": 0.9963, + "step": 11458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984080452338125e-05, + "loss": 1.1228, + "step": 11459 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984077649528315e-05, + "loss": 0.8998, + "step": 11460 + }, + { + "epoch": 0.01, + "learning_rate": 4.998407484647188e-05, + "loss": 0.9379, + "step": 11461 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984072043168815e-05, + "loss": 1.5251, + "step": 11462 + }, + { + "epoch": 0.01, + "learning_rate": 4.998406923961911e-05, + "loss": 1.5672, + "step": 11463 + }, + { + "epoch": 0.01, + "learning_rate": 4.998406643582279e-05, + "loss": 1.1538, + "step": 11464 + }, + { + "epoch": 0.01, + "learning_rate": 4.998406363177982e-05, + "loss": 1.1419, + "step": 11465 + }, + { + "epoch": 0.01, + "learning_rate": 4.998406082749023e-05, + "loss": 1.306, + "step": 11466 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984058022953996e-05, + "loss": 1.0548, + "step": 11467 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984055218171135e-05, + "loss": 1.177, + "step": 11468 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984052413141645e-05, + "loss": 1.223, + "step": 11469 + }, + { + "epoch": 0.01, + "learning_rate": 4.998404960786552e-05, + "loss": 1.4315, + "step": 11470 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984046802342764e-05, + "loss": 1.4485, + "step": 11471 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984043996573374e-05, + "loss": 0.9677, + "step": 11472 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984041190557355e-05, + "loss": 1.0573, + "step": 11473 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984038384294706e-05, + "loss": 1.4296, + "step": 11474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984035577785416e-05, + "loss": 3.2775, + "step": 11475 + }, + { + "epoch": 0.01, + "learning_rate": 4.99840327710295e-05, + "loss": 1.2259, + "step": 11476 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984029964026954e-05, + "loss": 1.2345, + "step": 11477 + }, + { + "epoch": 0.01, + "learning_rate": 4.998402715677778e-05, + "loss": 1.1811, + "step": 11478 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984024349281964e-05, + "loss": 1.2459, + "step": 11479 + }, + { + "epoch": 0.01, + "learning_rate": 4.998402154153952e-05, + "loss": 1.0402, + "step": 11480 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984018733550445e-05, + "loss": 1.2996, + "step": 11481 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984015925314745e-05, + "loss": 1.4455, + "step": 11482 + }, + { + "epoch": 0.01, + "learning_rate": 4.99840131168324e-05, + "loss": 1.0468, + "step": 11483 + }, + { + "epoch": 0.01, + "learning_rate": 4.998401030810343e-05, + "loss": 0.9074, + "step": 11484 + }, + { + "epoch": 0.01, + "learning_rate": 4.9984007499127825e-05, + "loss": 1.6606, + "step": 11485 + }, + { + "epoch": 0.01, + "learning_rate": 4.99840046899056e-05, + "loss": 1.3244, + "step": 11486 + }, + { + "epoch": 0.01, + "learning_rate": 4.998400188043673e-05, + "loss": 0.9966, + "step": 11487 + }, + { + "epoch": 0.01, + "learning_rate": 4.998399907072123e-05, + "loss": 0.7921, + "step": 11488 + }, + { + "epoch": 0.01, + "learning_rate": 4.998399626075911e-05, + "loss": 0.9289, + "step": 11489 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983993450550345e-05, + "loss": 1.4705, + "step": 11490 + }, + { + "epoch": 0.01, + "learning_rate": 4.998399064009496e-05, + "loss": 1.2707, + "step": 11491 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983987829392936e-05, + "loss": 1.1782, + "step": 11492 + }, + { + "epoch": 0.01, + "learning_rate": 4.998398501844428e-05, + "loss": 1.0509, + "step": 11493 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983982207249e-05, + "loss": 1.2076, + "step": 11494 + }, + { + "epoch": 0.01, + "learning_rate": 4.998397939580708e-05, + "loss": 1.2198, + "step": 11495 + }, + { + "epoch": 0.01, + "learning_rate": 4.998397658411854e-05, + "loss": 1.2317, + "step": 11496 + }, + { + "epoch": 0.01, + "learning_rate": 4.998397377218336e-05, + "loss": 1.0691, + "step": 11497 + }, + { + "epoch": 0.01, + "learning_rate": 4.998397096000155e-05, + "loss": 1.4243, + "step": 11498 + }, + { + "epoch": 0.01, + "learning_rate": 4.99839681475731e-05, + "loss": 1.2581, + "step": 11499 + }, + { + "epoch": 0.01, + "learning_rate": 4.998396533489803e-05, + "loss": 1.1266, + "step": 11500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0503673553466797, + "eval_runtime": 83.5269, + "eval_samples_per_second": 16.581, + "eval_steps_per_second": 4.154, + "step": 11500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983962521976326e-05, + "loss": 0.9645, + "step": 11501 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983959708807994e-05, + "loss": 1.2468, + "step": 11502 + }, + { + "epoch": 0.01, + "learning_rate": 4.998395689539303e-05, + "loss": 1.0432, + "step": 11503 + }, + { + "epoch": 0.01, + "learning_rate": 4.998395408173143e-05, + "loss": 1.2302, + "step": 11504 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983951267823206e-05, + "loss": 1.081, + "step": 11505 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983948453668345e-05, + "loss": 0.9665, + "step": 11506 + }, + { + "epoch": 0.01, + "learning_rate": 4.998394563926685e-05, + "loss": 0.8637, + "step": 11507 + }, + { + "epoch": 0.01, + "learning_rate": 4.998394282461874e-05, + "loss": 1.2088, + "step": 11508 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983940009723976e-05, + "loss": 1.144, + "step": 11509 + }, + { + "epoch": 0.01, + "learning_rate": 4.99839371945826e-05, + "loss": 1.9551, + "step": 11510 + }, + { + "epoch": 0.01, + "learning_rate": 4.998393437919459e-05, + "loss": 1.3644, + "step": 11511 + }, + { + "epoch": 0.01, + "learning_rate": 4.998393156355994e-05, + "loss": 1.1624, + "step": 11512 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983928747678663e-05, + "loss": 1.0378, + "step": 11513 + }, + { + "epoch": 0.01, + "learning_rate": 4.998392593155076e-05, + "loss": 1.1334, + "step": 11514 + }, + { + "epoch": 0.01, + "learning_rate": 4.998392311517622e-05, + "loss": 1.2368, + "step": 11515 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983920298555054e-05, + "loss": 1.1113, + "step": 11516 + }, + { + "epoch": 0.01, + "learning_rate": 4.998391748168725e-05, + "loss": 1.0115, + "step": 11517 + }, + { + "epoch": 0.01, + "learning_rate": 4.998391466457282e-05, + "loss": 1.3807, + "step": 11518 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983911847211764e-05, + "loss": 1.6326, + "step": 11519 + }, + { + "epoch": 0.01, + "learning_rate": 4.998390902960407e-05, + "loss": 1.2276, + "step": 11520 + }, + { + "epoch": 0.01, + "learning_rate": 4.998390621174975e-05, + "loss": 1.0294, + "step": 11521 + }, + { + "epoch": 0.01, + "learning_rate": 4.99839033936488e-05, + "loss": 0.6542, + "step": 11522 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983900575301214e-05, + "loss": 0.7583, + "step": 11523 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983897756707e-05, + "loss": 0.9786, + "step": 11524 + }, + { + "epoch": 0.01, + "learning_rate": 4.998389493786616e-05, + "loss": 1.024, + "step": 11525 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983892118778684e-05, + "loss": 1.0794, + "step": 11526 + }, + { + "epoch": 0.01, + "learning_rate": 4.998388929944458e-05, + "loss": 1.2192, + "step": 11527 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983886479863846e-05, + "loss": 1.3275, + "step": 11528 + }, + { + "epoch": 0.01, + "learning_rate": 4.998388366003648e-05, + "loss": 0.9568, + "step": 11529 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983880839962485e-05, + "loss": 1.1889, + "step": 11530 + }, + { + "epoch": 0.01, + "learning_rate": 4.998387801964186e-05, + "loss": 0.9586, + "step": 11531 + }, + { + "epoch": 0.01, + "learning_rate": 4.99838751990746e-05, + "loss": 1.2669, + "step": 11532 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983872378260714e-05, + "loss": 1.2115, + "step": 11533 + }, + { + "epoch": 0.01, + "learning_rate": 4.99838695572002e-05, + "loss": 1.479, + "step": 11534 + }, + { + "epoch": 0.01, + "learning_rate": 4.998386673589305e-05, + "loss": 1.3306, + "step": 11535 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983863914339276e-05, + "loss": 1.3134, + "step": 11536 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983861092538865e-05, + "loss": 0.8815, + "step": 11537 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983858270491826e-05, + "loss": 1.1442, + "step": 11538 + }, + { + "epoch": 0.01, + "learning_rate": 4.998385544819816e-05, + "loss": 1.404, + "step": 11539 + }, + { + "epoch": 0.01, + "learning_rate": 4.998385262565786e-05, + "loss": 0.7192, + "step": 11540 + }, + { + "epoch": 0.01, + "learning_rate": 4.998384980287093e-05, + "loss": 1.7457, + "step": 11541 + }, + { + "epoch": 0.01, + "learning_rate": 4.998384697983738e-05, + "loss": 1.2463, + "step": 11542 + }, + { + "epoch": 0.01, + "learning_rate": 4.998384415655719e-05, + "loss": 1.0283, + "step": 11543 + }, + { + "epoch": 0.01, + "learning_rate": 4.998384133303037e-05, + "loss": 1.0004, + "step": 11544 + }, + { + "epoch": 0.01, + "learning_rate": 4.998383850925692e-05, + "loss": 0.9757, + "step": 11545 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983835685236844e-05, + "loss": 1.1777, + "step": 11546 + }, + { + "epoch": 0.01, + "learning_rate": 4.998383286097014e-05, + "loss": 0.9577, + "step": 11547 + }, + { + "epoch": 0.01, + "learning_rate": 4.99838300364568e-05, + "loss": 1.2794, + "step": 11548 + }, + { + "epoch": 0.01, + "learning_rate": 4.998382721169683e-05, + "loss": 1.1624, + "step": 11549 + }, + { + "epoch": 0.01, + "learning_rate": 4.998382438669024e-05, + "loss": 1.4319, + "step": 11550 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983821561437003e-05, + "loss": 1.134, + "step": 11551 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983818735937146e-05, + "loss": 1.0575, + "step": 11552 + }, + { + "epoch": 0.01, + "learning_rate": 4.998381591019066e-05, + "loss": 0.9679, + "step": 11553 + }, + { + "epoch": 0.01, + "learning_rate": 4.998381308419755e-05, + "loss": 1.13, + "step": 11554 + }, + { + "epoch": 0.01, + "learning_rate": 4.99838102579578e-05, + "loss": 1.1842, + "step": 11555 + }, + { + "epoch": 0.01, + "learning_rate": 4.998380743147143e-05, + "loss": 1.129, + "step": 11556 + }, + { + "epoch": 0.01, + "learning_rate": 4.998380460473842e-05, + "loss": 0.9742, + "step": 11557 + }, + { + "epoch": 0.01, + "learning_rate": 4.998380177775879e-05, + "loss": 0.9647, + "step": 11558 + }, + { + "epoch": 0.01, + "learning_rate": 4.998379895053252e-05, + "loss": 1.1006, + "step": 11559 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983796123059626e-05, + "loss": 1.0966, + "step": 11560 + }, + { + "epoch": 0.01, + "learning_rate": 4.99837932953401e-05, + "loss": 1.3214, + "step": 11561 + }, + { + "epoch": 0.01, + "learning_rate": 4.998379046737395e-05, + "loss": 0.9388, + "step": 11562 + }, + { + "epoch": 0.01, + "learning_rate": 4.998378763916117e-05, + "loss": 1.0182, + "step": 11563 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983784810701756e-05, + "loss": 1.1197, + "step": 11564 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983781981995717e-05, + "loss": 0.9419, + "step": 11565 + }, + { + "epoch": 0.01, + "learning_rate": 4.998377915304304e-05, + "loss": 0.6806, + "step": 11566 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983776323843744e-05, + "loss": 0.8233, + "step": 11567 + }, + { + "epoch": 0.01, + "learning_rate": 4.998377349439781e-05, + "loss": 0.8634, + "step": 11568 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983770664705255e-05, + "loss": 0.5917, + "step": 11569 + }, + { + "epoch": 0.01, + "learning_rate": 4.998376783476607e-05, + "loss": 1.2584, + "step": 11570 + }, + { + "epoch": 0.01, + "learning_rate": 4.998376500458025e-05, + "loss": 1.2925, + "step": 11571 + }, + { + "epoch": 0.01, + "learning_rate": 4.998376217414781e-05, + "loss": 1.3929, + "step": 11572 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983759343468725e-05, + "loss": 1.0965, + "step": 11573 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983756512543025e-05, + "loss": 1.5753, + "step": 11574 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983753681370696e-05, + "loss": 1.0878, + "step": 11575 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983750849951725e-05, + "loss": 0.8005, + "step": 11576 + }, + { + "epoch": 0.01, + "learning_rate": 4.998374801828614e-05, + "loss": 0.1209, + "step": 11577 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983745186373923e-05, + "loss": 0.0624, + "step": 11578 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983742354215066e-05, + "loss": 0.0626, + "step": 11579 + }, + { + "epoch": 0.01, + "learning_rate": 4.998373952180959e-05, + "loss": 0.1138, + "step": 11580 + }, + { + "epoch": 0.01, + "learning_rate": 4.998373668915748e-05, + "loss": 0.0397, + "step": 11581 + }, + { + "epoch": 0.01, + "learning_rate": 4.998373385625874e-05, + "loss": 0.0496, + "step": 11582 + }, + { + "epoch": 0.01, + "learning_rate": 4.998373102311338e-05, + "loss": 0.0782, + "step": 11583 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983728189721384e-05, + "loss": 0.4105, + "step": 11584 + }, + { + "epoch": 0.01, + "learning_rate": 4.998372535608276e-05, + "loss": 1.2428, + "step": 11585 + }, + { + "epoch": 0.01, + "learning_rate": 4.998372252219751e-05, + "loss": 0.8774, + "step": 11586 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983719688065625e-05, + "loss": 1.1977, + "step": 11587 + }, + { + "epoch": 0.01, + "learning_rate": 4.998371685368712e-05, + "loss": 1.2278, + "step": 11588 + }, + { + "epoch": 0.01, + "learning_rate": 4.998371401906198e-05, + "loss": 0.7271, + "step": 11589 + }, + { + "epoch": 0.01, + "learning_rate": 4.998371118419021e-05, + "loss": 0.562, + "step": 11590 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983708349071814e-05, + "loss": 1.1148, + "step": 11591 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983705513706794e-05, + "loss": 1.038, + "step": 11592 + }, + { + "epoch": 0.01, + "learning_rate": 4.998370267809514e-05, + "loss": 1.3651, + "step": 11593 + }, + { + "epoch": 0.01, + "learning_rate": 4.998369984223686e-05, + "loss": 1.2701, + "step": 11594 + }, + { + "epoch": 0.01, + "learning_rate": 4.998369700613195e-05, + "loss": 0.9801, + "step": 11595 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983694169780406e-05, + "loss": 1.1135, + "step": 11596 + }, + { + "epoch": 0.01, + "learning_rate": 4.998369133318224e-05, + "loss": 1.1544, + "step": 11597 + }, + { + "epoch": 0.01, + "learning_rate": 4.998368849633744e-05, + "loss": 1.1129, + "step": 11598 + }, + { + "epoch": 0.01, + "learning_rate": 4.998368565924603e-05, + "loss": 1.0495, + "step": 11599 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983682821907976e-05, + "loss": 0.9521, + "step": 11600 + }, + { + "epoch": 0.01, + "learning_rate": 4.998367998432329e-05, + "loss": 1.2354, + "step": 11601 + }, + { + "epoch": 0.01, + "learning_rate": 4.998367714649198e-05, + "loss": 0.9108, + "step": 11602 + }, + { + "epoch": 0.01, + "learning_rate": 4.998367430841404e-05, + "loss": 1.4155, + "step": 11603 + }, + { + "epoch": 0.01, + "learning_rate": 4.998367147008948e-05, + "loss": 1.2151, + "step": 11604 + }, + { + "epoch": 0.01, + "learning_rate": 4.998366863151828e-05, + "loss": 1.066, + "step": 11605 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983665792700464e-05, + "loss": 0.9034, + "step": 11606 + }, + { + "epoch": 0.01, + "learning_rate": 4.998366295363601e-05, + "loss": 1.1208, + "step": 11607 + }, + { + "epoch": 0.01, + "learning_rate": 4.998366011432493e-05, + "loss": 1.1024, + "step": 11608 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983657274767225e-05, + "loss": 1.1969, + "step": 11609 + }, + { + "epoch": 0.01, + "learning_rate": 4.998365443496289e-05, + "loss": 1.2588, + "step": 11610 + }, + { + "epoch": 0.01, + "learning_rate": 4.998365159491193e-05, + "loss": 1.196, + "step": 11611 + }, + { + "epoch": 0.01, + "learning_rate": 4.998364875461433e-05, + "loss": 0.9279, + "step": 11612 + }, + { + "epoch": 0.01, + "learning_rate": 4.998364591407011e-05, + "loss": 1.0814, + "step": 11613 + }, + { + "epoch": 0.01, + "learning_rate": 4.998364307327927e-05, + "loss": 1.2077, + "step": 11614 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983640232241794e-05, + "loss": 0.9932, + "step": 11615 + }, + { + "epoch": 0.01, + "learning_rate": 4.998363739095769e-05, + "loss": 1.1417, + "step": 11616 + }, + { + "epoch": 0.01, + "learning_rate": 4.998363454942696e-05, + "loss": 1.229, + "step": 11617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983631707649595e-05, + "loss": 1.2227, + "step": 11618 + }, + { + "epoch": 0.01, + "learning_rate": 4.998362886562561e-05, + "loss": 1.1244, + "step": 11619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983626023354994e-05, + "loss": 1.1602, + "step": 11620 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983623180837756e-05, + "loss": 1.2657, + "step": 11621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983620338073883e-05, + "loss": 1.2006, + "step": 11622 + }, + { + "epoch": 0.01, + "learning_rate": 4.998361749506338e-05, + "loss": 1.1533, + "step": 11623 + }, + { + "epoch": 0.01, + "learning_rate": 4.998361465180626e-05, + "loss": 1.0951, + "step": 11624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983611808302505e-05, + "loss": 1.0147, + "step": 11625 + }, + { + "epoch": 0.01, + "learning_rate": 4.998360896455212e-05, + "loss": 1.3346, + "step": 11626 + }, + { + "epoch": 0.01, + "learning_rate": 4.998360612055511e-05, + "loss": 1.5097, + "step": 11627 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983603276311473e-05, + "loss": 0.9217, + "step": 11628 + }, + { + "epoch": 0.01, + "learning_rate": 4.998360043182121e-05, + "loss": 1.0167, + "step": 11629 + }, + { + "epoch": 0.01, + "learning_rate": 4.998359758708432e-05, + "loss": 1.0593, + "step": 11630 + }, + { + "epoch": 0.01, + "learning_rate": 4.99835947421008e-05, + "loss": 0.9394, + "step": 11631 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983591896870654e-05, + "loss": 0.9581, + "step": 11632 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983589051393884e-05, + "loss": 1.3864, + "step": 11633 + }, + { + "epoch": 0.01, + "learning_rate": 4.998358620567048e-05, + "loss": 0.5484, + "step": 11634 + }, + { + "epoch": 0.01, + "learning_rate": 4.998358335970045e-05, + "loss": 0.3537, + "step": 11635 + }, + { + "epoch": 0.01, + "learning_rate": 4.99835805134838e-05, + "loss": 1.1215, + "step": 11636 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983577667020507e-05, + "loss": 1.1501, + "step": 11637 + }, + { + "epoch": 0.01, + "learning_rate": 4.99835748203106e-05, + "loss": 1.1637, + "step": 11638 + }, + { + "epoch": 0.01, + "learning_rate": 4.998357197335406e-05, + "loss": 0.8523, + "step": 11639 + }, + { + "epoch": 0.01, + "learning_rate": 4.998356912615089e-05, + "loss": 1.2117, + "step": 11640 + }, + { + "epoch": 0.01, + "learning_rate": 4.99835662787011e-05, + "loss": 1.1864, + "step": 11641 + }, + { + "epoch": 0.01, + "learning_rate": 4.998356343100468e-05, + "loss": 0.9856, + "step": 11642 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983560583061625e-05, + "loss": 1.0273, + "step": 11643 + }, + { + "epoch": 0.01, + "learning_rate": 4.998355773487196e-05, + "loss": 1.1164, + "step": 11644 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983554886435654e-05, + "loss": 1.209, + "step": 11645 + }, + { + "epoch": 0.01, + "learning_rate": 4.998355203775272e-05, + "loss": 1.2607, + "step": 11646 + }, + { + "epoch": 0.01, + "learning_rate": 4.998354918882317e-05, + "loss": 0.9828, + "step": 11647 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983546339646984e-05, + "loss": 1.1073, + "step": 11648 + }, + { + "epoch": 0.01, + "learning_rate": 4.998354349022417e-05, + "loss": 0.8334, + "step": 11649 + }, + { + "epoch": 0.01, + "learning_rate": 4.998354064055474e-05, + "loss": 1.2648, + "step": 11650 + }, + { + "epoch": 0.01, + "learning_rate": 4.998353779063867e-05, + "loss": 1.2619, + "step": 11651 + }, + { + "epoch": 0.01, + "learning_rate": 4.998353494047598e-05, + "loss": 1.405, + "step": 11652 + }, + { + "epoch": 0.01, + "learning_rate": 4.998353209006666e-05, + "loss": 1.4111, + "step": 11653 + }, + { + "epoch": 0.01, + "learning_rate": 4.998352923941072e-05, + "loss": 0.9003, + "step": 11654 + }, + { + "epoch": 0.01, + "learning_rate": 4.998352638850814e-05, + "loss": 1.1074, + "step": 11655 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983523537358945e-05, + "loss": 0.9703, + "step": 11656 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983520685963115e-05, + "loss": 1.0727, + "step": 11657 + }, + { + "epoch": 0.01, + "learning_rate": 4.998351783432066e-05, + "loss": 1.1307, + "step": 11658 + }, + { + "epoch": 0.01, + "learning_rate": 4.998351498243159e-05, + "loss": 0.7871, + "step": 11659 + }, + { + "epoch": 0.01, + "learning_rate": 4.998351213029588e-05, + "loss": 0.7997, + "step": 11660 + }, + { + "epoch": 0.01, + "learning_rate": 4.998350927791355e-05, + "loss": 0.7349, + "step": 11661 + }, + { + "epoch": 0.01, + "learning_rate": 4.998350642528459e-05, + "loss": 0.934, + "step": 11662 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983503572409004e-05, + "loss": 0.9828, + "step": 11663 + }, + { + "epoch": 0.01, + "learning_rate": 4.998350071928679e-05, + "loss": 1.241, + "step": 11664 + }, + { + "epoch": 0.01, + "learning_rate": 4.998349786591795e-05, + "loss": 1.3398, + "step": 11665 + }, + { + "epoch": 0.01, + "learning_rate": 4.998349501230248e-05, + "loss": 1.0326, + "step": 11666 + }, + { + "epoch": 0.01, + "learning_rate": 4.998349215844039e-05, + "loss": 1.2806, + "step": 11667 + }, + { + "epoch": 0.01, + "learning_rate": 4.998348930433167e-05, + "loss": 1.0945, + "step": 11668 + }, + { + "epoch": 0.01, + "learning_rate": 4.998348644997633e-05, + "loss": 1.1345, + "step": 11669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983483595374356e-05, + "loss": 1.2521, + "step": 11670 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983480740525756e-05, + "loss": 1.3518, + "step": 11671 + }, + { + "epoch": 0.01, + "learning_rate": 4.998347788543053e-05, + "loss": 1.1941, + "step": 11672 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983475030088675e-05, + "loss": 1.2435, + "step": 11673 + }, + { + "epoch": 0.01, + "learning_rate": 4.99834721745002e-05, + "loss": 1.1946, + "step": 11674 + }, + { + "epoch": 0.01, + "learning_rate": 4.99834693186651e-05, + "loss": 0.9514, + "step": 11675 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983466462583375e-05, + "loss": 0.9711, + "step": 11676 + }, + { + "epoch": 0.01, + "learning_rate": 4.998346360625501e-05, + "loss": 1.1204, + "step": 11677 + }, + { + "epoch": 0.01, + "learning_rate": 4.998346074968003e-05, + "loss": 0.6449, + "step": 11678 + }, + { + "epoch": 0.01, + "learning_rate": 4.998345789285842e-05, + "loss": 0.932, + "step": 11679 + }, + { + "epoch": 0.01, + "learning_rate": 4.998345503579018e-05, + "loss": 1.0239, + "step": 11680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983452178475326e-05, + "loss": 1.2577, + "step": 11681 + }, + { + "epoch": 0.01, + "learning_rate": 4.998344932091384e-05, + "loss": 1.2697, + "step": 11682 + }, + { + "epoch": 0.01, + "learning_rate": 4.998344646310573e-05, + "loss": 1.0062, + "step": 11683 + }, + { + "epoch": 0.01, + "learning_rate": 4.998344360505099e-05, + "loss": 0.96, + "step": 11684 + }, + { + "epoch": 0.01, + "learning_rate": 4.998344074674962e-05, + "loss": 1.4697, + "step": 11685 + }, + { + "epoch": 0.01, + "learning_rate": 4.998343788820163e-05, + "loss": 1.0183, + "step": 11686 + }, + { + "epoch": 0.01, + "learning_rate": 4.998343502940701e-05, + "loss": 0.9672, + "step": 11687 + }, + { + "epoch": 0.01, + "learning_rate": 4.998343217036577e-05, + "loss": 0.8766, + "step": 11688 + }, + { + "epoch": 0.01, + "learning_rate": 4.99834293110779e-05, + "loss": 1.0352, + "step": 11689 + }, + { + "epoch": 0.01, + "learning_rate": 4.99834264515434e-05, + "loss": 1.0626, + "step": 11690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983423591762285e-05, + "loss": 0.9342, + "step": 11691 + }, + { + "epoch": 0.01, + "learning_rate": 4.998342073173454e-05, + "loss": 0.9113, + "step": 11692 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983417871460164e-05, + "loss": 1.1392, + "step": 11693 + }, + { + "epoch": 0.01, + "learning_rate": 4.998341501093917e-05, + "loss": 1.147, + "step": 11694 + }, + { + "epoch": 0.01, + "learning_rate": 4.998341215017154e-05, + "loss": 1.2468, + "step": 11695 + }, + { + "epoch": 0.01, + "learning_rate": 4.998340928915729e-05, + "loss": 1.2733, + "step": 11696 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983406427896416e-05, + "loss": 0.771, + "step": 11697 + }, + { + "epoch": 0.01, + "learning_rate": 4.998340356638892e-05, + "loss": 1.2064, + "step": 11698 + }, + { + "epoch": 0.01, + "learning_rate": 4.998340070463479e-05, + "loss": 1.2075, + "step": 11699 + }, + { + "epoch": 0.01, + "learning_rate": 4.998339784263404e-05, + "loss": 1.1184, + "step": 11700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983394980386653e-05, + "loss": 1.0077, + "step": 11701 + }, + { + "epoch": 0.01, + "learning_rate": 4.998339211789266e-05, + "loss": 1.3163, + "step": 11702 + }, + { + "epoch": 0.01, + "learning_rate": 4.998338925515202e-05, + "loss": 1.1615, + "step": 11703 + }, + { + "epoch": 0.01, + "learning_rate": 4.998338639216477e-05, + "loss": 1.0435, + "step": 11704 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983383528930896e-05, + "loss": 1.4037, + "step": 11705 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983380665450386e-05, + "loss": 1.2341, + "step": 11706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983377801723254e-05, + "loss": 0.9345, + "step": 11707 + }, + { + "epoch": 0.01, + "learning_rate": 4.99833749377495e-05, + "loss": 0.874, + "step": 11708 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983372073529124e-05, + "loss": 1.1688, + "step": 11709 + }, + { + "epoch": 0.01, + "learning_rate": 4.998336920906211e-05, + "loss": 1.1019, + "step": 11710 + }, + { + "epoch": 0.01, + "learning_rate": 4.998336634434848e-05, + "loss": 1.3786, + "step": 11711 + }, + { + "epoch": 0.01, + "learning_rate": 4.998336347938822e-05, + "loss": 1.439, + "step": 11712 + }, + { + "epoch": 0.01, + "learning_rate": 4.998336061418134e-05, + "loss": 1.339, + "step": 11713 + }, + { + "epoch": 0.01, + "learning_rate": 4.998335774872783e-05, + "loss": 1.1371, + "step": 11714 + }, + { + "epoch": 0.01, + "learning_rate": 4.99833548830277e-05, + "loss": 0.8706, + "step": 11715 + }, + { + "epoch": 0.01, + "learning_rate": 4.998335201708094e-05, + "loss": 0.7482, + "step": 11716 + }, + { + "epoch": 0.01, + "learning_rate": 4.998334915088756e-05, + "loss": 0.9907, + "step": 11717 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983346284447554e-05, + "loss": 0.9305, + "step": 11718 + }, + { + "epoch": 0.01, + "learning_rate": 4.998334341776092e-05, + "loss": 1.0183, + "step": 11719 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983340550827656e-05, + "loss": 1.0315, + "step": 11720 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983337683647774e-05, + "loss": 0.9853, + "step": 11721 + }, + { + "epoch": 0.01, + "learning_rate": 4.998333481622127e-05, + "loss": 0.8974, + "step": 11722 + }, + { + "epoch": 0.01, + "learning_rate": 4.998333194854813e-05, + "loss": 0.9707, + "step": 11723 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983329080628374e-05, + "loss": 1.1358, + "step": 11724 + }, + { + "epoch": 0.01, + "learning_rate": 4.998332621246199e-05, + "loss": 1.0645, + "step": 11725 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983323344048984e-05, + "loss": 1.1729, + "step": 11726 + }, + { + "epoch": 0.01, + "learning_rate": 4.998332047538935e-05, + "loss": 0.9998, + "step": 11727 + }, + { + "epoch": 0.01, + "learning_rate": 4.998331760648309e-05, + "loss": 0.8516, + "step": 11728 + }, + { + "epoch": 0.01, + "learning_rate": 4.998331473733021e-05, + "loss": 0.4824, + "step": 11729 + }, + { + "epoch": 0.01, + "learning_rate": 4.99833118679307e-05, + "loss": 0.5791, + "step": 11730 + }, + { + "epoch": 0.01, + "learning_rate": 4.998330899828457e-05, + "loss": 0.4122, + "step": 11731 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983306128391814e-05, + "loss": 0.4782, + "step": 11732 + }, + { + "epoch": 0.01, + "learning_rate": 4.998330325825243e-05, + "loss": 0.4057, + "step": 11733 + }, + { + "epoch": 0.01, + "learning_rate": 4.998330038786643e-05, + "loss": 0.9686, + "step": 11734 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983297517233796e-05, + "loss": 1.0921, + "step": 11735 + }, + { + "epoch": 0.01, + "learning_rate": 4.998329464635454e-05, + "loss": 0.9476, + "step": 11736 + }, + { + "epoch": 0.01, + "learning_rate": 4.998329177522866e-05, + "loss": 1.1128, + "step": 11737 + }, + { + "epoch": 0.01, + "learning_rate": 4.998328890385616e-05, + "loss": 1.2486, + "step": 11738 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983286032237025e-05, + "loss": 1.1701, + "step": 11739 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983283160371275e-05, + "loss": 1.0324, + "step": 11740 + }, + { + "epoch": 0.01, + "learning_rate": 4.99832802882589e-05, + "loss": 1.2524, + "step": 11741 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983277415899896e-05, + "loss": 0.9121, + "step": 11742 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983274543294266e-05, + "loss": 0.7149, + "step": 11743 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983271670442014e-05, + "loss": 0.7126, + "step": 11744 + }, + { + "epoch": 0.01, + "learning_rate": 4.998326879734314e-05, + "loss": 0.7163, + "step": 11745 + }, + { + "epoch": 0.01, + "learning_rate": 4.998326592399764e-05, + "loss": 0.6952, + "step": 11746 + }, + { + "epoch": 0.01, + "learning_rate": 4.998326305040551e-05, + "loss": 0.6841, + "step": 11747 + }, + { + "epoch": 0.01, + "learning_rate": 4.998326017656677e-05, + "loss": 0.6924, + "step": 11748 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983257302481404e-05, + "loss": 0.7164, + "step": 11749 + }, + { + "epoch": 0.01, + "learning_rate": 4.99832544281494e-05, + "loss": 0.6867, + "step": 11750 + }, + { + "epoch": 0.01, + "learning_rate": 4.998325155357078e-05, + "loss": 0.6912, + "step": 11751 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983248678745536e-05, + "loss": 0.6771, + "step": 11752 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983245803673665e-05, + "loss": 0.6655, + "step": 11753 + }, + { + "epoch": 0.01, + "learning_rate": 4.998324292835517e-05, + "loss": 0.534, + "step": 11754 + }, + { + "epoch": 0.01, + "learning_rate": 4.998324005279006e-05, + "loss": 0.4779, + "step": 11755 + }, + { + "epoch": 0.01, + "learning_rate": 4.998323717697831e-05, + "loss": 0.6134, + "step": 11756 + }, + { + "epoch": 0.01, + "learning_rate": 4.998323430091995e-05, + "loss": 0.6505, + "step": 11757 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983231424614964e-05, + "loss": 0.658, + "step": 11758 + }, + { + "epoch": 0.01, + "learning_rate": 4.998322854806335e-05, + "loss": 0.6802, + "step": 11759 + }, + { + "epoch": 0.01, + "learning_rate": 4.998322567126511e-05, + "loss": 0.6745, + "step": 11760 + }, + { + "epoch": 0.01, + "learning_rate": 4.998322279422025e-05, + "loss": 0.6616, + "step": 11761 + }, + { + "epoch": 0.01, + "learning_rate": 4.998321991692877e-05, + "loss": 0.6549, + "step": 11762 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983217039390663e-05, + "loss": 0.6667, + "step": 11763 + }, + { + "epoch": 0.01, + "learning_rate": 4.998321416160593e-05, + "loss": 0.6433, + "step": 11764 + }, + { + "epoch": 0.01, + "learning_rate": 4.998321128357457e-05, + "loss": 0.6033, + "step": 11765 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983208405296586e-05, + "loss": 0.6027, + "step": 11766 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983205526771985e-05, + "loss": 0.5814, + "step": 11767 + }, + { + "epoch": 0.01, + "learning_rate": 4.998320264800076e-05, + "loss": 0.5885, + "step": 11768 + }, + { + "epoch": 0.01, + "learning_rate": 4.998319976898292e-05, + "loss": 0.5924, + "step": 11769 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983196889718436e-05, + "loss": 0.5976, + "step": 11770 + }, + { + "epoch": 0.01, + "learning_rate": 4.998319401020733e-05, + "loss": 0.584, + "step": 11771 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983191130449614e-05, + "loss": 0.567, + "step": 11772 + }, + { + "epoch": 0.01, + "learning_rate": 4.998318825044527e-05, + "loss": 0.5799, + "step": 11773 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983185370194305e-05, + "loss": 0.6011, + "step": 11774 + }, + { + "epoch": 0.01, + "learning_rate": 4.998318248969671e-05, + "loss": 0.6608, + "step": 11775 + }, + { + "epoch": 0.01, + "learning_rate": 4.99831796089525e-05, + "loss": 0.6653, + "step": 11776 + }, + { + "epoch": 0.01, + "learning_rate": 4.998317672796166e-05, + "loss": 0.6688, + "step": 11777 + }, + { + "epoch": 0.01, + "learning_rate": 4.99831738467242e-05, + "loss": 0.6603, + "step": 11778 + }, + { + "epoch": 0.01, + "learning_rate": 4.998317096524011e-05, + "loss": 0.6453, + "step": 11779 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983168083509404e-05, + "loss": 0.6551, + "step": 11780 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983165201532066e-05, + "loss": 0.6575, + "step": 11781 + }, + { + "epoch": 0.01, + "learning_rate": 4.998316231930811e-05, + "loss": 0.6216, + "step": 11782 + }, + { + "epoch": 0.01, + "learning_rate": 4.998315943683754e-05, + "loss": 0.6309, + "step": 11783 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983156554120334e-05, + "loss": 0.6641, + "step": 11784 + }, + { + "epoch": 0.01, + "learning_rate": 4.998315367115651e-05, + "loss": 0.9049, + "step": 11785 + }, + { + "epoch": 0.01, + "learning_rate": 4.998315078794606e-05, + "loss": 1.3519, + "step": 11786 + }, + { + "epoch": 0.01, + "learning_rate": 4.998314790448899e-05, + "loss": 1.1333, + "step": 11787 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983145020785296e-05, + "loss": 0.67, + "step": 11788 + }, + { + "epoch": 0.01, + "learning_rate": 4.998314213683497e-05, + "loss": 1.1663, + "step": 11789 + }, + { + "epoch": 0.01, + "learning_rate": 4.998313925263803e-05, + "loss": 1.0568, + "step": 11790 + }, + { + "epoch": 0.01, + "learning_rate": 4.998313636819447e-05, + "loss": 1.3779, + "step": 11791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983133483504285e-05, + "loss": 1.1582, + "step": 11792 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983130598567475e-05, + "loss": 0.9026, + "step": 11793 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983127713384036e-05, + "loss": 1.1377, + "step": 11794 + }, + { + "epoch": 0.01, + "learning_rate": 4.998312482795399e-05, + "loss": 1.2038, + "step": 11795 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983121942277306e-05, + "loss": 1.0308, + "step": 11796 + }, + { + "epoch": 0.01, + "learning_rate": 4.998311905635401e-05, + "loss": 0.9214, + "step": 11797 + }, + { + "epoch": 0.01, + "learning_rate": 4.998311617018409e-05, + "loss": 1.11, + "step": 11798 + }, + { + "epoch": 0.01, + "learning_rate": 4.998311328376754e-05, + "loss": 1.3039, + "step": 11799 + }, + { + "epoch": 0.01, + "learning_rate": 4.998311039710437e-05, + "loss": 1.0181, + "step": 11800 + }, + { + "epoch": 0.01, + "learning_rate": 4.998310751019457e-05, + "loss": 0.7254, + "step": 11801 + }, + { + "epoch": 0.01, + "learning_rate": 4.998310462303816e-05, + "loss": 0.9878, + "step": 11802 + }, + { + "epoch": 0.01, + "learning_rate": 4.998310173563512e-05, + "loss": 1.1497, + "step": 11803 + }, + { + "epoch": 0.01, + "learning_rate": 4.998309884798546e-05, + "loss": 1.106, + "step": 11804 + }, + { + "epoch": 0.01, + "learning_rate": 4.998309596008918e-05, + "loss": 0.331, + "step": 11805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983093071946274e-05, + "loss": 0.6411, + "step": 11806 + }, + { + "epoch": 0.01, + "learning_rate": 4.998309018355674e-05, + "loss": 1.171, + "step": 11807 + }, + { + "epoch": 0.01, + "learning_rate": 4.998308729492059e-05, + "loss": 1.2667, + "step": 11808 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983084406037815e-05, + "loss": 0.9939, + "step": 11809 + }, + { + "epoch": 0.01, + "learning_rate": 4.998308151690842e-05, + "loss": 1.0861, + "step": 11810 + }, + { + "epoch": 0.01, + "learning_rate": 4.99830786275324e-05, + "loss": 1.1063, + "step": 11811 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983075737909765e-05, + "loss": 1.1264, + "step": 11812 + }, + { + "epoch": 0.01, + "learning_rate": 4.99830728480405e-05, + "loss": 0.9674, + "step": 11813 + }, + { + "epoch": 0.01, + "learning_rate": 4.998306995792462e-05, + "loss": 1.1379, + "step": 11814 + }, + { + "epoch": 0.01, + "learning_rate": 4.998306706756211e-05, + "loss": 0.9703, + "step": 11815 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983064176952976e-05, + "loss": 1.3968, + "step": 11816 + }, + { + "epoch": 0.01, + "learning_rate": 4.998306128609722e-05, + "loss": 1.0956, + "step": 11817 + }, + { + "epoch": 0.01, + "learning_rate": 4.998305839499485e-05, + "loss": 2.2113, + "step": 11818 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983055503645854e-05, + "loss": 2.0404, + "step": 11819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983052612050234e-05, + "loss": 1.96, + "step": 11820 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983049720208e-05, + "loss": 1.9934, + "step": 11821 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983046828119127e-05, + "loss": 1.0033, + "step": 11822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983043935783647e-05, + "loss": 1.083, + "step": 11823 + }, + { + "epoch": 0.01, + "learning_rate": 4.998304104320154e-05, + "loss": 0.9911, + "step": 11824 + }, + { + "epoch": 0.01, + "learning_rate": 4.998303815037281e-05, + "loss": 1.1688, + "step": 11825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983035257297454e-05, + "loss": 1.3553, + "step": 11826 + }, + { + "epoch": 0.01, + "learning_rate": 4.998303236397548e-05, + "loss": 0.9616, + "step": 11827 + }, + { + "epoch": 0.01, + "learning_rate": 4.998302947040688e-05, + "loss": 1.381, + "step": 11828 + }, + { + "epoch": 0.01, + "learning_rate": 4.998302657659167e-05, + "loss": 1.2144, + "step": 11829 + }, + { + "epoch": 0.01, + "learning_rate": 4.998302368252983e-05, + "loss": 0.9461, + "step": 11830 + }, + { + "epoch": 0.01, + "learning_rate": 4.998302078822137e-05, + "loss": 0.5215, + "step": 11831 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983017893666284e-05, + "loss": 0.9689, + "step": 11832 + }, + { + "epoch": 0.01, + "learning_rate": 4.998301499886458e-05, + "loss": 1.4347, + "step": 11833 + }, + { + "epoch": 0.01, + "learning_rate": 4.998301210381625e-05, + "loss": 0.9473, + "step": 11834 + }, + { + "epoch": 0.01, + "learning_rate": 4.99830092085213e-05, + "loss": 1.5753, + "step": 11835 + }, + { + "epoch": 0.01, + "learning_rate": 4.998300631297973e-05, + "loss": 1.5506, + "step": 11836 + }, + { + "epoch": 0.01, + "learning_rate": 4.9983003417191544e-05, + "loss": 1.1735, + "step": 11837 + }, + { + "epoch": 0.01, + "learning_rate": 4.998300052115673e-05, + "loss": 1.0264, + "step": 11838 + }, + { + "epoch": 0.01, + "learning_rate": 4.998299762487529e-05, + "loss": 1.2732, + "step": 11839 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982994728347235e-05, + "loss": 1.0026, + "step": 11840 + }, + { + "epoch": 0.01, + "learning_rate": 4.998299183157256e-05, + "loss": 0.9684, + "step": 11841 + }, + { + "epoch": 0.01, + "learning_rate": 4.998298893455126e-05, + "loss": 1.1025, + "step": 11842 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982986037283335e-05, + "loss": 0.8175, + "step": 11843 + }, + { + "epoch": 0.01, + "learning_rate": 4.998298313976879e-05, + "loss": 0.6195, + "step": 11844 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982980242007624e-05, + "loss": 1.047, + "step": 11845 + }, + { + "epoch": 0.01, + "learning_rate": 4.998297734399984e-05, + "loss": 0.8917, + "step": 11846 + }, + { + "epoch": 0.01, + "learning_rate": 4.998297444574543e-05, + "loss": 1.0824, + "step": 11847 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982971547244406e-05, + "loss": 1.0504, + "step": 11848 + }, + { + "epoch": 0.01, + "learning_rate": 4.998296864849675e-05, + "loss": 0.9859, + "step": 11849 + }, + { + "epoch": 0.01, + "learning_rate": 4.998296574950249e-05, + "loss": 1.2751, + "step": 11850 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982962850261595e-05, + "loss": 1.1871, + "step": 11851 + }, + { + "epoch": 0.01, + "learning_rate": 4.998295995077408e-05, + "loss": 1.3634, + "step": 11852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982957051039944e-05, + "loss": 1.5256, + "step": 11853 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982954151059185e-05, + "loss": 1.6309, + "step": 11854 + }, + { + "epoch": 0.01, + "learning_rate": 4.998295125083181e-05, + "loss": 1.72, + "step": 11855 + }, + { + "epoch": 0.01, + "learning_rate": 4.998294835035781e-05, + "loss": 2.9109, + "step": 11856 + }, + { + "epoch": 0.01, + "learning_rate": 4.998294544963719e-05, + "loss": 0.969, + "step": 11857 + }, + { + "epoch": 0.01, + "learning_rate": 4.998294254866995e-05, + "loss": 0.7834, + "step": 11858 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982939647456087e-05, + "loss": 1.0156, + "step": 11859 + }, + { + "epoch": 0.01, + "learning_rate": 4.99829367459956e-05, + "loss": 1.1561, + "step": 11860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982933844288495e-05, + "loss": 1.1978, + "step": 11861 + }, + { + "epoch": 0.01, + "learning_rate": 4.998293094233477e-05, + "loss": 1.1596, + "step": 11862 + }, + { + "epoch": 0.01, + "learning_rate": 4.998292804013442e-05, + "loss": 1.118, + "step": 11863 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982925137687456e-05, + "loss": 1.2925, + "step": 11864 + }, + { + "epoch": 0.01, + "learning_rate": 4.998292223499387e-05, + "loss": 1.0552, + "step": 11865 + }, + { + "epoch": 0.01, + "learning_rate": 4.998291933205366e-05, + "loss": 1.5542, + "step": 11866 + }, + { + "epoch": 0.01, + "learning_rate": 4.998291642886683e-05, + "loss": 1.1159, + "step": 11867 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982913525433376e-05, + "loss": 0.9709, + "step": 11868 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982910621753306e-05, + "loss": 0.993, + "step": 11869 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982907717826614e-05, + "loss": 1.3587, + "step": 11870 + }, + { + "epoch": 0.01, + "learning_rate": 4.99829048136533e-05, + "loss": 1.2833, + "step": 11871 + }, + { + "epoch": 0.01, + "learning_rate": 4.998290190923337e-05, + "loss": 1.1247, + "step": 11872 + }, + { + "epoch": 0.01, + "learning_rate": 4.998289900456681e-05, + "loss": 0.836, + "step": 11873 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982896099653644e-05, + "loss": 0.9815, + "step": 11874 + }, + { + "epoch": 0.01, + "learning_rate": 4.998289319449384e-05, + "loss": 0.985, + "step": 11875 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982890289087423e-05, + "loss": 0.7515, + "step": 11876 + }, + { + "epoch": 0.01, + "learning_rate": 4.998288738343439e-05, + "loss": 1.1292, + "step": 11877 + }, + { + "epoch": 0.01, + "learning_rate": 4.998288447753473e-05, + "loss": 1.2825, + "step": 11878 + }, + { + "epoch": 0.01, + "learning_rate": 4.998288157138845e-05, + "loss": 0.9876, + "step": 11879 + }, + { + "epoch": 0.01, + "learning_rate": 4.998287866499556e-05, + "loss": 1.0972, + "step": 11880 + }, + { + "epoch": 0.01, + "learning_rate": 4.998287575835604e-05, + "loss": 1.5533, + "step": 11881 + }, + { + "epoch": 0.01, + "learning_rate": 4.99828728514699e-05, + "loss": 1.0838, + "step": 11882 + }, + { + "epoch": 0.01, + "learning_rate": 4.998286994433714e-05, + "loss": 0.9633, + "step": 11883 + }, + { + "epoch": 0.01, + "learning_rate": 4.998286703695776e-05, + "loss": 1.0257, + "step": 11884 + }, + { + "epoch": 0.01, + "learning_rate": 4.998286412933176e-05, + "loss": 1.0479, + "step": 11885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982861221459135e-05, + "loss": 1.1619, + "step": 11886 + }, + { + "epoch": 0.01, + "learning_rate": 4.99828583133399e-05, + "loss": 1.0674, + "step": 11887 + }, + { + "epoch": 0.01, + "learning_rate": 4.998285540497403e-05, + "loss": 1.2695, + "step": 11888 + }, + { + "epoch": 0.01, + "learning_rate": 4.998285249636155e-05, + "loss": 1.2531, + "step": 11889 + }, + { + "epoch": 0.01, + "learning_rate": 4.998284958750245e-05, + "loss": 0.8401, + "step": 11890 + }, + { + "epoch": 0.01, + "learning_rate": 4.998284667839673e-05, + "loss": 0.7986, + "step": 11891 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982843769044384e-05, + "loss": 1.2023, + "step": 11892 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982840859445424e-05, + "loss": 0.6585, + "step": 11893 + }, + { + "epoch": 0.01, + "learning_rate": 4.998283794959984e-05, + "loss": 1.1371, + "step": 11894 + }, + { + "epoch": 0.01, + "learning_rate": 4.998283503950764e-05, + "loss": 1.1359, + "step": 11895 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982832129168824e-05, + "loss": 0.848, + "step": 11896 + }, + { + "epoch": 0.01, + "learning_rate": 4.998282921858337e-05, + "loss": 1.4398, + "step": 11897 + }, + { + "epoch": 0.01, + "learning_rate": 4.998282630775131e-05, + "loss": 1.3705, + "step": 11898 + }, + { + "epoch": 0.01, + "learning_rate": 4.998282339667263e-05, + "loss": 1.0344, + "step": 11899 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982820485347324e-05, + "loss": 1.21, + "step": 11900 + }, + { + "epoch": 0.01, + "learning_rate": 4.998281757377541e-05, + "loss": 1.0661, + "step": 11901 + }, + { + "epoch": 0.01, + "learning_rate": 4.998281466195687e-05, + "loss": 0.636, + "step": 11902 + }, + { + "epoch": 0.01, + "learning_rate": 4.99828117498917e-05, + "loss": 0.464, + "step": 11903 + }, + { + "epoch": 0.01, + "learning_rate": 4.998280883757992e-05, + "loss": 0.4931, + "step": 11904 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982805925021526e-05, + "loss": 0.4886, + "step": 11905 + }, + { + "epoch": 0.01, + "learning_rate": 4.99828030122165e-05, + "loss": 0.4493, + "step": 11906 + }, + { + "epoch": 0.01, + "learning_rate": 4.998280009916486e-05, + "loss": 0.4238, + "step": 11907 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982797185866605e-05, + "loss": 0.3304, + "step": 11908 + }, + { + "epoch": 0.01, + "learning_rate": 4.998279427232172e-05, + "loss": 0.2891, + "step": 11909 + }, + { + "epoch": 0.01, + "learning_rate": 4.998279135853022e-05, + "loss": 0.5425, + "step": 11910 + }, + { + "epoch": 0.01, + "learning_rate": 4.99827884444921e-05, + "loss": 1.1136, + "step": 11911 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982785530207366e-05, + "loss": 1.098, + "step": 11912 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982782615676004e-05, + "loss": 1.3658, + "step": 11913 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982779700898034e-05, + "loss": 1.0967, + "step": 11914 + }, + { + "epoch": 0.01, + "learning_rate": 4.998277678587343e-05, + "loss": 0.8413, + "step": 11915 + }, + { + "epoch": 0.01, + "learning_rate": 4.998277387060221e-05, + "loss": 1.1046, + "step": 11916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982770955084376e-05, + "loss": 0.5204, + "step": 11917 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982768039319924e-05, + "loss": 0.5353, + "step": 11918 + }, + { + "epoch": 0.01, + "learning_rate": 4.998276512330885e-05, + "loss": 0.7756, + "step": 11919 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982762207051154e-05, + "loss": 1.1151, + "step": 11920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982759290546836e-05, + "loss": 1.0285, + "step": 11921 + }, + { + "epoch": 0.01, + "learning_rate": 4.99827563737959e-05, + "loss": 0.9638, + "step": 11922 + }, + { + "epoch": 0.01, + "learning_rate": 4.998275345679835e-05, + "loss": 1.0292, + "step": 11923 + }, + { + "epoch": 0.01, + "learning_rate": 4.998275053955418e-05, + "loss": 1.0397, + "step": 11924 + }, + { + "epoch": 0.01, + "learning_rate": 4.998274762206339e-05, + "loss": 0.2139, + "step": 11925 + }, + { + "epoch": 0.01, + "learning_rate": 4.998274470432599e-05, + "loss": 0.1712, + "step": 11926 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982741786341956e-05, + "loss": 0.0851, + "step": 11927 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982738868111304e-05, + "loss": 0.0965, + "step": 11928 + }, + { + "epoch": 0.01, + "learning_rate": 4.998273594963404e-05, + "loss": 0.0471, + "step": 11929 + }, + { + "epoch": 0.01, + "learning_rate": 4.998273303091016e-05, + "loss": 0.172, + "step": 11930 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982730111939646e-05, + "loss": 0.3087, + "step": 11931 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982727192722525e-05, + "loss": 1.0428, + "step": 11932 + }, + { + "epoch": 0.01, + "learning_rate": 4.998272427325878e-05, + "loss": 1.0664, + "step": 11933 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982721353548424e-05, + "loss": 1.4479, + "step": 11934 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982718433591444e-05, + "loss": 0.9686, + "step": 11935 + }, + { + "epoch": 0.01, + "learning_rate": 4.998271551338784e-05, + "loss": 0.7969, + "step": 11936 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982712592937616e-05, + "loss": 0.9167, + "step": 11937 + }, + { + "epoch": 0.01, + "learning_rate": 4.998270967224078e-05, + "loss": 1.133, + "step": 11938 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982706751297334e-05, + "loss": 1.3677, + "step": 11939 + }, + { + "epoch": 0.01, + "learning_rate": 4.998270383010726e-05, + "loss": 0.8491, + "step": 11940 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982700908670564e-05, + "loss": 1.178, + "step": 11941 + }, + { + "epoch": 0.01, + "learning_rate": 4.998269798698725e-05, + "loss": 1.2003, + "step": 11942 + }, + { + "epoch": 0.01, + "learning_rate": 4.998269506505732e-05, + "loss": 1.2625, + "step": 11943 + }, + { + "epoch": 0.01, + "learning_rate": 4.998269214288077e-05, + "loss": 1.0666, + "step": 11944 + }, + { + "epoch": 0.01, + "learning_rate": 4.99826892204576e-05, + "loss": 0.9202, + "step": 11945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982686297787816e-05, + "loss": 1.2332, + "step": 11946 + }, + { + "epoch": 0.01, + "learning_rate": 4.998268337487141e-05, + "loss": 0.8747, + "step": 11947 + }, + { + "epoch": 0.01, + "learning_rate": 4.998268045170838e-05, + "loss": 0.788, + "step": 11948 + }, + { + "epoch": 0.01, + "learning_rate": 4.998267752829874e-05, + "loss": 0.9752, + "step": 11949 + }, + { + "epoch": 0.01, + "learning_rate": 4.998267460464249e-05, + "loss": 0.8442, + "step": 11950 + }, + { + "epoch": 0.01, + "learning_rate": 4.99826716807396e-05, + "loss": 1.1171, + "step": 11951 + }, + { + "epoch": 0.01, + "learning_rate": 4.998266875659011e-05, + "loss": 0.8666, + "step": 11952 + }, + { + "epoch": 0.01, + "learning_rate": 4.998266583219399e-05, + "loss": 1.3084, + "step": 11953 + }, + { + "epoch": 0.01, + "learning_rate": 4.998266290755126e-05, + "loss": 1.5812, + "step": 11954 + }, + { + "epoch": 0.01, + "learning_rate": 4.998265998266191e-05, + "loss": 1.4263, + "step": 11955 + }, + { + "epoch": 0.01, + "learning_rate": 4.998265705752593e-05, + "loss": 1.3035, + "step": 11956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982654132143346e-05, + "loss": 1.618, + "step": 11957 + }, + { + "epoch": 0.01, + "learning_rate": 4.998265120651414e-05, + "loss": 1.44, + "step": 11958 + }, + { + "epoch": 0.01, + "learning_rate": 4.998264828063831e-05, + "loss": 0.9215, + "step": 11959 + }, + { + "epoch": 0.01, + "learning_rate": 4.998264535451587e-05, + "loss": 0.9074, + "step": 11960 + }, + { + "epoch": 0.01, + "learning_rate": 4.998264242814681e-05, + "loss": 1.1559, + "step": 11961 + }, + { + "epoch": 0.01, + "learning_rate": 4.998263950153113e-05, + "loss": 0.8698, + "step": 11962 + }, + { + "epoch": 0.01, + "learning_rate": 4.998263657466883e-05, + "loss": 1.4071, + "step": 11963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982633647559915e-05, + "loss": 1.1193, + "step": 11964 + }, + { + "epoch": 0.01, + "learning_rate": 4.998263072020438e-05, + "loss": 0.8669, + "step": 11965 + }, + { + "epoch": 0.01, + "learning_rate": 4.998262779260223e-05, + "loss": 1.0289, + "step": 11966 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982624864753456e-05, + "loss": 1.0742, + "step": 11967 + }, + { + "epoch": 0.01, + "learning_rate": 4.998262193665807e-05, + "loss": 0.9148, + "step": 11968 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982619008316065e-05, + "loss": 1.1374, + "step": 11969 + }, + { + "epoch": 0.01, + "learning_rate": 4.998261607972744e-05, + "loss": 1.2256, + "step": 11970 + }, + { + "epoch": 0.01, + "learning_rate": 4.99826131508922e-05, + "loss": 0.9827, + "step": 11971 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982610221810335e-05, + "loss": 0.8465, + "step": 11972 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982607292481856e-05, + "loss": 0.9577, + "step": 11973 + }, + { + "epoch": 0.01, + "learning_rate": 4.998260436290676e-05, + "loss": 1.0681, + "step": 11974 + }, + { + "epoch": 0.01, + "learning_rate": 4.998260143308505e-05, + "loss": 1.0878, + "step": 11975 + }, + { + "epoch": 0.01, + "learning_rate": 4.998259850301672e-05, + "loss": 1.1793, + "step": 11976 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982595572701776e-05, + "loss": 1.0593, + "step": 11977 + }, + { + "epoch": 0.01, + "learning_rate": 4.998259264214021e-05, + "loss": 1.1046, + "step": 11978 + }, + { + "epoch": 0.01, + "learning_rate": 4.998258971133202e-05, + "loss": 1.1013, + "step": 11979 + }, + { + "epoch": 0.01, + "learning_rate": 4.998258678027722e-05, + "loss": 1.0016, + "step": 11980 + }, + { + "epoch": 0.01, + "learning_rate": 4.998258384897581e-05, + "loss": 1.2534, + "step": 11981 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982580917427765e-05, + "loss": 1.0986, + "step": 11982 + }, + { + "epoch": 0.01, + "learning_rate": 4.998257798563312e-05, + "loss": 1.0771, + "step": 11983 + }, + { + "epoch": 0.01, + "learning_rate": 4.998257505359184e-05, + "loss": 1.2647, + "step": 11984 + }, + { + "epoch": 0.01, + "learning_rate": 4.998257212130395e-05, + "loss": 0.9088, + "step": 11985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982569188769444e-05, + "loss": 1.4022, + "step": 11986 + }, + { + "epoch": 0.01, + "learning_rate": 4.998256625598833e-05, + "loss": 1.1231, + "step": 11987 + }, + { + "epoch": 0.01, + "learning_rate": 4.998256332296059e-05, + "loss": 1.1074, + "step": 11988 + }, + { + "epoch": 0.01, + "learning_rate": 4.998256038968623e-05, + "loss": 1.1353, + "step": 11989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982557456165244e-05, + "loss": 1.07, + "step": 11990 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982554522397656e-05, + "loss": 0.887, + "step": 11991 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982551588383445e-05, + "loss": 0.6104, + "step": 11992 + }, + { + "epoch": 0.01, + "learning_rate": 4.998254865412262e-05, + "loss": 1.2055, + "step": 11993 + }, + { + "epoch": 0.01, + "learning_rate": 4.998254571961518e-05, + "loss": 1.4669, + "step": 11994 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982542784861115e-05, + "loss": 1.1528, + "step": 11995 + }, + { + "epoch": 0.01, + "learning_rate": 4.998253984986043e-05, + "loss": 1.1111, + "step": 11996 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982536914613135e-05, + "loss": 1.1834, + "step": 11997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982533979119226e-05, + "loss": 0.9809, + "step": 11998 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982531043378695e-05, + "loss": 1.3561, + "step": 11999 + }, + { + "epoch": 0.01, + "learning_rate": 4.998252810739155e-05, + "loss": 1.0595, + "step": 12000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0549529790878296, + "eval_runtime": 95.3857, + "eval_samples_per_second": 14.52, + "eval_steps_per_second": 3.638, + "step": 12000 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982525171157786e-05, + "loss": 1.407, + "step": 12001 + }, + { + "epoch": 0.01, + "learning_rate": 4.99825222346774e-05, + "loss": 1.2803, + "step": 12002 + }, + { + "epoch": 0.01, + "learning_rate": 4.99825192979504e-05, + "loss": 1.2961, + "step": 12003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982516360976794e-05, + "loss": 1.1177, + "step": 12004 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982513423756564e-05, + "loss": 1.2042, + "step": 12005 + }, + { + "epoch": 0.01, + "learning_rate": 4.998251048628971e-05, + "loss": 1.0742, + "step": 12006 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982507548576244e-05, + "loss": 1.3132, + "step": 12007 + }, + { + "epoch": 0.01, + "learning_rate": 4.998250461061617e-05, + "loss": 1.3119, + "step": 12008 + }, + { + "epoch": 0.01, + "learning_rate": 4.998250167240946e-05, + "loss": 1.468, + "step": 12009 + }, + { + "epoch": 0.01, + "learning_rate": 4.998249873395615e-05, + "loss": 1.5029, + "step": 12010 + }, + { + "epoch": 0.01, + "learning_rate": 4.998249579525621e-05, + "loss": 0.4397, + "step": 12011 + }, + { + "epoch": 0.01, + "learning_rate": 4.998249285630967e-05, + "loss": 0.9234, + "step": 12012 + }, + { + "epoch": 0.01, + "learning_rate": 4.99824899171165e-05, + "loss": 1.1719, + "step": 12013 + }, + { + "epoch": 0.01, + "learning_rate": 4.998248697767672e-05, + "loss": 1.0474, + "step": 12014 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982484037990316e-05, + "loss": 1.0646, + "step": 12015 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982481098057304e-05, + "loss": 1.647, + "step": 12016 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982478157877676e-05, + "loss": 1.5274, + "step": 12017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982475217451426e-05, + "loss": 1.4571, + "step": 12018 + }, + { + "epoch": 0.01, + "learning_rate": 4.998247227677856e-05, + "loss": 1.371, + "step": 12019 + }, + { + "epoch": 0.01, + "learning_rate": 4.998246933585908e-05, + "loss": 0.9796, + "step": 12020 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982466394692984e-05, + "loss": 0.8978, + "step": 12021 + }, + { + "epoch": 0.01, + "learning_rate": 4.998246345328027e-05, + "loss": 1.0147, + "step": 12022 + }, + { + "epoch": 0.01, + "learning_rate": 4.998246051162093e-05, + "loss": 1.2353, + "step": 12023 + }, + { + "epoch": 0.01, + "learning_rate": 4.998245756971499e-05, + "loss": 1.0887, + "step": 12024 + }, + { + "epoch": 0.01, + "learning_rate": 4.998245462756243e-05, + "loss": 0.9905, + "step": 12025 + }, + { + "epoch": 0.01, + "learning_rate": 4.998245168516325e-05, + "loss": 1.1723, + "step": 12026 + }, + { + "epoch": 0.01, + "learning_rate": 4.998244874251745e-05, + "loss": 1.1858, + "step": 12027 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982445799625036e-05, + "loss": 1.1936, + "step": 12028 + }, + { + "epoch": 0.01, + "learning_rate": 4.998244285648601e-05, + "loss": 1.1744, + "step": 12029 + }, + { + "epoch": 0.01, + "learning_rate": 4.998243991310036e-05, + "loss": 1.1702, + "step": 12030 + }, + { + "epoch": 0.01, + "learning_rate": 4.998243696946811e-05, + "loss": 1.2338, + "step": 12031 + }, + { + "epoch": 0.01, + "learning_rate": 4.998243402558923e-05, + "loss": 1.0368, + "step": 12032 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982431081463735e-05, + "loss": 1.0198, + "step": 12033 + }, + { + "epoch": 0.01, + "learning_rate": 4.998242813709163e-05, + "loss": 0.8942, + "step": 12034 + }, + { + "epoch": 0.01, + "learning_rate": 4.99824251924729e-05, + "loss": 1.2525, + "step": 12035 + }, + { + "epoch": 0.01, + "learning_rate": 4.998242224760756e-05, + "loss": 1.1418, + "step": 12036 + }, + { + "epoch": 0.01, + "learning_rate": 4.998241930249561e-05, + "loss": 1.1844, + "step": 12037 + }, + { + "epoch": 0.01, + "learning_rate": 4.998241635713704e-05, + "loss": 1.4223, + "step": 12038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982413411531845e-05, + "loss": 1.1155, + "step": 12039 + }, + { + "epoch": 0.01, + "learning_rate": 4.998241046568004e-05, + "loss": 1.1506, + "step": 12040 + }, + { + "epoch": 0.01, + "learning_rate": 4.998240751958163e-05, + "loss": 1.1888, + "step": 12041 + }, + { + "epoch": 0.01, + "learning_rate": 4.998240457323658e-05, + "loss": 1.2224, + "step": 12042 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982401626644936e-05, + "loss": 1.2334, + "step": 12043 + }, + { + "epoch": 0.01, + "learning_rate": 4.998239867980667e-05, + "loss": 0.8968, + "step": 12044 + }, + { + "epoch": 0.01, + "learning_rate": 4.998239573272179e-05, + "loss": 1.0604, + "step": 12045 + }, + { + "epoch": 0.01, + "learning_rate": 4.998239278539028e-05, + "loss": 0.7093, + "step": 12046 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982389837812175e-05, + "loss": 1.4507, + "step": 12047 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982386889987444e-05, + "loss": 1.3586, + "step": 12048 + }, + { + "epoch": 0.01, + "learning_rate": 4.99823839419161e-05, + "loss": 0.9533, + "step": 12049 + }, + { + "epoch": 0.01, + "learning_rate": 4.998238099359814e-05, + "loss": 0.9154, + "step": 12050 + }, + { + "epoch": 0.01, + "learning_rate": 4.998237804503356e-05, + "loss": 0.9695, + "step": 12051 + }, + { + "epoch": 0.01, + "learning_rate": 4.998237509622237e-05, + "loss": 1.1887, + "step": 12052 + }, + { + "epoch": 0.01, + "learning_rate": 4.998237214716456e-05, + "loss": 1.1882, + "step": 12053 + }, + { + "epoch": 0.01, + "learning_rate": 4.998236919786014e-05, + "loss": 1.0273, + "step": 12054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982366248309096e-05, + "loss": 1.229, + "step": 12055 + }, + { + "epoch": 0.01, + "learning_rate": 4.998236329851145e-05, + "loss": 1.2101, + "step": 12056 + }, + { + "epoch": 0.01, + "learning_rate": 4.998236034846718e-05, + "loss": 0.9866, + "step": 12057 + }, + { + "epoch": 0.01, + "learning_rate": 4.998235739817629e-05, + "loss": 1.1917, + "step": 12058 + }, + { + "epoch": 0.01, + "learning_rate": 4.998235444763879e-05, + "loss": 0.853, + "step": 12059 + }, + { + "epoch": 0.01, + "learning_rate": 4.998235149685468e-05, + "loss": 1.0745, + "step": 12060 + }, + { + "epoch": 0.01, + "learning_rate": 4.998234854582395e-05, + "loss": 1.1735, + "step": 12061 + }, + { + "epoch": 0.01, + "learning_rate": 4.99823455945466e-05, + "loss": 1.3232, + "step": 12062 + }, + { + "epoch": 0.01, + "learning_rate": 4.998234264302264e-05, + "loss": 1.137, + "step": 12063 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982339691252066e-05, + "loss": 1.0724, + "step": 12064 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982336739234875e-05, + "loss": 1.1275, + "step": 12065 + }, + { + "epoch": 0.01, + "learning_rate": 4.998233378697107e-05, + "loss": 1.189, + "step": 12066 + }, + { + "epoch": 0.01, + "learning_rate": 4.998233083446065e-05, + "loss": 1.0839, + "step": 12067 + }, + { + "epoch": 0.01, + "learning_rate": 4.998232788170361e-05, + "loss": 1.1865, + "step": 12068 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982324928699964e-05, + "loss": 1.2996, + "step": 12069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982321975449696e-05, + "loss": 1.0822, + "step": 12070 + }, + { + "epoch": 0.01, + "learning_rate": 4.998231902195282e-05, + "loss": 0.9162, + "step": 12071 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982316068209315e-05, + "loss": 1.1151, + "step": 12072 + }, + { + "epoch": 0.01, + "learning_rate": 4.99823131142192e-05, + "loss": 0.9708, + "step": 12073 + }, + { + "epoch": 0.01, + "learning_rate": 4.998231015998248e-05, + "loss": 0.8525, + "step": 12074 + }, + { + "epoch": 0.01, + "learning_rate": 4.998230720549914e-05, + "loss": 1.1532, + "step": 12075 + }, + { + "epoch": 0.01, + "learning_rate": 4.998230425076919e-05, + "loss": 1.061, + "step": 12076 + }, + { + "epoch": 0.01, + "learning_rate": 4.998230129579261e-05, + "loss": 1.1644, + "step": 12077 + }, + { + "epoch": 0.01, + "learning_rate": 4.998229834056943e-05, + "loss": 1.3484, + "step": 12078 + }, + { + "epoch": 0.01, + "learning_rate": 4.998229538509963e-05, + "loss": 1.3026, + "step": 12079 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982292429383216e-05, + "loss": 1.1669, + "step": 12080 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982289473420186e-05, + "loss": 1.1338, + "step": 12081 + }, + { + "epoch": 0.01, + "learning_rate": 4.998228651721054e-05, + "loss": 0.9417, + "step": 12082 + }, + { + "epoch": 0.01, + "learning_rate": 4.998228356075428e-05, + "loss": 1.1192, + "step": 12083 + }, + { + "epoch": 0.01, + "learning_rate": 4.998228060405141e-05, + "loss": 1.1765, + "step": 12084 + }, + { + "epoch": 0.01, + "learning_rate": 4.998227764710192e-05, + "loss": 1.162, + "step": 12085 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982274689905814e-05, + "loss": 1.1069, + "step": 12086 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982271732463106e-05, + "loss": 1.2318, + "step": 12087 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982268774773776e-05, + "loss": 1.0605, + "step": 12088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982265816837824e-05, + "loss": 0.5878, + "step": 12089 + }, + { + "epoch": 0.01, + "learning_rate": 4.998226285865527e-05, + "loss": 0.6539, + "step": 12090 + }, + { + "epoch": 0.01, + "learning_rate": 4.998225990022609e-05, + "loss": 0.9472, + "step": 12091 + }, + { + "epoch": 0.01, + "learning_rate": 4.998225694155031e-05, + "loss": 1.1295, + "step": 12092 + }, + { + "epoch": 0.01, + "learning_rate": 4.99822539826279e-05, + "loss": 1.0472, + "step": 12093 + }, + { + "epoch": 0.01, + "learning_rate": 4.998225102345888e-05, + "loss": 1.2961, + "step": 12094 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982248064043255e-05, + "loss": 1.1211, + "step": 12095 + }, + { + "epoch": 0.01, + "learning_rate": 4.998224510438101e-05, + "loss": 0.8796, + "step": 12096 + }, + { + "epoch": 0.01, + "learning_rate": 4.998224214447215e-05, + "loss": 0.8588, + "step": 12097 + }, + { + "epoch": 0.01, + "learning_rate": 4.998223918431668e-05, + "loss": 0.6565, + "step": 12098 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982236223914585e-05, + "loss": 1.061, + "step": 12099 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982233263265884e-05, + "loss": 1.2461, + "step": 12100 + }, + { + "epoch": 0.01, + "learning_rate": 4.998223030237057e-05, + "loss": 1.4045, + "step": 12101 + }, + { + "epoch": 0.01, + "learning_rate": 4.998222734122864e-05, + "loss": 1.216, + "step": 12102 + }, + { + "epoch": 0.01, + "learning_rate": 4.998222437984009e-05, + "loss": 1.0652, + "step": 12103 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982221418204935e-05, + "loss": 1.2943, + "step": 12104 + }, + { + "epoch": 0.01, + "learning_rate": 4.998221845632316e-05, + "loss": 1.3192, + "step": 12105 + }, + { + "epoch": 0.01, + "learning_rate": 4.998221549419477e-05, + "loss": 1.3339, + "step": 12106 + }, + { + "epoch": 0.01, + "learning_rate": 4.998221253181978e-05, + "loss": 1.5881, + "step": 12107 + }, + { + "epoch": 0.01, + "learning_rate": 4.998220956919816e-05, + "loss": 1.3818, + "step": 12108 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982206606329935e-05, + "loss": 1.2337, + "step": 12109 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982203643215094e-05, + "loss": 1.2094, + "step": 12110 + }, + { + "epoch": 0.01, + "learning_rate": 4.998220067985364e-05, + "loss": 1.3412, + "step": 12111 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982197716245574e-05, + "loss": 1.1303, + "step": 12112 + }, + { + "epoch": 0.01, + "learning_rate": 4.998219475239089e-05, + "loss": 0.9769, + "step": 12113 + }, + { + "epoch": 0.01, + "learning_rate": 4.998219178828959e-05, + "loss": 1.0795, + "step": 12114 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982188823941675e-05, + "loss": 1.0715, + "step": 12115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982185859347156e-05, + "loss": 1.1885, + "step": 12116 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982182894506015e-05, + "loss": 0.9226, + "step": 12117 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982179929418265e-05, + "loss": 0.9401, + "step": 12118 + }, + { + "epoch": 0.01, + "learning_rate": 4.998217696408391e-05, + "loss": 0.7499, + "step": 12119 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982173998502926e-05, + "loss": 0.364, + "step": 12120 + }, + { + "epoch": 0.01, + "learning_rate": 4.998217103267533e-05, + "loss": 1.0029, + "step": 12121 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982168066601126e-05, + "loss": 0.9788, + "step": 12122 + }, + { + "epoch": 0.01, + "learning_rate": 4.998216510028031e-05, + "loss": 0.8128, + "step": 12123 + }, + { + "epoch": 0.01, + "learning_rate": 4.998216213371287e-05, + "loss": 0.8819, + "step": 12124 + }, + { + "epoch": 0.01, + "learning_rate": 4.998215916689883e-05, + "loss": 1.0108, + "step": 12125 + }, + { + "epoch": 0.01, + "learning_rate": 4.998215619983817e-05, + "loss": 0.9376, + "step": 12126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982153232530895e-05, + "loss": 0.7809, + "step": 12127 + }, + { + "epoch": 0.01, + "learning_rate": 4.998215026497701e-05, + "loss": 0.7568, + "step": 12128 + }, + { + "epoch": 0.01, + "learning_rate": 4.998214729717652e-05, + "loss": 1.0232, + "step": 12129 + }, + { + "epoch": 0.01, + "learning_rate": 4.998214432912941e-05, + "loss": 1.3491, + "step": 12130 + }, + { + "epoch": 0.01, + "learning_rate": 4.998214136083568e-05, + "loss": 1.2105, + "step": 12131 + }, + { + "epoch": 0.01, + "learning_rate": 4.998213839229534e-05, + "loss": 1.145, + "step": 12132 + }, + { + "epoch": 0.01, + "learning_rate": 4.998213542350839e-05, + "loss": 1.1217, + "step": 12133 + }, + { + "epoch": 0.01, + "learning_rate": 4.998213245447483e-05, + "loss": 1.166, + "step": 12134 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982129485194644e-05, + "loss": 1.2469, + "step": 12135 + }, + { + "epoch": 0.01, + "learning_rate": 4.998212651566786e-05, + "loss": 1.2689, + "step": 12136 + }, + { + "epoch": 0.01, + "learning_rate": 4.998212354589445e-05, + "loss": 1.0761, + "step": 12137 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982120575874436e-05, + "loss": 0.984, + "step": 12138 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982117605607805e-05, + "loss": 1.2007, + "step": 12139 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982114635094566e-05, + "loss": 1.1728, + "step": 12140 + }, + { + "epoch": 0.01, + "learning_rate": 4.998211166433471e-05, + "loss": 1.186, + "step": 12141 + }, + { + "epoch": 0.01, + "learning_rate": 4.998210869332824e-05, + "loss": 1.2243, + "step": 12142 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982105722075154e-05, + "loss": 1.4051, + "step": 12143 + }, + { + "epoch": 0.01, + "learning_rate": 4.998210275057547e-05, + "loss": 1.0489, + "step": 12144 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982099778829164e-05, + "loss": 0.8759, + "step": 12145 + }, + { + "epoch": 0.01, + "learning_rate": 4.998209680683624e-05, + "loss": 1.2142, + "step": 12146 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982093834596706e-05, + "loss": 1.3355, + "step": 12147 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982090862110564e-05, + "loss": 0.9924, + "step": 12148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982087889377806e-05, + "loss": 0.9815, + "step": 12149 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982084916398434e-05, + "loss": 0.6959, + "step": 12150 + }, + { + "epoch": 0.01, + "learning_rate": 4.998208194317245e-05, + "loss": 0.4738, + "step": 12151 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982078969699856e-05, + "loss": 0.3914, + "step": 12152 + }, + { + "epoch": 0.01, + "learning_rate": 4.998207599598065e-05, + "loss": 0.6012, + "step": 12153 + }, + { + "epoch": 0.01, + "learning_rate": 4.998207302201482e-05, + "loss": 0.3795, + "step": 12154 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982070047802394e-05, + "loss": 0.3312, + "step": 12155 + }, + { + "epoch": 0.01, + "learning_rate": 4.998206707334335e-05, + "loss": 0.4454, + "step": 12156 + }, + { + "epoch": 0.01, + "learning_rate": 4.998206409863769e-05, + "loss": 0.9173, + "step": 12157 + }, + { + "epoch": 0.01, + "learning_rate": 4.998206112368542e-05, + "loss": 1.0747, + "step": 12158 + }, + { + "epoch": 0.01, + "learning_rate": 4.998205814848653e-05, + "loss": 1.2573, + "step": 12159 + }, + { + "epoch": 0.01, + "learning_rate": 4.998205517304104e-05, + "loss": 1.3131, + "step": 12160 + }, + { + "epoch": 0.01, + "learning_rate": 4.998205219734893e-05, + "loss": 0.9893, + "step": 12161 + }, + { + "epoch": 0.01, + "learning_rate": 4.998204922141021e-05, + "loss": 0.5699, + "step": 12162 + }, + { + "epoch": 0.01, + "learning_rate": 4.998204624522488e-05, + "loss": 0.7396, + "step": 12163 + }, + { + "epoch": 0.01, + "learning_rate": 4.998204326879294e-05, + "loss": 0.9701, + "step": 12164 + }, + { + "epoch": 0.01, + "learning_rate": 4.998204029211438e-05, + "loss": 1.189, + "step": 12165 + }, + { + "epoch": 0.01, + "learning_rate": 4.998203731518921e-05, + "loss": 1.275, + "step": 12166 + }, + { + "epoch": 0.01, + "learning_rate": 4.998203433801743e-05, + "loss": 1.1439, + "step": 12167 + }, + { + "epoch": 0.01, + "learning_rate": 4.998203136059904e-05, + "loss": 0.8104, + "step": 12168 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982028382934034e-05, + "loss": 1.1535, + "step": 12169 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982025405022416e-05, + "loss": 1.0484, + "step": 12170 + }, + { + "epoch": 0.01, + "learning_rate": 4.998202242686418e-05, + "loss": 0.9965, + "step": 12171 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982019448459346e-05, + "loss": 0.8001, + "step": 12172 + }, + { + "epoch": 0.01, + "learning_rate": 4.998201646980789e-05, + "loss": 0.9192, + "step": 12173 + }, + { + "epoch": 0.01, + "learning_rate": 4.998201349090983e-05, + "loss": 1.187, + "step": 12174 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982010511765154e-05, + "loss": 1.1789, + "step": 12175 + }, + { + "epoch": 0.01, + "learning_rate": 4.9982007532373864e-05, + "loss": 0.832, + "step": 12176 + }, + { + "epoch": 0.01, + "learning_rate": 4.998200455273596e-05, + "loss": 1.1056, + "step": 12177 + }, + { + "epoch": 0.01, + "learning_rate": 4.998200157285145e-05, + "loss": 1.0418, + "step": 12178 + }, + { + "epoch": 0.01, + "learning_rate": 4.998199859272032e-05, + "loss": 0.8702, + "step": 12179 + }, + { + "epoch": 0.01, + "learning_rate": 4.998199561234259e-05, + "loss": 1.1537, + "step": 12180 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981992631718244e-05, + "loss": 1.0839, + "step": 12181 + }, + { + "epoch": 0.01, + "learning_rate": 4.998198965084728e-05, + "loss": 0.9476, + "step": 12182 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981986669729705e-05, + "loss": 0.5703, + "step": 12183 + }, + { + "epoch": 0.01, + "learning_rate": 4.998198368836553e-05, + "loss": 0.6009, + "step": 12184 + }, + { + "epoch": 0.01, + "learning_rate": 4.998198070675473e-05, + "loss": 1.3527, + "step": 12185 + }, + { + "epoch": 0.01, + "learning_rate": 4.998197772489733e-05, + "loss": 1.3717, + "step": 12186 + }, + { + "epoch": 0.01, + "learning_rate": 4.998197474279331e-05, + "loss": 1.1882, + "step": 12187 + }, + { + "epoch": 0.01, + "learning_rate": 4.998197176044268e-05, + "loss": 0.9134, + "step": 12188 + }, + { + "epoch": 0.01, + "learning_rate": 4.998196877784544e-05, + "loss": 0.7254, + "step": 12189 + }, + { + "epoch": 0.01, + "learning_rate": 4.998196579500159e-05, + "loss": 0.9603, + "step": 12190 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981962811911125e-05, + "loss": 1.0766, + "step": 12191 + }, + { + "epoch": 0.01, + "learning_rate": 4.998195982857405e-05, + "loss": 1.2593, + "step": 12192 + }, + { + "epoch": 0.01, + "learning_rate": 4.998195684499036e-05, + "loss": 1.2716, + "step": 12193 + }, + { + "epoch": 0.01, + "learning_rate": 4.998195386116007e-05, + "loss": 1.1075, + "step": 12194 + }, + { + "epoch": 0.01, + "learning_rate": 4.998195087708316e-05, + "loss": 1.1545, + "step": 12195 + }, + { + "epoch": 0.01, + "learning_rate": 4.998194789275964e-05, + "loss": 1.0028, + "step": 12196 + }, + { + "epoch": 0.01, + "learning_rate": 4.99819449081895e-05, + "loss": 1.1678, + "step": 12197 + }, + { + "epoch": 0.01, + "learning_rate": 4.998194192337276e-05, + "loss": 1.1907, + "step": 12198 + }, + { + "epoch": 0.01, + "learning_rate": 4.998193893830941e-05, + "loss": 1.3121, + "step": 12199 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981935952999445e-05, + "loss": 0.7019, + "step": 12200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981932967442866e-05, + "loss": 1.1035, + "step": 12201 + }, + { + "epoch": 0.01, + "learning_rate": 4.998192998163968e-05, + "loss": 1.6263, + "step": 12202 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981926995589876e-05, + "loss": 0.9957, + "step": 12203 + }, + { + "epoch": 0.01, + "learning_rate": 4.998192400929347e-05, + "loss": 0.9056, + "step": 12204 + }, + { + "epoch": 0.01, + "learning_rate": 4.998192102275045e-05, + "loss": 0.8507, + "step": 12205 + }, + { + "epoch": 0.01, + "learning_rate": 4.998191803596082e-05, + "loss": 0.9602, + "step": 12206 + }, + { + "epoch": 0.01, + "learning_rate": 4.998191504892458e-05, + "loss": 0.8681, + "step": 12207 + }, + { + "epoch": 0.01, + "learning_rate": 4.998191206164172e-05, + "loss": 0.7919, + "step": 12208 + }, + { + "epoch": 0.01, + "learning_rate": 4.998190907411226e-05, + "loss": 0.784, + "step": 12209 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981906086336184e-05, + "loss": 0.739, + "step": 12210 + }, + { + "epoch": 0.01, + "learning_rate": 4.99819030983135e-05, + "loss": 0.7427, + "step": 12211 + }, + { + "epoch": 0.01, + "learning_rate": 4.99819001100442e-05, + "loss": 0.7395, + "step": 12212 + }, + { + "epoch": 0.01, + "learning_rate": 4.99818971215283e-05, + "loss": 0.6996, + "step": 12213 + }, + { + "epoch": 0.01, + "learning_rate": 4.998189413276578e-05, + "loss": 0.7456, + "step": 12214 + }, + { + "epoch": 0.01, + "learning_rate": 4.998189114375665e-05, + "loss": 0.7073, + "step": 12215 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981888154500914e-05, + "loss": 0.7032, + "step": 12216 + }, + { + "epoch": 0.01, + "learning_rate": 4.998188516499855e-05, + "loss": 0.6384, + "step": 12217 + }, + { + "epoch": 0.01, + "learning_rate": 4.998188217524959e-05, + "loss": 0.6803, + "step": 12218 + }, + { + "epoch": 0.01, + "learning_rate": 4.998187918525402e-05, + "loss": 0.6206, + "step": 12219 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981876195011844e-05, + "loss": 0.6704, + "step": 12220 + }, + { + "epoch": 0.01, + "learning_rate": 4.998187320452305e-05, + "loss": 1.6482, + "step": 12221 + }, + { + "epoch": 0.01, + "learning_rate": 4.998187021378765e-05, + "loss": 0.9323, + "step": 12222 + }, + { + "epoch": 0.01, + "learning_rate": 4.998186722280563e-05, + "loss": 1.0517, + "step": 12223 + }, + { + "epoch": 0.01, + "learning_rate": 4.998186423157701e-05, + "loss": 0.9098, + "step": 12224 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981861240101765e-05, + "loss": 1.2377, + "step": 12225 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981858248379925e-05, + "loss": 1.1893, + "step": 12226 + }, + { + "epoch": 0.01, + "learning_rate": 4.998185525641147e-05, + "loss": 0.9471, + "step": 12227 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981852264196405e-05, + "loss": 1.2663, + "step": 12228 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981849271734726e-05, + "loss": 1.4243, + "step": 12229 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981846279026445e-05, + "loss": 0.8721, + "step": 12230 + }, + { + "epoch": 0.01, + "learning_rate": 4.998184328607154e-05, + "loss": 1.2458, + "step": 12231 + }, + { + "epoch": 0.01, + "learning_rate": 4.998184029287004e-05, + "loss": 1.2852, + "step": 12232 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981837299421916e-05, + "loss": 1.1038, + "step": 12233 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981834305727194e-05, + "loss": 1.0695, + "step": 12234 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981831311785856e-05, + "loss": 1.1814, + "step": 12235 + }, + { + "epoch": 0.01, + "learning_rate": 4.998182831759791e-05, + "loss": 1.1959, + "step": 12236 + }, + { + "epoch": 0.01, + "learning_rate": 4.998182532316335e-05, + "loss": 0.9837, + "step": 12237 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981822328482186e-05, + "loss": 1.0547, + "step": 12238 + }, + { + "epoch": 0.01, + "learning_rate": 4.99818193335544e-05, + "loss": 1.3387, + "step": 12239 + }, + { + "epoch": 0.01, + "learning_rate": 4.998181633838001e-05, + "loss": 1.2434, + "step": 12240 + }, + { + "epoch": 0.01, + "learning_rate": 4.998181334295902e-05, + "loss": 1.1832, + "step": 12241 + }, + { + "epoch": 0.01, + "learning_rate": 4.998181034729141e-05, + "loss": 2.241, + "step": 12242 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981807351377194e-05, + "loss": 1.0094, + "step": 12243 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981804355216366e-05, + "loss": 0.8128, + "step": 12244 + }, + { + "epoch": 0.01, + "learning_rate": 4.998180135880893e-05, + "loss": 1.1457, + "step": 12245 + }, + { + "epoch": 0.01, + "learning_rate": 4.998179836215488e-05, + "loss": 1.1867, + "step": 12246 + }, + { + "epoch": 0.01, + "learning_rate": 4.998179536525422e-05, + "loss": 1.2103, + "step": 12247 + }, + { + "epoch": 0.01, + "learning_rate": 4.998179236810696e-05, + "loss": 1.129, + "step": 12248 + }, + { + "epoch": 0.01, + "learning_rate": 4.998178937071308e-05, + "loss": 1.0006, + "step": 12249 + }, + { + "epoch": 0.01, + "learning_rate": 4.998178637307259e-05, + "loss": 1.1473, + "step": 12250 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981783375185495e-05, + "loss": 1.2892, + "step": 12251 + }, + { + "epoch": 0.01, + "learning_rate": 4.998178037705179e-05, + "loss": 1.1412, + "step": 12252 + }, + { + "epoch": 0.01, + "learning_rate": 4.998177737867148e-05, + "loss": 0.8711, + "step": 12253 + }, + { + "epoch": 0.01, + "learning_rate": 4.998177438004455e-05, + "loss": 0.9427, + "step": 12254 + }, + { + "epoch": 0.01, + "learning_rate": 4.998177138117102e-05, + "loss": 1.1894, + "step": 12255 + }, + { + "epoch": 0.01, + "learning_rate": 4.998176838205087e-05, + "loss": 1.3187, + "step": 12256 + }, + { + "epoch": 0.01, + "learning_rate": 4.998176538268412e-05, + "loss": 1.5896, + "step": 12257 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981762383070765e-05, + "loss": 1.7242, + "step": 12258 + }, + { + "epoch": 0.01, + "learning_rate": 4.998175938321079e-05, + "loss": 1.3832, + "step": 12259 + }, + { + "epoch": 0.01, + "learning_rate": 4.99817563831042e-05, + "loss": 1.3111, + "step": 12260 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981753382751015e-05, + "loss": 1.1332, + "step": 12261 + }, + { + "epoch": 0.01, + "learning_rate": 4.998175038215121e-05, + "loss": 0.7401, + "step": 12262 + }, + { + "epoch": 0.01, + "learning_rate": 4.998174738130481e-05, + "loss": 1.1572, + "step": 12263 + }, + { + "epoch": 0.01, + "learning_rate": 4.998174438021179e-05, + "loss": 0.963, + "step": 12264 + }, + { + "epoch": 0.01, + "learning_rate": 4.998174137887216e-05, + "loss": 0.9892, + "step": 12265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981738377285924e-05, + "loss": 2.0799, + "step": 12266 + }, + { + "epoch": 0.01, + "learning_rate": 4.998173537545308e-05, + "loss": 1.0048, + "step": 12267 + }, + { + "epoch": 0.01, + "learning_rate": 4.998173237337362e-05, + "loss": 1.4656, + "step": 12268 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981729371047555e-05, + "loss": 0.895, + "step": 12269 + }, + { + "epoch": 0.01, + "learning_rate": 4.998172636847488e-05, + "loss": 1.1404, + "step": 12270 + }, + { + "epoch": 0.01, + "learning_rate": 4.99817233656556e-05, + "loss": 1.145, + "step": 12271 + }, + { + "epoch": 0.01, + "learning_rate": 4.998172036258971e-05, + "loss": 1.2568, + "step": 12272 + }, + { + "epoch": 0.01, + "learning_rate": 4.99817173592772e-05, + "loss": 1.4198, + "step": 12273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981714355718094e-05, + "loss": 1.2216, + "step": 12274 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981711351912374e-05, + "loss": 1.1723, + "step": 12275 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981708347860044e-05, + "loss": 0.9818, + "step": 12276 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981705343561114e-05, + "loss": 1.1023, + "step": 12277 + }, + { + "epoch": 0.01, + "learning_rate": 4.998170233901557e-05, + "loss": 0.602, + "step": 12278 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981699334223406e-05, + "loss": 0.888, + "step": 12279 + }, + { + "epoch": 0.01, + "learning_rate": 4.998169632918465e-05, + "loss": 0.8708, + "step": 12280 + }, + { + "epoch": 0.01, + "learning_rate": 4.998169332389928e-05, + "loss": 1.2637, + "step": 12281 + }, + { + "epoch": 0.01, + "learning_rate": 4.998169031836729e-05, + "loss": 1.0338, + "step": 12282 + }, + { + "epoch": 0.01, + "learning_rate": 4.998168731258871e-05, + "loss": 0.7036, + "step": 12283 + }, + { + "epoch": 0.01, + "learning_rate": 4.99816843065635e-05, + "loss": 0.7508, + "step": 12284 + }, + { + "epoch": 0.01, + "learning_rate": 4.99816813002917e-05, + "loss": 0.584, + "step": 12285 + }, + { + "epoch": 0.01, + "learning_rate": 4.998167829377328e-05, + "loss": 0.4087, + "step": 12286 + }, + { + "epoch": 0.01, + "learning_rate": 4.998167528700826e-05, + "loss": 1.1411, + "step": 12287 + }, + { + "epoch": 0.01, + "learning_rate": 4.998167227999663e-05, + "loss": 1.0434, + "step": 12288 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981669272738386e-05, + "loss": 0.7202, + "step": 12289 + }, + { + "epoch": 0.01, + "learning_rate": 4.998166626523354e-05, + "loss": 0.831, + "step": 12290 + }, + { + "epoch": 0.01, + "learning_rate": 4.998166325748208e-05, + "loss": 0.7361, + "step": 12291 + }, + { + "epoch": 0.01, + "learning_rate": 4.998166024948401e-05, + "loss": 0.954, + "step": 12292 + }, + { + "epoch": 0.01, + "learning_rate": 4.998165724123933e-05, + "loss": 1.6453, + "step": 12293 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981654232748056e-05, + "loss": 1.0373, + "step": 12294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981651224010164e-05, + "loss": 0.9059, + "step": 12295 + }, + { + "epoch": 0.01, + "learning_rate": 4.998164821502566e-05, + "loss": 1.0726, + "step": 12296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981645205794555e-05, + "loss": 1.3252, + "step": 12297 + }, + { + "epoch": 0.01, + "learning_rate": 4.998164219631684e-05, + "loss": 1.1061, + "step": 12298 + }, + { + "epoch": 0.01, + "learning_rate": 4.998163918659251e-05, + "loss": 0.9898, + "step": 12299 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981636176621577e-05, + "loss": 0.9904, + "step": 12300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981633166404033e-05, + "loss": 1.2752, + "step": 12301 + }, + { + "epoch": 0.01, + "learning_rate": 4.998163015593988e-05, + "loss": 0.8749, + "step": 12302 + }, + { + "epoch": 0.01, + "learning_rate": 4.998162714522913e-05, + "loss": 1.0561, + "step": 12303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981624134271766e-05, + "loss": 1.0469, + "step": 12304 + }, + { + "epoch": 0.01, + "learning_rate": 4.998162112306779e-05, + "loss": 1.0939, + "step": 12305 + }, + { + "epoch": 0.01, + "learning_rate": 4.998161811161721e-05, + "loss": 0.8517, + "step": 12306 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981615099920015e-05, + "loss": 1.1539, + "step": 12307 + }, + { + "epoch": 0.01, + "learning_rate": 4.998161208797621e-05, + "loss": 1.1281, + "step": 12308 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981609075785814e-05, + "loss": 0.976, + "step": 12309 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981606063348794e-05, + "loss": 0.6655, + "step": 12310 + }, + { + "epoch": 0.01, + "learning_rate": 4.998160305066518e-05, + "loss": 1.7917, + "step": 12311 + }, + { + "epoch": 0.01, + "learning_rate": 4.998160003773494e-05, + "loss": 1.8101, + "step": 12312 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981597024558115e-05, + "loss": 1.9419, + "step": 12313 + }, + { + "epoch": 0.01, + "learning_rate": 4.998159401113466e-05, + "loss": 1.9558, + "step": 12314 + }, + { + "epoch": 0.01, + "learning_rate": 4.998159099746461e-05, + "loss": 1.7066, + "step": 12315 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981587983547954e-05, + "loss": 1.8039, + "step": 12316 + }, + { + "epoch": 0.01, + "learning_rate": 4.998158496938468e-05, + "loss": 1.7127, + "step": 12317 + }, + { + "epoch": 0.01, + "learning_rate": 4.998158195497481e-05, + "loss": 1.2072, + "step": 12318 + }, + { + "epoch": 0.01, + "learning_rate": 4.998157894031832e-05, + "loss": 0.3727, + "step": 12319 + }, + { + "epoch": 0.01, + "learning_rate": 4.998157592541523e-05, + "loss": 0.2639, + "step": 12320 + }, + { + "epoch": 0.01, + "learning_rate": 4.998157291026553e-05, + "loss": 0.5425, + "step": 12321 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981569894869226e-05, + "loss": 0.5329, + "step": 12322 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981566879226314e-05, + "loss": 0.7372, + "step": 12323 + }, + { + "epoch": 0.01, + "learning_rate": 4.998156386333679e-05, + "loss": 1.1371, + "step": 12324 + }, + { + "epoch": 0.01, + "learning_rate": 4.998156084720066e-05, + "loss": 1.0917, + "step": 12325 + }, + { + "epoch": 0.01, + "learning_rate": 4.998155783081793e-05, + "loss": 1.0032, + "step": 12326 + }, + { + "epoch": 0.01, + "learning_rate": 4.998155481418858e-05, + "loss": 1.1574, + "step": 12327 + }, + { + "epoch": 0.01, + "learning_rate": 4.998155179731263e-05, + "loss": 1.24, + "step": 12328 + }, + { + "epoch": 0.01, + "learning_rate": 4.998154878019007e-05, + "loss": 1.3952, + "step": 12329 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981545762820903e-05, + "loss": 1.1931, + "step": 12330 + }, + { + "epoch": 0.01, + "learning_rate": 4.998154274520514e-05, + "loss": 0.7442, + "step": 12331 + }, + { + "epoch": 0.01, + "learning_rate": 4.998153972734275e-05, + "loss": 1.1324, + "step": 12332 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981536709233766e-05, + "loss": 0.605, + "step": 12333 + }, + { + "epoch": 0.01, + "learning_rate": 4.998153369087817e-05, + "loss": 0.5132, + "step": 12334 + }, + { + "epoch": 0.01, + "learning_rate": 4.998153067227597e-05, + "loss": 0.4633, + "step": 12335 + }, + { + "epoch": 0.01, + "learning_rate": 4.998152765342716e-05, + "loss": 0.4099, + "step": 12336 + }, + { + "epoch": 0.01, + "learning_rate": 4.998152463433175e-05, + "loss": 0.445, + "step": 12337 + }, + { + "epoch": 0.01, + "learning_rate": 4.998152161498972e-05, + "loss": 0.5687, + "step": 12338 + }, + { + "epoch": 0.01, + "learning_rate": 4.998151859540109e-05, + "loss": 1.0694, + "step": 12339 + }, + { + "epoch": 0.01, + "learning_rate": 4.998151557556585e-05, + "loss": 1.2817, + "step": 12340 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981512555484005e-05, + "loss": 1.1015, + "step": 12341 + }, + { + "epoch": 0.01, + "learning_rate": 4.998150953515556e-05, + "loss": 1.2148, + "step": 12342 + }, + { + "epoch": 0.01, + "learning_rate": 4.99815065145805e-05, + "loss": 1.1055, + "step": 12343 + }, + { + "epoch": 0.01, + "learning_rate": 4.998150349375883e-05, + "loss": 0.8014, + "step": 12344 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981500472690554e-05, + "loss": 1.1865, + "step": 12345 + }, + { + "epoch": 0.01, + "learning_rate": 4.998149745137568e-05, + "loss": 0.8474, + "step": 12346 + }, + { + "epoch": 0.01, + "learning_rate": 4.998149442981419e-05, + "loss": 1.171, + "step": 12347 + }, + { + "epoch": 0.01, + "learning_rate": 4.99814914080061e-05, + "loss": 1.1856, + "step": 12348 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981488385951406e-05, + "loss": 1.1312, + "step": 12349 + }, + { + "epoch": 0.01, + "learning_rate": 4.998148536365009e-05, + "loss": 1.0987, + "step": 12350 + }, + { + "epoch": 0.01, + "learning_rate": 4.998148234110218e-05, + "loss": 1.1335, + "step": 12351 + }, + { + "epoch": 0.01, + "learning_rate": 4.998147931830767e-05, + "loss": 1.0597, + "step": 12352 + }, + { + "epoch": 0.01, + "learning_rate": 4.998147629526654e-05, + "loss": 1.0757, + "step": 12353 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981473271978805e-05, + "loss": 0.9938, + "step": 12354 + }, + { + "epoch": 0.01, + "learning_rate": 4.998147024844446e-05, + "loss": 1.2549, + "step": 12355 + }, + { + "epoch": 0.01, + "learning_rate": 4.998146722466352e-05, + "loss": 1.1512, + "step": 12356 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981464200635966e-05, + "loss": 1.1168, + "step": 12357 + }, + { + "epoch": 0.01, + "learning_rate": 4.998146117636181e-05, + "loss": 0.9795, + "step": 12358 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981458151841045e-05, + "loss": 1.1687, + "step": 12359 + }, + { + "epoch": 0.01, + "learning_rate": 4.998145512707367e-05, + "loss": 1.1541, + "step": 12360 + }, + { + "epoch": 0.01, + "learning_rate": 4.998145210205969e-05, + "loss": 1.0797, + "step": 12361 + }, + { + "epoch": 0.01, + "learning_rate": 4.998144907679911e-05, + "loss": 1.126, + "step": 12362 + }, + { + "epoch": 0.01, + "learning_rate": 4.998144605129191e-05, + "loss": 1.1401, + "step": 12363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981443025538125e-05, + "loss": 0.9566, + "step": 12364 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981439999537715e-05, + "loss": 1.0134, + "step": 12365 + }, + { + "epoch": 0.01, + "learning_rate": 4.998143697329071e-05, + "loss": 1.2251, + "step": 12366 + }, + { + "epoch": 0.01, + "learning_rate": 4.998143394679709e-05, + "loss": 1.0263, + "step": 12367 + }, + { + "epoch": 0.01, + "learning_rate": 4.998143092005687e-05, + "loss": 1.1975, + "step": 12368 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981427893070045e-05, + "loss": 1.7828, + "step": 12369 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981424865836606e-05, + "loss": 0.8206, + "step": 12370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981421838356565e-05, + "loss": 0.7618, + "step": 12371 + }, + { + "epoch": 0.01, + "learning_rate": 4.998141881062992e-05, + "loss": 0.5565, + "step": 12372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981415782656664e-05, + "loss": 0.4305, + "step": 12373 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981412754436805e-05, + "loss": 0.3556, + "step": 12374 + }, + { + "epoch": 0.01, + "learning_rate": 4.998140972597034e-05, + "loss": 0.7321, + "step": 12375 + }, + { + "epoch": 0.01, + "learning_rate": 4.998140669725727e-05, + "loss": 0.7104, + "step": 12376 + }, + { + "epoch": 0.01, + "learning_rate": 4.99814036682976e-05, + "loss": 0.4816, + "step": 12377 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981400639091314e-05, + "loss": 0.211, + "step": 12378 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981397609638426e-05, + "loss": 0.319, + "step": 12379 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981394579938935e-05, + "loss": 0.5376, + "step": 12380 + }, + { + "epoch": 0.01, + "learning_rate": 4.998139154999283e-05, + "loss": 0.5669, + "step": 12381 + }, + { + "epoch": 0.01, + "learning_rate": 4.998138851980013e-05, + "loss": 0.6685, + "step": 12382 + }, + { + "epoch": 0.01, + "learning_rate": 4.998138548936082e-05, + "loss": 0.3601, + "step": 12383 + }, + { + "epoch": 0.01, + "learning_rate": 4.99813824586749e-05, + "loss": 0.6418, + "step": 12384 + }, + { + "epoch": 0.01, + "learning_rate": 4.998137942774237e-05, + "loss": 0.9678, + "step": 12385 + }, + { + "epoch": 0.01, + "learning_rate": 4.998137639656325e-05, + "loss": 0.7382, + "step": 12386 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981373365137515e-05, + "loss": 0.8277, + "step": 12387 + }, + { + "epoch": 0.01, + "learning_rate": 4.998137033346517e-05, + "loss": 0.4406, + "step": 12388 + }, + { + "epoch": 0.01, + "learning_rate": 4.998136730154623e-05, + "loss": 0.6967, + "step": 12389 + }, + { + "epoch": 0.01, + "learning_rate": 4.998136426938068e-05, + "loss": 0.4694, + "step": 12390 + }, + { + "epoch": 0.01, + "learning_rate": 4.998136123696852e-05, + "loss": 0.5338, + "step": 12391 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981358204309755e-05, + "loss": 0.6247, + "step": 12392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981355171404394e-05, + "loss": 0.3445, + "step": 12393 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981352138252424e-05, + "loss": 0.297, + "step": 12394 + }, + { + "epoch": 0.01, + "learning_rate": 4.998134910485384e-05, + "loss": 0.9048, + "step": 12395 + }, + { + "epoch": 0.01, + "learning_rate": 4.998134607120866e-05, + "loss": 1.2166, + "step": 12396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981343037316876e-05, + "loss": 1.181, + "step": 12397 + }, + { + "epoch": 0.01, + "learning_rate": 4.998134000317848e-05, + "loss": 1.2389, + "step": 12398 + }, + { + "epoch": 0.01, + "learning_rate": 4.998133696879348e-05, + "loss": 1.0067, + "step": 12399 + }, + { + "epoch": 0.01, + "learning_rate": 4.998133393416187e-05, + "loss": 1.1272, + "step": 12400 + }, + { + "epoch": 0.01, + "learning_rate": 4.998133089928366e-05, + "loss": 1.2163, + "step": 12401 + }, + { + "epoch": 0.01, + "learning_rate": 4.998132786415885e-05, + "loss": 0.8942, + "step": 12402 + }, + { + "epoch": 0.01, + "learning_rate": 4.998132482878743e-05, + "loss": 1.1119, + "step": 12403 + }, + { + "epoch": 0.01, + "learning_rate": 4.99813217931694e-05, + "loss": 1.3623, + "step": 12404 + }, + { + "epoch": 0.01, + "learning_rate": 4.998131875730477e-05, + "loss": 0.9595, + "step": 12405 + }, + { + "epoch": 0.01, + "learning_rate": 4.998131572119354e-05, + "loss": 0.7142, + "step": 12406 + }, + { + "epoch": 0.01, + "learning_rate": 4.99813126848357e-05, + "loss": 1.2325, + "step": 12407 + }, + { + "epoch": 0.01, + "learning_rate": 4.998130964823126e-05, + "loss": 1.4484, + "step": 12408 + }, + { + "epoch": 0.01, + "learning_rate": 4.998130661138021e-05, + "loss": 1.345, + "step": 12409 + }, + { + "epoch": 0.01, + "learning_rate": 4.998130357428255e-05, + "loss": 1.3685, + "step": 12410 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981300536938294e-05, + "loss": 1.1315, + "step": 12411 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981297499347424e-05, + "loss": 0.7519, + "step": 12412 + }, + { + "epoch": 0.01, + "learning_rate": 4.998129446150995e-05, + "loss": 1.1386, + "step": 12413 + }, + { + "epoch": 0.01, + "learning_rate": 4.998129142342588e-05, + "loss": 1.145, + "step": 12414 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981288385095204e-05, + "loss": 1.0816, + "step": 12415 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981285346517914e-05, + "loss": 1.1919, + "step": 12416 + }, + { + "epoch": 0.01, + "learning_rate": 4.998128230769403e-05, + "loss": 1.1038, + "step": 12417 + }, + { + "epoch": 0.01, + "learning_rate": 4.998127926862354e-05, + "loss": 1.1895, + "step": 12418 + }, + { + "epoch": 0.01, + "learning_rate": 4.998127622930644e-05, + "loss": 1.1287, + "step": 12419 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981273189742734e-05, + "loss": 1.1634, + "step": 12420 + }, + { + "epoch": 0.01, + "learning_rate": 4.998127014993242e-05, + "loss": 0.6406, + "step": 12421 + }, + { + "epoch": 0.01, + "learning_rate": 4.998126710987552e-05, + "loss": 0.8229, + "step": 12422 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981264069572e-05, + "loss": 1.1455, + "step": 12423 + }, + { + "epoch": 0.01, + "learning_rate": 4.998126102902188e-05, + "loss": 1.0545, + "step": 12424 + }, + { + "epoch": 0.01, + "learning_rate": 4.998125798822515e-05, + "loss": 1.0934, + "step": 12425 + }, + { + "epoch": 0.01, + "learning_rate": 4.998125494718182e-05, + "loss": 1.0143, + "step": 12426 + }, + { + "epoch": 0.01, + "learning_rate": 4.998125190589189e-05, + "loss": 1.1201, + "step": 12427 + }, + { + "epoch": 0.01, + "learning_rate": 4.998124886435535e-05, + "loss": 0.9573, + "step": 12428 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981245822572206e-05, + "loss": 0.8915, + "step": 12429 + }, + { + "epoch": 0.01, + "learning_rate": 4.998124278054246e-05, + "loss": 1.3156, + "step": 12430 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981239738266106e-05, + "loss": 1.2664, + "step": 12431 + }, + { + "epoch": 0.01, + "learning_rate": 4.998123669574315e-05, + "loss": 1.0817, + "step": 12432 + }, + { + "epoch": 0.01, + "learning_rate": 4.998123365297359e-05, + "loss": 0.702, + "step": 12433 + }, + { + "epoch": 0.01, + "learning_rate": 4.998123060995743e-05, + "loss": 1.0154, + "step": 12434 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981227566694656e-05, + "loss": 1.2546, + "step": 12435 + }, + { + "epoch": 0.01, + "learning_rate": 4.998122452318529e-05, + "loss": 1.1136, + "step": 12436 + }, + { + "epoch": 0.01, + "learning_rate": 4.998122147942931e-05, + "loss": 0.9993, + "step": 12437 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981218435426724e-05, + "loss": 1.0578, + "step": 12438 + }, + { + "epoch": 0.01, + "learning_rate": 4.998121539117754e-05, + "loss": 0.7509, + "step": 12439 + }, + { + "epoch": 0.01, + "learning_rate": 4.998121234668175e-05, + "loss": 1.0293, + "step": 12440 + }, + { + "epoch": 0.01, + "learning_rate": 4.998120930193936e-05, + "loss": 1.2657, + "step": 12441 + }, + { + "epoch": 0.01, + "learning_rate": 4.998120625695036e-05, + "loss": 1.0948, + "step": 12442 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981203211714764e-05, + "loss": 1.2965, + "step": 12443 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981200166232556e-05, + "loss": 1.1268, + "step": 12444 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981197120503746e-05, + "loss": 1.1302, + "step": 12445 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981194074528334e-05, + "loss": 1.464, + "step": 12446 + }, + { + "epoch": 0.01, + "learning_rate": 4.998119102830632e-05, + "loss": 1.2115, + "step": 12447 + }, + { + "epoch": 0.01, + "learning_rate": 4.99811879818377e-05, + "loss": 1.1439, + "step": 12448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981184935122474e-05, + "loss": 1.3663, + "step": 12449 + }, + { + "epoch": 0.01, + "learning_rate": 4.998118188816064e-05, + "loss": 1.1248, + "step": 12450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981178840952215e-05, + "loss": 1.0585, + "step": 12451 + }, + { + "epoch": 0.01, + "learning_rate": 4.998117579349718e-05, + "loss": 1.1164, + "step": 12452 + }, + { + "epoch": 0.01, + "learning_rate": 4.998117274579555e-05, + "loss": 1.0238, + "step": 12453 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981169697847294e-05, + "loss": 0.7298, + "step": 12454 + }, + { + "epoch": 0.01, + "learning_rate": 4.998116664965245e-05, + "loss": 1.1957, + "step": 12455 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981163601211e-05, + "loss": 1.012, + "step": 12456 + }, + { + "epoch": 0.01, + "learning_rate": 4.998116055252295e-05, + "loss": 1.1416, + "step": 12457 + }, + { + "epoch": 0.01, + "learning_rate": 4.998115750358829e-05, + "loss": 1.1676, + "step": 12458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981154454407036e-05, + "loss": 1.1385, + "step": 12459 + }, + { + "epoch": 0.01, + "learning_rate": 4.998115140497918e-05, + "loss": 1.0225, + "step": 12460 + }, + { + "epoch": 0.01, + "learning_rate": 4.998114835530471e-05, + "loss": 0.7041, + "step": 12461 + }, + { + "epoch": 0.01, + "learning_rate": 4.998114530538364e-05, + "loss": 0.8163, + "step": 12462 + }, + { + "epoch": 0.01, + "learning_rate": 4.998114225521596e-05, + "loss": 0.5854, + "step": 12463 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981139204801686e-05, + "loss": 0.4704, + "step": 12464 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981136154140806e-05, + "loss": 0.5542, + "step": 12465 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981133103233325e-05, + "loss": 0.5285, + "step": 12466 + }, + { + "epoch": 0.01, + "learning_rate": 4.998113005207924e-05, + "loss": 0.6402, + "step": 12467 + }, + { + "epoch": 0.01, + "learning_rate": 4.998112700067854e-05, + "loss": 0.7608, + "step": 12468 + }, + { + "epoch": 0.01, + "learning_rate": 4.998112394903125e-05, + "loss": 0.8731, + "step": 12469 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981120897137355e-05, + "loss": 0.7127, + "step": 12470 + }, + { + "epoch": 0.01, + "learning_rate": 4.998111784499686e-05, + "loss": 1.0558, + "step": 12471 + }, + { + "epoch": 0.01, + "learning_rate": 4.998111479260976e-05, + "loss": 1.4552, + "step": 12472 + }, + { + "epoch": 0.01, + "learning_rate": 4.998111173997605e-05, + "loss": 0.5671, + "step": 12473 + }, + { + "epoch": 0.01, + "learning_rate": 4.998110868709574e-05, + "loss": 1.531, + "step": 12474 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981105633968825e-05, + "loss": 0.9289, + "step": 12475 + }, + { + "epoch": 0.01, + "learning_rate": 4.998110258059532e-05, + "loss": 0.9232, + "step": 12476 + }, + { + "epoch": 0.01, + "learning_rate": 4.99810995269752e-05, + "loss": 1.111, + "step": 12477 + }, + { + "epoch": 0.01, + "learning_rate": 4.998109647310848e-05, + "loss": 1.0808, + "step": 12478 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981093418995157e-05, + "loss": 1.274, + "step": 12479 + }, + { + "epoch": 0.01, + "learning_rate": 4.998109036463523e-05, + "loss": 1.0235, + "step": 12480 + }, + { + "epoch": 0.01, + "learning_rate": 4.998108731002871e-05, + "loss": 1.1525, + "step": 12481 + }, + { + "epoch": 0.01, + "learning_rate": 4.998108425517557e-05, + "loss": 1.1978, + "step": 12482 + }, + { + "epoch": 0.01, + "learning_rate": 4.998108120007584e-05, + "loss": 1.1724, + "step": 12483 + }, + { + "epoch": 0.01, + "learning_rate": 4.99810781447295e-05, + "loss": 1.2258, + "step": 12484 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981075089136563e-05, + "loss": 1.1161, + "step": 12485 + }, + { + "epoch": 0.01, + "learning_rate": 4.998107203329702e-05, + "loss": 0.6646, + "step": 12486 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981068977210873e-05, + "loss": 0.4699, + "step": 12487 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981065920878126e-05, + "loss": 0.4123, + "step": 12488 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981062864298776e-05, + "loss": 0.3083, + "step": 12489 + }, + { + "epoch": 0.01, + "learning_rate": 4.998105980747283e-05, + "loss": 0.8927, + "step": 12490 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981056750400265e-05, + "loss": 1.4359, + "step": 12491 + }, + { + "epoch": 0.01, + "learning_rate": 4.998105369308112e-05, + "loss": 0.9881, + "step": 12492 + }, + { + "epoch": 0.01, + "learning_rate": 4.998105063551536e-05, + "loss": 1.3489, + "step": 12493 + }, + { + "epoch": 0.01, + "learning_rate": 4.998104757770299e-05, + "loss": 1.2331, + "step": 12494 + }, + { + "epoch": 0.01, + "learning_rate": 4.998104451964403e-05, + "loss": 0.9847, + "step": 12495 + }, + { + "epoch": 0.01, + "learning_rate": 4.998104146133846e-05, + "loss": 1.0687, + "step": 12496 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981038402786296e-05, + "loss": 1.0977, + "step": 12497 + }, + { + "epoch": 0.01, + "learning_rate": 4.998103534398752e-05, + "loss": 1.1294, + "step": 12498 + }, + { + "epoch": 0.01, + "learning_rate": 4.998103228494215e-05, + "loss": 0.8384, + "step": 12499 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981029225650174e-05, + "loss": 0.8916, + "step": 12500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0588072538375854, + "eval_runtime": 85.3376, + "eval_samples_per_second": 16.23, + "eval_steps_per_second": 4.066, + "step": 12500 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981026166111597e-05, + "loss": 0.9107, + "step": 12501 + }, + { + "epoch": 0.01, + "learning_rate": 4.998102310632641e-05, + "loss": 1.0434, + "step": 12502 + }, + { + "epoch": 0.01, + "learning_rate": 4.998102004629463e-05, + "loss": 0.9114, + "step": 12503 + }, + { + "epoch": 0.01, + "learning_rate": 4.9981016986016246e-05, + "loss": 1.0933, + "step": 12504 + }, + { + "epoch": 0.01, + "learning_rate": 4.998101392549126e-05, + "loss": 1.0656, + "step": 12505 + }, + { + "epoch": 0.01, + "learning_rate": 4.998101086471967e-05, + "loss": 0.8722, + "step": 12506 + }, + { + "epoch": 0.01, + "learning_rate": 4.998100780370148e-05, + "loss": 0.9555, + "step": 12507 + }, + { + "epoch": 0.01, + "learning_rate": 4.998100474243669e-05, + "loss": 1.1079, + "step": 12508 + }, + { + "epoch": 0.01, + "learning_rate": 4.998100168092529e-05, + "loss": 1.1997, + "step": 12509 + }, + { + "epoch": 0.01, + "learning_rate": 4.99809986191673e-05, + "loss": 0.8179, + "step": 12510 + }, + { + "epoch": 0.01, + "learning_rate": 4.99809955571627e-05, + "loss": 0.8281, + "step": 12511 + }, + { + "epoch": 0.01, + "learning_rate": 4.99809924949115e-05, + "loss": 1.0142, + "step": 12512 + }, + { + "epoch": 0.01, + "learning_rate": 4.998098943241369e-05, + "loss": 1.0984, + "step": 12513 + }, + { + "epoch": 0.01, + "learning_rate": 4.998098636966929e-05, + "loss": 1.2993, + "step": 12514 + }, + { + "epoch": 0.01, + "learning_rate": 4.998098330667829e-05, + "loss": 0.802, + "step": 12515 + }, + { + "epoch": 0.01, + "learning_rate": 4.998098024344068e-05, + "loss": 1.1062, + "step": 12516 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980977179956465e-05, + "loss": 1.3741, + "step": 12517 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980974116225657e-05, + "loss": 1.2558, + "step": 12518 + }, + { + "epoch": 0.01, + "learning_rate": 4.998097105224825e-05, + "loss": 0.8686, + "step": 12519 + }, + { + "epoch": 0.01, + "learning_rate": 4.998096798802423e-05, + "loss": 0.5407, + "step": 12520 + }, + { + "epoch": 0.01, + "learning_rate": 4.998096492355362e-05, + "loss": 0.9241, + "step": 12521 + }, + { + "epoch": 0.01, + "learning_rate": 4.99809618588364e-05, + "loss": 1.1364, + "step": 12522 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980958793872577e-05, + "loss": 1.2324, + "step": 12523 + }, + { + "epoch": 0.01, + "learning_rate": 4.998095572866216e-05, + "loss": 0.91, + "step": 12524 + }, + { + "epoch": 0.01, + "learning_rate": 4.998095266320514e-05, + "loss": 1.3726, + "step": 12525 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980949597501515e-05, + "loss": 1.0983, + "step": 12526 + }, + { + "epoch": 0.01, + "learning_rate": 4.998094653155129e-05, + "loss": 1.1695, + "step": 12527 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980943465354465e-05, + "loss": 1.1617, + "step": 12528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980940398911045e-05, + "loss": 1.1853, + "step": 12529 + }, + { + "epoch": 0.01, + "learning_rate": 4.998093733222101e-05, + "loss": 1.0265, + "step": 12530 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980934265284384e-05, + "loss": 0.9487, + "step": 12531 + }, + { + "epoch": 0.01, + "learning_rate": 4.998093119810115e-05, + "loss": 0.7968, + "step": 12532 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980928130671316e-05, + "loss": 1.2177, + "step": 12533 + }, + { + "epoch": 0.01, + "learning_rate": 4.998092506299489e-05, + "loss": 0.8995, + "step": 12534 + }, + { + "epoch": 0.01, + "learning_rate": 4.998092199507185e-05, + "loss": 1.1374, + "step": 12535 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980918926902215e-05, + "loss": 1.1554, + "step": 12536 + }, + { + "epoch": 0.01, + "learning_rate": 4.998091585848598e-05, + "loss": 1.1725, + "step": 12537 + }, + { + "epoch": 0.01, + "learning_rate": 4.998091278982314e-05, + "loss": 1.2441, + "step": 12538 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980909720913705e-05, + "loss": 0.9656, + "step": 12539 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980906651757664e-05, + "loss": 1.0499, + "step": 12540 + }, + { + "epoch": 0.01, + "learning_rate": 4.998090358235502e-05, + "loss": 1.3264, + "step": 12541 + }, + { + "epoch": 0.01, + "learning_rate": 4.998090051270578e-05, + "loss": 0.9362, + "step": 12542 + }, + { + "epoch": 0.01, + "learning_rate": 4.998089744280994e-05, + "loss": 1.0823, + "step": 12543 + }, + { + "epoch": 0.01, + "learning_rate": 4.998089437266749e-05, + "loss": 1.3716, + "step": 12544 + }, + { + "epoch": 0.01, + "learning_rate": 4.998089130227844e-05, + "loss": 0.5029, + "step": 12545 + }, + { + "epoch": 0.01, + "learning_rate": 4.99808882316428e-05, + "loss": 1.1508, + "step": 12546 + }, + { + "epoch": 0.01, + "learning_rate": 4.998088516076055e-05, + "loss": 1.5744, + "step": 12547 + }, + { + "epoch": 0.01, + "learning_rate": 4.998088208963171e-05, + "loss": 1.0503, + "step": 12548 + }, + { + "epoch": 0.01, + "learning_rate": 4.998087901825626e-05, + "loss": 1.1751, + "step": 12549 + }, + { + "epoch": 0.01, + "learning_rate": 4.998087594663421e-05, + "loss": 1.1362, + "step": 12550 + }, + { + "epoch": 0.01, + "learning_rate": 4.998087287476556e-05, + "loss": 1.1305, + "step": 12551 + }, + { + "epoch": 0.01, + "learning_rate": 4.998086980265031e-05, + "loss": 1.2603, + "step": 12552 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980866730288464e-05, + "loss": 1.2407, + "step": 12553 + }, + { + "epoch": 0.01, + "learning_rate": 4.998086365768001e-05, + "loss": 1.2634, + "step": 12554 + }, + { + "epoch": 0.01, + "learning_rate": 4.998086058482496e-05, + "loss": 0.9507, + "step": 12555 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980857511723303e-05, + "loss": 0.9109, + "step": 12556 + }, + { + "epoch": 0.01, + "learning_rate": 4.998085443837506e-05, + "loss": 0.8525, + "step": 12557 + }, + { + "epoch": 0.01, + "learning_rate": 4.99808513647802e-05, + "loss": 1.0178, + "step": 12558 + }, + { + "epoch": 0.01, + "learning_rate": 4.998084829093875e-05, + "loss": 1.0699, + "step": 12559 + }, + { + "epoch": 0.01, + "learning_rate": 4.998084521685069e-05, + "loss": 1.0353, + "step": 12560 + }, + { + "epoch": 0.01, + "learning_rate": 4.998084214251604e-05, + "loss": 1.2007, + "step": 12561 + }, + { + "epoch": 0.01, + "learning_rate": 4.998083906793478e-05, + "loss": 1.2147, + "step": 12562 + }, + { + "epoch": 0.01, + "learning_rate": 4.998083599310693e-05, + "loss": 1.0938, + "step": 12563 + }, + { + "epoch": 0.01, + "learning_rate": 4.998083291803247e-05, + "loss": 0.8749, + "step": 12564 + }, + { + "epoch": 0.01, + "learning_rate": 4.998082984271142e-05, + "loss": 1.1915, + "step": 12565 + }, + { + "epoch": 0.01, + "learning_rate": 4.998082676714376e-05, + "loss": 1.2303, + "step": 12566 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980823691329505e-05, + "loss": 1.3084, + "step": 12567 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980820615268655e-05, + "loss": 1.021, + "step": 12568 + }, + { + "epoch": 0.01, + "learning_rate": 4.99808175389612e-05, + "loss": 0.8386, + "step": 12569 + }, + { + "epoch": 0.01, + "learning_rate": 4.998081446240714e-05, + "loss": 0.5628, + "step": 12570 + }, + { + "epoch": 0.01, + "learning_rate": 4.998081138560649e-05, + "loss": 0.394, + "step": 12571 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980808308559225e-05, + "loss": 0.3166, + "step": 12572 + }, + { + "epoch": 0.01, + "learning_rate": 4.998080523126537e-05, + "loss": 0.3661, + "step": 12573 + }, + { + "epoch": 0.01, + "learning_rate": 4.998080215372491e-05, + "loss": 0.2504, + "step": 12574 + }, + { + "epoch": 0.01, + "learning_rate": 4.998079907593786e-05, + "loss": 0.1394, + "step": 12575 + }, + { + "epoch": 0.01, + "learning_rate": 4.99807959979042e-05, + "loss": 0.1276, + "step": 12576 + }, + { + "epoch": 0.01, + "learning_rate": 4.998079291962394e-05, + "loss": 0.0873, + "step": 12577 + }, + { + "epoch": 0.01, + "learning_rate": 4.998078984109709e-05, + "loss": 0.1252, + "step": 12578 + }, + { + "epoch": 0.01, + "learning_rate": 4.998078676232363e-05, + "loss": 0.1228, + "step": 12579 + }, + { + "epoch": 0.01, + "learning_rate": 4.998078368330358e-05, + "loss": 0.1049, + "step": 12580 + }, + { + "epoch": 0.01, + "learning_rate": 4.998078060403692e-05, + "loss": 0.2173, + "step": 12581 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980777524523667e-05, + "loss": 0.1332, + "step": 12582 + }, + { + "epoch": 0.01, + "learning_rate": 4.998077444476381e-05, + "loss": 0.0962, + "step": 12583 + }, + { + "epoch": 0.01, + "learning_rate": 4.998077136475736e-05, + "loss": 0.0435, + "step": 12584 + }, + { + "epoch": 0.01, + "learning_rate": 4.99807682845043e-05, + "loss": 0.024, + "step": 12585 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980765204004645e-05, + "loss": 0.0397, + "step": 12586 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980762123258395e-05, + "loss": 0.0451, + "step": 12587 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980759042265544e-05, + "loss": 0.0358, + "step": 12588 + }, + { + "epoch": 0.01, + "learning_rate": 4.998075596102609e-05, + "loss": 0.0299, + "step": 12589 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980752879540035e-05, + "loss": 0.0337, + "step": 12590 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980749797807385e-05, + "loss": 0.0337, + "step": 12591 + }, + { + "epoch": 0.01, + "learning_rate": 4.998074671582813e-05, + "loss": 0.0328, + "step": 12592 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980743633602286e-05, + "loss": 0.0385, + "step": 12593 + }, + { + "epoch": 0.01, + "learning_rate": 4.998074055112984e-05, + "loss": 0.0282, + "step": 12594 + }, + { + "epoch": 0.01, + "learning_rate": 4.998073746841079e-05, + "loss": 0.123, + "step": 12595 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980734385445135e-05, + "loss": 1.1476, + "step": 12596 + }, + { + "epoch": 0.01, + "learning_rate": 4.998073130223289e-05, + "loss": 1.3075, + "step": 12597 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980728218774045e-05, + "loss": 1.1733, + "step": 12598 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980725135068595e-05, + "loss": 1.0228, + "step": 12599 + }, + { + "epoch": 0.01, + "learning_rate": 4.998072205111655e-05, + "loss": 1.1655, + "step": 12600 + }, + { + "epoch": 0.01, + "learning_rate": 4.998071896691791e-05, + "loss": 1.2506, + "step": 12601 + }, + { + "epoch": 0.01, + "learning_rate": 4.998071588247266e-05, + "loss": 1.0348, + "step": 12602 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980712797780814e-05, + "loss": 0.8461, + "step": 12603 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980709712842375e-05, + "loss": 0.7891, + "step": 12604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980706627657333e-05, + "loss": 0.9184, + "step": 12605 + }, + { + "epoch": 0.01, + "learning_rate": 4.998070354222569e-05, + "loss": 1.1337, + "step": 12606 + }, + { + "epoch": 0.01, + "learning_rate": 4.998070045654746e-05, + "loss": 1.1779, + "step": 12607 + }, + { + "epoch": 0.01, + "learning_rate": 4.998069737062262e-05, + "loss": 1.1481, + "step": 12608 + }, + { + "epoch": 0.01, + "learning_rate": 4.998069428445118e-05, + "loss": 1.2349, + "step": 12609 + }, + { + "epoch": 0.01, + "learning_rate": 4.998069119803315e-05, + "loss": 1.1853, + "step": 12610 + }, + { + "epoch": 0.01, + "learning_rate": 4.998068811136851e-05, + "loss": 1.2069, + "step": 12611 + }, + { + "epoch": 0.01, + "learning_rate": 4.998068502445728e-05, + "loss": 0.9469, + "step": 12612 + }, + { + "epoch": 0.01, + "learning_rate": 4.998068193729945e-05, + "loss": 1.2307, + "step": 12613 + }, + { + "epoch": 0.01, + "learning_rate": 4.998067884989502e-05, + "loss": 0.8482, + "step": 12614 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980675762243986e-05, + "loss": 1.2056, + "step": 12615 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980672674346366e-05, + "loss": 0.9474, + "step": 12616 + }, + { + "epoch": 0.01, + "learning_rate": 4.998066958620213e-05, + "loss": 1.1598, + "step": 12617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980666497811307e-05, + "loss": 1.2106, + "step": 12618 + }, + { + "epoch": 0.01, + "learning_rate": 4.998066340917388e-05, + "loss": 1.0984, + "step": 12619 + }, + { + "epoch": 0.01, + "learning_rate": 4.998066032028986e-05, + "loss": 1.2588, + "step": 12620 + }, + { + "epoch": 0.01, + "learning_rate": 4.998065723115924e-05, + "loss": 1.129, + "step": 12621 + }, + { + "epoch": 0.01, + "learning_rate": 4.998065414178202e-05, + "loss": 0.9005, + "step": 12622 + }, + { + "epoch": 0.01, + "learning_rate": 4.99806510521582e-05, + "loss": 1.1697, + "step": 12623 + }, + { + "epoch": 0.01, + "learning_rate": 4.998064796228779e-05, + "loss": 1.2553, + "step": 12624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980644872170765e-05, + "loss": 1.0182, + "step": 12625 + }, + { + "epoch": 0.01, + "learning_rate": 4.998064178180716e-05, + "loss": 1.2193, + "step": 12626 + }, + { + "epoch": 0.01, + "learning_rate": 4.998063869119695e-05, + "loss": 1.0012, + "step": 12627 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980635600340134e-05, + "loss": 1.1328, + "step": 12628 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980632509236724e-05, + "loss": 1.2161, + "step": 12629 + }, + { + "epoch": 0.01, + "learning_rate": 4.998062941788671e-05, + "loss": 0.9976, + "step": 12630 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980626326290114e-05, + "loss": 1.162, + "step": 12631 + }, + { + "epoch": 0.01, + "learning_rate": 4.998062323444691e-05, + "loss": 1.1436, + "step": 12632 + }, + { + "epoch": 0.01, + "learning_rate": 4.99806201423571e-05, + "loss": 1.2007, + "step": 12633 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980617050020704e-05, + "loss": 1.162, + "step": 12634 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980613957437704e-05, + "loss": 1.2691, + "step": 12635 + }, + { + "epoch": 0.01, + "learning_rate": 4.998061086460811e-05, + "loss": 1.2555, + "step": 12636 + }, + { + "epoch": 0.01, + "learning_rate": 4.998060777153192e-05, + "loss": 0.7833, + "step": 12637 + }, + { + "epoch": 0.01, + "learning_rate": 4.998060467820913e-05, + "loss": 0.6888, + "step": 12638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980601584639735e-05, + "loss": 1.0737, + "step": 12639 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980598490823746e-05, + "loss": 1.1638, + "step": 12640 + }, + { + "epoch": 0.01, + "learning_rate": 4.998059539676116e-05, + "loss": 1.257, + "step": 12641 + }, + { + "epoch": 0.01, + "learning_rate": 4.998059230245198e-05, + "loss": 0.9882, + "step": 12642 + }, + { + "epoch": 0.01, + "learning_rate": 4.99805892078962e-05, + "loss": 1.2478, + "step": 12643 + }, + { + "epoch": 0.01, + "learning_rate": 4.998058611309382e-05, + "loss": 1.2171, + "step": 12644 + }, + { + "epoch": 0.01, + "learning_rate": 4.998058301804484e-05, + "loss": 0.894, + "step": 12645 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980579922749266e-05, + "loss": 1.1506, + "step": 12646 + }, + { + "epoch": 0.01, + "learning_rate": 4.998057682720709e-05, + "loss": 1.0116, + "step": 12647 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980573731418324e-05, + "loss": 1.2914, + "step": 12648 + }, + { + "epoch": 0.01, + "learning_rate": 4.998057063538295e-05, + "loss": 1.2192, + "step": 12649 + }, + { + "epoch": 0.01, + "learning_rate": 4.998056753910099e-05, + "loss": 1.235, + "step": 12650 + }, + { + "epoch": 0.01, + "learning_rate": 4.998056444257242e-05, + "loss": 1.0878, + "step": 12651 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980561345797264e-05, + "loss": 1.2564, + "step": 12652 + }, + { + "epoch": 0.01, + "learning_rate": 4.99805582487755e-05, + "loss": 1.0418, + "step": 12653 + }, + { + "epoch": 0.01, + "learning_rate": 4.998055515150715e-05, + "loss": 1.008, + "step": 12654 + }, + { + "epoch": 0.01, + "learning_rate": 4.998055205399219e-05, + "loss": 1.3084, + "step": 12655 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980548956230644e-05, + "loss": 1.2171, + "step": 12656 + }, + { + "epoch": 0.01, + "learning_rate": 4.998054585822249e-05, + "loss": 0.9538, + "step": 12657 + }, + { + "epoch": 0.01, + "learning_rate": 4.998054275996774e-05, + "loss": 1.1178, + "step": 12658 + }, + { + "epoch": 0.01, + "learning_rate": 4.99805396614664e-05, + "loss": 1.0045, + "step": 12659 + }, + { + "epoch": 0.01, + "learning_rate": 4.998053656271846e-05, + "loss": 1.2685, + "step": 12660 + }, + { + "epoch": 0.01, + "learning_rate": 4.998053346372392e-05, + "loss": 1.0377, + "step": 12661 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980530364482786e-05, + "loss": 0.9865, + "step": 12662 + }, + { + "epoch": 0.01, + "learning_rate": 4.998052726499505e-05, + "loss": 1.273, + "step": 12663 + }, + { + "epoch": 0.01, + "learning_rate": 4.998052416526072e-05, + "loss": 0.9645, + "step": 12664 + }, + { + "epoch": 0.01, + "learning_rate": 4.99805210652798e-05, + "loss": 1.2464, + "step": 12665 + }, + { + "epoch": 0.01, + "learning_rate": 4.998051796505227e-05, + "loss": 1.0677, + "step": 12666 + }, + { + "epoch": 0.01, + "learning_rate": 4.998051486457815e-05, + "loss": 0.9309, + "step": 12667 + }, + { + "epoch": 0.01, + "learning_rate": 4.998051176385743e-05, + "loss": 1.0929, + "step": 12668 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980508662890114e-05, + "loss": 0.8478, + "step": 12669 + }, + { + "epoch": 0.01, + "learning_rate": 4.998050556167621e-05, + "loss": 0.6965, + "step": 12670 + }, + { + "epoch": 0.01, + "learning_rate": 4.998050246021569e-05, + "loss": 0.7587, + "step": 12671 + }, + { + "epoch": 0.01, + "learning_rate": 4.998049935850858e-05, + "loss": 0.8848, + "step": 12672 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980496256554885e-05, + "loss": 0.9169, + "step": 12673 + }, + { + "epoch": 0.01, + "learning_rate": 4.998049315435458e-05, + "loss": 1.0632, + "step": 12674 + }, + { + "epoch": 0.01, + "learning_rate": 4.998049005190768e-05, + "loss": 1.4337, + "step": 12675 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980486949214186e-05, + "loss": 0.9088, + "step": 12676 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980483846274094e-05, + "loss": 1.1794, + "step": 12677 + }, + { + "epoch": 0.01, + "learning_rate": 4.998048074308741e-05, + "loss": 1.1089, + "step": 12678 + }, + { + "epoch": 0.01, + "learning_rate": 4.998047763965412e-05, + "loss": 1.0994, + "step": 12679 + }, + { + "epoch": 0.01, + "learning_rate": 4.998047453597424e-05, + "loss": 1.035, + "step": 12680 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980471432047757e-05, + "loss": 0.5826, + "step": 12681 + }, + { + "epoch": 0.01, + "learning_rate": 4.998046832787468e-05, + "loss": 0.378, + "step": 12682 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980465223455014e-05, + "loss": 0.582, + "step": 12683 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980462118788744e-05, + "loss": 0.6371, + "step": 12684 + }, + { + "epoch": 0.01, + "learning_rate": 4.998045901387588e-05, + "loss": 1.3622, + "step": 12685 + }, + { + "epoch": 0.01, + "learning_rate": 4.998045590871642e-05, + "loss": 0.9852, + "step": 12686 + }, + { + "epoch": 0.01, + "learning_rate": 4.998045280331036e-05, + "loss": 1.2683, + "step": 12687 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980449697657705e-05, + "loss": 1.1576, + "step": 12688 + }, + { + "epoch": 0.01, + "learning_rate": 4.998044659175845e-05, + "loss": 1.333, + "step": 12689 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980443485612606e-05, + "loss": 1.3333, + "step": 12690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980440379220163e-05, + "loss": 1.0612, + "step": 12691 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980437272581126e-05, + "loss": 1.1116, + "step": 12692 + }, + { + "epoch": 0.01, + "learning_rate": 4.998043416569549e-05, + "loss": 1.3357, + "step": 12693 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980431058563246e-05, + "loss": 0.7662, + "step": 12694 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980427951184424e-05, + "loss": 0.2322, + "step": 12695 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980424843559e-05, + "loss": 0.4027, + "step": 12696 + }, + { + "epoch": 0.01, + "learning_rate": 4.998042173568698e-05, + "loss": 0.4733, + "step": 12697 + }, + { + "epoch": 0.01, + "learning_rate": 4.998041862756836e-05, + "loss": 0.6448, + "step": 12698 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980415519203144e-05, + "loss": 0.1055, + "step": 12699 + }, + { + "epoch": 0.01, + "learning_rate": 4.998041241059133e-05, + "loss": 0.7113, + "step": 12700 + }, + { + "epoch": 0.01, + "learning_rate": 4.998040930173292e-05, + "loss": 0.0967, + "step": 12701 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980406192627926e-05, + "loss": 0.5632, + "step": 12702 + }, + { + "epoch": 0.01, + "learning_rate": 4.998040308327633e-05, + "loss": 0.9085, + "step": 12703 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980399973678126e-05, + "loss": 1.2519, + "step": 12704 + }, + { + "epoch": 0.01, + "learning_rate": 4.998039686383334e-05, + "loss": 1.1727, + "step": 12705 + }, + { + "epoch": 0.01, + "learning_rate": 4.998039375374195e-05, + "loss": 1.2409, + "step": 12706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980390643403976e-05, + "loss": 1.1744, + "step": 12707 + }, + { + "epoch": 0.01, + "learning_rate": 4.998038753281939e-05, + "loss": 1.0515, + "step": 12708 + }, + { + "epoch": 0.01, + "learning_rate": 4.998038442198822e-05, + "loss": 0.8253, + "step": 12709 + }, + { + "epoch": 0.01, + "learning_rate": 4.998038131091045e-05, + "loss": 0.9936, + "step": 12710 + }, + { + "epoch": 0.01, + "learning_rate": 4.998037819958608e-05, + "loss": 1.1891, + "step": 12711 + }, + { + "epoch": 0.01, + "learning_rate": 4.998037508801512e-05, + "loss": 1.3771, + "step": 12712 + }, + { + "epoch": 0.01, + "learning_rate": 4.998037197619756e-05, + "loss": 1.0651, + "step": 12713 + }, + { + "epoch": 0.01, + "learning_rate": 4.998036886413341e-05, + "loss": 1.07, + "step": 12714 + }, + { + "epoch": 0.01, + "learning_rate": 4.998036575182266e-05, + "loss": 1.0145, + "step": 12715 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980362639265316e-05, + "loss": 1.0349, + "step": 12716 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980359526461376e-05, + "loss": 1.2096, + "step": 12717 + }, + { + "epoch": 0.01, + "learning_rate": 4.998035641341084e-05, + "loss": 0.9221, + "step": 12718 + }, + { + "epoch": 0.01, + "learning_rate": 4.998035330011371e-05, + "loss": 1.2303, + "step": 12719 + }, + { + "epoch": 0.01, + "learning_rate": 4.998035018656998e-05, + "loss": 0.8573, + "step": 12720 + }, + { + "epoch": 0.01, + "learning_rate": 4.998034707277966e-05, + "loss": 1.3344, + "step": 12721 + }, + { + "epoch": 0.01, + "learning_rate": 4.998034395874274e-05, + "loss": 0.8321, + "step": 12722 + }, + { + "epoch": 0.01, + "learning_rate": 4.998034084445923e-05, + "loss": 0.8328, + "step": 12723 + }, + { + "epoch": 0.01, + "learning_rate": 4.998033772992912e-05, + "loss": 0.8378, + "step": 12724 + }, + { + "epoch": 0.01, + "learning_rate": 4.998033461515242e-05, + "loss": 0.7233, + "step": 12725 + }, + { + "epoch": 0.01, + "learning_rate": 4.998033150012912e-05, + "loss": 0.5925, + "step": 12726 + }, + { + "epoch": 0.01, + "learning_rate": 4.998032838485922e-05, + "loss": 0.6885, + "step": 12727 + }, + { + "epoch": 0.01, + "learning_rate": 4.998032526934273e-05, + "loss": 0.9141, + "step": 12728 + }, + { + "epoch": 0.01, + "learning_rate": 4.998032215357965e-05, + "loss": 2.9479, + "step": 12729 + }, + { + "epoch": 0.01, + "learning_rate": 4.998031903756997e-05, + "loss": 1.1008, + "step": 12730 + }, + { + "epoch": 0.01, + "learning_rate": 4.998031592131369e-05, + "loss": 0.9494, + "step": 12731 + }, + { + "epoch": 0.01, + "learning_rate": 4.998031280481083e-05, + "loss": 1.0106, + "step": 12732 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980309688061354e-05, + "loss": 1.2855, + "step": 12733 + }, + { + "epoch": 0.01, + "learning_rate": 4.998030657106529e-05, + "loss": 1.1777, + "step": 12734 + }, + { + "epoch": 0.01, + "learning_rate": 4.998030345382264e-05, + "loss": 1.3056, + "step": 12735 + }, + { + "epoch": 0.01, + "learning_rate": 4.998030033633339e-05, + "loss": 1.0951, + "step": 12736 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980297218597545e-05, + "loss": 1.4119, + "step": 12737 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980294100615104e-05, + "loss": 1.2088, + "step": 12738 + }, + { + "epoch": 0.01, + "learning_rate": 4.998029098238607e-05, + "loss": 1.381, + "step": 12739 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980287863910435e-05, + "loss": 1.1912, + "step": 12740 + }, + { + "epoch": 0.01, + "learning_rate": 4.998028474518821e-05, + "loss": 0.9648, + "step": 12741 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980281626219394e-05, + "loss": 1.0769, + "step": 12742 + }, + { + "epoch": 0.01, + "learning_rate": 4.998027850700398e-05, + "loss": 1.1291, + "step": 12743 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980275387541966e-05, + "loss": 1.2115, + "step": 12744 + }, + { + "epoch": 0.01, + "learning_rate": 4.998027226783336e-05, + "loss": 1.1611, + "step": 12745 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980269147878165e-05, + "loss": 1.1745, + "step": 12746 + }, + { + "epoch": 0.01, + "learning_rate": 4.998026602767637e-05, + "loss": 0.7889, + "step": 12747 + }, + { + "epoch": 0.01, + "learning_rate": 4.998026290722798e-05, + "loss": 0.973, + "step": 12748 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980259786533e-05, + "loss": 0.8657, + "step": 12749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980256665591415e-05, + "loss": 1.1629, + "step": 12750 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980253544403245e-05, + "loss": 0.835, + "step": 12751 + }, + { + "epoch": 0.01, + "learning_rate": 4.998025042296848e-05, + "loss": 1.2045, + "step": 12752 + }, + { + "epoch": 0.01, + "learning_rate": 4.998024730128712e-05, + "loss": 0.9768, + "step": 12753 + }, + { + "epoch": 0.01, + "learning_rate": 4.998024417935916e-05, + "loss": 0.8842, + "step": 12754 + }, + { + "epoch": 0.01, + "learning_rate": 4.998024105718461e-05, + "loss": 1.3598, + "step": 12755 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980237934763464e-05, + "loss": 1.1881, + "step": 12756 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980234812095724e-05, + "loss": 1.1587, + "step": 12757 + }, + { + "epoch": 0.01, + "learning_rate": 4.998023168918139e-05, + "loss": 1.0493, + "step": 12758 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980228566020466e-05, + "loss": 0.9941, + "step": 12759 + }, + { + "epoch": 0.01, + "learning_rate": 4.998022544261294e-05, + "loss": 1.0987, + "step": 12760 + }, + { + "epoch": 0.01, + "learning_rate": 4.998022231895883e-05, + "loss": 0.8508, + "step": 12761 + }, + { + "epoch": 0.01, + "learning_rate": 4.998021919505811e-05, + "loss": 1.0839, + "step": 12762 + }, + { + "epoch": 0.01, + "learning_rate": 4.998021607091081e-05, + "loss": 1.209, + "step": 12763 + }, + { + "epoch": 0.01, + "learning_rate": 4.998021294651691e-05, + "loss": 1.2368, + "step": 12764 + }, + { + "epoch": 0.01, + "learning_rate": 4.998020982187642e-05, + "loss": 0.9204, + "step": 12765 + }, + { + "epoch": 0.01, + "learning_rate": 4.998020669698933e-05, + "loss": 1.0901, + "step": 12766 + }, + { + "epoch": 0.01, + "learning_rate": 4.998020357185564e-05, + "loss": 1.3258, + "step": 12767 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980200446475376e-05, + "loss": 1.1116, + "step": 12768 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980197320848496e-05, + "loss": 1.1504, + "step": 12769 + }, + { + "epoch": 0.01, + "learning_rate": 4.998019419497504e-05, + "loss": 0.928, + "step": 12770 + }, + { + "epoch": 0.01, + "learning_rate": 4.998019106885498e-05, + "loss": 1.0945, + "step": 12771 + }, + { + "epoch": 0.01, + "learning_rate": 4.998018794248833e-05, + "loss": 1.0755, + "step": 12772 + }, + { + "epoch": 0.01, + "learning_rate": 4.998018481587509e-05, + "loss": 0.9464, + "step": 12773 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980181689015245e-05, + "loss": 0.9836, + "step": 12774 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980178561908816e-05, + "loss": 0.7677, + "step": 12775 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980175434555784e-05, + "loss": 0.9265, + "step": 12776 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980172306956164e-05, + "loss": 0.9324, + "step": 12777 + }, + { + "epoch": 0.01, + "learning_rate": 4.998016917910995e-05, + "loss": 1.0801, + "step": 12778 + }, + { + "epoch": 0.01, + "learning_rate": 4.998016605101714e-05, + "loss": 1.2038, + "step": 12779 + }, + { + "epoch": 0.01, + "learning_rate": 4.998016292267774e-05, + "loss": 0.5574, + "step": 12780 + }, + { + "epoch": 0.01, + "learning_rate": 4.998015979409175e-05, + "loss": 1.1233, + "step": 12781 + }, + { + "epoch": 0.01, + "learning_rate": 4.998015666525916e-05, + "loss": 0.9915, + "step": 12782 + }, + { + "epoch": 0.01, + "learning_rate": 4.998015353617997e-05, + "loss": 1.1283, + "step": 12783 + }, + { + "epoch": 0.01, + "learning_rate": 4.99801504068542e-05, + "loss": 1.2538, + "step": 12784 + }, + { + "epoch": 0.01, + "learning_rate": 4.998014727728183e-05, + "loss": 1.0479, + "step": 12785 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980144147462866e-05, + "loss": 1.1323, + "step": 12786 + }, + { + "epoch": 0.01, + "learning_rate": 4.998014101739731e-05, + "loss": 1.2417, + "step": 12787 + }, + { + "epoch": 0.01, + "learning_rate": 4.998013788708516e-05, + "loss": 1.0424, + "step": 12788 + }, + { + "epoch": 0.01, + "learning_rate": 4.998013475652642e-05, + "loss": 1.068, + "step": 12789 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980131625721084e-05, + "loss": 1.3921, + "step": 12790 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980128494669155e-05, + "loss": 1.0747, + "step": 12791 + }, + { + "epoch": 0.01, + "learning_rate": 4.998012536337063e-05, + "loss": 1.2593, + "step": 12792 + }, + { + "epoch": 0.01, + "learning_rate": 4.998012223182551e-05, + "loss": 1.1864, + "step": 12793 + }, + { + "epoch": 0.01, + "learning_rate": 4.99801191000338e-05, + "loss": 1.0663, + "step": 12794 + }, + { + "epoch": 0.01, + "learning_rate": 4.99801159679955e-05, + "loss": 1.13, + "step": 12795 + }, + { + "epoch": 0.01, + "learning_rate": 4.99801128357106e-05, + "loss": 1.0854, + "step": 12796 + }, + { + "epoch": 0.01, + "learning_rate": 4.998010970317911e-05, + "loss": 1.0491, + "step": 12797 + }, + { + "epoch": 0.01, + "learning_rate": 4.998010657040103e-05, + "loss": 1.0693, + "step": 12798 + }, + { + "epoch": 0.01, + "learning_rate": 4.998010343737636e-05, + "loss": 1.0836, + "step": 12799 + }, + { + "epoch": 0.01, + "learning_rate": 4.998010030410508e-05, + "loss": 1.1157, + "step": 12800 + }, + { + "epoch": 0.01, + "learning_rate": 4.998009717058722e-05, + "loss": 1.3284, + "step": 12801 + }, + { + "epoch": 0.01, + "learning_rate": 4.998009403682277e-05, + "loss": 0.8892, + "step": 12802 + }, + { + "epoch": 0.01, + "learning_rate": 4.998009090281172e-05, + "loss": 1.1005, + "step": 12803 + }, + { + "epoch": 0.01, + "learning_rate": 4.998008776855408e-05, + "loss": 0.7738, + "step": 12804 + }, + { + "epoch": 0.01, + "learning_rate": 4.998008463404985e-05, + "loss": 0.8511, + "step": 12805 + }, + { + "epoch": 0.01, + "learning_rate": 4.998008149929903e-05, + "loss": 0.9628, + "step": 12806 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980078364301606e-05, + "loss": 1.1077, + "step": 12807 + }, + { + "epoch": 0.01, + "learning_rate": 4.998007522905759e-05, + "loss": 0.8475, + "step": 12808 + }, + { + "epoch": 0.01, + "learning_rate": 4.998007209356699e-05, + "loss": 0.328, + "step": 12809 + }, + { + "epoch": 0.01, + "learning_rate": 4.998006895782978e-05, + "loss": 0.5646, + "step": 12810 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980065821846e-05, + "loss": 1.0582, + "step": 12811 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980062685615615e-05, + "loss": 1.2618, + "step": 12812 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980059549138636e-05, + "loss": 1.1225, + "step": 12813 + }, + { + "epoch": 0.01, + "learning_rate": 4.998005641241507e-05, + "loss": 0.8237, + "step": 12814 + }, + { + "epoch": 0.01, + "learning_rate": 4.998005327544491e-05, + "loss": 0.933, + "step": 12815 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980050138228156e-05, + "loss": 0.808, + "step": 12816 + }, + { + "epoch": 0.01, + "learning_rate": 4.998004700076481e-05, + "loss": 0.7977, + "step": 12817 + }, + { + "epoch": 0.01, + "learning_rate": 4.998004386305487e-05, + "loss": 1.1029, + "step": 12818 + }, + { + "epoch": 0.01, + "learning_rate": 4.998004072509834e-05, + "loss": 1.0664, + "step": 12819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980037586895225e-05, + "loss": 0.919, + "step": 12820 + }, + { + "epoch": 0.01, + "learning_rate": 4.99800344484455e-05, + "loss": 0.8339, + "step": 12821 + }, + { + "epoch": 0.01, + "learning_rate": 4.99800313097492e-05, + "loss": 0.8931, + "step": 12822 + }, + { + "epoch": 0.01, + "learning_rate": 4.998002817080629e-05, + "loss": 1.1265, + "step": 12823 + }, + { + "epoch": 0.01, + "learning_rate": 4.998002503161681e-05, + "loss": 1.0405, + "step": 12824 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980021892180715e-05, + "loss": 1.2704, + "step": 12825 + }, + { + "epoch": 0.01, + "learning_rate": 4.9980018752498035e-05, + "loss": 0.983, + "step": 12826 + }, + { + "epoch": 0.01, + "learning_rate": 4.998001561256877e-05, + "loss": 1.1701, + "step": 12827 + }, + { + "epoch": 0.01, + "learning_rate": 4.998001247239291e-05, + "loss": 0.9842, + "step": 12828 + }, + { + "epoch": 0.01, + "learning_rate": 4.998000933197046e-05, + "loss": 0.9698, + "step": 12829 + }, + { + "epoch": 0.01, + "learning_rate": 4.998000619130141e-05, + "loss": 0.813, + "step": 12830 + }, + { + "epoch": 0.01, + "learning_rate": 4.998000305038577e-05, + "loss": 0.8723, + "step": 12831 + }, + { + "epoch": 0.01, + "learning_rate": 4.997999990922354e-05, + "loss": 1.0961, + "step": 12832 + }, + { + "epoch": 0.01, + "learning_rate": 4.997999676781472e-05, + "loss": 1.2978, + "step": 12833 + }, + { + "epoch": 0.01, + "learning_rate": 4.997999362615931e-05, + "loss": 1.3828, + "step": 12834 + }, + { + "epoch": 0.01, + "learning_rate": 4.99799904842573e-05, + "loss": 1.0502, + "step": 12835 + }, + { + "epoch": 0.01, + "learning_rate": 4.99799873421087e-05, + "loss": 1.2325, + "step": 12836 + }, + { + "epoch": 0.01, + "learning_rate": 4.997998419971351e-05, + "loss": 1.2105, + "step": 12837 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979981057071725e-05, + "loss": 1.4, + "step": 12838 + }, + { + "epoch": 0.01, + "learning_rate": 4.997997791418336e-05, + "loss": 0.9596, + "step": 12839 + }, + { + "epoch": 0.01, + "learning_rate": 4.997997477104839e-05, + "loss": 0.9279, + "step": 12840 + }, + { + "epoch": 0.01, + "learning_rate": 4.997997162766683e-05, + "loss": 1.3523, + "step": 12841 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979968484038684e-05, + "loss": 1.2956, + "step": 12842 + }, + { + "epoch": 0.01, + "learning_rate": 4.997996534016395e-05, + "loss": 1.2156, + "step": 12843 + }, + { + "epoch": 0.01, + "learning_rate": 4.997996219604261e-05, + "loss": 1.2605, + "step": 12844 + }, + { + "epoch": 0.01, + "learning_rate": 4.997995905167468e-05, + "loss": 1.11, + "step": 12845 + }, + { + "epoch": 0.01, + "learning_rate": 4.997995590706017e-05, + "loss": 1.1698, + "step": 12846 + }, + { + "epoch": 0.01, + "learning_rate": 4.997995276219907e-05, + "loss": 1.0783, + "step": 12847 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979949617091366e-05, + "loss": 1.4209, + "step": 12848 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979946471737075e-05, + "loss": 1.0297, + "step": 12849 + }, + { + "epoch": 0.01, + "learning_rate": 4.997994332613619e-05, + "loss": 1.1634, + "step": 12850 + }, + { + "epoch": 0.01, + "learning_rate": 4.997994018028872e-05, + "loss": 1.3217, + "step": 12851 + }, + { + "epoch": 0.01, + "learning_rate": 4.997993703419466e-05, + "loss": 0.9882, + "step": 12852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979933887854e-05, + "loss": 1.0151, + "step": 12853 + }, + { + "epoch": 0.01, + "learning_rate": 4.997993074126675e-05, + "loss": 1.0105, + "step": 12854 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979927594432916e-05, + "loss": 1.0664, + "step": 12855 + }, + { + "epoch": 0.01, + "learning_rate": 4.997992444735248e-05, + "loss": 1.1174, + "step": 12856 + }, + { + "epoch": 0.01, + "learning_rate": 4.997992130002546e-05, + "loss": 1.2691, + "step": 12857 + }, + { + "epoch": 0.01, + "learning_rate": 4.997991815245185e-05, + "loss": 1.1248, + "step": 12858 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979915004631644e-05, + "loss": 1.1102, + "step": 12859 + }, + { + "epoch": 0.01, + "learning_rate": 4.997991185656485e-05, + "loss": 1.1124, + "step": 12860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979908708251465e-05, + "loss": 1.1699, + "step": 12861 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979905559691487e-05, + "loss": 1.2475, + "step": 12862 + }, + { + "epoch": 0.01, + "learning_rate": 4.997990241088491e-05, + "loss": 1.0424, + "step": 12863 + }, + { + "epoch": 0.01, + "learning_rate": 4.997989926183175e-05, + "loss": 1.1789, + "step": 12864 + }, + { + "epoch": 0.01, + "learning_rate": 4.997989611253201e-05, + "loss": 0.8412, + "step": 12865 + }, + { + "epoch": 0.01, + "learning_rate": 4.997989296298566e-05, + "loss": 0.9935, + "step": 12866 + }, + { + "epoch": 0.01, + "learning_rate": 4.997988981319273e-05, + "loss": 1.3858, + "step": 12867 + }, + { + "epoch": 0.01, + "learning_rate": 4.997988666315321e-05, + "loss": 1.2902, + "step": 12868 + }, + { + "epoch": 0.01, + "learning_rate": 4.997988351286709e-05, + "loss": 1.7678, + "step": 12869 + }, + { + "epoch": 0.01, + "learning_rate": 4.997988036233439e-05, + "loss": 1.1934, + "step": 12870 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979877211555094e-05, + "loss": 1.1657, + "step": 12871 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979874060529206e-05, + "loss": 0.9137, + "step": 12872 + }, + { + "epoch": 0.01, + "learning_rate": 4.997987090925672e-05, + "loss": 1.0796, + "step": 12873 + }, + { + "epoch": 0.01, + "learning_rate": 4.997986775773766e-05, + "loss": 1.1675, + "step": 12874 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979864605972e-05, + "loss": 1.1648, + "step": 12875 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979861453959745e-05, + "loss": 1.8118, + "step": 12876 + }, + { + "epoch": 0.01, + "learning_rate": 4.997985830170091e-05, + "loss": 0.8699, + "step": 12877 + }, + { + "epoch": 0.01, + "learning_rate": 4.997985514919548e-05, + "loss": 1.2232, + "step": 12878 + }, + { + "epoch": 0.01, + "learning_rate": 4.997985199644345e-05, + "loss": 1.1903, + "step": 12879 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979848843444845e-05, + "loss": 1.1403, + "step": 12880 + }, + { + "epoch": 0.01, + "learning_rate": 4.997984569019964e-05, + "loss": 1.2959, + "step": 12881 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979842536707845e-05, + "loss": 1.0923, + "step": 12882 + }, + { + "epoch": 0.01, + "learning_rate": 4.997983938296946e-05, + "loss": 0.8126, + "step": 12883 + }, + { + "epoch": 0.01, + "learning_rate": 4.997983622898449e-05, + "loss": 1.1587, + "step": 12884 + }, + { + "epoch": 0.01, + "learning_rate": 4.997983307475292e-05, + "loss": 0.941, + "step": 12885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979829920274766e-05, + "loss": 1.2729, + "step": 12886 + }, + { + "epoch": 0.01, + "learning_rate": 4.997982676555002e-05, + "loss": 1.2418, + "step": 12887 + }, + { + "epoch": 0.01, + "learning_rate": 4.997982361057868e-05, + "loss": 1.1064, + "step": 12888 + }, + { + "epoch": 0.01, + "learning_rate": 4.997982045536076e-05, + "loss": 1.249, + "step": 12889 + }, + { + "epoch": 0.01, + "learning_rate": 4.997981729989624e-05, + "loss": 1.2328, + "step": 12890 + }, + { + "epoch": 0.01, + "learning_rate": 4.997981414418513e-05, + "loss": 1.0787, + "step": 12891 + }, + { + "epoch": 0.01, + "learning_rate": 4.997981098822744e-05, + "loss": 1.0769, + "step": 12892 + }, + { + "epoch": 0.01, + "learning_rate": 4.997980783202315e-05, + "loss": 0.9737, + "step": 12893 + }, + { + "epoch": 0.01, + "learning_rate": 4.997980467557227e-05, + "loss": 0.965, + "step": 12894 + }, + { + "epoch": 0.01, + "learning_rate": 4.99798015188748e-05, + "loss": 0.9079, + "step": 12895 + }, + { + "epoch": 0.01, + "learning_rate": 4.997979836193075e-05, + "loss": 0.7914, + "step": 12896 + }, + { + "epoch": 0.01, + "learning_rate": 4.99797952047401e-05, + "loss": 0.9449, + "step": 12897 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979792047302863e-05, + "loss": 0.9801, + "step": 12898 + }, + { + "epoch": 0.01, + "learning_rate": 4.997978888961904e-05, + "loss": 1.3363, + "step": 12899 + }, + { + "epoch": 0.01, + "learning_rate": 4.997978573168862e-05, + "loss": 1.0618, + "step": 12900 + }, + { + "epoch": 0.01, + "learning_rate": 4.997978257351161e-05, + "loss": 0.2838, + "step": 12901 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979779415088016e-05, + "loss": 0.3652, + "step": 12902 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979776256417825e-05, + "loss": 0.9881, + "step": 12903 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979773097501046e-05, + "loss": 1.7913, + "step": 12904 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979769938337685e-05, + "loss": 1.7617, + "step": 12905 + }, + { + "epoch": 0.01, + "learning_rate": 4.997976677892773e-05, + "loss": 1.3546, + "step": 12906 + }, + { + "epoch": 0.01, + "learning_rate": 4.997976361927118e-05, + "loss": 1.2825, + "step": 12907 + }, + { + "epoch": 0.01, + "learning_rate": 4.997976045936804e-05, + "loss": 1.0407, + "step": 12908 + }, + { + "epoch": 0.01, + "learning_rate": 4.997975729921831e-05, + "loss": 0.9894, + "step": 12909 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979754138822e-05, + "loss": 1.2142, + "step": 12910 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979750978179094e-05, + "loss": 1.1767, + "step": 12911 + }, + { + "epoch": 0.01, + "learning_rate": 4.99797478172896e-05, + "loss": 0.8767, + "step": 12912 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979744656153516e-05, + "loss": 1.2397, + "step": 12913 + }, + { + "epoch": 0.01, + "learning_rate": 4.997974149477085e-05, + "loss": 1.2256, + "step": 12914 + }, + { + "epoch": 0.01, + "learning_rate": 4.997973833314158e-05, + "loss": 0.8299, + "step": 12915 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979735171265726e-05, + "loss": 1.1286, + "step": 12916 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979732009143286e-05, + "loss": 0.9414, + "step": 12917 + }, + { + "epoch": 0.01, + "learning_rate": 4.997972884677425e-05, + "loss": 1.035, + "step": 12918 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979725684158636e-05, + "loss": 1.084, + "step": 12919 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979722521296424e-05, + "loss": 0.8994, + "step": 12920 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979719358187625e-05, + "loss": 1.2614, + "step": 12921 + }, + { + "epoch": 0.01, + "learning_rate": 4.997971619483224e-05, + "loss": 1.0149, + "step": 12922 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979713031230255e-05, + "loss": 1.0084, + "step": 12923 + }, + { + "epoch": 0.01, + "learning_rate": 4.99797098673817e-05, + "loss": 1.2544, + "step": 12924 + }, + { + "epoch": 0.01, + "learning_rate": 4.997970670328654e-05, + "loss": 0.9306, + "step": 12925 + }, + { + "epoch": 0.01, + "learning_rate": 4.997970353894479e-05, + "loss": 1.2647, + "step": 12926 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979700374356455e-05, + "loss": 1.2281, + "step": 12927 + }, + { + "epoch": 0.01, + "learning_rate": 4.997969720952154e-05, + "loss": 0.5042, + "step": 12928 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979694044440026e-05, + "loss": 0.6964, + "step": 12929 + }, + { + "epoch": 0.01, + "learning_rate": 4.997969087911192e-05, + "loss": 1.3595, + "step": 12930 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979687713537224e-05, + "loss": 1.0151, + "step": 12931 + }, + { + "epoch": 0.01, + "learning_rate": 4.997968454771595e-05, + "loss": 1.1689, + "step": 12932 + }, + { + "epoch": 0.01, + "learning_rate": 4.997968138164808e-05, + "loss": 1.1825, + "step": 12933 + }, + { + "epoch": 0.01, + "learning_rate": 4.997967821533362e-05, + "loss": 0.9164, + "step": 12934 + }, + { + "epoch": 0.01, + "learning_rate": 4.997967504877258e-05, + "loss": 1.018, + "step": 12935 + }, + { + "epoch": 0.01, + "learning_rate": 4.997967188196494e-05, + "loss": 0.7397, + "step": 12936 + }, + { + "epoch": 0.01, + "learning_rate": 4.997966871491072e-05, + "loss": 1.758, + "step": 12937 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979665547609914e-05, + "loss": 1.2953, + "step": 12938 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979662380062506e-05, + "loss": 1.0949, + "step": 12939 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979659212268516e-05, + "loss": 1.389, + "step": 12940 + }, + { + "epoch": 0.01, + "learning_rate": 4.997965604422794e-05, + "loss": 1.1682, + "step": 12941 + }, + { + "epoch": 0.01, + "learning_rate": 4.997965287594077e-05, + "loss": 1.4026, + "step": 12942 + }, + { + "epoch": 0.01, + "learning_rate": 4.997964970740702e-05, + "loss": 1.5013, + "step": 12943 + }, + { + "epoch": 0.01, + "learning_rate": 4.997964653862667e-05, + "loss": 1.0865, + "step": 12944 + }, + { + "epoch": 0.01, + "learning_rate": 4.997964336959974e-05, + "loss": 1.0767, + "step": 12945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979640200326214e-05, + "loss": 1.4635, + "step": 12946 + }, + { + "epoch": 0.01, + "learning_rate": 4.997963703080611e-05, + "loss": 1.2607, + "step": 12947 + }, + { + "epoch": 0.01, + "learning_rate": 4.997963386103941e-05, + "loss": 1.0479, + "step": 12948 + }, + { + "epoch": 0.01, + "learning_rate": 4.997963069102613e-05, + "loss": 1.2824, + "step": 12949 + }, + { + "epoch": 0.01, + "learning_rate": 4.997962752076625e-05, + "loss": 1.3158, + "step": 12950 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979624350259776e-05, + "loss": 1.0888, + "step": 12951 + }, + { + "epoch": 0.01, + "learning_rate": 4.997962117950673e-05, + "loss": 0.8503, + "step": 12952 + }, + { + "epoch": 0.01, + "learning_rate": 4.997961800850709e-05, + "loss": 0.7097, + "step": 12953 + }, + { + "epoch": 0.01, + "learning_rate": 4.997961483726086e-05, + "loss": 0.8083, + "step": 12954 + }, + { + "epoch": 0.01, + "learning_rate": 4.997961166576805e-05, + "loss": 1.0741, + "step": 12955 + }, + { + "epoch": 0.01, + "learning_rate": 4.997960849402864e-05, + "loss": 1.1947, + "step": 12956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979605322042644e-05, + "loss": 0.9949, + "step": 12957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979602149810066e-05, + "loss": 1.1383, + "step": 12958 + }, + { + "epoch": 0.01, + "learning_rate": 4.99795989773309e-05, + "loss": 0.9662, + "step": 12959 + }, + { + "epoch": 0.01, + "learning_rate": 4.997959580460514e-05, + "loss": 0.824, + "step": 12960 + }, + { + "epoch": 0.01, + "learning_rate": 4.997959263163279e-05, + "loss": 1.1551, + "step": 12961 + }, + { + "epoch": 0.01, + "learning_rate": 4.997958945841386e-05, + "loss": 1.2205, + "step": 12962 + }, + { + "epoch": 0.01, + "learning_rate": 4.997958628494834e-05, + "loss": 0.4988, + "step": 12963 + }, + { + "epoch": 0.01, + "learning_rate": 4.997958311123623e-05, + "loss": 0.2457, + "step": 12964 + }, + { + "epoch": 0.01, + "learning_rate": 4.997957993727753e-05, + "loss": 0.2279, + "step": 12965 + }, + { + "epoch": 0.01, + "learning_rate": 4.997957676307224e-05, + "loss": 0.1507, + "step": 12966 + }, + { + "epoch": 0.01, + "learning_rate": 4.997957358862038e-05, + "loss": 0.39, + "step": 12967 + }, + { + "epoch": 0.01, + "learning_rate": 4.997957041392192e-05, + "loss": 0.773, + "step": 12968 + }, + { + "epoch": 0.01, + "learning_rate": 4.997956723897686e-05, + "loss": 0.7261, + "step": 12969 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979564063785226e-05, + "loss": 0.7207, + "step": 12970 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979560888347e-05, + "loss": 0.6963, + "step": 12971 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979557712662196e-05, + "loss": 0.5992, + "step": 12972 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979554536730795e-05, + "loss": 0.7173, + "step": 12973 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979551360552806e-05, + "loss": 0.7709, + "step": 12974 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979548184128235e-05, + "loss": 0.6878, + "step": 12975 + }, + { + "epoch": 0.01, + "learning_rate": 4.997954500745707e-05, + "loss": 0.5592, + "step": 12976 + }, + { + "epoch": 0.01, + "learning_rate": 4.997954183053932e-05, + "loss": 0.645, + "step": 12977 + }, + { + "epoch": 0.01, + "learning_rate": 4.997953865337498e-05, + "loss": 0.5575, + "step": 12978 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979535475964056e-05, + "loss": 0.5624, + "step": 12979 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979532298306544e-05, + "loss": 0.562, + "step": 12980 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979529120402444e-05, + "loss": 0.5335, + "step": 12981 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979525942251756e-05, + "loss": 0.4988, + "step": 12982 + }, + { + "epoch": 0.01, + "learning_rate": 4.997952276385448e-05, + "loss": 0.502, + "step": 12983 + }, + { + "epoch": 0.01, + "learning_rate": 4.997951958521062e-05, + "loss": 0.4721, + "step": 12984 + }, + { + "epoch": 0.01, + "learning_rate": 4.997951640632017e-05, + "loss": 0.4373, + "step": 12985 + }, + { + "epoch": 0.01, + "learning_rate": 4.997951322718314e-05, + "loss": 0.4362, + "step": 12986 + }, + { + "epoch": 0.01, + "learning_rate": 4.997951004779952e-05, + "loss": 0.4396, + "step": 12987 + }, + { + "epoch": 0.01, + "learning_rate": 4.99795068681693e-05, + "loss": 0.4634, + "step": 12988 + }, + { + "epoch": 0.01, + "learning_rate": 4.997950368829251e-05, + "loss": 0.5829, + "step": 12989 + }, + { + "epoch": 0.01, + "learning_rate": 4.997950050816912e-05, + "loss": 0.5246, + "step": 12990 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979497327799145e-05, + "loss": 0.4869, + "step": 12991 + }, + { + "epoch": 0.01, + "learning_rate": 4.997949414718259e-05, + "loss": 0.498, + "step": 12992 + }, + { + "epoch": 0.01, + "learning_rate": 4.997949096631944e-05, + "loss": 0.4482, + "step": 12993 + }, + { + "epoch": 0.01, + "learning_rate": 4.997948778520971e-05, + "loss": 0.4193, + "step": 12994 + }, + { + "epoch": 0.01, + "learning_rate": 4.997948460385339e-05, + "loss": 0.4703, + "step": 12995 + }, + { + "epoch": 0.01, + "learning_rate": 4.997948142225048e-05, + "loss": 0.5105, + "step": 12996 + }, + { + "epoch": 0.01, + "learning_rate": 4.997947824040099e-05, + "loss": 0.4898, + "step": 12997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979475058304906e-05, + "loss": 0.4599, + "step": 12998 + }, + { + "epoch": 0.01, + "learning_rate": 4.997947187596224e-05, + "loss": 0.4422, + "step": 12999 + }, + { + "epoch": 0.01, + "learning_rate": 4.997946869337298e-05, + "loss": 0.4958, + "step": 13000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0994994640350342, + "eval_runtime": 84.1793, + "eval_samples_per_second": 16.453, + "eval_steps_per_second": 4.122, + "step": 13000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997946551053714e-05, + "loss": 0.5183, + "step": 13001 + }, + { + "epoch": 0.01, + "learning_rate": 4.997946232745471e-05, + "loss": 0.4278, + "step": 13002 + }, + { + "epoch": 0.01, + "learning_rate": 4.99794591441257e-05, + "loss": 0.4494, + "step": 13003 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979455960550096e-05, + "loss": 0.4726, + "step": 13004 + }, + { + "epoch": 0.01, + "learning_rate": 4.997945277672791e-05, + "loss": 0.47, + "step": 13005 + }, + { + "epoch": 0.01, + "learning_rate": 4.997944959265913e-05, + "loss": 0.5201, + "step": 13006 + }, + { + "epoch": 0.01, + "learning_rate": 4.997944640834377e-05, + "loss": 0.5025, + "step": 13007 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979443223781824e-05, + "loss": 0.525, + "step": 13008 + }, + { + "epoch": 0.01, + "learning_rate": 4.997944003897329e-05, + "loss": 0.4876, + "step": 13009 + }, + { + "epoch": 0.01, + "learning_rate": 4.997943685391817e-05, + "loss": 0.8739, + "step": 13010 + }, + { + "epoch": 0.01, + "learning_rate": 4.997943366861646e-05, + "loss": 1.0577, + "step": 13011 + }, + { + "epoch": 0.01, + "learning_rate": 4.997943048306817e-05, + "loss": 1.1702, + "step": 13012 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979427297273286e-05, + "loss": 1.2193, + "step": 13013 + }, + { + "epoch": 0.01, + "learning_rate": 4.997942411123182e-05, + "loss": 1.2447, + "step": 13014 + }, + { + "epoch": 0.01, + "learning_rate": 4.997942092494376e-05, + "loss": 1.5422, + "step": 13015 + }, + { + "epoch": 0.01, + "learning_rate": 4.997941773840913e-05, + "loss": 1.0676, + "step": 13016 + }, + { + "epoch": 0.01, + "learning_rate": 4.99794145516279e-05, + "loss": 1.3999, + "step": 13017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979411364600095e-05, + "loss": 0.8781, + "step": 13018 + }, + { + "epoch": 0.01, + "learning_rate": 4.997940817732569e-05, + "loss": 1.171, + "step": 13019 + }, + { + "epoch": 0.01, + "learning_rate": 4.997940498980471e-05, + "loss": 1.1286, + "step": 13020 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979401802037135e-05, + "loss": 1.2802, + "step": 13021 + }, + { + "epoch": 0.01, + "learning_rate": 4.997939861402298e-05, + "loss": 0.9592, + "step": 13022 + }, + { + "epoch": 0.01, + "learning_rate": 4.997939542576223e-05, + "loss": 1.0764, + "step": 13023 + }, + { + "epoch": 0.01, + "learning_rate": 4.997939223725491e-05, + "loss": 1.2927, + "step": 13024 + }, + { + "epoch": 0.01, + "learning_rate": 4.997938904850099e-05, + "loss": 1.04, + "step": 13025 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979385859500497e-05, + "loss": 1.0126, + "step": 13026 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979382670253406e-05, + "loss": 1.3136, + "step": 13027 + }, + { + "epoch": 0.01, + "learning_rate": 4.997937948075973e-05, + "loss": 1.3859, + "step": 13028 + }, + { + "epoch": 0.01, + "learning_rate": 4.997937629101947e-05, + "loss": 1.697, + "step": 13029 + }, + { + "epoch": 0.01, + "learning_rate": 4.997937310103263e-05, + "loss": 1.0814, + "step": 13030 + }, + { + "epoch": 0.01, + "learning_rate": 4.99793699107992e-05, + "loss": 0.9613, + "step": 13031 + }, + { + "epoch": 0.01, + "learning_rate": 4.997936672031918e-05, + "loss": 1.0867, + "step": 13032 + }, + { + "epoch": 0.01, + "learning_rate": 4.997936352959258e-05, + "loss": 1.217, + "step": 13033 + }, + { + "epoch": 0.01, + "learning_rate": 4.997936033861939e-05, + "loss": 1.0016, + "step": 13034 + }, + { + "epoch": 0.01, + "learning_rate": 4.997935714739962e-05, + "loss": 0.9677, + "step": 13035 + }, + { + "epoch": 0.01, + "learning_rate": 4.997935395593326e-05, + "loss": 1.2878, + "step": 13036 + }, + { + "epoch": 0.01, + "learning_rate": 4.997935076422031e-05, + "loss": 1.1637, + "step": 13037 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979347572260785e-05, + "loss": 0.9675, + "step": 13038 + }, + { + "epoch": 0.01, + "learning_rate": 4.997934438005467e-05, + "loss": 1.9872, + "step": 13039 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979341187601965e-05, + "loss": 0.9205, + "step": 13040 + }, + { + "epoch": 0.01, + "learning_rate": 4.997933799490267e-05, + "loss": 0.8962, + "step": 13041 + }, + { + "epoch": 0.01, + "learning_rate": 4.99793348019568e-05, + "loss": 1.0186, + "step": 13042 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979331608764344e-05, + "loss": 1.146, + "step": 13043 + }, + { + "epoch": 0.01, + "learning_rate": 4.99793284153253e-05, + "loss": 1.4336, + "step": 13044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979325221639675e-05, + "loss": 1.1118, + "step": 13045 + }, + { + "epoch": 0.01, + "learning_rate": 4.997932202770746e-05, + "loss": 0.8323, + "step": 13046 + }, + { + "epoch": 0.01, + "learning_rate": 4.997931883352866e-05, + "loss": 2.7557, + "step": 13047 + }, + { + "epoch": 0.01, + "learning_rate": 4.997931563910328e-05, + "loss": 0.3497, + "step": 13048 + }, + { + "epoch": 0.01, + "learning_rate": 4.99793124444313e-05, + "loss": 1.1898, + "step": 13049 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979309249512754e-05, + "loss": 1.3632, + "step": 13050 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979306054347607e-05, + "loss": 1.1675, + "step": 13051 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979302858935884e-05, + "loss": 1.0783, + "step": 13052 + }, + { + "epoch": 0.01, + "learning_rate": 4.997929966327757e-05, + "loss": 1.0931, + "step": 13053 + }, + { + "epoch": 0.01, + "learning_rate": 4.997929646737267e-05, + "loss": 1.1867, + "step": 13054 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979293271221195e-05, + "loss": 1.4483, + "step": 13055 + }, + { + "epoch": 0.01, + "learning_rate": 4.997929007482313e-05, + "loss": 1.4049, + "step": 13056 + }, + { + "epoch": 0.01, + "learning_rate": 4.997928687817848e-05, + "loss": 1.5023, + "step": 13057 + }, + { + "epoch": 0.01, + "learning_rate": 4.997928368128724e-05, + "loss": 0.9887, + "step": 13058 + }, + { + "epoch": 0.01, + "learning_rate": 4.997928048414941e-05, + "loss": 1.3957, + "step": 13059 + }, + { + "epoch": 0.01, + "learning_rate": 4.997927728676501e-05, + "loss": 1.084, + "step": 13060 + }, + { + "epoch": 0.01, + "learning_rate": 4.997927408913402e-05, + "loss": 1.2527, + "step": 13061 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979270891256445e-05, + "loss": 1.4548, + "step": 13062 + }, + { + "epoch": 0.01, + "learning_rate": 4.997926769313228e-05, + "loss": 1.1246, + "step": 13063 + }, + { + "epoch": 0.01, + "learning_rate": 4.997926449476153e-05, + "loss": 0.9704, + "step": 13064 + }, + { + "epoch": 0.01, + "learning_rate": 4.997926129614421e-05, + "loss": 1.164, + "step": 13065 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979258097280294e-05, + "loss": 1.577, + "step": 13066 + }, + { + "epoch": 0.01, + "learning_rate": 4.997925489816979e-05, + "loss": 1.0228, + "step": 13067 + }, + { + "epoch": 0.01, + "learning_rate": 4.997925169881271e-05, + "loss": 0.8215, + "step": 13068 + }, + { + "epoch": 0.01, + "learning_rate": 4.997924849920904e-05, + "loss": 0.9098, + "step": 13069 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979245299358785e-05, + "loss": 1.2712, + "step": 13070 + }, + { + "epoch": 0.01, + "learning_rate": 4.997924209926195e-05, + "loss": 1.2067, + "step": 13071 + }, + { + "epoch": 0.01, + "learning_rate": 4.997923889891852e-05, + "loss": 1.1048, + "step": 13072 + }, + { + "epoch": 0.01, + "learning_rate": 4.997923569832852e-05, + "loss": 1.1711, + "step": 13073 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979232497491925e-05, + "loss": 1.35, + "step": 13074 + }, + { + "epoch": 0.01, + "learning_rate": 4.997922929640875e-05, + "loss": 0.7622, + "step": 13075 + }, + { + "epoch": 0.01, + "learning_rate": 4.997922609507898e-05, + "loss": 1.3351, + "step": 13076 + }, + { + "epoch": 0.01, + "learning_rate": 4.997922289350264e-05, + "loss": 0.9535, + "step": 13077 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979219691679715e-05, + "loss": 0.9206, + "step": 13078 + }, + { + "epoch": 0.01, + "learning_rate": 4.99792164896102e-05, + "loss": 1.1329, + "step": 13079 + }, + { + "epoch": 0.01, + "learning_rate": 4.99792132872941e-05, + "loss": 0.8848, + "step": 13080 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979210084731414e-05, + "loss": 0.7544, + "step": 13081 + }, + { + "epoch": 0.01, + "learning_rate": 4.997920688192215e-05, + "loss": 0.8494, + "step": 13082 + }, + { + "epoch": 0.01, + "learning_rate": 4.99792036788663e-05, + "loss": 1.131, + "step": 13083 + }, + { + "epoch": 0.01, + "learning_rate": 4.997920047556387e-05, + "loss": 0.7298, + "step": 13084 + }, + { + "epoch": 0.01, + "learning_rate": 4.997919727201484e-05, + "loss": 0.1745, + "step": 13085 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979194068219235e-05, + "loss": 0.1724, + "step": 13086 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979190864177053e-05, + "loss": 0.1669, + "step": 13087 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979187659888284e-05, + "loss": 0.1473, + "step": 13088 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979184455352926e-05, + "loss": 0.5057, + "step": 13089 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979181250570986e-05, + "loss": 0.536, + "step": 13090 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979178045542465e-05, + "loss": 0.6683, + "step": 13091 + }, + { + "epoch": 0.01, + "learning_rate": 4.997917484026735e-05, + "loss": 0.6792, + "step": 13092 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979171634745665e-05, + "loss": 0.6064, + "step": 13093 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979168428977386e-05, + "loss": 0.5759, + "step": 13094 + }, + { + "epoch": 0.01, + "learning_rate": 4.997916522296253e-05, + "loss": 0.5447, + "step": 13095 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979162016701084e-05, + "loss": 0.4281, + "step": 13096 + }, + { + "epoch": 0.01, + "learning_rate": 4.997915881019306e-05, + "loss": 0.5687, + "step": 13097 + }, + { + "epoch": 0.01, + "learning_rate": 4.997915560343845e-05, + "loss": 0.5711, + "step": 13098 + }, + { + "epoch": 0.01, + "learning_rate": 4.997915239643726e-05, + "loss": 0.532, + "step": 13099 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979149189189476e-05, + "loss": 0.4495, + "step": 13100 + }, + { + "epoch": 0.01, + "learning_rate": 4.997914598169512e-05, + "loss": 0.5383, + "step": 13101 + }, + { + "epoch": 0.01, + "learning_rate": 4.997914277395417e-05, + "loss": 0.6257, + "step": 13102 + }, + { + "epoch": 0.01, + "learning_rate": 4.997913956596664e-05, + "loss": 0.4699, + "step": 13103 + }, + { + "epoch": 0.01, + "learning_rate": 4.997913635773253e-05, + "loss": 0.4719, + "step": 13104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979133149251834e-05, + "loss": 0.4495, + "step": 13105 + }, + { + "epoch": 0.01, + "learning_rate": 4.997912994052456e-05, + "loss": 0.4682, + "step": 13106 + }, + { + "epoch": 0.01, + "learning_rate": 4.997912673155069e-05, + "loss": 0.403, + "step": 13107 + }, + { + "epoch": 0.01, + "learning_rate": 4.997912352233025e-05, + "loss": 0.3921, + "step": 13108 + }, + { + "epoch": 0.01, + "learning_rate": 4.997912031286322e-05, + "loss": 0.3934, + "step": 13109 + }, + { + "epoch": 0.01, + "learning_rate": 4.997911710314961e-05, + "loss": 0.4463, + "step": 13110 + }, + { + "epoch": 0.01, + "learning_rate": 4.997911389318941e-05, + "loss": 0.5515, + "step": 13111 + }, + { + "epoch": 0.01, + "learning_rate": 4.997911068298263e-05, + "loss": 0.5984, + "step": 13112 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979107472529266e-05, + "loss": 0.4758, + "step": 13113 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979104261829324e-05, + "loss": 0.462, + "step": 13114 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979101050882794e-05, + "loss": 0.4119, + "step": 13115 + }, + { + "epoch": 0.01, + "learning_rate": 4.997909783968968e-05, + "loss": 0.4225, + "step": 13116 + }, + { + "epoch": 0.01, + "learning_rate": 4.997909462824999e-05, + "loss": 0.4138, + "step": 13117 + }, + { + "epoch": 0.01, + "learning_rate": 4.997909141656371e-05, + "loss": 0.5493, + "step": 13118 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979088204630845e-05, + "loss": 0.5572, + "step": 13119 + }, + { + "epoch": 0.01, + "learning_rate": 4.997908499245141e-05, + "loss": 0.4677, + "step": 13120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979081780025376e-05, + "loss": 0.4902, + "step": 13121 + }, + { + "epoch": 0.01, + "learning_rate": 4.997907856735277e-05, + "loss": 0.4652, + "step": 13122 + }, + { + "epoch": 0.01, + "learning_rate": 4.997907535443357e-05, + "loss": 0.5826, + "step": 13123 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979072141267796e-05, + "loss": 0.4451, + "step": 13124 + }, + { + "epoch": 0.01, + "learning_rate": 4.997906892785544e-05, + "loss": 0.4576, + "step": 13125 + }, + { + "epoch": 0.01, + "learning_rate": 4.997906571419649e-05, + "loss": 0.4687, + "step": 13126 + }, + { + "epoch": 0.01, + "learning_rate": 4.997906250029097e-05, + "loss": 0.4176, + "step": 13127 + }, + { + "epoch": 0.01, + "learning_rate": 4.997905928613886e-05, + "loss": 0.4297, + "step": 13128 + }, + { + "epoch": 0.01, + "learning_rate": 4.997905607174017e-05, + "loss": 0.4745, + "step": 13129 + }, + { + "epoch": 0.01, + "learning_rate": 4.99790528570949e-05, + "loss": 0.5335, + "step": 13130 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979049642203045e-05, + "loss": 0.482, + "step": 13131 + }, + { + "epoch": 0.01, + "learning_rate": 4.997904642706461e-05, + "loss": 0.5525, + "step": 13132 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979043211679586e-05, + "loss": 0.8318, + "step": 13133 + }, + { + "epoch": 0.01, + "learning_rate": 4.997903999604798e-05, + "loss": 1.1823, + "step": 13134 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979036780169795e-05, + "loss": 1.3099, + "step": 13135 + }, + { + "epoch": 0.01, + "learning_rate": 4.997903356404503e-05, + "loss": 0.9838, + "step": 13136 + }, + { + "epoch": 0.01, + "learning_rate": 4.997903034767368e-05, + "loss": 1.0567, + "step": 13137 + }, + { + "epoch": 0.01, + "learning_rate": 4.997902713105575e-05, + "loss": 1.2956, + "step": 13138 + }, + { + "epoch": 0.01, + "learning_rate": 4.997902391419123e-05, + "loss": 1.2132, + "step": 13139 + }, + { + "epoch": 0.01, + "learning_rate": 4.997902069708013e-05, + "loss": 1.2639, + "step": 13140 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979017479722446e-05, + "loss": 1.2568, + "step": 13141 + }, + { + "epoch": 0.01, + "learning_rate": 4.997901426211818e-05, + "loss": 1.2874, + "step": 13142 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979011044267345e-05, + "loss": 1.105, + "step": 13143 + }, + { + "epoch": 0.01, + "learning_rate": 4.997900782616991e-05, + "loss": 1.1131, + "step": 13144 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979004607825904e-05, + "loss": 1.1554, + "step": 13145 + }, + { + "epoch": 0.01, + "learning_rate": 4.9979001389235315e-05, + "loss": 0.9998, + "step": 13146 + }, + { + "epoch": 0.01, + "learning_rate": 4.997899817039814e-05, + "loss": 1.0174, + "step": 13147 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978994951314386e-05, + "loss": 1.1904, + "step": 13148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978991731984046e-05, + "loss": 0.8066, + "step": 13149 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978988512407124e-05, + "loss": 0.5018, + "step": 13150 + }, + { + "epoch": 0.01, + "learning_rate": 4.997898529258362e-05, + "loss": 0.3624, + "step": 13151 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978982072513536e-05, + "loss": 0.3332, + "step": 13152 + }, + { + "epoch": 0.01, + "learning_rate": 4.997897885219687e-05, + "loss": 0.2909, + "step": 13153 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978975631633616e-05, + "loss": 0.2502, + "step": 13154 + }, + { + "epoch": 0.01, + "learning_rate": 4.997897241082379e-05, + "loss": 1.0549, + "step": 13155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978969189767377e-05, + "loss": 1.093, + "step": 13156 + }, + { + "epoch": 0.01, + "learning_rate": 4.997896596846438e-05, + "loss": 1.2906, + "step": 13157 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978962746914805e-05, + "loss": 0.7933, + "step": 13158 + }, + { + "epoch": 0.01, + "learning_rate": 4.997895952511865e-05, + "loss": 2.054, + "step": 13159 + }, + { + "epoch": 0.01, + "learning_rate": 4.997895630307591e-05, + "loss": 1.0205, + "step": 13160 + }, + { + "epoch": 0.01, + "learning_rate": 4.997895308078658e-05, + "loss": 1.2437, + "step": 13161 + }, + { + "epoch": 0.01, + "learning_rate": 4.997894985825068e-05, + "loss": 1.0688, + "step": 13162 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978946635468196e-05, + "loss": 0.919, + "step": 13163 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978943412439134e-05, + "loss": 0.8673, + "step": 13164 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978940189163483e-05, + "loss": 0.4756, + "step": 13165 + }, + { + "epoch": 0.01, + "learning_rate": 4.997893696564125e-05, + "loss": 0.872, + "step": 13166 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978933741872445e-05, + "loss": 1.2752, + "step": 13167 + }, + { + "epoch": 0.01, + "learning_rate": 4.997893051785705e-05, + "loss": 1.132, + "step": 13168 + }, + { + "epoch": 0.01, + "learning_rate": 4.997892729359508e-05, + "loss": 0.8541, + "step": 13169 + }, + { + "epoch": 0.01, + "learning_rate": 4.997892406908652e-05, + "loss": 1.2013, + "step": 13170 + }, + { + "epoch": 0.01, + "learning_rate": 4.997892084433139e-05, + "loss": 1.0825, + "step": 13171 + }, + { + "epoch": 0.01, + "learning_rate": 4.997891761932967e-05, + "loss": 1.1343, + "step": 13172 + }, + { + "epoch": 0.01, + "learning_rate": 4.997891439408137e-05, + "loss": 1.1043, + "step": 13173 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978911168586486e-05, + "loss": 1.1395, + "step": 13174 + }, + { + "epoch": 0.01, + "learning_rate": 4.997890794284502e-05, + "loss": 1.1256, + "step": 13175 + }, + { + "epoch": 0.01, + "learning_rate": 4.997890471685698e-05, + "loss": 1.09, + "step": 13176 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978901490622354e-05, + "loss": 1.0381, + "step": 13177 + }, + { + "epoch": 0.01, + "learning_rate": 4.997889826414115e-05, + "loss": 0.9623, + "step": 13178 + }, + { + "epoch": 0.01, + "learning_rate": 4.997889503741337e-05, + "loss": 0.575, + "step": 13179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978891810438995e-05, + "loss": 0.7448, + "step": 13180 + }, + { + "epoch": 0.01, + "learning_rate": 4.997888858321804e-05, + "loss": 0.6096, + "step": 13181 + }, + { + "epoch": 0.01, + "learning_rate": 4.997888535575051e-05, + "loss": 1.2684, + "step": 13182 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978882128036404e-05, + "loss": 1.1718, + "step": 13183 + }, + { + "epoch": 0.01, + "learning_rate": 4.997887890007571e-05, + "loss": 1.1802, + "step": 13184 + }, + { + "epoch": 0.01, + "learning_rate": 4.997887567186844e-05, + "loss": 0.7917, + "step": 13185 + }, + { + "epoch": 0.01, + "learning_rate": 4.997887244341458e-05, + "loss": 1.0472, + "step": 13186 + }, + { + "epoch": 0.01, + "learning_rate": 4.997886921471415e-05, + "loss": 1.371, + "step": 13187 + }, + { + "epoch": 0.01, + "learning_rate": 4.997886598576713e-05, + "loss": 1.1039, + "step": 13188 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978862756573535e-05, + "loss": 1.1748, + "step": 13189 + }, + { + "epoch": 0.01, + "learning_rate": 4.997885952713336e-05, + "loss": 1.0792, + "step": 13190 + }, + { + "epoch": 0.01, + "learning_rate": 4.99788562974466e-05, + "loss": 1.1257, + "step": 13191 + }, + { + "epoch": 0.01, + "learning_rate": 4.997885306751326e-05, + "loss": 1.1188, + "step": 13192 + }, + { + "epoch": 0.01, + "learning_rate": 4.997884983733334e-05, + "loss": 1.0585, + "step": 13193 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978846606906834e-05, + "loss": 0.8592, + "step": 13194 + }, + { + "epoch": 0.01, + "learning_rate": 4.997884337623376e-05, + "loss": 1.3862, + "step": 13195 + }, + { + "epoch": 0.01, + "learning_rate": 4.99788401453141e-05, + "loss": 1.1329, + "step": 13196 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978836914147855e-05, + "loss": 1.3172, + "step": 13197 + }, + { + "epoch": 0.01, + "learning_rate": 4.997883368273503e-05, + "loss": 1.1406, + "step": 13198 + }, + { + "epoch": 0.01, + "learning_rate": 4.997883045107563e-05, + "loss": 1.0109, + "step": 13199 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978827219169643e-05, + "loss": 1.282, + "step": 13200 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978823987017074e-05, + "loss": 0.9855, + "step": 13201 + }, + { + "epoch": 0.01, + "learning_rate": 4.997882075461793e-05, + "loss": 0.7936, + "step": 13202 + }, + { + "epoch": 0.01, + "learning_rate": 4.997881752197221e-05, + "loss": 1.0041, + "step": 13203 + }, + { + "epoch": 0.01, + "learning_rate": 4.99788142890799e-05, + "loss": 1.1821, + "step": 13204 + }, + { + "epoch": 0.01, + "learning_rate": 4.997881105594102e-05, + "loss": 1.3851, + "step": 13205 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978807822555554e-05, + "loss": 0.9859, + "step": 13206 + }, + { + "epoch": 0.01, + "learning_rate": 4.99788045889235e-05, + "loss": 0.9828, + "step": 13207 + }, + { + "epoch": 0.01, + "learning_rate": 4.997880135504488e-05, + "loss": 0.9934, + "step": 13208 + }, + { + "epoch": 0.01, + "learning_rate": 4.997879812091967e-05, + "loss": 0.9731, + "step": 13209 + }, + { + "epoch": 0.01, + "learning_rate": 4.997879488654789e-05, + "loss": 1.0061, + "step": 13210 + }, + { + "epoch": 0.01, + "learning_rate": 4.997879165192951e-05, + "loss": 1.2834, + "step": 13211 + }, + { + "epoch": 0.01, + "learning_rate": 4.997878841706457e-05, + "loss": 1.2319, + "step": 13212 + }, + { + "epoch": 0.01, + "learning_rate": 4.997878518195304e-05, + "loss": 1.1768, + "step": 13213 + }, + { + "epoch": 0.01, + "learning_rate": 4.997878194659493e-05, + "loss": 1.0656, + "step": 13214 + }, + { + "epoch": 0.01, + "learning_rate": 4.997877871099025e-05, + "loss": 1.0897, + "step": 13215 + }, + { + "epoch": 0.01, + "learning_rate": 4.997877547513898e-05, + "loss": 1.3611, + "step": 13216 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978772239041134e-05, + "loss": 0.9368, + "step": 13217 + }, + { + "epoch": 0.01, + "learning_rate": 4.99787690026967e-05, + "loss": 1.487, + "step": 13218 + }, + { + "epoch": 0.01, + "learning_rate": 4.997876576610569e-05, + "loss": 1.0307, + "step": 13219 + }, + { + "epoch": 0.01, + "learning_rate": 4.997876252926811e-05, + "loss": 1.1302, + "step": 13220 + }, + { + "epoch": 0.01, + "learning_rate": 4.997875929218394e-05, + "loss": 1.1502, + "step": 13221 + }, + { + "epoch": 0.01, + "learning_rate": 4.997875605485319e-05, + "loss": 1.1711, + "step": 13222 + }, + { + "epoch": 0.01, + "learning_rate": 4.997875281727587e-05, + "loss": 0.9221, + "step": 13223 + }, + { + "epoch": 0.01, + "learning_rate": 4.997874957945196e-05, + "loss": 0.903, + "step": 13224 + }, + { + "epoch": 0.01, + "learning_rate": 4.997874634138148e-05, + "loss": 1.0384, + "step": 13225 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978743103064415e-05, + "loss": 1.1008, + "step": 13226 + }, + { + "epoch": 0.01, + "learning_rate": 4.997873986450077e-05, + "loss": 1.1846, + "step": 13227 + }, + { + "epoch": 0.01, + "learning_rate": 4.997873662569054e-05, + "loss": 1.3003, + "step": 13228 + }, + { + "epoch": 0.01, + "learning_rate": 4.997873338663374e-05, + "loss": 1.1651, + "step": 13229 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978730147330355e-05, + "loss": 0.9041, + "step": 13230 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978726907780396e-05, + "loss": 1.1599, + "step": 13231 + }, + { + "epoch": 0.01, + "learning_rate": 4.997872366798385e-05, + "loss": 1.2287, + "step": 13232 + }, + { + "epoch": 0.01, + "learning_rate": 4.997872042794073e-05, + "loss": 1.3725, + "step": 13233 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978717187651025e-05, + "loss": 1.2067, + "step": 13234 + }, + { + "epoch": 0.01, + "learning_rate": 4.997871394711474e-05, + "loss": 1.1195, + "step": 13235 + }, + { + "epoch": 0.01, + "learning_rate": 4.997871070633188e-05, + "loss": 1.0614, + "step": 13236 + }, + { + "epoch": 0.01, + "learning_rate": 4.997870746530244e-05, + "loss": 0.9316, + "step": 13237 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978704224026426e-05, + "loss": 0.9898, + "step": 13238 + }, + { + "epoch": 0.01, + "learning_rate": 4.997870098250382e-05, + "loss": 0.7764, + "step": 13239 + }, + { + "epoch": 0.01, + "learning_rate": 4.997869774073465e-05, + "loss": 0.5434, + "step": 13240 + }, + { + "epoch": 0.01, + "learning_rate": 4.997869449871889e-05, + "loss": 1.0947, + "step": 13241 + }, + { + "epoch": 0.01, + "learning_rate": 4.997869125645656e-05, + "loss": 1.0978, + "step": 13242 + }, + { + "epoch": 0.01, + "learning_rate": 4.997868801394764e-05, + "loss": 1.242, + "step": 13243 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978684771192144e-05, + "loss": 1.0641, + "step": 13244 + }, + { + "epoch": 0.01, + "learning_rate": 4.997868152819007e-05, + "loss": 1.096, + "step": 13245 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978678284941426e-05, + "loss": 1.0832, + "step": 13246 + }, + { + "epoch": 0.01, + "learning_rate": 4.997867504144619e-05, + "loss": 1.04, + "step": 13247 + }, + { + "epoch": 0.01, + "learning_rate": 4.997867179770438e-05, + "loss": 1.0938, + "step": 13248 + }, + { + "epoch": 0.01, + "learning_rate": 4.997866855371599e-05, + "loss": 1.0092, + "step": 13249 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978665309481025e-05, + "loss": 1.2841, + "step": 13250 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978662064999485e-05, + "loss": 1.3628, + "step": 13251 + }, + { + "epoch": 0.01, + "learning_rate": 4.997865882027135e-05, + "loss": 1.3454, + "step": 13252 + }, + { + "epoch": 0.01, + "learning_rate": 4.997865557529665e-05, + "loss": 1.1328, + "step": 13253 + }, + { + "epoch": 0.01, + "learning_rate": 4.997865233007537e-05, + "loss": 1.2953, + "step": 13254 + }, + { + "epoch": 0.01, + "learning_rate": 4.99786490846075e-05, + "loss": 1.0408, + "step": 13255 + }, + { + "epoch": 0.01, + "learning_rate": 4.997864583889307e-05, + "loss": 1.0113, + "step": 13256 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978642592932046e-05, + "loss": 0.99, + "step": 13257 + }, + { + "epoch": 0.01, + "learning_rate": 4.997863934672445e-05, + "loss": 1.3442, + "step": 13258 + }, + { + "epoch": 0.01, + "learning_rate": 4.997863610027027e-05, + "loss": 1.1026, + "step": 13259 + }, + { + "epoch": 0.01, + "learning_rate": 4.997863285356952e-05, + "loss": 1.1804, + "step": 13260 + }, + { + "epoch": 0.01, + "learning_rate": 4.997862960662218e-05, + "loss": 1.118, + "step": 13261 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978626359428274e-05, + "loss": 0.8905, + "step": 13262 + }, + { + "epoch": 0.01, + "learning_rate": 4.997862311198778e-05, + "loss": 0.859, + "step": 13263 + }, + { + "epoch": 0.01, + "learning_rate": 4.997861986430071e-05, + "loss": 0.9503, + "step": 13264 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978616616367065e-05, + "loss": 0.837, + "step": 13265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978613368186837e-05, + "loss": 0.8207, + "step": 13266 + }, + { + "epoch": 0.01, + "learning_rate": 4.997861011976004e-05, + "loss": 1.2311, + "step": 13267 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978606871086656e-05, + "loss": 1.1697, + "step": 13268 + }, + { + "epoch": 0.01, + "learning_rate": 4.997860362216669e-05, + "loss": 0.8688, + "step": 13269 + }, + { + "epoch": 0.01, + "learning_rate": 4.997860037300015e-05, + "loss": 0.7533, + "step": 13270 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978597123587035e-05, + "loss": 0.8438, + "step": 13271 + }, + { + "epoch": 0.01, + "learning_rate": 4.997859387392734e-05, + "loss": 1.1314, + "step": 13272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978590624021074e-05, + "loss": 1.059, + "step": 13273 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978587373868214e-05, + "loss": 1.2934, + "step": 13274 + }, + { + "epoch": 0.01, + "learning_rate": 4.997858412346879e-05, + "loss": 1.2194, + "step": 13275 + }, + { + "epoch": 0.01, + "learning_rate": 4.997858087282278e-05, + "loss": 1.388, + "step": 13276 + }, + { + "epoch": 0.01, + "learning_rate": 4.997857762193019e-05, + "loss": 1.0481, + "step": 13277 + }, + { + "epoch": 0.01, + "learning_rate": 4.997857437079103e-05, + "loss": 0.8995, + "step": 13278 + }, + { + "epoch": 0.01, + "learning_rate": 4.997857111940529e-05, + "loss": 1.1896, + "step": 13279 + }, + { + "epoch": 0.01, + "learning_rate": 4.997856786777297e-05, + "loss": 1.0073, + "step": 13280 + }, + { + "epoch": 0.01, + "learning_rate": 4.997856461589407e-05, + "loss": 0.8425, + "step": 13281 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978561363768595e-05, + "loss": 1.5723, + "step": 13282 + }, + { + "epoch": 0.01, + "learning_rate": 4.997855811139654e-05, + "loss": 1.0289, + "step": 13283 + }, + { + "epoch": 0.01, + "learning_rate": 4.997855485877792e-05, + "loss": 0.913, + "step": 13284 + }, + { + "epoch": 0.01, + "learning_rate": 4.99785516059127e-05, + "loss": 1.1144, + "step": 13285 + }, + { + "epoch": 0.01, + "learning_rate": 4.997854835280091e-05, + "loss": 1.0904, + "step": 13286 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978545099442555e-05, + "loss": 0.9672, + "step": 13287 + }, + { + "epoch": 0.01, + "learning_rate": 4.997854184583761e-05, + "loss": 1.1621, + "step": 13288 + }, + { + "epoch": 0.01, + "learning_rate": 4.997853859198609e-05, + "loss": 0.9643, + "step": 13289 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978535337887994e-05, + "loss": 1.0162, + "step": 13290 + }, + { + "epoch": 0.01, + "learning_rate": 4.997853208354332e-05, + "loss": 1.1104, + "step": 13291 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978528828952066e-05, + "loss": 1.0895, + "step": 13292 + }, + { + "epoch": 0.01, + "learning_rate": 4.997852557411424e-05, + "loss": 0.9818, + "step": 13293 + }, + { + "epoch": 0.01, + "learning_rate": 4.997852231902983e-05, + "loss": 1.1447, + "step": 13294 + }, + { + "epoch": 0.01, + "learning_rate": 4.997851906369885e-05, + "loss": 1.1971, + "step": 13295 + }, + { + "epoch": 0.01, + "learning_rate": 4.997851580812129e-05, + "loss": 0.8869, + "step": 13296 + }, + { + "epoch": 0.01, + "learning_rate": 4.997851255229714e-05, + "loss": 1.4195, + "step": 13297 + }, + { + "epoch": 0.01, + "learning_rate": 4.997850929622643e-05, + "loss": 1.0647, + "step": 13298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978506039909135e-05, + "loss": 0.9029, + "step": 13299 + }, + { + "epoch": 0.01, + "learning_rate": 4.997850278334526e-05, + "loss": 1.104, + "step": 13300 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978499526534816e-05, + "loss": 1.2008, + "step": 13301 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978496269477784e-05, + "loss": 0.8578, + "step": 13302 + }, + { + "epoch": 0.01, + "learning_rate": 4.997849301217419e-05, + "loss": 1.1522, + "step": 13303 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978489754624e-05, + "loss": 1.3556, + "step": 13304 + }, + { + "epoch": 0.01, + "learning_rate": 4.997848649682725e-05, + "loss": 1.2666, + "step": 13305 + }, + { + "epoch": 0.01, + "learning_rate": 4.997848323878391e-05, + "loss": 1.2673, + "step": 13306 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978479980494005e-05, + "loss": 1.0229, + "step": 13307 + }, + { + "epoch": 0.01, + "learning_rate": 4.997847672195751e-05, + "loss": 1.771, + "step": 13308 + }, + { + "epoch": 0.01, + "learning_rate": 4.997847346317445e-05, + "loss": 0.9576, + "step": 13309 + }, + { + "epoch": 0.01, + "learning_rate": 4.997847020414481e-05, + "loss": 1.0482, + "step": 13310 + }, + { + "epoch": 0.01, + "learning_rate": 4.997846694486858e-05, + "loss": 0.9268, + "step": 13311 + }, + { + "epoch": 0.01, + "learning_rate": 4.997846368534579e-05, + "loss": 1.0142, + "step": 13312 + }, + { + "epoch": 0.01, + "learning_rate": 4.997846042557642e-05, + "loss": 0.9943, + "step": 13313 + }, + { + "epoch": 0.01, + "learning_rate": 4.997845716556047e-05, + "loss": 0.9842, + "step": 13314 + }, + { + "epoch": 0.01, + "learning_rate": 4.997845390529794e-05, + "loss": 1.0571, + "step": 13315 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978450644788834e-05, + "loss": 1.0647, + "step": 13316 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978447384033155e-05, + "loss": 1.3043, + "step": 13317 + }, + { + "epoch": 0.01, + "learning_rate": 4.99784441230309e-05, + "loss": 1.1047, + "step": 13318 + }, + { + "epoch": 0.01, + "learning_rate": 4.997844086178206e-05, + "loss": 1.0826, + "step": 13319 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978437600286656e-05, + "loss": 0.7416, + "step": 13320 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978434338544665e-05, + "loss": 0.9795, + "step": 13321 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978431076556106e-05, + "loss": 1.1324, + "step": 13322 + }, + { + "epoch": 0.01, + "learning_rate": 4.997842781432096e-05, + "loss": 0.931, + "step": 13323 + }, + { + "epoch": 0.01, + "learning_rate": 4.997842455183925e-05, + "loss": 0.9355, + "step": 13324 + }, + { + "epoch": 0.01, + "learning_rate": 4.997842128911095e-05, + "loss": 1.0136, + "step": 13325 + }, + { + "epoch": 0.01, + "learning_rate": 4.997841802613609e-05, + "loss": 1.1904, + "step": 13326 + }, + { + "epoch": 0.01, + "learning_rate": 4.997841476291464e-05, + "loss": 0.8844, + "step": 13327 + }, + { + "epoch": 0.01, + "learning_rate": 4.997841149944662e-05, + "loss": 1.1935, + "step": 13328 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978408235732025e-05, + "loss": 1.3043, + "step": 13329 + }, + { + "epoch": 0.01, + "learning_rate": 4.997840497177084e-05, + "loss": 1.4427, + "step": 13330 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978401707563096e-05, + "loss": 1.121, + "step": 13331 + }, + { + "epoch": 0.01, + "learning_rate": 4.997839844310876e-05, + "loss": 1.0611, + "step": 13332 + }, + { + "epoch": 0.01, + "learning_rate": 4.997839517840786e-05, + "loss": 1.2253, + "step": 13333 + }, + { + "epoch": 0.01, + "learning_rate": 4.997839191346038e-05, + "loss": 1.2705, + "step": 13334 + }, + { + "epoch": 0.01, + "learning_rate": 4.997838864826633e-05, + "loss": 0.8734, + "step": 13335 + }, + { + "epoch": 0.01, + "learning_rate": 4.997838538282569e-05, + "loss": 1.1791, + "step": 13336 + }, + { + "epoch": 0.01, + "learning_rate": 4.997838211713848e-05, + "loss": 1.1229, + "step": 13337 + }, + { + "epoch": 0.01, + "learning_rate": 4.99783788512047e-05, + "loss": 1.0637, + "step": 13338 + }, + { + "epoch": 0.01, + "learning_rate": 4.997837558502434e-05, + "loss": 1.0856, + "step": 13339 + }, + { + "epoch": 0.01, + "learning_rate": 4.99783723185974e-05, + "loss": 1.2392, + "step": 13340 + }, + { + "epoch": 0.01, + "learning_rate": 4.997836905192389e-05, + "loss": 0.953, + "step": 13341 + }, + { + "epoch": 0.01, + "learning_rate": 4.99783657850038e-05, + "loss": 1.0463, + "step": 13342 + }, + { + "epoch": 0.01, + "learning_rate": 4.997836251783714e-05, + "loss": 1.5255, + "step": 13343 + }, + { + "epoch": 0.01, + "learning_rate": 4.99783592504239e-05, + "loss": 1.0337, + "step": 13344 + }, + { + "epoch": 0.01, + "learning_rate": 4.997835598276408e-05, + "loss": 1.0521, + "step": 13345 + }, + { + "epoch": 0.01, + "learning_rate": 4.997835271485769e-05, + "loss": 1.051, + "step": 13346 + }, + { + "epoch": 0.01, + "learning_rate": 4.997834944670472e-05, + "loss": 1.206, + "step": 13347 + }, + { + "epoch": 0.01, + "learning_rate": 4.997834617830518e-05, + "loss": 0.962, + "step": 13348 + }, + { + "epoch": 0.01, + "learning_rate": 4.997834290965906e-05, + "loss": 0.9511, + "step": 13349 + }, + { + "epoch": 0.01, + "learning_rate": 4.997833964076637e-05, + "loss": 1.265, + "step": 13350 + }, + { + "epoch": 0.01, + "learning_rate": 4.99783363716271e-05, + "loss": 1.032, + "step": 13351 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978333102241247e-05, + "loss": 1.1215, + "step": 13352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978329832608825e-05, + "loss": 0.7762, + "step": 13353 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978326562729835e-05, + "loss": 1.1542, + "step": 13354 + }, + { + "epoch": 0.01, + "learning_rate": 4.997832329260426e-05, + "loss": 1.1663, + "step": 13355 + }, + { + "epoch": 0.01, + "learning_rate": 4.997832002223211e-05, + "loss": 1.0352, + "step": 13356 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978316751613384e-05, + "loss": 0.9754, + "step": 13357 + }, + { + "epoch": 0.01, + "learning_rate": 4.997831348074808e-05, + "loss": 1.048, + "step": 13358 + }, + { + "epoch": 0.01, + "learning_rate": 4.997831020963621e-05, + "loss": 1.1207, + "step": 13359 + }, + { + "epoch": 0.01, + "learning_rate": 4.997830693827776e-05, + "loss": 0.8837, + "step": 13360 + }, + { + "epoch": 0.01, + "learning_rate": 4.997830366667274e-05, + "loss": 1.3253, + "step": 13361 + }, + { + "epoch": 0.01, + "learning_rate": 4.997830039482114e-05, + "loss": 1.2562, + "step": 13362 + }, + { + "epoch": 0.01, + "learning_rate": 4.997829712272296e-05, + "loss": 1.3778, + "step": 13363 + }, + { + "epoch": 0.01, + "learning_rate": 4.997829385037821e-05, + "loss": 0.9268, + "step": 13364 + }, + { + "epoch": 0.01, + "learning_rate": 4.997829057778689e-05, + "loss": 1.0259, + "step": 13365 + }, + { + "epoch": 0.01, + "learning_rate": 4.997828730494899e-05, + "loss": 1.2288, + "step": 13366 + }, + { + "epoch": 0.01, + "learning_rate": 4.997828403186451e-05, + "loss": 1.1526, + "step": 13367 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978280758533455e-05, + "loss": 1.0788, + "step": 13368 + }, + { + "epoch": 0.01, + "learning_rate": 4.997827748495583e-05, + "loss": 1.1302, + "step": 13369 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978274211131633e-05, + "loss": 1.4603, + "step": 13370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978270937060854e-05, + "loss": 1.3785, + "step": 13371 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978267662743506e-05, + "loss": 1.2116, + "step": 13372 + }, + { + "epoch": 0.01, + "learning_rate": 4.997826438817958e-05, + "loss": 1.2782, + "step": 13373 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978261113369074e-05, + "loss": 1.1429, + "step": 13374 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978257838312e-05, + "loss": 1.1785, + "step": 13375 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978254563008356e-05, + "loss": 0.774, + "step": 13376 + }, + { + "epoch": 0.01, + "learning_rate": 4.997825128745812e-05, + "loss": 1.0027, + "step": 13377 + }, + { + "epoch": 0.01, + "learning_rate": 4.997824801166132e-05, + "loss": 0.9855, + "step": 13378 + }, + { + "epoch": 0.01, + "learning_rate": 4.997824473561795e-05, + "loss": 1.1095, + "step": 13379 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978241459328004e-05, + "loss": 1.1271, + "step": 13380 + }, + { + "epoch": 0.01, + "learning_rate": 4.997823818279147e-05, + "loss": 1.0207, + "step": 13381 + }, + { + "epoch": 0.01, + "learning_rate": 4.997823490600838e-05, + "loss": 1.0327, + "step": 13382 + }, + { + "epoch": 0.01, + "learning_rate": 4.99782316289787e-05, + "loss": 1.0698, + "step": 13383 + }, + { + "epoch": 0.01, + "learning_rate": 4.997822835170245e-05, + "loss": 1.2344, + "step": 13384 + }, + { + "epoch": 0.01, + "learning_rate": 4.997822507417963e-05, + "loss": 1.2526, + "step": 13385 + }, + { + "epoch": 0.01, + "learning_rate": 4.997822179641023e-05, + "loss": 1.2137, + "step": 13386 + }, + { + "epoch": 0.01, + "learning_rate": 4.997821851839426e-05, + "loss": 0.9287, + "step": 13387 + }, + { + "epoch": 0.01, + "learning_rate": 4.997821524013171e-05, + "loss": 1.1097, + "step": 13388 + }, + { + "epoch": 0.01, + "learning_rate": 4.997821196162259e-05, + "loss": 1.0661, + "step": 13389 + }, + { + "epoch": 0.01, + "learning_rate": 4.99782086828669e-05, + "loss": 0.9219, + "step": 13390 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978205403864634e-05, + "loss": 1.1103, + "step": 13391 + }, + { + "epoch": 0.01, + "learning_rate": 4.997820212461578e-05, + "loss": 0.8699, + "step": 13392 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978198845120364e-05, + "loss": 0.889, + "step": 13393 + }, + { + "epoch": 0.01, + "learning_rate": 4.997819556537837e-05, + "loss": 1.1113, + "step": 13394 + }, + { + "epoch": 0.01, + "learning_rate": 4.997819228538981e-05, + "loss": 1.1604, + "step": 13395 + }, + { + "epoch": 0.01, + "learning_rate": 4.997818900515466e-05, + "loss": 1.141, + "step": 13396 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978185724672954e-05, + "loss": 0.5784, + "step": 13397 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978182443944665e-05, + "loss": 1.2137, + "step": 13398 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978179162969794e-05, + "loss": 1.1008, + "step": 13399 + }, + { + "epoch": 0.01, + "learning_rate": 4.997817588174836e-05, + "loss": 0.967, + "step": 13400 + }, + { + "epoch": 0.01, + "learning_rate": 4.997817260028035e-05, + "loss": 1.0273, + "step": 13401 + }, + { + "epoch": 0.01, + "learning_rate": 4.997816931856577e-05, + "loss": 1.1577, + "step": 13402 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978166036604606e-05, + "loss": 1.2207, + "step": 13403 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978162754396875e-05, + "loss": 1.4783, + "step": 13404 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978159471942563e-05, + "loss": 1.1987, + "step": 13405 + }, + { + "epoch": 0.01, + "learning_rate": 4.997815618924169e-05, + "loss": 1.4815, + "step": 13406 + }, + { + "epoch": 0.01, + "learning_rate": 4.997815290629423e-05, + "loss": 1.1812, + "step": 13407 + }, + { + "epoch": 0.01, + "learning_rate": 4.99781496231002e-05, + "loss": 0.9092, + "step": 13408 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978146339659596e-05, + "loss": 1.0899, + "step": 13409 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978143055972424e-05, + "loss": 1.3144, + "step": 13410 + }, + { + "epoch": 0.01, + "learning_rate": 4.997813977203867e-05, + "loss": 1.1962, + "step": 13411 + }, + { + "epoch": 0.01, + "learning_rate": 4.997813648785835e-05, + "loss": 1.2084, + "step": 13412 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978133203431454e-05, + "loss": 1.0493, + "step": 13413 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978129918757984e-05, + "loss": 1.1011, + "step": 13414 + }, + { + "epoch": 0.01, + "learning_rate": 4.997812663383794e-05, + "loss": 1.3777, + "step": 13415 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978123348671326e-05, + "loss": 1.651, + "step": 13416 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978120063258125e-05, + "loss": 1.5813, + "step": 13417 + }, + { + "epoch": 0.01, + "learning_rate": 4.997811677759836e-05, + "loss": 1.5543, + "step": 13418 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978113491692025e-05, + "loss": 1.2562, + "step": 13419 + }, + { + "epoch": 0.01, + "learning_rate": 4.997811020553911e-05, + "loss": 1.6136, + "step": 13420 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978106919139634e-05, + "loss": 1.7078, + "step": 13421 + }, + { + "epoch": 0.01, + "learning_rate": 4.997810363249357e-05, + "loss": 1.173, + "step": 13422 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978100345600944e-05, + "loss": 1.5011, + "step": 13423 + }, + { + "epoch": 0.01, + "learning_rate": 4.997809705846174e-05, + "loss": 1.6022, + "step": 13424 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978093771075955e-05, + "loss": 1.2602, + "step": 13425 + }, + { + "epoch": 0.01, + "learning_rate": 4.997809048344361e-05, + "loss": 1.3651, + "step": 13426 + }, + { + "epoch": 0.01, + "learning_rate": 4.997808719556468e-05, + "loss": 1.3125, + "step": 13427 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978083907439185e-05, + "loss": 1.3091, + "step": 13428 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978080619067115e-05, + "loss": 0.9429, + "step": 13429 + }, + { + "epoch": 0.01, + "learning_rate": 4.997807733044847e-05, + "loss": 1.1973, + "step": 13430 + }, + { + "epoch": 0.01, + "learning_rate": 4.997807404158325e-05, + "loss": 1.0719, + "step": 13431 + }, + { + "epoch": 0.01, + "learning_rate": 4.997807075247146e-05, + "loss": 1.0395, + "step": 13432 + }, + { + "epoch": 0.01, + "learning_rate": 4.99780674631131e-05, + "loss": 1.1366, + "step": 13433 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978064173508164e-05, + "loss": 1.019, + "step": 13434 + }, + { + "epoch": 0.01, + "learning_rate": 4.997806088365665e-05, + "loss": 1.0591, + "step": 13435 + }, + { + "epoch": 0.01, + "learning_rate": 4.997805759355857e-05, + "loss": 1.0975, + "step": 13436 + }, + { + "epoch": 0.01, + "learning_rate": 4.997805430321392e-05, + "loss": 1.2038, + "step": 13437 + }, + { + "epoch": 0.01, + "learning_rate": 4.997805101262269e-05, + "loss": 1.2305, + "step": 13438 + }, + { + "epoch": 0.01, + "learning_rate": 4.997804772178489e-05, + "loss": 1.1124, + "step": 13439 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978044430700514e-05, + "loss": 1.1629, + "step": 13440 + }, + { + "epoch": 0.01, + "learning_rate": 4.997804113936957e-05, + "loss": 1.0923, + "step": 13441 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978037847792054e-05, + "loss": 0.9277, + "step": 13442 + }, + { + "epoch": 0.01, + "learning_rate": 4.997803455596796e-05, + "loss": 0.981, + "step": 13443 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978031263897295e-05, + "loss": 1.1139, + "step": 13444 + }, + { + "epoch": 0.01, + "learning_rate": 4.997802797158006e-05, + "loss": 1.2224, + "step": 13445 + }, + { + "epoch": 0.01, + "learning_rate": 4.997802467901626e-05, + "loss": 0.944, + "step": 13446 + }, + { + "epoch": 0.01, + "learning_rate": 4.997802138620587e-05, + "loss": 1.0513, + "step": 13447 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978018093148915e-05, + "loss": 1.0209, + "step": 13448 + }, + { + "epoch": 0.01, + "learning_rate": 4.997801479984539e-05, + "loss": 1.0556, + "step": 13449 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978011506295294e-05, + "loss": 1.188, + "step": 13450 + }, + { + "epoch": 0.01, + "learning_rate": 4.9978008212498615e-05, + "loss": 1.4315, + "step": 13451 + }, + { + "epoch": 0.01, + "learning_rate": 4.997800491845538e-05, + "loss": 1.6668, + "step": 13452 + }, + { + "epoch": 0.01, + "learning_rate": 4.997800162416556e-05, + "loss": 1.1929, + "step": 13453 + }, + { + "epoch": 0.01, + "learning_rate": 4.997799832962917e-05, + "loss": 1.0631, + "step": 13454 + }, + { + "epoch": 0.01, + "learning_rate": 4.997799503484621e-05, + "loss": 1.1428, + "step": 13455 + }, + { + "epoch": 0.01, + "learning_rate": 4.997799173981668e-05, + "loss": 0.8503, + "step": 13456 + }, + { + "epoch": 0.01, + "learning_rate": 4.997798844454057e-05, + "loss": 0.9871, + "step": 13457 + }, + { + "epoch": 0.01, + "learning_rate": 4.99779851490179e-05, + "loss": 1.2522, + "step": 13458 + }, + { + "epoch": 0.01, + "learning_rate": 4.997798185324865e-05, + "loss": 1.2566, + "step": 13459 + }, + { + "epoch": 0.01, + "learning_rate": 4.997797855723283e-05, + "loss": 0.9995, + "step": 13460 + }, + { + "epoch": 0.01, + "learning_rate": 4.997797526097043e-05, + "loss": 0.896, + "step": 13461 + }, + { + "epoch": 0.01, + "learning_rate": 4.997797196446147e-05, + "loss": 1.0634, + "step": 13462 + }, + { + "epoch": 0.01, + "learning_rate": 4.997796866770593e-05, + "loss": 0.6953, + "step": 13463 + }, + { + "epoch": 0.01, + "learning_rate": 4.997796537070382e-05, + "loss": 1.043, + "step": 13464 + }, + { + "epoch": 0.01, + "learning_rate": 4.997796207345514e-05, + "loss": 1.3509, + "step": 13465 + }, + { + "epoch": 0.01, + "learning_rate": 4.997795877595989e-05, + "loss": 1.317, + "step": 13466 + }, + { + "epoch": 0.01, + "learning_rate": 4.997795547821806e-05, + "loss": 1.2163, + "step": 13467 + }, + { + "epoch": 0.01, + "learning_rate": 4.997795218022966e-05, + "loss": 1.1202, + "step": 13468 + }, + { + "epoch": 0.01, + "learning_rate": 4.99779488819947e-05, + "loss": 1.0508, + "step": 13469 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977945583513155e-05, + "loss": 1.203, + "step": 13470 + }, + { + "epoch": 0.01, + "learning_rate": 4.997794228478504e-05, + "loss": 1.082, + "step": 13471 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977938985810356e-05, + "loss": 0.8471, + "step": 13472 + }, + { + "epoch": 0.01, + "learning_rate": 4.99779356865891e-05, + "loss": 0.4605, + "step": 13473 + }, + { + "epoch": 0.01, + "learning_rate": 4.997793238712127e-05, + "loss": 0.2344, + "step": 13474 + }, + { + "epoch": 0.01, + "learning_rate": 4.997792908740687e-05, + "loss": 0.3221, + "step": 13475 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977925787445904e-05, + "loss": 1.3215, + "step": 13476 + }, + { + "epoch": 0.01, + "learning_rate": 4.997792248723836e-05, + "loss": 1.2194, + "step": 13477 + }, + { + "epoch": 0.01, + "learning_rate": 4.997791918678425e-05, + "loss": 0.6998, + "step": 13478 + }, + { + "epoch": 0.01, + "learning_rate": 4.997791588608356e-05, + "loss": 1.1459, + "step": 13479 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977912585136304e-05, + "loss": 0.9691, + "step": 13480 + }, + { + "epoch": 0.01, + "learning_rate": 4.997790928394248e-05, + "loss": 0.8361, + "step": 13481 + }, + { + "epoch": 0.01, + "learning_rate": 4.997790598250208e-05, + "loss": 1.3147, + "step": 13482 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977902680815105e-05, + "loss": 0.9627, + "step": 13483 + }, + { + "epoch": 0.01, + "learning_rate": 4.997789937888156e-05, + "loss": 0.7577, + "step": 13484 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977896076701446e-05, + "loss": 0.5085, + "step": 13485 + }, + { + "epoch": 0.01, + "learning_rate": 4.997789277427477e-05, + "loss": 0.6398, + "step": 13486 + }, + { + "epoch": 0.01, + "learning_rate": 4.997788947160151e-05, + "loss": 1.1449, + "step": 13487 + }, + { + "epoch": 0.01, + "learning_rate": 4.997788616868168e-05, + "loss": 1.0784, + "step": 13488 + }, + { + "epoch": 0.01, + "learning_rate": 4.997788286551528e-05, + "loss": 0.9939, + "step": 13489 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977879562102316e-05, + "loss": 1.1397, + "step": 13490 + }, + { + "epoch": 0.01, + "learning_rate": 4.997787625844277e-05, + "loss": 1.3888, + "step": 13491 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977872954536666e-05, + "loss": 1.2933, + "step": 13492 + }, + { + "epoch": 0.01, + "learning_rate": 4.997786965038398e-05, + "loss": 1.1572, + "step": 13493 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977866345984724e-05, + "loss": 1.1605, + "step": 13494 + }, + { + "epoch": 0.01, + "learning_rate": 4.99778630413389e-05, + "loss": 1.1386, + "step": 13495 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977859736446503e-05, + "loss": 1.1907, + "step": 13496 + }, + { + "epoch": 0.01, + "learning_rate": 4.997785643130754e-05, + "loss": 1.1242, + "step": 13497 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977853125922004e-05, + "loss": 0.7879, + "step": 13498 + }, + { + "epoch": 0.01, + "learning_rate": 4.997784982028989e-05, + "loss": 0.6311, + "step": 13499 + }, + { + "epoch": 0.01, + "learning_rate": 4.997784651441121e-05, + "loss": 0.883, + "step": 13500 + }, + { + "epoch": 0.01, + "eval_loss": 1.0440155267715454, + "eval_runtime": 86.2254, + "eval_samples_per_second": 16.063, + "eval_steps_per_second": 4.024, + "step": 13500 + }, + { + "epoch": 0.01, + "learning_rate": 4.997784320828596e-05, + "loss": 0.7417, + "step": 13501 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977839901914144e-05, + "loss": 0.8161, + "step": 13502 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977836595295744e-05, + "loss": 0.7489, + "step": 13503 + }, + { + "epoch": 0.01, + "learning_rate": 4.997783328843079e-05, + "loss": 0.5493, + "step": 13504 + }, + { + "epoch": 0.01, + "learning_rate": 4.997782998131926e-05, + "loss": 0.4742, + "step": 13505 + }, + { + "epoch": 0.01, + "learning_rate": 4.997782667396115e-05, + "loss": 0.4656, + "step": 13506 + }, + { + "epoch": 0.01, + "learning_rate": 4.997782336635648e-05, + "loss": 0.4238, + "step": 13507 + }, + { + "epoch": 0.01, + "learning_rate": 4.997782005850523e-05, + "loss": 0.438, + "step": 13508 + }, + { + "epoch": 0.01, + "learning_rate": 4.997781675040742e-05, + "loss": 0.5162, + "step": 13509 + }, + { + "epoch": 0.01, + "learning_rate": 4.997781344206303e-05, + "loss": 0.4822, + "step": 13510 + }, + { + "epoch": 0.01, + "learning_rate": 4.997781013347208e-05, + "loss": 0.4873, + "step": 13511 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977806824634554e-05, + "loss": 0.5565, + "step": 13512 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977803515550455e-05, + "loss": 0.4265, + "step": 13513 + }, + { + "epoch": 0.01, + "learning_rate": 4.997780020621978e-05, + "loss": 0.4158, + "step": 13514 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977796896642546e-05, + "loss": 0.4371, + "step": 13515 + }, + { + "epoch": 0.01, + "learning_rate": 4.997779358681874e-05, + "loss": 0.6408, + "step": 13516 + }, + { + "epoch": 0.01, + "learning_rate": 4.997779027674836e-05, + "loss": 0.5063, + "step": 13517 + }, + { + "epoch": 0.01, + "learning_rate": 4.997778696643142e-05, + "loss": 0.4855, + "step": 13518 + }, + { + "epoch": 0.01, + "learning_rate": 4.997778365586789e-05, + "loss": 0.5207, + "step": 13519 + }, + { + "epoch": 0.01, + "learning_rate": 4.997778034505781e-05, + "loss": 0.4506, + "step": 13520 + }, + { + "epoch": 0.01, + "learning_rate": 4.997777703400115e-05, + "loss": 0.4622, + "step": 13521 + }, + { + "epoch": 0.01, + "learning_rate": 4.997777372269792e-05, + "loss": 0.4479, + "step": 13522 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977770411148125e-05, + "loss": 0.4662, + "step": 13523 + }, + { + "epoch": 0.01, + "learning_rate": 4.997776709935176e-05, + "loss": 0.4799, + "step": 13524 + }, + { + "epoch": 0.01, + "learning_rate": 4.997776378730882e-05, + "loss": 0.4619, + "step": 13525 + }, + { + "epoch": 0.01, + "learning_rate": 4.997776047501931e-05, + "loss": 0.4242, + "step": 13526 + }, + { + "epoch": 0.01, + "learning_rate": 4.997775716248323e-05, + "loss": 0.4824, + "step": 13527 + }, + { + "epoch": 0.01, + "learning_rate": 4.997775384970058e-05, + "loss": 0.463, + "step": 13528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977750536671366e-05, + "loss": 0.4151, + "step": 13529 + }, + { + "epoch": 0.01, + "learning_rate": 4.997774722339557e-05, + "loss": 0.416, + "step": 13530 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977743909873216e-05, + "loss": 0.4624, + "step": 13531 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977740596104285e-05, + "loss": 0.4563, + "step": 13532 + }, + { + "epoch": 0.01, + "learning_rate": 4.997773728208879e-05, + "loss": 0.4372, + "step": 13533 + }, + { + "epoch": 0.01, + "learning_rate": 4.997773396782672e-05, + "loss": 0.5665, + "step": 13534 + }, + { + "epoch": 0.01, + "learning_rate": 4.997773065331809e-05, + "loss": 0.4993, + "step": 13535 + }, + { + "epoch": 0.01, + "learning_rate": 4.997772733856288e-05, + "loss": 0.4868, + "step": 13536 + }, + { + "epoch": 0.01, + "learning_rate": 4.99777240235611e-05, + "loss": 0.5029, + "step": 13537 + }, + { + "epoch": 0.01, + "learning_rate": 4.997772070831276e-05, + "loss": 0.51, + "step": 13538 + }, + { + "epoch": 0.01, + "learning_rate": 4.997771739281784e-05, + "loss": 0.5196, + "step": 13539 + }, + { + "epoch": 0.01, + "learning_rate": 4.997771407707635e-05, + "loss": 0.4814, + "step": 13540 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977710761088304e-05, + "loss": 0.4599, + "step": 13541 + }, + { + "epoch": 0.01, + "learning_rate": 4.997770744485368e-05, + "loss": 0.4118, + "step": 13542 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977704128372483e-05, + "loss": 0.4171, + "step": 13543 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977700811644725e-05, + "loss": 0.4187, + "step": 13544 + }, + { + "epoch": 0.01, + "learning_rate": 4.997769749467039e-05, + "loss": 0.4279, + "step": 13545 + }, + { + "epoch": 0.01, + "learning_rate": 4.997769417744949e-05, + "loss": 0.4406, + "step": 13546 + }, + { + "epoch": 0.01, + "learning_rate": 4.997769085998202e-05, + "loss": 0.4483, + "step": 13547 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977687542267984e-05, + "loss": 0.4418, + "step": 13548 + }, + { + "epoch": 0.01, + "learning_rate": 4.997768422430737e-05, + "loss": 0.4313, + "step": 13549 + }, + { + "epoch": 0.01, + "learning_rate": 4.997768090610019e-05, + "loss": 0.4078, + "step": 13550 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977677587646446e-05, + "loss": 0.4018, + "step": 13551 + }, + { + "epoch": 0.01, + "learning_rate": 4.997767426894613e-05, + "loss": 0.5116, + "step": 13552 + }, + { + "epoch": 0.01, + "learning_rate": 4.997767094999925e-05, + "loss": 0.7394, + "step": 13553 + }, + { + "epoch": 0.01, + "learning_rate": 4.997766763080579e-05, + "loss": 0.5174, + "step": 13554 + }, + { + "epoch": 0.01, + "learning_rate": 4.997766431136577e-05, + "loss": 0.4871, + "step": 13555 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977660991679176e-05, + "loss": 0.7955, + "step": 13556 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977657671746014e-05, + "loss": 1.3388, + "step": 13557 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977654351566285e-05, + "loss": 0.621, + "step": 13558 + }, + { + "epoch": 0.01, + "learning_rate": 4.997765103113998e-05, + "loss": 0.3241, + "step": 13559 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977647710467115e-05, + "loss": 0.6977, + "step": 13560 + }, + { + "epoch": 0.01, + "learning_rate": 4.997764438954768e-05, + "loss": 1.4052, + "step": 13561 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977641068381674e-05, + "loss": 1.2271, + "step": 13562 + }, + { + "epoch": 0.01, + "learning_rate": 4.99776377469691e-05, + "loss": 1.379, + "step": 13563 + }, + { + "epoch": 0.01, + "learning_rate": 4.997763442530996e-05, + "loss": 1.7043, + "step": 13564 + }, + { + "epoch": 0.01, + "learning_rate": 4.997763110340424e-05, + "loss": 1.728, + "step": 13565 + }, + { + "epoch": 0.01, + "learning_rate": 4.997762778125197e-05, + "loss": 1.3264, + "step": 13566 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977624458853116e-05, + "loss": 1.2016, + "step": 13567 + }, + { + "epoch": 0.01, + "learning_rate": 4.99776211362077e-05, + "loss": 0.9734, + "step": 13568 + }, + { + "epoch": 0.01, + "learning_rate": 4.997761781331571e-05, + "loss": 1.1336, + "step": 13569 + }, + { + "epoch": 0.01, + "learning_rate": 4.997761449017716e-05, + "loss": 1.1087, + "step": 13570 + }, + { + "epoch": 0.01, + "learning_rate": 4.997761116679204e-05, + "loss": 1.0512, + "step": 13571 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977607843160346e-05, + "loss": 1.1937, + "step": 13572 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977604519282085e-05, + "loss": 1.1724, + "step": 13573 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977601195157255e-05, + "loss": 1.1913, + "step": 13574 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977597870785864e-05, + "loss": 1.0414, + "step": 13575 + }, + { + "epoch": 0.01, + "learning_rate": 4.997759454616789e-05, + "loss": 1.2591, + "step": 13576 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977591221303366e-05, + "loss": 0.7551, + "step": 13577 + }, + { + "epoch": 0.01, + "learning_rate": 4.997758789619226e-05, + "loss": 1.1312, + "step": 13578 + }, + { + "epoch": 0.01, + "learning_rate": 4.997758457083459e-05, + "loss": 0.9839, + "step": 13579 + }, + { + "epoch": 0.01, + "learning_rate": 4.997758124523035e-05, + "loss": 1.0888, + "step": 13580 + }, + { + "epoch": 0.01, + "learning_rate": 4.997757791937955e-05, + "loss": 1.0604, + "step": 13581 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977574593282174e-05, + "loss": 1.04, + "step": 13582 + }, + { + "epoch": 0.01, + "learning_rate": 4.997757126693823e-05, + "loss": 1.0527, + "step": 13583 + }, + { + "epoch": 0.01, + "learning_rate": 4.997756794034772e-05, + "loss": 0.9416, + "step": 13584 + }, + { + "epoch": 0.01, + "learning_rate": 4.997756461351064e-05, + "loss": 1.3833, + "step": 13585 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977561286427e-05, + "loss": 1.0416, + "step": 13586 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977557959096785e-05, + "loss": 0.8595, + "step": 13587 + }, + { + "epoch": 0.01, + "learning_rate": 4.997755463152e-05, + "loss": 0.4039, + "step": 13588 + }, + { + "epoch": 0.01, + "learning_rate": 4.997755130369664e-05, + "loss": 0.312, + "step": 13589 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977547975626734e-05, + "loss": 0.8537, + "step": 13590 + }, + { + "epoch": 0.01, + "learning_rate": 4.997754464731025e-05, + "loss": 0.9302, + "step": 13591 + }, + { + "epoch": 0.01, + "learning_rate": 4.997754131874719e-05, + "loss": 1.1255, + "step": 13592 + }, + { + "epoch": 0.01, + "learning_rate": 4.997753798993757e-05, + "loss": 1.3073, + "step": 13593 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977534660881384e-05, + "loss": 1.0837, + "step": 13594 + }, + { + "epoch": 0.01, + "learning_rate": 4.997753133157863e-05, + "loss": 0.9998, + "step": 13595 + }, + { + "epoch": 0.01, + "learning_rate": 4.99775280020293e-05, + "loss": 1.1223, + "step": 13596 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977524672233414e-05, + "loss": 0.9514, + "step": 13597 + }, + { + "epoch": 0.01, + "learning_rate": 4.997752134219095e-05, + "loss": 1.4154, + "step": 13598 + }, + { + "epoch": 0.01, + "learning_rate": 4.997751801190193e-05, + "loss": 1.2093, + "step": 13599 + }, + { + "epoch": 0.01, + "learning_rate": 4.997751468136632e-05, + "loss": 1.1492, + "step": 13600 + }, + { + "epoch": 0.01, + "learning_rate": 4.997751135058416e-05, + "loss": 1.0651, + "step": 13601 + }, + { + "epoch": 0.01, + "learning_rate": 4.997750801955543e-05, + "loss": 1.1727, + "step": 13602 + }, + { + "epoch": 0.01, + "learning_rate": 4.997750468828013e-05, + "loss": 1.0718, + "step": 13603 + }, + { + "epoch": 0.01, + "learning_rate": 4.997750135675827e-05, + "loss": 1.1734, + "step": 13604 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977498024989845e-05, + "loss": 1.1093, + "step": 13605 + }, + { + "epoch": 0.01, + "learning_rate": 4.997749469297484e-05, + "loss": 0.9622, + "step": 13606 + }, + { + "epoch": 0.01, + "learning_rate": 4.997749136071327e-05, + "loss": 1.1411, + "step": 13607 + }, + { + "epoch": 0.01, + "learning_rate": 4.997748802820514e-05, + "loss": 1.2939, + "step": 13608 + }, + { + "epoch": 0.01, + "learning_rate": 4.997748469545044e-05, + "loss": 1.1855, + "step": 13609 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977481362449174e-05, + "loss": 1.1944, + "step": 13610 + }, + { + "epoch": 0.01, + "learning_rate": 4.997747802920133e-05, + "loss": 1.3632, + "step": 13611 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977474695706936e-05, + "loss": 0.9715, + "step": 13612 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977471361965965e-05, + "loss": 0.9561, + "step": 13613 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977468027978426e-05, + "loss": 1.3744, + "step": 13614 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977464693744326e-05, + "loss": 1.0924, + "step": 13615 + }, + { + "epoch": 0.01, + "learning_rate": 4.997746135926366e-05, + "loss": 1.1654, + "step": 13616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977458024536416e-05, + "loss": 1.1529, + "step": 13617 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977454689562605e-05, + "loss": 0.9529, + "step": 13618 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977451354342233e-05, + "loss": 1.2269, + "step": 13619 + }, + { + "epoch": 0.01, + "learning_rate": 4.99774480188753e-05, + "loss": 1.3335, + "step": 13620 + }, + { + "epoch": 0.01, + "learning_rate": 4.997744468316179e-05, + "loss": 1.3038, + "step": 13621 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977441347201725e-05, + "loss": 1.1051, + "step": 13622 + }, + { + "epoch": 0.01, + "learning_rate": 4.997743801099508e-05, + "loss": 1.0032, + "step": 13623 + }, + { + "epoch": 0.01, + "learning_rate": 4.997743467454188e-05, + "loss": 1.327, + "step": 13624 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977431337842105e-05, + "loss": 1.2184, + "step": 13625 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977428000895765e-05, + "loss": 1.1238, + "step": 13626 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977424663702857e-05, + "loss": 1.5121, + "step": 13627 + }, + { + "epoch": 0.01, + "learning_rate": 4.997742132626339e-05, + "loss": 1.0176, + "step": 13628 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977417988577343e-05, + "loss": 0.9828, + "step": 13629 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977414650644745e-05, + "loss": 0.8862, + "step": 13630 + }, + { + "epoch": 0.01, + "learning_rate": 4.997741131246557e-05, + "loss": 0.8507, + "step": 13631 + }, + { + "epoch": 0.01, + "learning_rate": 4.997740797403984e-05, + "loss": 0.8304, + "step": 13632 + }, + { + "epoch": 0.01, + "learning_rate": 4.997740463536753e-05, + "loss": 0.7821, + "step": 13633 + }, + { + "epoch": 0.01, + "learning_rate": 4.997740129644865e-05, + "loss": 0.7177, + "step": 13634 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977397957283215e-05, + "loss": 0.8141, + "step": 13635 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977394617871216e-05, + "loss": 0.684, + "step": 13636 + }, + { + "epoch": 0.01, + "learning_rate": 4.997739127821264e-05, + "loss": 0.7407, + "step": 13637 + }, + { + "epoch": 0.01, + "learning_rate": 4.997738793830751e-05, + "loss": 0.973, + "step": 13638 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977384598155805e-05, + "loss": 1.2165, + "step": 13639 + }, + { + "epoch": 0.01, + "learning_rate": 4.997738125775754e-05, + "loss": 1.1518, + "step": 13640 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977377917112697e-05, + "loss": 1.25, + "step": 13641 + }, + { + "epoch": 0.01, + "learning_rate": 4.99773745762213e-05, + "loss": 0.8807, + "step": 13642 + }, + { + "epoch": 0.01, + "learning_rate": 4.997737123508333e-05, + "loss": 1.2195, + "step": 13643 + }, + { + "epoch": 0.01, + "learning_rate": 4.99773678936988e-05, + "loss": 1.1316, + "step": 13644 + }, + { + "epoch": 0.01, + "learning_rate": 4.99773645520677e-05, + "loss": 1.1911, + "step": 13645 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977361210190033e-05, + "loss": 0.9835, + "step": 13646 + }, + { + "epoch": 0.01, + "learning_rate": 4.99773578680658e-05, + "loss": 0.8384, + "step": 13647 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977354525695e-05, + "loss": 1.0689, + "step": 13648 + }, + { + "epoch": 0.01, + "learning_rate": 4.997735118307764e-05, + "loss": 1.3812, + "step": 13649 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977347840213704e-05, + "loss": 1.0354, + "step": 13650 + }, + { + "epoch": 0.01, + "learning_rate": 4.997734449710322e-05, + "loss": 1.2625, + "step": 13651 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977341153746155e-05, + "loss": 1.1085, + "step": 13652 + }, + { + "epoch": 0.01, + "learning_rate": 4.997733781014253e-05, + "loss": 0.6286, + "step": 13653 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977334466292335e-05, + "loss": 1.4704, + "step": 13654 + }, + { + "epoch": 0.01, + "learning_rate": 4.997733112219557e-05, + "loss": 0.4904, + "step": 13655 + }, + { + "epoch": 0.01, + "learning_rate": 4.997732777785225e-05, + "loss": 0.2927, + "step": 13656 + }, + { + "epoch": 0.01, + "learning_rate": 4.997732443326236e-05, + "loss": 0.2228, + "step": 13657 + }, + { + "epoch": 0.01, + "learning_rate": 4.997732108842591e-05, + "loss": 0.7885, + "step": 13658 + }, + { + "epoch": 0.01, + "learning_rate": 4.997731774334289e-05, + "loss": 1.2204, + "step": 13659 + }, + { + "epoch": 0.01, + "learning_rate": 4.99773143980133e-05, + "loss": 1.2167, + "step": 13660 + }, + { + "epoch": 0.01, + "learning_rate": 4.997731105243715e-05, + "loss": 1.1974, + "step": 13661 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977307706614427e-05, + "loss": 1.1804, + "step": 13662 + }, + { + "epoch": 0.01, + "learning_rate": 4.997730436054515e-05, + "loss": 1.1207, + "step": 13663 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977301014229296e-05, + "loss": 0.9005, + "step": 13664 + }, + { + "epoch": 0.01, + "learning_rate": 4.997729766766688e-05, + "loss": 0.9722, + "step": 13665 + }, + { + "epoch": 0.01, + "learning_rate": 4.997729432085791e-05, + "loss": 0.9487, + "step": 13666 + }, + { + "epoch": 0.01, + "learning_rate": 4.997729097380236e-05, + "loss": 1.0496, + "step": 13667 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977287626500246e-05, + "loss": 1.1762, + "step": 13668 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977284278951575e-05, + "loss": 0.9811, + "step": 13669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977280931156335e-05, + "loss": 0.8552, + "step": 13670 + }, + { + "epoch": 0.01, + "learning_rate": 4.997727758311453e-05, + "loss": 1.0674, + "step": 13671 + }, + { + "epoch": 0.01, + "learning_rate": 4.997727423482616e-05, + "loss": 1.0416, + "step": 13672 + }, + { + "epoch": 0.01, + "learning_rate": 4.997727088629123e-05, + "loss": 1.0856, + "step": 13673 + }, + { + "epoch": 0.01, + "learning_rate": 4.997726753750972e-05, + "loss": 1.3287, + "step": 13674 + }, + { + "epoch": 0.01, + "learning_rate": 4.997726418848165e-05, + "loss": 1.2644, + "step": 13675 + }, + { + "epoch": 0.01, + "learning_rate": 4.997726083920703e-05, + "loss": 0.824, + "step": 13676 + }, + { + "epoch": 0.01, + "learning_rate": 4.997725748968583e-05, + "loss": 1.0309, + "step": 13677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977254139918073e-05, + "loss": 1.0639, + "step": 13678 + }, + { + "epoch": 0.01, + "learning_rate": 4.997725078990374e-05, + "loss": 1.3336, + "step": 13679 + }, + { + "epoch": 0.01, + "learning_rate": 4.997724743964285e-05, + "loss": 1.3766, + "step": 13680 + }, + { + "epoch": 0.01, + "learning_rate": 4.99772440891354e-05, + "loss": 0.897, + "step": 13681 + }, + { + "epoch": 0.01, + "learning_rate": 4.997724073838138e-05, + "loss": 1.1617, + "step": 13682 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977237387380796e-05, + "loss": 1.2439, + "step": 13683 + }, + { + "epoch": 0.01, + "learning_rate": 4.997723403613364e-05, + "loss": 1.3029, + "step": 13684 + }, + { + "epoch": 0.01, + "learning_rate": 4.997723068463993e-05, + "loss": 1.1788, + "step": 13685 + }, + { + "epoch": 0.01, + "learning_rate": 4.997722733289964e-05, + "loss": 1.1932, + "step": 13686 + }, + { + "epoch": 0.01, + "learning_rate": 4.99772239809128e-05, + "loss": 1.3462, + "step": 13687 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977220628679395e-05, + "loss": 1.4004, + "step": 13688 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977217276199426e-05, + "loss": 1.3485, + "step": 13689 + }, + { + "epoch": 0.01, + "learning_rate": 4.997721392347289e-05, + "loss": 1.3562, + "step": 13690 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977210570499784e-05, + "loss": 1.2233, + "step": 13691 + }, + { + "epoch": 0.01, + "learning_rate": 4.997720721728011e-05, + "loss": 1.0757, + "step": 13692 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977203863813884e-05, + "loss": 0.9933, + "step": 13693 + }, + { + "epoch": 0.01, + "learning_rate": 4.997720051010108e-05, + "loss": 1.1821, + "step": 13694 + }, + { + "epoch": 0.01, + "learning_rate": 4.997719715614173e-05, + "loss": 1.3159, + "step": 13695 + }, + { + "epoch": 0.01, + "learning_rate": 4.997719380193581e-05, + "loss": 1.2139, + "step": 13696 + }, + { + "epoch": 0.01, + "learning_rate": 4.997719044748331e-05, + "loss": 2.1006, + "step": 13697 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977187092784264e-05, + "loss": 1.1615, + "step": 13698 + }, + { + "epoch": 0.01, + "learning_rate": 4.997718373783864e-05, + "loss": 1.2347, + "step": 13699 + }, + { + "epoch": 0.01, + "learning_rate": 4.997718038264646e-05, + "loss": 1.1515, + "step": 13700 + }, + { + "epoch": 0.01, + "learning_rate": 4.997717702720772e-05, + "loss": 0.917, + "step": 13701 + }, + { + "epoch": 0.01, + "learning_rate": 4.99771736715224e-05, + "loss": 0.9968, + "step": 13702 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977170315590535e-05, + "loss": 1.0793, + "step": 13703 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977166959412095e-05, + "loss": 0.9374, + "step": 13704 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977163602987094e-05, + "loss": 1.0369, + "step": 13705 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977160246315525e-05, + "loss": 1.1964, + "step": 13706 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977156889397394e-05, + "loss": 0.9708, + "step": 13707 + }, + { + "epoch": 0.01, + "learning_rate": 4.99771535322327e-05, + "loss": 1.1762, + "step": 13708 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977150174821444e-05, + "loss": 1.1125, + "step": 13709 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977146817163623e-05, + "loss": 1.1945, + "step": 13710 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977143459259235e-05, + "loss": 0.8692, + "step": 13711 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977140101108286e-05, + "loss": 0.5989, + "step": 13712 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977136742710775e-05, + "loss": 0.9482, + "step": 13713 + }, + { + "epoch": 0.01, + "learning_rate": 4.99771333840667e-05, + "loss": 0.6659, + "step": 13714 + }, + { + "epoch": 0.01, + "learning_rate": 4.997713002517606e-05, + "loss": 1.0247, + "step": 13715 + }, + { + "epoch": 0.01, + "learning_rate": 4.997712666603885e-05, + "loss": 1.2016, + "step": 13716 + }, + { + "epoch": 0.01, + "learning_rate": 4.997712330665508e-05, + "loss": 0.8925, + "step": 13717 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977119947024744e-05, + "loss": 1.1947, + "step": 13718 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977116587147854e-05, + "loss": 1.1535, + "step": 13719 + }, + { + "epoch": 0.01, + "learning_rate": 4.997711322702439e-05, + "loss": 1.2114, + "step": 13720 + }, + { + "epoch": 0.01, + "learning_rate": 4.997710986665437e-05, + "loss": 0.9377, + "step": 13721 + }, + { + "epoch": 0.01, + "learning_rate": 4.997710650603779e-05, + "loss": 1.2785, + "step": 13722 + }, + { + "epoch": 0.01, + "learning_rate": 4.997710314517463e-05, + "loss": 1.0253, + "step": 13723 + }, + { + "epoch": 0.01, + "learning_rate": 4.997709978406492e-05, + "loss": 1.0243, + "step": 13724 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977096422708643e-05, + "loss": 1.2286, + "step": 13725 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977093061105804e-05, + "loss": 1.0534, + "step": 13726 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770896992564e-05, + "loss": 1.121, + "step": 13727 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977086337160436e-05, + "loss": 1.4994, + "step": 13728 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770829748179e-05, + "loss": 0.7856, + "step": 13729 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770796122288e-05, + "loss": 0.9881, + "step": 13730 + }, + { + "epoch": 0.01, + "learning_rate": 4.997707624939315e-05, + "loss": 0.8364, + "step": 13731 + }, + { + "epoch": 0.01, + "learning_rate": 4.997707288631093e-05, + "loss": 1.0286, + "step": 13732 + }, + { + "epoch": 0.01, + "learning_rate": 4.997706952298215e-05, + "loss": 1.2626, + "step": 13733 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770661594068e-05, + "loss": 1.2507, + "step": 13734 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977062795584893e-05, + "loss": 1.246, + "step": 13735 + }, + { + "epoch": 0.01, + "learning_rate": 4.997705943151643e-05, + "loss": 1.1015, + "step": 13736 + }, + { + "epoch": 0.01, + "learning_rate": 4.997705606720139e-05, + "loss": 0.9253, + "step": 13737 + }, + { + "epoch": 0.01, + "learning_rate": 4.997705270263979e-05, + "loss": 1.2107, + "step": 13738 + }, + { + "epoch": 0.01, + "learning_rate": 4.997704933783163e-05, + "loss": 1.1612, + "step": 13739 + }, + { + "epoch": 0.01, + "learning_rate": 4.99770459727769e-05, + "loss": 1.1212, + "step": 13740 + }, + { + "epoch": 0.01, + "learning_rate": 4.997704260747562e-05, + "loss": 1.1298, + "step": 13741 + }, + { + "epoch": 0.01, + "learning_rate": 4.997703924192777e-05, + "loss": 1.1805, + "step": 13742 + }, + { + "epoch": 0.01, + "learning_rate": 4.997703587613336e-05, + "loss": 0.9882, + "step": 13743 + }, + { + "epoch": 0.01, + "learning_rate": 4.997703251009238e-05, + "loss": 0.9589, + "step": 13744 + }, + { + "epoch": 0.01, + "learning_rate": 4.997702914380484e-05, + "loss": 1.1904, + "step": 13745 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977025777270746e-05, + "loss": 1.1239, + "step": 13746 + }, + { + "epoch": 0.01, + "learning_rate": 4.997702241049008e-05, + "loss": 1.209, + "step": 13747 + }, + { + "epoch": 0.01, + "learning_rate": 4.997701904346286e-05, + "loss": 0.9033, + "step": 13748 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977015676189065e-05, + "loss": 0.9875, + "step": 13749 + }, + { + "epoch": 0.01, + "learning_rate": 4.997701230866871e-05, + "loss": 1.0884, + "step": 13750 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977008940901804e-05, + "loss": 1.3568, + "step": 13751 + }, + { + "epoch": 0.01, + "learning_rate": 4.997700557288832e-05, + "loss": 1.1509, + "step": 13752 + }, + { + "epoch": 0.01, + "learning_rate": 4.9977002204628284e-05, + "loss": 1.2721, + "step": 13753 + }, + { + "epoch": 0.01, + "learning_rate": 4.997699883612169e-05, + "loss": 1.1617, + "step": 13754 + }, + { + "epoch": 0.01, + "learning_rate": 4.997699546736853e-05, + "loss": 1.172, + "step": 13755 + }, + { + "epoch": 0.01, + "learning_rate": 4.99769920983688e-05, + "loss": 1.154, + "step": 13756 + }, + { + "epoch": 0.01, + "learning_rate": 4.997698872912251e-05, + "loss": 1.2378, + "step": 13757 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976985359629656e-05, + "loss": 1.0012, + "step": 13758 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976981989890245e-05, + "loss": 1.0612, + "step": 13759 + }, + { + "epoch": 0.01, + "learning_rate": 4.997697861990427e-05, + "loss": 1.1235, + "step": 13760 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976975249671734e-05, + "loss": 1.1316, + "step": 13761 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976971879192633e-05, + "loss": 0.9852, + "step": 13762 + }, + { + "epoch": 0.01, + "learning_rate": 4.997696850846697e-05, + "loss": 1.066, + "step": 13763 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976965137494756e-05, + "loss": 1.265, + "step": 13764 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976961766275965e-05, + "loss": 1.0337, + "step": 13765 + }, + { + "epoch": 0.01, + "learning_rate": 4.997695839481062e-05, + "loss": 1.1193, + "step": 13766 + }, + { + "epoch": 0.01, + "learning_rate": 4.997695502309871e-05, + "loss": 1.2403, + "step": 13767 + }, + { + "epoch": 0.01, + "learning_rate": 4.997695165114023e-05, + "loss": 1.0786, + "step": 13768 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976948278935204e-05, + "loss": 0.9102, + "step": 13769 + }, + { + "epoch": 0.01, + "learning_rate": 4.997694490648361e-05, + "loss": 1.4613, + "step": 13770 + }, + { + "epoch": 0.01, + "learning_rate": 4.997694153378545e-05, + "loss": 1.3508, + "step": 13771 + }, + { + "epoch": 0.01, + "learning_rate": 4.997693816084073e-05, + "loss": 1.226, + "step": 13772 + }, + { + "epoch": 0.01, + "learning_rate": 4.997693478764945e-05, + "loss": 1.1583, + "step": 13773 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976931414211604e-05, + "loss": 0.9677, + "step": 13774 + }, + { + "epoch": 0.01, + "learning_rate": 4.99769280405272e-05, + "loss": 1.0938, + "step": 13775 + }, + { + "epoch": 0.01, + "learning_rate": 4.997692466659624e-05, + "loss": 1.1531, + "step": 13776 + }, + { + "epoch": 0.01, + "learning_rate": 4.997692129241871e-05, + "loss": 1.0068, + "step": 13777 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976917917994615e-05, + "loss": 1.1943, + "step": 13778 + }, + { + "epoch": 0.01, + "learning_rate": 4.997691454332397e-05, + "loss": 1.0498, + "step": 13779 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976911168406754e-05, + "loss": 1.2163, + "step": 13780 + }, + { + "epoch": 0.01, + "learning_rate": 4.997690779324298e-05, + "loss": 1.2518, + "step": 13781 + }, + { + "epoch": 0.01, + "learning_rate": 4.997690441783265e-05, + "loss": 1.278, + "step": 13782 + }, + { + "epoch": 0.01, + "learning_rate": 4.997690104217575e-05, + "loss": 1.1028, + "step": 13783 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976897666272285e-05, + "loss": 0.6773, + "step": 13784 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976894290122265e-05, + "loss": 1.1019, + "step": 13785 + }, + { + "epoch": 0.01, + "learning_rate": 4.997689091372569e-05, + "loss": 1.1219, + "step": 13786 + }, + { + "epoch": 0.01, + "learning_rate": 4.997688753708255e-05, + "loss": 1.1479, + "step": 13787 + }, + { + "epoch": 0.01, + "learning_rate": 4.997688416019284e-05, + "loss": 1.0981, + "step": 13788 + }, + { + "epoch": 0.01, + "learning_rate": 4.997688078305658e-05, + "loss": 0.9778, + "step": 13789 + }, + { + "epoch": 0.01, + "learning_rate": 4.997687740567375e-05, + "loss": 1.0922, + "step": 13790 + }, + { + "epoch": 0.01, + "learning_rate": 4.997687402804436e-05, + "loss": 1.0741, + "step": 13791 + }, + { + "epoch": 0.01, + "learning_rate": 4.997687065016841e-05, + "loss": 0.9276, + "step": 13792 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976867272045904e-05, + "loss": 1.2373, + "step": 13793 + }, + { + "epoch": 0.01, + "learning_rate": 4.997686389367683e-05, + "loss": 0.9606, + "step": 13794 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976860515061196e-05, + "loss": 1.0542, + "step": 13795 + }, + { + "epoch": 0.01, + "learning_rate": 4.997685713619901e-05, + "loss": 1.1851, + "step": 13796 + }, + { + "epoch": 0.01, + "learning_rate": 4.997685375709025e-05, + "loss": 1.0827, + "step": 13797 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976850377734935e-05, + "loss": 0.9717, + "step": 13798 + }, + { + "epoch": 0.01, + "learning_rate": 4.997684699813306e-05, + "loss": 1.2958, + "step": 13799 + }, + { + "epoch": 0.01, + "learning_rate": 4.997684361828462e-05, + "loss": 0.897, + "step": 13800 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976840238189624e-05, + "loss": 1.0937, + "step": 13801 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976836857848065e-05, + "loss": 0.9241, + "step": 13802 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976833477259944e-05, + "loss": 0.94, + "step": 13803 + }, + { + "epoch": 0.01, + "learning_rate": 4.997683009642526e-05, + "loss": 1.1254, + "step": 13804 + }, + { + "epoch": 0.01, + "learning_rate": 4.997682671534402e-05, + "loss": 0.9907, + "step": 13805 + }, + { + "epoch": 0.01, + "learning_rate": 4.997682333401622e-05, + "loss": 1.2535, + "step": 13806 + }, + { + "epoch": 0.01, + "learning_rate": 4.997681995244186e-05, + "loss": 1.0171, + "step": 13807 + }, + { + "epoch": 0.01, + "learning_rate": 4.997681657062093e-05, + "loss": 0.9388, + "step": 13808 + }, + { + "epoch": 0.01, + "learning_rate": 4.997681318855345e-05, + "loss": 0.8717, + "step": 13809 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976809806239404e-05, + "loss": 1.3064, + "step": 13810 + }, + { + "epoch": 0.01, + "learning_rate": 4.99768064236788e-05, + "loss": 1.0957, + "step": 13811 + }, + { + "epoch": 0.01, + "learning_rate": 4.997680304087163e-05, + "loss": 1.0988, + "step": 13812 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976799657817906e-05, + "loss": 1.0316, + "step": 13813 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976796274517627e-05, + "loss": 0.9699, + "step": 13814 + }, + { + "epoch": 0.01, + "learning_rate": 4.997679289097077e-05, + "loss": 0.864, + "step": 13815 + }, + { + "epoch": 0.01, + "learning_rate": 4.997678950717737e-05, + "loss": 0.9019, + "step": 13816 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976786123137395e-05, + "loss": 1.2512, + "step": 13817 + }, + { + "epoch": 0.01, + "learning_rate": 4.997678273885087e-05, + "loss": 1.2744, + "step": 13818 + }, + { + "epoch": 0.01, + "learning_rate": 4.997677935431778e-05, + "loss": 1.0352, + "step": 13819 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976775969538134e-05, + "loss": 1.2294, + "step": 13820 + }, + { + "epoch": 0.01, + "learning_rate": 4.997677258451193e-05, + "loss": 1.4391, + "step": 13821 + }, + { + "epoch": 0.01, + "learning_rate": 4.997676919923916e-05, + "loss": 0.9864, + "step": 13822 + }, + { + "epoch": 0.01, + "learning_rate": 4.997676581371983e-05, + "loss": 1.2285, + "step": 13823 + }, + { + "epoch": 0.01, + "learning_rate": 4.997676242795394e-05, + "loss": 1.2572, + "step": 13824 + }, + { + "epoch": 0.01, + "learning_rate": 4.997675904194149e-05, + "loss": 1.0138, + "step": 13825 + }, + { + "epoch": 0.01, + "learning_rate": 4.997675565568248e-05, + "loss": 0.3226, + "step": 13826 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976752269176914e-05, + "loss": 1.0268, + "step": 13827 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976748882424786e-05, + "loss": 1.1284, + "step": 13828 + }, + { + "epoch": 0.01, + "learning_rate": 4.997674549542609e-05, + "loss": 1.2994, + "step": 13829 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976742108180846e-05, + "loss": 0.5558, + "step": 13830 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976738720689034e-05, + "loss": 0.984, + "step": 13831 + }, + { + "epoch": 0.01, + "learning_rate": 4.997673533295067e-05, + "loss": 1.2748, + "step": 13832 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976731944965735e-05, + "loss": 1.2244, + "step": 13833 + }, + { + "epoch": 0.01, + "learning_rate": 4.997672855673425e-05, + "loss": 1.4405, + "step": 13834 + }, + { + "epoch": 0.01, + "learning_rate": 4.99767251682562e-05, + "loss": 1.2221, + "step": 13835 + }, + { + "epoch": 0.01, + "learning_rate": 4.99767217795316e-05, + "loss": 1.0759, + "step": 13836 + }, + { + "epoch": 0.01, + "learning_rate": 4.997671839056043e-05, + "loss": 0.7912, + "step": 13837 + }, + { + "epoch": 0.01, + "learning_rate": 4.99767150013427e-05, + "loss": 0.9559, + "step": 13838 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976711611878416e-05, + "loss": 1.0427, + "step": 13839 + }, + { + "epoch": 0.01, + "learning_rate": 4.997670822216757e-05, + "loss": 1.1443, + "step": 13840 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976704832210165e-05, + "loss": 0.9395, + "step": 13841 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976701442006195e-05, + "loss": 1.1513, + "step": 13842 + }, + { + "epoch": 0.01, + "learning_rate": 4.997669805155567e-05, + "loss": 1.2022, + "step": 13843 + }, + { + "epoch": 0.01, + "learning_rate": 4.997669466085859e-05, + "loss": 1.0524, + "step": 13844 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976691269914944e-05, + "loss": 1.107, + "step": 13845 + }, + { + "epoch": 0.01, + "learning_rate": 4.997668787872474e-05, + "loss": 0.8735, + "step": 13846 + }, + { + "epoch": 0.01, + "learning_rate": 4.997668448728799e-05, + "loss": 1.0215, + "step": 13847 + }, + { + "epoch": 0.01, + "learning_rate": 4.997668109560466e-05, + "loss": 1.1086, + "step": 13848 + }, + { + "epoch": 0.01, + "learning_rate": 4.997667770367478e-05, + "loss": 1.0567, + "step": 13849 + }, + { + "epoch": 0.01, + "learning_rate": 4.997667431149834e-05, + "loss": 1.3272, + "step": 13850 + }, + { + "epoch": 0.01, + "learning_rate": 4.997667091907534e-05, + "loss": 1.423, + "step": 13851 + }, + { + "epoch": 0.01, + "learning_rate": 4.997666752640578e-05, + "loss": 0.9504, + "step": 13852 + }, + { + "epoch": 0.01, + "learning_rate": 4.997666413348966e-05, + "loss": 1.1183, + "step": 13853 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976660740326985e-05, + "loss": 0.8809, + "step": 13854 + }, + { + "epoch": 0.01, + "learning_rate": 4.997665734691775e-05, + "loss": 1.2235, + "step": 13855 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976653953261955e-05, + "loss": 1.0271, + "step": 13856 + }, + { + "epoch": 0.01, + "learning_rate": 4.99766505593596e-05, + "loss": 1.1365, + "step": 13857 + }, + { + "epoch": 0.01, + "learning_rate": 4.997664716521069e-05, + "loss": 1.0548, + "step": 13858 + }, + { + "epoch": 0.01, + "learning_rate": 4.997664377081521e-05, + "loss": 1.1949, + "step": 13859 + }, + { + "epoch": 0.01, + "learning_rate": 4.997664037617319e-05, + "loss": 0.6866, + "step": 13860 + }, + { + "epoch": 0.01, + "learning_rate": 4.99766369812846e-05, + "loss": 0.8584, + "step": 13861 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976633586149444e-05, + "loss": 1.0817, + "step": 13862 + }, + { + "epoch": 0.01, + "learning_rate": 4.997663019076774e-05, + "loss": 1.0412, + "step": 13863 + }, + { + "epoch": 0.01, + "learning_rate": 4.997662679513948e-05, + "loss": 0.7251, + "step": 13864 + }, + { + "epoch": 0.01, + "learning_rate": 4.997662339926465e-05, + "loss": 1.0131, + "step": 13865 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976620003143265e-05, + "loss": 1.0863, + "step": 13866 + }, + { + "epoch": 0.01, + "learning_rate": 4.997661660677533e-05, + "loss": 1.1812, + "step": 13867 + }, + { + "epoch": 0.01, + "learning_rate": 4.997661321016083e-05, + "loss": 0.8374, + "step": 13868 + }, + { + "epoch": 0.01, + "learning_rate": 4.997660981329977e-05, + "loss": 1.0997, + "step": 13869 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976606416192154e-05, + "loss": 1.0978, + "step": 13870 + }, + { + "epoch": 0.01, + "learning_rate": 4.997660301883797e-05, + "loss": 1.1241, + "step": 13871 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976599621237236e-05, + "loss": 0.9462, + "step": 13872 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976596223389945e-05, + "loss": 1.0272, + "step": 13873 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976592825296086e-05, + "loss": 1.0391, + "step": 13874 + }, + { + "epoch": 0.01, + "learning_rate": 4.997658942695568e-05, + "loss": 1.1377, + "step": 13875 + }, + { + "epoch": 0.01, + "learning_rate": 4.997658602836871e-05, + "loss": 0.995, + "step": 13876 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976582629535184e-05, + "loss": 1.0341, + "step": 13877 + }, + { + "epoch": 0.01, + "learning_rate": 4.99765792304551e-05, + "loss": 1.0076, + "step": 13878 + }, + { + "epoch": 0.01, + "learning_rate": 4.997657583112846e-05, + "loss": 0.9456, + "step": 13879 + }, + { + "epoch": 0.01, + "learning_rate": 4.997657243155526e-05, + "loss": 1.1291, + "step": 13880 + }, + { + "epoch": 0.01, + "learning_rate": 4.99765690317355e-05, + "loss": 0.8903, + "step": 13881 + }, + { + "epoch": 0.01, + "learning_rate": 4.997656563166918e-05, + "loss": 1.1877, + "step": 13882 + }, + { + "epoch": 0.01, + "learning_rate": 4.99765622313563e-05, + "loss": 1.0815, + "step": 13883 + }, + { + "epoch": 0.01, + "learning_rate": 4.997655883079687e-05, + "loss": 0.9905, + "step": 13884 + }, + { + "epoch": 0.01, + "learning_rate": 4.997655542999087e-05, + "loss": 1.0751, + "step": 13885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976552028938326e-05, + "loss": 0.8339, + "step": 13886 + }, + { + "epoch": 0.01, + "learning_rate": 4.997654862763922e-05, + "loss": 1.188, + "step": 13887 + }, + { + "epoch": 0.01, + "learning_rate": 4.997654522609355e-05, + "loss": 1.1361, + "step": 13888 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976541824301324e-05, + "loss": 1.0927, + "step": 13889 + }, + { + "epoch": 0.01, + "learning_rate": 4.997653842226254e-05, + "loss": 1.1153, + "step": 13890 + }, + { + "epoch": 0.01, + "learning_rate": 4.99765350199772e-05, + "loss": 1.0066, + "step": 13891 + }, + { + "epoch": 0.01, + "learning_rate": 4.997653161744531e-05, + "loss": 0.7331, + "step": 13892 + }, + { + "epoch": 0.01, + "learning_rate": 4.997652821466685e-05, + "loss": 0.8633, + "step": 13893 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976524811641834e-05, + "loss": 1.037, + "step": 13894 + }, + { + "epoch": 0.01, + "learning_rate": 4.997652140837027e-05, + "loss": 0.7927, + "step": 13895 + }, + { + "epoch": 0.01, + "learning_rate": 4.997651800485214e-05, + "loss": 0.9617, + "step": 13896 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976514601087454e-05, + "loss": 0.7226, + "step": 13897 + }, + { + "epoch": 0.01, + "learning_rate": 4.99765111970762e-05, + "loss": 1.095, + "step": 13898 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976507792818404e-05, + "loss": 1.0521, + "step": 13899 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976504388314044e-05, + "loss": 1.1347, + "step": 13900 + }, + { + "epoch": 0.01, + "learning_rate": 4.997650098356313e-05, + "loss": 1.0732, + "step": 13901 + }, + { + "epoch": 0.01, + "learning_rate": 4.997649757856565e-05, + "loss": 1.0931, + "step": 13902 + }, + { + "epoch": 0.01, + "learning_rate": 4.997649417332162e-05, + "loss": 1.1045, + "step": 13903 + }, + { + "epoch": 0.01, + "learning_rate": 4.997649076783103e-05, + "loss": 1.1639, + "step": 13904 + }, + { + "epoch": 0.01, + "learning_rate": 4.997648736209389e-05, + "loss": 0.9362, + "step": 13905 + }, + { + "epoch": 0.01, + "learning_rate": 4.997648395611018e-05, + "loss": 1.2209, + "step": 13906 + }, + { + "epoch": 0.01, + "learning_rate": 4.997648054987992e-05, + "loss": 1.1402, + "step": 13907 + }, + { + "epoch": 0.01, + "learning_rate": 4.99764771434031e-05, + "loss": 1.189, + "step": 13908 + }, + { + "epoch": 0.01, + "learning_rate": 4.997647373667973e-05, + "loss": 1.1464, + "step": 13909 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976470329709794e-05, + "loss": 1.0142, + "step": 13910 + }, + { + "epoch": 0.01, + "learning_rate": 4.99764669224933e-05, + "loss": 1.0851, + "step": 13911 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976463515030256e-05, + "loss": 1.3289, + "step": 13912 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976460107320656e-05, + "loss": 1.1941, + "step": 13913 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976456699364494e-05, + "loss": 0.8055, + "step": 13914 + }, + { + "epoch": 0.01, + "learning_rate": 4.997645329116177e-05, + "loss": 0.929, + "step": 13915 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976449882712495e-05, + "loss": 1.5083, + "step": 13916 + }, + { + "epoch": 0.01, + "learning_rate": 4.997644647401666e-05, + "loss": 1.3804, + "step": 13917 + }, + { + "epoch": 0.01, + "learning_rate": 4.997644306507428e-05, + "loss": 1.2218, + "step": 13918 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976439655885324e-05, + "loss": 1.1761, + "step": 13919 + }, + { + "epoch": 0.01, + "learning_rate": 4.997643624644982e-05, + "loss": 0.9478, + "step": 13920 + }, + { + "epoch": 0.01, + "learning_rate": 4.997643283676776e-05, + "loss": 1.2176, + "step": 13921 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976429426839144e-05, + "loss": 1.4068, + "step": 13922 + }, + { + "epoch": 0.01, + "learning_rate": 4.997642601666397e-05, + "loss": 1.0524, + "step": 13923 + }, + { + "epoch": 0.01, + "learning_rate": 4.997642260624225e-05, + "loss": 1.092, + "step": 13924 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976419195573955e-05, + "loss": 0.937, + "step": 13925 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976415784659114e-05, + "loss": 1.1258, + "step": 13926 + }, + { + "epoch": 0.01, + "learning_rate": 4.997641237349771e-05, + "loss": 1.0027, + "step": 13927 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976408962089756e-05, + "loss": 1.1325, + "step": 13928 + }, + { + "epoch": 0.01, + "learning_rate": 4.997640555043524e-05, + "loss": 1.1987, + "step": 13929 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976402138534173e-05, + "loss": 1.3612, + "step": 13930 + }, + { + "epoch": 0.01, + "learning_rate": 4.997639872638654e-05, + "loss": 1.2419, + "step": 13931 + }, + { + "epoch": 0.01, + "learning_rate": 4.997639531399236e-05, + "loss": 1.1688, + "step": 13932 + }, + { + "epoch": 0.01, + "learning_rate": 4.997639190135162e-05, + "loss": 1.1912, + "step": 13933 + }, + { + "epoch": 0.01, + "learning_rate": 4.997638848846432e-05, + "loss": 1.076, + "step": 13934 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976385075330466e-05, + "loss": 1.4147, + "step": 13935 + }, + { + "epoch": 0.01, + "learning_rate": 4.997638166195006e-05, + "loss": 0.9483, + "step": 13936 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976378248323095e-05, + "loss": 1.2198, + "step": 13937 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976374834449575e-05, + "loss": 0.9229, + "step": 13938 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976371420329494e-05, + "loss": 1.1425, + "step": 13939 + }, + { + "epoch": 0.01, + "learning_rate": 4.997636800596286e-05, + "loss": 1.1703, + "step": 13940 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976364591349675e-05, + "loss": 1.1516, + "step": 13941 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976361176489924e-05, + "loss": 0.9866, + "step": 13942 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976357761383626e-05, + "loss": 1.1424, + "step": 13943 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976354346030766e-05, + "loss": 1.1475, + "step": 13944 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976350930431345e-05, + "loss": 0.7793, + "step": 13945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976347514585377e-05, + "loss": 0.8283, + "step": 13946 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976344098492854e-05, + "loss": 1.0096, + "step": 13947 + }, + { + "epoch": 0.01, + "learning_rate": 4.997634068215376e-05, + "loss": 0.7455, + "step": 13948 + }, + { + "epoch": 0.01, + "learning_rate": 4.997633726556813e-05, + "loss": 1.2219, + "step": 13949 + }, + { + "epoch": 0.01, + "learning_rate": 4.997633384873593e-05, + "loss": 1.2689, + "step": 13950 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976330431657185e-05, + "loss": 0.9038, + "step": 13951 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976327014331884e-05, + "loss": 0.9423, + "step": 13952 + }, + { + "epoch": 0.01, + "learning_rate": 4.997632359676002e-05, + "loss": 1.2045, + "step": 13953 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976320178941605e-05, + "loss": 1.2527, + "step": 13954 + }, + { + "epoch": 0.01, + "learning_rate": 4.997631676087663e-05, + "loss": 1.2215, + "step": 13955 + }, + { + "epoch": 0.01, + "learning_rate": 4.99763133425651e-05, + "loss": 0.9627, + "step": 13956 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976309924007015e-05, + "loss": 1.0087, + "step": 13957 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976306505202374e-05, + "loss": 0.7504, + "step": 13958 + }, + { + "epoch": 0.01, + "learning_rate": 4.997630308615118e-05, + "loss": 1.0684, + "step": 13959 + }, + { + "epoch": 0.01, + "learning_rate": 4.997629966685343e-05, + "loss": 1.1437, + "step": 13960 + }, + { + "epoch": 0.01, + "learning_rate": 4.997629624730912e-05, + "loss": 1.1618, + "step": 13961 + }, + { + "epoch": 0.01, + "learning_rate": 4.997629282751826e-05, + "loss": 1.2139, + "step": 13962 + }, + { + "epoch": 0.01, + "learning_rate": 4.997628940748084e-05, + "loss": 1.0534, + "step": 13963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976285987196866e-05, + "loss": 1.0269, + "step": 13964 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976282566666345e-05, + "loss": 1.1003, + "step": 13965 + }, + { + "epoch": 0.01, + "learning_rate": 4.997627914588926e-05, + "loss": 1.0463, + "step": 13966 + }, + { + "epoch": 0.01, + "learning_rate": 4.997627572486562e-05, + "loss": 1.1097, + "step": 13967 + }, + { + "epoch": 0.01, + "learning_rate": 4.997627230359542e-05, + "loss": 1.1212, + "step": 13968 + }, + { + "epoch": 0.01, + "learning_rate": 4.997626888207867e-05, + "loss": 1.0862, + "step": 13969 + }, + { + "epoch": 0.01, + "learning_rate": 4.997626546031537e-05, + "loss": 1.2666, + "step": 13970 + }, + { + "epoch": 0.01, + "learning_rate": 4.997626203830551e-05, + "loss": 1.1089, + "step": 13971 + }, + { + "epoch": 0.01, + "learning_rate": 4.997625861604909e-05, + "loss": 1.1409, + "step": 13972 + }, + { + "epoch": 0.01, + "learning_rate": 4.997625519354612e-05, + "loss": 1.0216, + "step": 13973 + }, + { + "epoch": 0.01, + "learning_rate": 4.99762517707966e-05, + "loss": 1.1534, + "step": 13974 + }, + { + "epoch": 0.01, + "learning_rate": 4.997624834780051e-05, + "loss": 1.0097, + "step": 13975 + }, + { + "epoch": 0.01, + "learning_rate": 4.997624492455788e-05, + "loss": 0.8927, + "step": 13976 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976241501068686e-05, + "loss": 1.0883, + "step": 13977 + }, + { + "epoch": 0.01, + "learning_rate": 4.997623807733295e-05, + "loss": 1.0862, + "step": 13978 + }, + { + "epoch": 0.01, + "learning_rate": 4.997623465335065e-05, + "loss": 1.1607, + "step": 13979 + }, + { + "epoch": 0.01, + "learning_rate": 4.99762312291218e-05, + "loss": 1.1474, + "step": 13980 + }, + { + "epoch": 0.01, + "learning_rate": 4.997622780464638e-05, + "loss": 1.0373, + "step": 13981 + }, + { + "epoch": 0.01, + "learning_rate": 4.997622437992442e-05, + "loss": 0.5321, + "step": 13982 + }, + { + "epoch": 0.01, + "learning_rate": 4.99762209549559e-05, + "loss": 0.8732, + "step": 13983 + }, + { + "epoch": 0.01, + "learning_rate": 4.997621752974082e-05, + "loss": 1.2515, + "step": 13984 + }, + { + "epoch": 0.01, + "learning_rate": 4.99762141042792e-05, + "loss": 1.18, + "step": 13985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976210678571015e-05, + "loss": 0.95, + "step": 13986 + }, + { + "epoch": 0.01, + "learning_rate": 4.997620725261627e-05, + "loss": 1.0746, + "step": 13987 + }, + { + "epoch": 0.01, + "learning_rate": 4.997620382641498e-05, + "loss": 0.9309, + "step": 13988 + }, + { + "epoch": 0.01, + "learning_rate": 4.997620039996713e-05, + "loss": 1.0832, + "step": 13989 + }, + { + "epoch": 0.01, + "learning_rate": 4.997619697327273e-05, + "loss": 1.0483, + "step": 13990 + }, + { + "epoch": 0.01, + "learning_rate": 4.997619354633177e-05, + "loss": 1.0112, + "step": 13991 + }, + { + "epoch": 0.01, + "learning_rate": 4.997619011914426e-05, + "loss": 1.1311, + "step": 13992 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976186691710204e-05, + "loss": 1.0354, + "step": 13993 + }, + { + "epoch": 0.01, + "learning_rate": 4.997618326402958e-05, + "loss": 1.3114, + "step": 13994 + }, + { + "epoch": 0.01, + "learning_rate": 4.997617983610241e-05, + "loss": 1.0396, + "step": 13995 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976176407928674e-05, + "loss": 1.159, + "step": 13996 + }, + { + "epoch": 0.01, + "learning_rate": 4.997617297950839e-05, + "loss": 0.9205, + "step": 13997 + }, + { + "epoch": 0.01, + "learning_rate": 4.997616955084156e-05, + "loss": 1.0981, + "step": 13998 + }, + { + "epoch": 0.01, + "learning_rate": 4.997616612192817e-05, + "loss": 1.3084, + "step": 13999 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976162692768224e-05, + "loss": 1.3947, + "step": 14000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0414711236953735, + "eval_runtime": 85.5056, + "eval_samples_per_second": 16.198, + "eval_steps_per_second": 4.058, + "step": 14000 + }, + { + "epoch": 0.01, + "learning_rate": 4.997615926336172e-05, + "loss": 1.2238, + "step": 14001 + }, + { + "epoch": 0.01, + "learning_rate": 4.997615583370867e-05, + "loss": 1.2037, + "step": 14002 + }, + { + "epoch": 0.01, + "learning_rate": 4.997615240380906e-05, + "loss": 1.175, + "step": 14003 + }, + { + "epoch": 0.01, + "learning_rate": 4.99761489736629e-05, + "loss": 1.0471, + "step": 14004 + }, + { + "epoch": 0.01, + "learning_rate": 4.997614554327019e-05, + "loss": 0.9915, + "step": 14005 + }, + { + "epoch": 0.01, + "learning_rate": 4.997614211263092e-05, + "loss": 0.9342, + "step": 14006 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976138681745094e-05, + "loss": 1.1484, + "step": 14007 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976135250612715e-05, + "loss": 1.0165, + "step": 14008 + }, + { + "epoch": 0.01, + "learning_rate": 4.997613181923378e-05, + "loss": 0.9837, + "step": 14009 + }, + { + "epoch": 0.01, + "learning_rate": 4.99761283876083e-05, + "loss": 0.9486, + "step": 14010 + }, + { + "epoch": 0.01, + "learning_rate": 4.997612495573626e-05, + "loss": 1.0191, + "step": 14011 + }, + { + "epoch": 0.01, + "learning_rate": 4.997612152361767e-05, + "loss": 1.5051, + "step": 14012 + }, + { + "epoch": 0.01, + "learning_rate": 4.997611809125252e-05, + "loss": 1.4434, + "step": 14013 + }, + { + "epoch": 0.01, + "learning_rate": 4.997611465864082e-05, + "loss": 1.3862, + "step": 14014 + }, + { + "epoch": 0.01, + "learning_rate": 4.997611122578256e-05, + "loss": 1.4054, + "step": 14015 + }, + { + "epoch": 0.01, + "learning_rate": 4.997610779267776e-05, + "loss": 1.4269, + "step": 14016 + }, + { + "epoch": 0.01, + "learning_rate": 4.99761043593264e-05, + "loss": 1.4767, + "step": 14017 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976100925728485e-05, + "loss": 1.5355, + "step": 14018 + }, + { + "epoch": 0.01, + "learning_rate": 4.997609749188401e-05, + "loss": 1.3726, + "step": 14019 + }, + { + "epoch": 0.01, + "learning_rate": 4.997609405779299e-05, + "loss": 1.4025, + "step": 14020 + }, + { + "epoch": 0.01, + "learning_rate": 4.997609062345542e-05, + "loss": 1.4298, + "step": 14021 + }, + { + "epoch": 0.01, + "learning_rate": 4.997608718887129e-05, + "loss": 1.4052, + "step": 14022 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976083754040606e-05, + "loss": 1.2636, + "step": 14023 + }, + { + "epoch": 0.01, + "learning_rate": 4.997608031896337e-05, + "loss": 0.9864, + "step": 14024 + }, + { + "epoch": 0.01, + "learning_rate": 4.997607688363958e-05, + "loss": 1.2486, + "step": 14025 + }, + { + "epoch": 0.01, + "learning_rate": 4.997607344806924e-05, + "loss": 1.2348, + "step": 14026 + }, + { + "epoch": 0.01, + "learning_rate": 4.997607001225235e-05, + "loss": 1.0257, + "step": 14027 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976066576188895e-05, + "loss": 1.235, + "step": 14028 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976063139878895e-05, + "loss": 1.1075, + "step": 14029 + }, + { + "epoch": 0.01, + "learning_rate": 4.997605970332234e-05, + "loss": 1.0367, + "step": 14030 + }, + { + "epoch": 0.01, + "learning_rate": 4.997605626651923e-05, + "loss": 0.9863, + "step": 14031 + }, + { + "epoch": 0.01, + "learning_rate": 4.997605282946957e-05, + "loss": 1.1902, + "step": 14032 + }, + { + "epoch": 0.01, + "learning_rate": 4.997604939217335e-05, + "loss": 0.9979, + "step": 14033 + }, + { + "epoch": 0.01, + "learning_rate": 4.997604595463059e-05, + "loss": 1.7464, + "step": 14034 + }, + { + "epoch": 0.01, + "learning_rate": 4.997604251684127e-05, + "loss": 1.6848, + "step": 14035 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976039078805395e-05, + "loss": 1.3606, + "step": 14036 + }, + { + "epoch": 0.01, + "learning_rate": 4.997603564052297e-05, + "loss": 0.3411, + "step": 14037 + }, + { + "epoch": 0.01, + "learning_rate": 4.997603220199399e-05, + "loss": 0.2637, + "step": 14038 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976028763218455e-05, + "loss": 0.2964, + "step": 14039 + }, + { + "epoch": 0.01, + "learning_rate": 4.997602532419637e-05, + "loss": 0.2212, + "step": 14040 + }, + { + "epoch": 0.01, + "learning_rate": 4.997602188492774e-05, + "loss": 0.3276, + "step": 14041 + }, + { + "epoch": 0.01, + "learning_rate": 4.997601844541254e-05, + "loss": 0.339, + "step": 14042 + }, + { + "epoch": 0.01, + "learning_rate": 4.99760150056508e-05, + "loss": 0.2881, + "step": 14043 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976011565642504e-05, + "loss": 0.191, + "step": 14044 + }, + { + "epoch": 0.01, + "learning_rate": 4.9976008125387655e-05, + "loss": 0.1706, + "step": 14045 + }, + { + "epoch": 0.01, + "learning_rate": 4.997600468488626e-05, + "loss": 0.2245, + "step": 14046 + }, + { + "epoch": 0.01, + "learning_rate": 4.997600124413831e-05, + "loss": 0.27, + "step": 14047 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975997803143795e-05, + "loss": 0.2916, + "step": 14048 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975994361902735e-05, + "loss": 0.2542, + "step": 14049 + }, + { + "epoch": 0.01, + "learning_rate": 4.997599092041513e-05, + "loss": 0.1982, + "step": 14050 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975987478680966e-05, + "loss": 0.4146, + "step": 14051 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975984036700244e-05, + "loss": 0.6991, + "step": 14052 + }, + { + "epoch": 0.01, + "learning_rate": 4.997598059447297e-05, + "loss": 0.6499, + "step": 14053 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975977151999155e-05, + "loss": 0.3083, + "step": 14054 + }, + { + "epoch": 0.01, + "learning_rate": 4.997597370927878e-05, + "loss": 0.2072, + "step": 14055 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975970266311856e-05, + "loss": 0.4152, + "step": 14056 + }, + { + "epoch": 0.01, + "learning_rate": 4.997596682309838e-05, + "loss": 0.7276, + "step": 14057 + }, + { + "epoch": 0.01, + "learning_rate": 4.997596337963835e-05, + "loss": 1.0309, + "step": 14058 + }, + { + "epoch": 0.01, + "learning_rate": 4.997595993593177e-05, + "loss": 1.3804, + "step": 14059 + }, + { + "epoch": 0.01, + "learning_rate": 4.997595649197864e-05, + "loss": 2.1518, + "step": 14060 + }, + { + "epoch": 0.01, + "learning_rate": 4.997595304777894e-05, + "loss": 1.1852, + "step": 14061 + }, + { + "epoch": 0.01, + "learning_rate": 4.99759496033327e-05, + "loss": 0.7476, + "step": 14062 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975946158639916e-05, + "loss": 0.4246, + "step": 14063 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975942713700575e-05, + "loss": 0.7485, + "step": 14064 + }, + { + "epoch": 0.01, + "learning_rate": 4.997593926851468e-05, + "loss": 0.3086, + "step": 14065 + }, + { + "epoch": 0.01, + "learning_rate": 4.997593582308223e-05, + "loss": 0.2025, + "step": 14066 + }, + { + "epoch": 0.01, + "learning_rate": 4.997593237740323e-05, + "loss": 0.1456, + "step": 14067 + }, + { + "epoch": 0.01, + "learning_rate": 4.997592893147769e-05, + "loss": 1.0141, + "step": 14068 + }, + { + "epoch": 0.01, + "learning_rate": 4.997592548530559e-05, + "loss": 1.0374, + "step": 14069 + }, + { + "epoch": 0.01, + "learning_rate": 4.997592203888693e-05, + "loss": 1.0126, + "step": 14070 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975918592221726e-05, + "loss": 1.0492, + "step": 14071 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975915145309964e-05, + "loss": 1.3127, + "step": 14072 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975911698151654e-05, + "loss": 1.4635, + "step": 14073 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975908250746796e-05, + "loss": 1.2857, + "step": 14074 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975904803095384e-05, + "loss": 0.9974, + "step": 14075 + }, + { + "epoch": 0.01, + "learning_rate": 4.997590135519742e-05, + "loss": 1.5194, + "step": 14076 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975897907052904e-05, + "loss": 1.1631, + "step": 14077 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975894458661836e-05, + "loss": 1.0396, + "step": 14078 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975891010024214e-05, + "loss": 1.4006, + "step": 14079 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975887561140044e-05, + "loss": 1.1684, + "step": 14080 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975884112009326e-05, + "loss": 1.2567, + "step": 14081 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975880662632054e-05, + "loss": 1.0493, + "step": 14082 + }, + { + "epoch": 0.01, + "learning_rate": 4.997587721300823e-05, + "loss": 1.4118, + "step": 14083 + }, + { + "epoch": 0.01, + "learning_rate": 4.997587376313785e-05, + "loss": 1.0582, + "step": 14084 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975870313020926e-05, + "loss": 0.9975, + "step": 14085 + }, + { + "epoch": 0.01, + "learning_rate": 4.997586686265745e-05, + "loss": 1.1511, + "step": 14086 + }, + { + "epoch": 0.01, + "learning_rate": 4.997586341204742e-05, + "loss": 0.9706, + "step": 14087 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975859961190835e-05, + "loss": 1.076, + "step": 14088 + }, + { + "epoch": 0.01, + "learning_rate": 4.99758565100877e-05, + "loss": 1.3168, + "step": 14089 + }, + { + "epoch": 0.01, + "learning_rate": 4.997585305873802e-05, + "loss": 0.8597, + "step": 14090 + }, + { + "epoch": 0.01, + "learning_rate": 4.997584960714179e-05, + "loss": 0.7687, + "step": 14091 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975846155299e-05, + "loss": 0.7132, + "step": 14092 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975842703209665e-05, + "loss": 0.6899, + "step": 14093 + }, + { + "epoch": 0.01, + "learning_rate": 4.997583925087378e-05, + "loss": 0.668, + "step": 14094 + }, + { + "epoch": 0.01, + "learning_rate": 4.997583579829134e-05, + "loss": 0.7043, + "step": 14095 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975832345462345e-05, + "loss": 0.6641, + "step": 14096 + }, + { + "epoch": 0.01, + "learning_rate": 4.997582889238681e-05, + "loss": 0.5732, + "step": 14097 + }, + { + "epoch": 0.01, + "learning_rate": 4.997582543906472e-05, + "loss": 0.6086, + "step": 14098 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975821985496076e-05, + "loss": 0.5433, + "step": 14099 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975818531680886e-05, + "loss": 0.78, + "step": 14100 + }, + { + "epoch": 0.01, + "learning_rate": 4.997581507761914e-05, + "loss": 1.2859, + "step": 14101 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975811623310845e-05, + "loss": 0.9706, + "step": 14102 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975808168756e-05, + "loss": 1.0174, + "step": 14103 + }, + { + "epoch": 0.01, + "learning_rate": 4.997580471395461e-05, + "loss": 0.9041, + "step": 14104 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975801258906665e-05, + "loss": 1.0138, + "step": 14105 + }, + { + "epoch": 0.01, + "learning_rate": 4.997579780361217e-05, + "loss": 1.1986, + "step": 14106 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975794348071114e-05, + "loss": 1.0084, + "step": 14107 + }, + { + "epoch": 0.01, + "learning_rate": 4.997579089228352e-05, + "loss": 1.2811, + "step": 14108 + }, + { + "epoch": 0.01, + "learning_rate": 4.997578743624937e-05, + "loss": 1.0046, + "step": 14109 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975783979968673e-05, + "loss": 1.2217, + "step": 14110 + }, + { + "epoch": 0.01, + "learning_rate": 4.997578052344143e-05, + "loss": 1.2312, + "step": 14111 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975777066667626e-05, + "loss": 1.3594, + "step": 14112 + }, + { + "epoch": 0.01, + "learning_rate": 4.997577360964728e-05, + "loss": 0.9843, + "step": 14113 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975770152380375e-05, + "loss": 1.0723, + "step": 14114 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975766694866924e-05, + "loss": 1.1024, + "step": 14115 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975763237106927e-05, + "loss": 1.0341, + "step": 14116 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975759779100374e-05, + "loss": 0.527, + "step": 14117 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975756320847274e-05, + "loss": 0.4008, + "step": 14118 + }, + { + "epoch": 0.01, + "learning_rate": 4.997575286234763e-05, + "loss": 0.41, + "step": 14119 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975749403601425e-05, + "loss": 0.3649, + "step": 14120 + }, + { + "epoch": 0.01, + "learning_rate": 4.997574594460867e-05, + "loss": 0.2659, + "step": 14121 + }, + { + "epoch": 0.01, + "learning_rate": 4.997574248536937e-05, + "loss": 0.1907, + "step": 14122 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975739025883514e-05, + "loss": 0.25, + "step": 14123 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975735566151115e-05, + "loss": 1.0159, + "step": 14124 + }, + { + "epoch": 0.01, + "learning_rate": 4.997573210617217e-05, + "loss": 1.2458, + "step": 14125 + }, + { + "epoch": 0.01, + "learning_rate": 4.997572864594666e-05, + "loss": 1.1723, + "step": 14126 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975725185474606e-05, + "loss": 1.1799, + "step": 14127 + }, + { + "epoch": 0.01, + "learning_rate": 4.997572172475601e-05, + "loss": 1.0264, + "step": 14128 + }, + { + "epoch": 0.01, + "learning_rate": 4.997571826379086e-05, + "loss": 1.2867, + "step": 14129 + }, + { + "epoch": 0.01, + "learning_rate": 4.997571480257916e-05, + "loss": 1.1588, + "step": 14130 + }, + { + "epoch": 0.01, + "learning_rate": 4.997571134112091e-05, + "loss": 1.269, + "step": 14131 + }, + { + "epoch": 0.01, + "learning_rate": 4.997570787941611e-05, + "loss": 1.105, + "step": 14132 + }, + { + "epoch": 0.01, + "learning_rate": 4.997570441746476e-05, + "loss": 1.1432, + "step": 14133 + }, + { + "epoch": 0.01, + "learning_rate": 4.997570095526686e-05, + "loss": 1.0189, + "step": 14134 + }, + { + "epoch": 0.01, + "learning_rate": 4.997569749282241e-05, + "loss": 1.552, + "step": 14135 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975694030131415e-05, + "loss": 1.186, + "step": 14136 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975690567193864e-05, + "loss": 0.9589, + "step": 14137 + }, + { + "epoch": 0.01, + "learning_rate": 4.997568710400977e-05, + "loss": 1.0039, + "step": 14138 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975683640579126e-05, + "loss": 0.9679, + "step": 14139 + }, + { + "epoch": 0.01, + "learning_rate": 4.997568017690193e-05, + "loss": 1.2845, + "step": 14140 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975676712978183e-05, + "loss": 1.432, + "step": 14141 + }, + { + "epoch": 0.01, + "learning_rate": 4.997567324880789e-05, + "loss": 1.1023, + "step": 14142 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975669784391044e-05, + "loss": 1.1681, + "step": 14143 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975666319727646e-05, + "loss": 1.1895, + "step": 14144 + }, + { + "epoch": 0.01, + "learning_rate": 4.997566285481771e-05, + "loss": 1.0524, + "step": 14145 + }, + { + "epoch": 0.01, + "learning_rate": 4.997565938966121e-05, + "loss": 1.0158, + "step": 14146 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975655924258175e-05, + "loss": 0.8595, + "step": 14147 + }, + { + "epoch": 0.01, + "learning_rate": 4.997565245860858e-05, + "loss": 1.0239, + "step": 14148 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975648992712444e-05, + "loss": 1.0904, + "step": 14149 + }, + { + "epoch": 0.01, + "learning_rate": 4.997564552656976e-05, + "loss": 1.1, + "step": 14150 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975642060180517e-05, + "loss": 0.9147, + "step": 14151 + }, + { + "epoch": 0.01, + "learning_rate": 4.997563859354473e-05, + "loss": 0.966, + "step": 14152 + }, + { + "epoch": 0.01, + "learning_rate": 4.99756351266624e-05, + "loss": 1.1773, + "step": 14153 + }, + { + "epoch": 0.01, + "learning_rate": 4.997563165953351e-05, + "loss": 0.8457, + "step": 14154 + }, + { + "epoch": 0.01, + "learning_rate": 4.997562819215808e-05, + "loss": 0.676, + "step": 14155 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975624724536095e-05, + "loss": 1.0086, + "step": 14156 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975621256667565e-05, + "loss": 0.637, + "step": 14157 + }, + { + "epoch": 0.01, + "learning_rate": 4.997561778855249e-05, + "loss": 0.7949, + "step": 14158 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975614320190856e-05, + "loss": 1.2112, + "step": 14159 + }, + { + "epoch": 0.01, + "learning_rate": 4.997561085158268e-05, + "loss": 1.006, + "step": 14160 + }, + { + "epoch": 0.01, + "learning_rate": 4.997560738272795e-05, + "loss": 1.1773, + "step": 14161 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975603913626675e-05, + "loss": 1.1284, + "step": 14162 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975600444278853e-05, + "loss": 1.0709, + "step": 14163 + }, + { + "epoch": 0.01, + "learning_rate": 4.997559697468448e-05, + "loss": 0.9037, + "step": 14164 + }, + { + "epoch": 0.01, + "learning_rate": 4.997559350484356e-05, + "loss": 1.2146, + "step": 14165 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975590034756095e-05, + "loss": 1.0601, + "step": 14166 + }, + { + "epoch": 0.01, + "learning_rate": 4.997558656442207e-05, + "loss": 1.1863, + "step": 14167 + }, + { + "epoch": 0.01, + "learning_rate": 4.99755830938415e-05, + "loss": 1.3709, + "step": 14168 + }, + { + "epoch": 0.01, + "learning_rate": 4.997557962301439e-05, + "loss": 1.3823, + "step": 14169 + }, + { + "epoch": 0.01, + "learning_rate": 4.997557615194073e-05, + "loss": 1.089, + "step": 14170 + }, + { + "epoch": 0.01, + "learning_rate": 4.997557268062052e-05, + "loss": 1.1908, + "step": 14171 + }, + { + "epoch": 0.01, + "learning_rate": 4.997556920905376e-05, + "loss": 1.1416, + "step": 14172 + }, + { + "epoch": 0.01, + "learning_rate": 4.997556573724045e-05, + "loss": 0.6685, + "step": 14173 + }, + { + "epoch": 0.01, + "learning_rate": 4.99755622651806e-05, + "loss": 1.248, + "step": 14174 + }, + { + "epoch": 0.01, + "learning_rate": 4.997555879287419e-05, + "loss": 0.8823, + "step": 14175 + }, + { + "epoch": 0.01, + "learning_rate": 4.997555532032124e-05, + "loss": 1.2691, + "step": 14176 + }, + { + "epoch": 0.01, + "learning_rate": 4.997555184752174e-05, + "loss": 0.9468, + "step": 14177 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975548374475686e-05, + "loss": 1.1383, + "step": 14178 + }, + { + "epoch": 0.01, + "learning_rate": 4.997554490118309e-05, + "loss": 1.1812, + "step": 14179 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975541427643945e-05, + "loss": 1.1472, + "step": 14180 + }, + { + "epoch": 0.01, + "learning_rate": 4.997553795385825e-05, + "loss": 1.1894, + "step": 14181 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975534479826014e-05, + "loss": 1.0304, + "step": 14182 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975531005547227e-05, + "loss": 1.3028, + "step": 14183 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975527531021885e-05, + "loss": 0.9467, + "step": 14184 + }, + { + "epoch": 0.01, + "learning_rate": 4.997552405625e-05, + "loss": 1.227, + "step": 14185 + }, + { + "epoch": 0.01, + "learning_rate": 4.997552058123157e-05, + "loss": 1.0007, + "step": 14186 + }, + { + "epoch": 0.01, + "learning_rate": 4.997551710596658e-05, + "loss": 1.0721, + "step": 14187 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975513630455064e-05, + "loss": 1.1782, + "step": 14188 + }, + { + "epoch": 0.01, + "learning_rate": 4.997551015469698e-05, + "loss": 1.1244, + "step": 14189 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975506678692365e-05, + "loss": 1.1925, + "step": 14190 + }, + { + "epoch": 0.01, + "learning_rate": 4.997550320244119e-05, + "loss": 1.1091, + "step": 14191 + }, + { + "epoch": 0.01, + "learning_rate": 4.997549972594347e-05, + "loss": 1.0063, + "step": 14192 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975496249199206e-05, + "loss": 1.0792, + "step": 14193 + }, + { + "epoch": 0.01, + "learning_rate": 4.997549277220839e-05, + "loss": 0.9771, + "step": 14194 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975489294971024e-05, + "loss": 1.3387, + "step": 14195 + }, + { + "epoch": 0.01, + "learning_rate": 4.997548581748712e-05, + "loss": 1.0673, + "step": 14196 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975482339756665e-05, + "loss": 0.997, + "step": 14197 + }, + { + "epoch": 0.01, + "learning_rate": 4.997547886177966e-05, + "loss": 1.1778, + "step": 14198 + }, + { + "epoch": 0.01, + "learning_rate": 4.99754753835561e-05, + "loss": 1.0958, + "step": 14199 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975471905086e-05, + "loss": 1.2378, + "step": 14200 + }, + { + "epoch": 0.01, + "learning_rate": 4.997546842636936e-05, + "loss": 1.1805, + "step": 14201 + }, + { + "epoch": 0.01, + "learning_rate": 4.997546494740617e-05, + "loss": 1.2093, + "step": 14202 + }, + { + "epoch": 0.01, + "learning_rate": 4.997546146819643e-05, + "loss": 1.1227, + "step": 14203 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975457988740135e-05, + "loss": 1.2625, + "step": 14204 + }, + { + "epoch": 0.01, + "learning_rate": 4.99754545090373e-05, + "loss": 1.0928, + "step": 14205 + }, + { + "epoch": 0.01, + "learning_rate": 4.997545102908791e-05, + "loss": 1.148, + "step": 14206 + }, + { + "epoch": 0.01, + "learning_rate": 4.997544754889199e-05, + "loss": 0.9633, + "step": 14207 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975444068449515e-05, + "loss": 1.0229, + "step": 14208 + }, + { + "epoch": 0.01, + "learning_rate": 4.997544058776049e-05, + "loss": 1.1975, + "step": 14209 + }, + { + "epoch": 0.01, + "learning_rate": 4.997543710682492e-05, + "loss": 0.9083, + "step": 14210 + }, + { + "epoch": 0.01, + "learning_rate": 4.99754336256428e-05, + "loss": 1.2467, + "step": 14211 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975430144214134e-05, + "loss": 1.0118, + "step": 14212 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975426662538926e-05, + "loss": 0.7819, + "step": 14213 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975423180617164e-05, + "loss": 1.1299, + "step": 14214 + }, + { + "epoch": 0.01, + "learning_rate": 4.997541969844886e-05, + "loss": 1.0817, + "step": 14215 + }, + { + "epoch": 0.01, + "learning_rate": 4.997541621603401e-05, + "loss": 1.1529, + "step": 14216 + }, + { + "epoch": 0.01, + "learning_rate": 4.997541273337261e-05, + "loss": 1.2344, + "step": 14217 + }, + { + "epoch": 0.01, + "learning_rate": 4.997540925046467e-05, + "loss": 0.7974, + "step": 14218 + }, + { + "epoch": 0.01, + "learning_rate": 4.997540576731017e-05, + "loss": 1.2864, + "step": 14219 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975402283909135e-05, + "loss": 0.9274, + "step": 14220 + }, + { + "epoch": 0.01, + "learning_rate": 4.997539880026155e-05, + "loss": 1.1259, + "step": 14221 + }, + { + "epoch": 0.01, + "learning_rate": 4.997539531636741e-05, + "loss": 0.8732, + "step": 14222 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975391832226735e-05, + "loss": 1.1752, + "step": 14223 + }, + { + "epoch": 0.01, + "learning_rate": 4.997538834783951e-05, + "loss": 0.9616, + "step": 14224 + }, + { + "epoch": 0.01, + "learning_rate": 4.997538486320574e-05, + "loss": 0.9051, + "step": 14225 + }, + { + "epoch": 0.01, + "learning_rate": 4.997538137832542e-05, + "loss": 1.0937, + "step": 14226 + }, + { + "epoch": 0.01, + "learning_rate": 4.997537789319855e-05, + "loss": 1.0993, + "step": 14227 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975374407825146e-05, + "loss": 1.1841, + "step": 14228 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975370922205184e-05, + "loss": 0.7506, + "step": 14229 + }, + { + "epoch": 0.01, + "learning_rate": 4.997536743633868e-05, + "loss": 1.4471, + "step": 14230 + }, + { + "epoch": 0.01, + "learning_rate": 4.997536395022563e-05, + "loss": 0.5568, + "step": 14231 + }, + { + "epoch": 0.01, + "learning_rate": 4.997536046386603e-05, + "loss": 0.832, + "step": 14232 + }, + { + "epoch": 0.01, + "learning_rate": 4.997535697725989e-05, + "loss": 1.2028, + "step": 14233 + }, + { + "epoch": 0.01, + "learning_rate": 4.99753534904072e-05, + "loss": 1.0788, + "step": 14234 + }, + { + "epoch": 0.01, + "learning_rate": 4.997535000330796e-05, + "loss": 0.7651, + "step": 14235 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975346515962186e-05, + "loss": 0.9412, + "step": 14236 + }, + { + "epoch": 0.01, + "learning_rate": 4.997534302836986e-05, + "loss": 1.28, + "step": 14237 + }, + { + "epoch": 0.01, + "learning_rate": 4.997533954053099e-05, + "loss": 1.1412, + "step": 14238 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975336052445564e-05, + "loss": 0.7331, + "step": 14239 + }, + { + "epoch": 0.01, + "learning_rate": 4.99753325641136e-05, + "loss": 0.9254, + "step": 14240 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975329075535086e-05, + "loss": 0.9398, + "step": 14241 + }, + { + "epoch": 0.01, + "learning_rate": 4.997532558671003e-05, + "loss": 1.412, + "step": 14242 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975322097638426e-05, + "loss": 1.1725, + "step": 14243 + }, + { + "epoch": 0.01, + "learning_rate": 4.997531860832028e-05, + "loss": 1.2248, + "step": 14244 + }, + { + "epoch": 0.01, + "learning_rate": 4.997531511875558e-05, + "loss": 1.0279, + "step": 14245 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975311628944344e-05, + "loss": 1.0378, + "step": 14246 + }, + { + "epoch": 0.01, + "learning_rate": 4.997530813888655e-05, + "loss": 1.2247, + "step": 14247 + }, + { + "epoch": 0.01, + "learning_rate": 4.997530464858222e-05, + "loss": 0.4484, + "step": 14248 + }, + { + "epoch": 0.01, + "learning_rate": 4.997530115803135e-05, + "loss": 0.9846, + "step": 14249 + }, + { + "epoch": 0.01, + "learning_rate": 4.997529766723392e-05, + "loss": 0.8921, + "step": 14250 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975294176189954e-05, + "loss": 1.0155, + "step": 14251 + }, + { + "epoch": 0.01, + "learning_rate": 4.997529068489943e-05, + "loss": 0.9831, + "step": 14252 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975287193362375e-05, + "loss": 1.0879, + "step": 14253 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975283701578765e-05, + "loss": 1.1171, + "step": 14254 + }, + { + "epoch": 0.01, + "learning_rate": 4.997528020954862e-05, + "loss": 1.0931, + "step": 14255 + }, + { + "epoch": 0.01, + "learning_rate": 4.997527671727192e-05, + "loss": 1.1142, + "step": 14256 + }, + { + "epoch": 0.01, + "learning_rate": 4.997527322474868e-05, + "loss": 1.0804, + "step": 14257 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975269731978894e-05, + "loss": 1.1162, + "step": 14258 + }, + { + "epoch": 0.01, + "learning_rate": 4.997526623896256e-05, + "loss": 1.1853, + "step": 14259 + }, + { + "epoch": 0.01, + "learning_rate": 4.997526274569968e-05, + "loss": 1.082, + "step": 14260 + }, + { + "epoch": 0.01, + "learning_rate": 4.997525925219026e-05, + "loss": 0.9478, + "step": 14261 + }, + { + "epoch": 0.01, + "learning_rate": 4.997525575843429e-05, + "loss": 1.033, + "step": 14262 + }, + { + "epoch": 0.01, + "learning_rate": 4.997525226443178e-05, + "loss": 1.1039, + "step": 14263 + }, + { + "epoch": 0.01, + "learning_rate": 4.997524877018271e-05, + "loss": 1.0438, + "step": 14264 + }, + { + "epoch": 0.01, + "learning_rate": 4.997524527568711e-05, + "loss": 1.1779, + "step": 14265 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975241780944964e-05, + "loss": 1.1588, + "step": 14266 + }, + { + "epoch": 0.01, + "learning_rate": 4.997523828595627e-05, + "loss": 1.1946, + "step": 14267 + }, + { + "epoch": 0.01, + "learning_rate": 4.997523479072103e-05, + "loss": 1.1071, + "step": 14268 + }, + { + "epoch": 0.01, + "learning_rate": 4.997523129523924e-05, + "loss": 1.0114, + "step": 14269 + }, + { + "epoch": 0.01, + "learning_rate": 4.997522779951091e-05, + "loss": 0.9748, + "step": 14270 + }, + { + "epoch": 0.01, + "learning_rate": 4.997522430353604e-05, + "loss": 1.3441, + "step": 14271 + }, + { + "epoch": 0.01, + "learning_rate": 4.997522080731463e-05, + "loss": 1.0202, + "step": 14272 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975217310846653e-05, + "loss": 1.1202, + "step": 14273 + }, + { + "epoch": 0.01, + "learning_rate": 4.997521381413215e-05, + "loss": 0.757, + "step": 14274 + }, + { + "epoch": 0.01, + "learning_rate": 4.99752103171711e-05, + "loss": 0.4076, + "step": 14275 + }, + { + "epoch": 0.01, + "learning_rate": 4.99752068199635e-05, + "loss": 0.6337, + "step": 14276 + }, + { + "epoch": 0.01, + "learning_rate": 4.997520332250936e-05, + "loss": 0.3864, + "step": 14277 + }, + { + "epoch": 0.01, + "learning_rate": 4.997519982480867e-05, + "loss": 0.4376, + "step": 14278 + }, + { + "epoch": 0.01, + "learning_rate": 4.997519632686144e-05, + "loss": 0.6838, + "step": 14279 + }, + { + "epoch": 0.01, + "learning_rate": 4.997519282866766e-05, + "loss": 0.4501, + "step": 14280 + }, + { + "epoch": 0.01, + "learning_rate": 4.997518933022733e-05, + "loss": 0.2631, + "step": 14281 + }, + { + "epoch": 0.01, + "learning_rate": 4.997518583154047e-05, + "loss": 0.3098, + "step": 14282 + }, + { + "epoch": 0.01, + "learning_rate": 4.997518233260706e-05, + "loss": 0.1209, + "step": 14283 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975178833427106e-05, + "loss": 0.4564, + "step": 14284 + }, + { + "epoch": 0.01, + "learning_rate": 4.997517533400061e-05, + "loss": 0.7584, + "step": 14285 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975171834327564e-05, + "loss": 0.8234, + "step": 14286 + }, + { + "epoch": 0.01, + "learning_rate": 4.997516833440797e-05, + "loss": 1.2015, + "step": 14287 + }, + { + "epoch": 0.01, + "learning_rate": 4.997516483424184e-05, + "loss": 1.2761, + "step": 14288 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975161333829165e-05, + "loss": 1.1576, + "step": 14289 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975157833169936e-05, + "loss": 0.9933, + "step": 14290 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975154332264174e-05, + "loss": 1.3237, + "step": 14291 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975150831111864e-05, + "loss": 0.9063, + "step": 14292 + }, + { + "epoch": 0.01, + "learning_rate": 4.997514732971301e-05, + "loss": 1.0895, + "step": 14293 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975143828067615e-05, + "loss": 1.091, + "step": 14294 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975140326175676e-05, + "loss": 1.0007, + "step": 14295 + }, + { + "epoch": 0.01, + "learning_rate": 4.997513682403718e-05, + "loss": 1.4644, + "step": 14296 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975133321652155e-05, + "loss": 0.9079, + "step": 14297 + }, + { + "epoch": 0.01, + "learning_rate": 4.997512981902058e-05, + "loss": 1.1363, + "step": 14298 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975126316142464e-05, + "loss": 1.3871, + "step": 14299 + }, + { + "epoch": 0.01, + "learning_rate": 4.997512281301781e-05, + "loss": 1.1419, + "step": 14300 + }, + { + "epoch": 0.01, + "learning_rate": 4.99751193096466e-05, + "loss": 1.207, + "step": 14301 + }, + { + "epoch": 0.01, + "learning_rate": 4.997511580602885e-05, + "loss": 0.5183, + "step": 14302 + }, + { + "epoch": 0.01, + "learning_rate": 4.997511230216455e-05, + "loss": 1.3572, + "step": 14303 + }, + { + "epoch": 0.01, + "learning_rate": 4.997510879805372e-05, + "loss": 0.7195, + "step": 14304 + }, + { + "epoch": 0.01, + "learning_rate": 4.997510529369633e-05, + "loss": 0.7084, + "step": 14305 + }, + { + "epoch": 0.01, + "learning_rate": 4.997510178909241e-05, + "loss": 0.7775, + "step": 14306 + }, + { + "epoch": 0.01, + "learning_rate": 4.997509828424194e-05, + "loss": 1.1075, + "step": 14307 + }, + { + "epoch": 0.01, + "learning_rate": 4.997509477914493e-05, + "loss": 1.1303, + "step": 14308 + }, + { + "epoch": 0.01, + "learning_rate": 4.997509127380138e-05, + "loss": 1.0402, + "step": 14309 + }, + { + "epoch": 0.01, + "learning_rate": 4.997508776821127e-05, + "loss": 1.3226, + "step": 14310 + }, + { + "epoch": 0.01, + "learning_rate": 4.997508426237463e-05, + "loss": 1.282, + "step": 14311 + }, + { + "epoch": 0.01, + "learning_rate": 4.997508075629145e-05, + "loss": 1.2724, + "step": 14312 + }, + { + "epoch": 0.01, + "learning_rate": 4.997507724996172e-05, + "loss": 1.2906, + "step": 14313 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975073743385444e-05, + "loss": 1.2708, + "step": 14314 + }, + { + "epoch": 0.01, + "learning_rate": 4.997507023656263e-05, + "loss": 0.7824, + "step": 14315 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975066729493264e-05, + "loss": 1.1854, + "step": 14316 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975063222177366e-05, + "loss": 0.9226, + "step": 14317 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975059714614914e-05, + "loss": 0.824, + "step": 14318 + }, + { + "epoch": 0.01, + "learning_rate": 4.997505620680593e-05, + "loss": 1.1231, + "step": 14319 + }, + { + "epoch": 0.01, + "learning_rate": 4.997505269875039e-05, + "loss": 1.0022, + "step": 14320 + }, + { + "epoch": 0.01, + "learning_rate": 4.997504919044831e-05, + "loss": 0.5094, + "step": 14321 + }, + { + "epoch": 0.01, + "learning_rate": 4.99750456818997e-05, + "loss": 1.136, + "step": 14322 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975042173104535e-05, + "loss": 1.4898, + "step": 14323 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975038664062825e-05, + "loss": 1.0795, + "step": 14324 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975035154774574e-05, + "loss": 1.8042, + "step": 14325 + }, + { + "epoch": 0.01, + "learning_rate": 4.997503164523979e-05, + "loss": 1.0174, + "step": 14326 + }, + { + "epoch": 0.01, + "learning_rate": 4.997502813545845e-05, + "loss": 1.4137, + "step": 14327 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975024625430576e-05, + "loss": 1.2082, + "step": 14328 + }, + { + "epoch": 0.01, + "learning_rate": 4.997502111515615e-05, + "loss": 1.0201, + "step": 14329 + }, + { + "epoch": 0.01, + "learning_rate": 4.9975017604635186e-05, + "loss": 1.3109, + "step": 14330 + }, + { + "epoch": 0.01, + "learning_rate": 4.997501409386768e-05, + "loss": 1.0131, + "step": 14331 + }, + { + "epoch": 0.01, + "learning_rate": 4.997501058285363e-05, + "loss": 0.9856, + "step": 14332 + }, + { + "epoch": 0.01, + "learning_rate": 4.997500707159304e-05, + "loss": 1.3251, + "step": 14333 + }, + { + "epoch": 0.01, + "learning_rate": 4.997500356008591e-05, + "loss": 1.2006, + "step": 14334 + }, + { + "epoch": 0.01, + "learning_rate": 4.997500004833223e-05, + "loss": 1.5733, + "step": 14335 + }, + { + "epoch": 0.01, + "learning_rate": 4.997499653633201e-05, + "loss": 1.4137, + "step": 14336 + }, + { + "epoch": 0.01, + "learning_rate": 4.997499302408525e-05, + "loss": 1.1408, + "step": 14337 + }, + { + "epoch": 0.01, + "learning_rate": 4.997498951159194e-05, + "loss": 1.2687, + "step": 14338 + }, + { + "epoch": 0.01, + "learning_rate": 4.997498599885209e-05, + "loss": 1.5553, + "step": 14339 + }, + { + "epoch": 0.01, + "learning_rate": 4.99749824858657e-05, + "loss": 0.52, + "step": 14340 + }, + { + "epoch": 0.01, + "learning_rate": 4.997497897263277e-05, + "loss": 0.8567, + "step": 14341 + }, + { + "epoch": 0.01, + "learning_rate": 4.99749754591533e-05, + "loss": 1.132, + "step": 14342 + }, + { + "epoch": 0.01, + "learning_rate": 4.997497194542727e-05, + "loss": 1.0302, + "step": 14343 + }, + { + "epoch": 0.01, + "learning_rate": 4.997496843145472e-05, + "loss": 0.9373, + "step": 14344 + }, + { + "epoch": 0.01, + "learning_rate": 4.997496491723561e-05, + "loss": 1.1295, + "step": 14345 + }, + { + "epoch": 0.01, + "learning_rate": 4.997496140276997e-05, + "loss": 1.3824, + "step": 14346 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974957888057786e-05, + "loss": 1.354, + "step": 14347 + }, + { + "epoch": 0.01, + "learning_rate": 4.997495437309905e-05, + "loss": 1.3216, + "step": 14348 + }, + { + "epoch": 0.01, + "learning_rate": 4.997495085789378e-05, + "loss": 1.06, + "step": 14349 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974947342441966e-05, + "loss": 1.1986, + "step": 14350 + }, + { + "epoch": 0.01, + "learning_rate": 4.997494382674361e-05, + "loss": 1.0807, + "step": 14351 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974940310798714e-05, + "loss": 1.24, + "step": 14352 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974936794607266e-05, + "loss": 1.1952, + "step": 14353 + }, + { + "epoch": 0.01, + "learning_rate": 4.997493327816929e-05, + "loss": 1.5186, + "step": 14354 + }, + { + "epoch": 0.01, + "learning_rate": 4.997492976148476e-05, + "loss": 1.1785, + "step": 14355 + }, + { + "epoch": 0.01, + "learning_rate": 4.99749262445537e-05, + "loss": 0.9407, + "step": 14356 + }, + { + "epoch": 0.01, + "learning_rate": 4.997492272737608e-05, + "loss": 1.2702, + "step": 14357 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974919209951936e-05, + "loss": 1.2608, + "step": 14358 + }, + { + "epoch": 0.01, + "learning_rate": 4.997491569228124e-05, + "loss": 1.2639, + "step": 14359 + }, + { + "epoch": 0.01, + "learning_rate": 4.997491217436401e-05, + "loss": 0.932, + "step": 14360 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974908656200236e-05, + "loss": 1.3442, + "step": 14361 + }, + { + "epoch": 0.01, + "learning_rate": 4.997490513778992e-05, + "loss": 1.2339, + "step": 14362 + }, + { + "epoch": 0.01, + "learning_rate": 4.997490161913306e-05, + "loss": 1.2823, + "step": 14363 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974898100229655e-05, + "loss": 1.1675, + "step": 14364 + }, + { + "epoch": 0.01, + "learning_rate": 4.997489458107971e-05, + "loss": 0.7062, + "step": 14365 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974891061683226e-05, + "loss": 1.0909, + "step": 14366 + }, + { + "epoch": 0.01, + "learning_rate": 4.997488754204021e-05, + "loss": 1.2318, + "step": 14367 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974884022150634e-05, + "loss": 1.6796, + "step": 14368 + }, + { + "epoch": 0.01, + "learning_rate": 4.997488050201453e-05, + "loss": 1.1237, + "step": 14369 + }, + { + "epoch": 0.01, + "learning_rate": 4.997487698163188e-05, + "loss": 1.7231, + "step": 14370 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974873461002683e-05, + "loss": 0.9219, + "step": 14371 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974869940126954e-05, + "loss": 0.7085, + "step": 14372 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974866419004677e-05, + "loss": 1.2581, + "step": 14373 + }, + { + "epoch": 0.01, + "learning_rate": 4.997486289763586e-05, + "loss": 1.0116, + "step": 14374 + }, + { + "epoch": 0.01, + "learning_rate": 4.99748593760205e-05, + "loss": 1.3663, + "step": 14375 + }, + { + "epoch": 0.01, + "learning_rate": 4.99748558541586e-05, + "loss": 1.0475, + "step": 14376 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974852332050167e-05, + "loss": 0.9837, + "step": 14377 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974848809695185e-05, + "loss": 0.8915, + "step": 14378 + }, + { + "epoch": 0.01, + "learning_rate": 4.997484528709366e-05, + "loss": 1.2295, + "step": 14379 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974841764245594e-05, + "loss": 1.2447, + "step": 14380 + }, + { + "epoch": 0.01, + "learning_rate": 4.997483824115099e-05, + "loss": 1.198, + "step": 14381 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974834717809846e-05, + "loss": 1.3973, + "step": 14382 + }, + { + "epoch": 0.01, + "learning_rate": 4.997483119422216e-05, + "loss": 1.1056, + "step": 14383 + }, + { + "epoch": 0.01, + "learning_rate": 4.997482767038793e-05, + "loss": 1.1403, + "step": 14384 + }, + { + "epoch": 0.01, + "learning_rate": 4.997482414630716e-05, + "loss": 1.6749, + "step": 14385 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974820621979853e-05, + "loss": 1.3194, + "step": 14386 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974817097406e-05, + "loss": 0.8325, + "step": 14387 + }, + { + "epoch": 0.01, + "learning_rate": 4.997481357258561e-05, + "loss": 1.031, + "step": 14388 + }, + { + "epoch": 0.01, + "learning_rate": 4.997481004751868e-05, + "loss": 1.1606, + "step": 14389 + }, + { + "epoch": 0.01, + "learning_rate": 4.997480652220521e-05, + "loss": 0.8679, + "step": 14390 + }, + { + "epoch": 0.01, + "learning_rate": 4.997480299664519e-05, + "loss": 0.9806, + "step": 14391 + }, + { + "epoch": 0.01, + "learning_rate": 4.997479947083864e-05, + "loss": 1.0224, + "step": 14392 + }, + { + "epoch": 0.01, + "learning_rate": 4.997479594478554e-05, + "loss": 1.1977, + "step": 14393 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974792418485904e-05, + "loss": 1.0153, + "step": 14394 + }, + { + "epoch": 0.01, + "learning_rate": 4.997478889193973e-05, + "loss": 1.1233, + "step": 14395 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974785365147013e-05, + "loss": 0.9666, + "step": 14396 + }, + { + "epoch": 0.01, + "learning_rate": 4.997478183810776e-05, + "loss": 1.2151, + "step": 14397 + }, + { + "epoch": 0.01, + "learning_rate": 4.997477831082196e-05, + "loss": 1.1125, + "step": 14398 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974774783289615e-05, + "loss": 1.289, + "step": 14399 + }, + { + "epoch": 0.01, + "learning_rate": 4.997477125551073e-05, + "loss": 1.0788, + "step": 14400 + }, + { + "epoch": 0.01, + "learning_rate": 4.997476772748532e-05, + "loss": 0.9865, + "step": 14401 + }, + { + "epoch": 0.01, + "learning_rate": 4.997476419921335e-05, + "loss": 0.8677, + "step": 14402 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974760670694855e-05, + "loss": 1.1269, + "step": 14403 + }, + { + "epoch": 0.01, + "learning_rate": 4.997475714192982e-05, + "loss": 1.0795, + "step": 14404 + }, + { + "epoch": 0.01, + "learning_rate": 4.997475361291824e-05, + "loss": 1.1291, + "step": 14405 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974750083660114e-05, + "loss": 1.2704, + "step": 14406 + }, + { + "epoch": 0.01, + "learning_rate": 4.997474655415545e-05, + "loss": 1.1482, + "step": 14407 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974743024404244e-05, + "loss": 1.0979, + "step": 14408 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974739494406505e-05, + "loss": 0.8416, + "step": 14409 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974735964162225e-05, + "loss": 0.921, + "step": 14410 + }, + { + "epoch": 0.01, + "learning_rate": 4.99747324336714e-05, + "loss": 1.1402, + "step": 14411 + }, + { + "epoch": 0.01, + "learning_rate": 4.997472890293404e-05, + "loss": 1.2226, + "step": 14412 + }, + { + "epoch": 0.01, + "learning_rate": 4.997472537195014e-05, + "loss": 1.346, + "step": 14413 + }, + { + "epoch": 0.01, + "learning_rate": 4.99747218407197e-05, + "loss": 0.9469, + "step": 14414 + }, + { + "epoch": 0.01, + "learning_rate": 4.997471830924272e-05, + "loss": 1.1293, + "step": 14415 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974714777519196e-05, + "loss": 0.9046, + "step": 14416 + }, + { + "epoch": 0.01, + "learning_rate": 4.997471124554913e-05, + "loss": 0.8587, + "step": 14417 + }, + { + "epoch": 0.01, + "learning_rate": 4.997470771333253e-05, + "loss": 1.1661, + "step": 14418 + }, + { + "epoch": 0.01, + "learning_rate": 4.997470418086939e-05, + "loss": 0.9759, + "step": 14419 + }, + { + "epoch": 0.01, + "learning_rate": 4.997470064815971e-05, + "loss": 1.3513, + "step": 14420 + }, + { + "epoch": 0.01, + "learning_rate": 4.997469711520349e-05, + "loss": 0.7998, + "step": 14421 + }, + { + "epoch": 0.01, + "learning_rate": 4.997469358200073e-05, + "loss": 0.4826, + "step": 14422 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974690048551434e-05, + "loss": 0.681, + "step": 14423 + }, + { + "epoch": 0.01, + "learning_rate": 4.997468651485558e-05, + "loss": 0.6375, + "step": 14424 + }, + { + "epoch": 0.01, + "learning_rate": 4.997468298091321e-05, + "loss": 0.532, + "step": 14425 + }, + { + "epoch": 0.01, + "learning_rate": 4.997467944672429e-05, + "loss": 0.3822, + "step": 14426 + }, + { + "epoch": 0.01, + "learning_rate": 4.997467591228883e-05, + "loss": 0.2592, + "step": 14427 + }, + { + "epoch": 0.01, + "learning_rate": 4.997467237760683e-05, + "loss": 0.264, + "step": 14428 + }, + { + "epoch": 0.01, + "learning_rate": 4.99746688426783e-05, + "loss": 0.1805, + "step": 14429 + }, + { + "epoch": 0.01, + "learning_rate": 4.997466530750322e-05, + "loss": 1.1733, + "step": 14430 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974661772081605e-05, + "loss": 1.3639, + "step": 14431 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974658236413455e-05, + "loss": 1.1364, + "step": 14432 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974654700498757e-05, + "loss": 0.8945, + "step": 14433 + }, + { + "epoch": 0.01, + "learning_rate": 4.997465116433752e-05, + "loss": 0.6625, + "step": 14434 + }, + { + "epoch": 0.01, + "learning_rate": 4.997464762792975e-05, + "loss": 0.7756, + "step": 14435 + }, + { + "epoch": 0.01, + "learning_rate": 4.997464409127544e-05, + "loss": 1.1861, + "step": 14436 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974640554374584e-05, + "loss": 1.0582, + "step": 14437 + }, + { + "epoch": 0.01, + "learning_rate": 4.997463701722719e-05, + "loss": 1.1725, + "step": 14438 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974633479833266e-05, + "loss": 1.078, + "step": 14439 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974629942192796e-05, + "loss": 0.9228, + "step": 14440 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974626404305785e-05, + "loss": 0.5655, + "step": 14441 + }, + { + "epoch": 0.01, + "learning_rate": 4.997462286617224e-05, + "loss": 0.4891, + "step": 14442 + }, + { + "epoch": 0.01, + "learning_rate": 4.997461932779215e-05, + "loss": 0.484, + "step": 14443 + }, + { + "epoch": 0.01, + "learning_rate": 4.997461578916552e-05, + "loss": 0.9818, + "step": 14444 + }, + { + "epoch": 0.01, + "learning_rate": 4.997461225029236e-05, + "loss": 1.0619, + "step": 14445 + }, + { + "epoch": 0.01, + "learning_rate": 4.997460871117267e-05, + "loss": 1.129, + "step": 14446 + }, + { + "epoch": 0.01, + "learning_rate": 4.997460517180642e-05, + "loss": 1.2111, + "step": 14447 + }, + { + "epoch": 0.01, + "learning_rate": 4.997460163219364e-05, + "loss": 0.4887, + "step": 14448 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974598092334325e-05, + "loss": 1.174, + "step": 14449 + }, + { + "epoch": 0.01, + "learning_rate": 4.997459455222847e-05, + "loss": 1.7233, + "step": 14450 + }, + { + "epoch": 0.01, + "learning_rate": 4.997459101187607e-05, + "loss": 1.5597, + "step": 14451 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974587471277137e-05, + "loss": 0.8462, + "step": 14452 + }, + { + "epoch": 0.01, + "learning_rate": 4.997458393043166e-05, + "loss": 1.3633, + "step": 14453 + }, + { + "epoch": 0.01, + "learning_rate": 4.997458038933965e-05, + "loss": 0.6299, + "step": 14454 + }, + { + "epoch": 0.01, + "learning_rate": 4.99745768480011e-05, + "loss": 0.1442, + "step": 14455 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974573306416005e-05, + "loss": 0.9584, + "step": 14456 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974569764584376e-05, + "loss": 1.3043, + "step": 14457 + }, + { + "epoch": 0.01, + "learning_rate": 4.997456622250621e-05, + "loss": 0.9104, + "step": 14458 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974562680181514e-05, + "loss": 1.1106, + "step": 14459 + }, + { + "epoch": 0.01, + "learning_rate": 4.997455913761027e-05, + "loss": 1.1653, + "step": 14460 + }, + { + "epoch": 0.01, + "learning_rate": 4.997455559479249e-05, + "loss": 0.9697, + "step": 14461 + }, + { + "epoch": 0.01, + "learning_rate": 4.997455205172817e-05, + "loss": 1.2126, + "step": 14462 + }, + { + "epoch": 0.01, + "learning_rate": 4.997454850841731e-05, + "loss": 0.9386, + "step": 14463 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974544964859914e-05, + "loss": 1.1982, + "step": 14464 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974541421055985e-05, + "loss": 1.0994, + "step": 14465 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974537877005515e-05, + "loss": 1.2372, + "step": 14466 + }, + { + "epoch": 0.01, + "learning_rate": 4.99745343327085e-05, + "loss": 1.0711, + "step": 14467 + }, + { + "epoch": 0.01, + "learning_rate": 4.997453078816495e-05, + "loss": 1.279, + "step": 14468 + }, + { + "epoch": 0.01, + "learning_rate": 4.997452724337486e-05, + "loss": 1.1346, + "step": 14469 + }, + { + "epoch": 0.01, + "learning_rate": 4.997452369833824e-05, + "loss": 1.0648, + "step": 14470 + }, + { + "epoch": 0.01, + "learning_rate": 4.997452015305508e-05, + "loss": 1.367, + "step": 14471 + }, + { + "epoch": 0.01, + "learning_rate": 4.997451660752538e-05, + "loss": 1.1264, + "step": 14472 + }, + { + "epoch": 0.01, + "learning_rate": 4.997451306174914e-05, + "loss": 1.1409, + "step": 14473 + }, + { + "epoch": 0.01, + "learning_rate": 4.997450951572636e-05, + "loss": 0.9711, + "step": 14474 + }, + { + "epoch": 0.01, + "learning_rate": 4.997450596945705e-05, + "loss": 0.9547, + "step": 14475 + }, + { + "epoch": 0.01, + "learning_rate": 4.99745024229412e-05, + "loss": 1.4691, + "step": 14476 + }, + { + "epoch": 0.01, + "learning_rate": 4.997449887617881e-05, + "loss": 1.1123, + "step": 14477 + }, + { + "epoch": 0.01, + "learning_rate": 4.997449532916988e-05, + "loss": 1.3364, + "step": 14478 + }, + { + "epoch": 0.01, + "learning_rate": 4.997449178191442e-05, + "loss": 1.0358, + "step": 14479 + }, + { + "epoch": 0.01, + "learning_rate": 4.997448823441241e-05, + "loss": 1.5155, + "step": 14480 + }, + { + "epoch": 0.01, + "learning_rate": 4.997448468666388e-05, + "loss": 1.4649, + "step": 14481 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974481138668796e-05, + "loss": 1.2042, + "step": 14482 + }, + { + "epoch": 0.01, + "learning_rate": 4.997447759042718e-05, + "loss": 0.8581, + "step": 14483 + }, + { + "epoch": 0.01, + "learning_rate": 4.997447404193903e-05, + "loss": 1.2848, + "step": 14484 + }, + { + "epoch": 0.01, + "learning_rate": 4.997447049320434e-05, + "loss": 0.6986, + "step": 14485 + }, + { + "epoch": 0.01, + "learning_rate": 4.997446694422311e-05, + "loss": 0.9755, + "step": 14486 + }, + { + "epoch": 0.01, + "learning_rate": 4.997446339499535e-05, + "loss": 1.2767, + "step": 14487 + }, + { + "epoch": 0.01, + "learning_rate": 4.997445984552104e-05, + "loss": 1.1844, + "step": 14488 + }, + { + "epoch": 0.01, + "learning_rate": 4.99744562958002e-05, + "loss": 1.7483, + "step": 14489 + }, + { + "epoch": 0.01, + "learning_rate": 4.997445274583282e-05, + "loss": 1.4285, + "step": 14490 + }, + { + "epoch": 0.01, + "learning_rate": 4.997444919561891e-05, + "loss": 1.0301, + "step": 14491 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974445645158464e-05, + "loss": 1.2763, + "step": 14492 + }, + { + "epoch": 0.01, + "learning_rate": 4.997444209445147e-05, + "loss": 1.1808, + "step": 14493 + }, + { + "epoch": 0.01, + "learning_rate": 4.997443854349795e-05, + "loss": 1.2185, + "step": 14494 + }, + { + "epoch": 0.01, + "learning_rate": 4.997443499229788e-05, + "loss": 1.1837, + "step": 14495 + }, + { + "epoch": 0.01, + "learning_rate": 4.997443144085128e-05, + "loss": 0.8031, + "step": 14496 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974427889158146e-05, + "loss": 1.2143, + "step": 14497 + }, + { + "epoch": 0.01, + "learning_rate": 4.997442433721847e-05, + "loss": 1.1006, + "step": 14498 + }, + { + "epoch": 0.01, + "learning_rate": 4.997442078503226e-05, + "loss": 1.2395, + "step": 14499 + }, + { + "epoch": 0.01, + "learning_rate": 4.997441723259951e-05, + "loss": 1.1118, + "step": 14500 + }, + { + "epoch": 0.01, + "eval_loss": 1.032680630683899, + "eval_runtime": 83.9404, + "eval_samples_per_second": 16.5, + "eval_steps_per_second": 4.134, + "step": 14500 + }, + { + "epoch": 0.01, + "learning_rate": 4.997441367992023e-05, + "loss": 1.2032, + "step": 14501 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974410126994404e-05, + "loss": 0.9744, + "step": 14502 + }, + { + "epoch": 0.01, + "learning_rate": 4.997440657382204e-05, + "loss": 1.0883, + "step": 14503 + }, + { + "epoch": 0.01, + "learning_rate": 4.997440302040315e-05, + "loss": 1.1888, + "step": 14504 + }, + { + "epoch": 0.01, + "learning_rate": 4.997439946673772e-05, + "loss": 1.1701, + "step": 14505 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974395912825744e-05, + "loss": 1.5214, + "step": 14506 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974392358667243e-05, + "loss": 1.174, + "step": 14507 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974388804262196e-05, + "loss": 1.1367, + "step": 14508 + }, + { + "epoch": 0.01, + "learning_rate": 4.997438524961062e-05, + "loss": 0.7751, + "step": 14509 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974381694712505e-05, + "loss": 1.1393, + "step": 14510 + }, + { + "epoch": 0.01, + "learning_rate": 4.997437813956785e-05, + "loss": 1.1476, + "step": 14511 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974374584176665e-05, + "loss": 1.2118, + "step": 14512 + }, + { + "epoch": 0.01, + "learning_rate": 4.997437102853893e-05, + "loss": 1.3143, + "step": 14513 + }, + { + "epoch": 0.01, + "learning_rate": 4.997436747265467e-05, + "loss": 1.0653, + "step": 14514 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974363916523875e-05, + "loss": 1.2056, + "step": 14515 + }, + { + "epoch": 0.01, + "learning_rate": 4.997436036014654e-05, + "loss": 0.8385, + "step": 14516 + }, + { + "epoch": 0.01, + "learning_rate": 4.997435680352267e-05, + "loss": 1.083, + "step": 14517 + }, + { + "epoch": 0.01, + "learning_rate": 4.997435324665226e-05, + "loss": 1.1865, + "step": 14518 + }, + { + "epoch": 0.01, + "learning_rate": 4.997434968953532e-05, + "loss": 1.0839, + "step": 14519 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974346132171834e-05, + "loss": 1.1288, + "step": 14520 + }, + { + "epoch": 0.01, + "learning_rate": 4.997434257456182e-05, + "loss": 1.1401, + "step": 14521 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974339016705265e-05, + "loss": 0.758, + "step": 14522 + }, + { + "epoch": 0.01, + "learning_rate": 4.997433545860219e-05, + "loss": 1.0797, + "step": 14523 + }, + { + "epoch": 0.01, + "learning_rate": 4.997433190025256e-05, + "loss": 1.3489, + "step": 14524 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974328341656394e-05, + "loss": 1.1971, + "step": 14525 + }, + { + "epoch": 0.01, + "learning_rate": 4.99743247828137e-05, + "loss": 0.9595, + "step": 14526 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974321223724465e-05, + "loss": 0.982, + "step": 14527 + }, + { + "epoch": 0.01, + "learning_rate": 4.997431766438869e-05, + "loss": 1.2661, + "step": 14528 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974314104806394e-05, + "loss": 0.7192, + "step": 14529 + }, + { + "epoch": 0.01, + "learning_rate": 4.997431054497756e-05, + "loss": 1.1649, + "step": 14530 + }, + { + "epoch": 0.01, + "learning_rate": 4.997430698490217e-05, + "loss": 1.2051, + "step": 14531 + }, + { + "epoch": 0.01, + "learning_rate": 4.997430342458026e-05, + "loss": 1.1533, + "step": 14532 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974299864011815e-05, + "loss": 0.7428, + "step": 14533 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974296303196835e-05, + "loss": 0.5662, + "step": 14534 + }, + { + "epoch": 0.01, + "learning_rate": 4.997429274213531e-05, + "loss": 1.1641, + "step": 14535 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974289180827253e-05, + "loss": 1.0921, + "step": 14536 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974285619272665e-05, + "loss": 0.9288, + "step": 14537 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974282057471535e-05, + "loss": 1.2262, + "step": 14538 + }, + { + "epoch": 0.01, + "learning_rate": 4.997427849542388e-05, + "loss": 0.985, + "step": 14539 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974274933129675e-05, + "loss": 1.1805, + "step": 14540 + }, + { + "epoch": 0.01, + "learning_rate": 4.997427137058894e-05, + "loss": 1.1029, + "step": 14541 + }, + { + "epoch": 0.01, + "learning_rate": 4.997426780780168e-05, + "loss": 0.5841, + "step": 14542 + }, + { + "epoch": 0.01, + "learning_rate": 4.997426424476787e-05, + "loss": 0.9382, + "step": 14543 + }, + { + "epoch": 0.01, + "learning_rate": 4.997426068148753e-05, + "loss": 0.9929, + "step": 14544 + }, + { + "epoch": 0.01, + "learning_rate": 4.997425711796066e-05, + "loss": 1.0577, + "step": 14545 + }, + { + "epoch": 0.01, + "learning_rate": 4.997425355418724e-05, + "loss": 1.0238, + "step": 14546 + }, + { + "epoch": 0.01, + "learning_rate": 4.99742499901673e-05, + "loss": 1.0698, + "step": 14547 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974246425900816e-05, + "loss": 1.0617, + "step": 14548 + }, + { + "epoch": 0.01, + "learning_rate": 4.99742428613878e-05, + "loss": 0.9446, + "step": 14549 + }, + { + "epoch": 0.01, + "learning_rate": 4.997423929662825e-05, + "loss": 0.715, + "step": 14550 + }, + { + "epoch": 0.01, + "learning_rate": 4.997423573162216e-05, + "loss": 0.6863, + "step": 14551 + }, + { + "epoch": 0.01, + "learning_rate": 4.997423216636954e-05, + "loss": 1.0478, + "step": 14552 + }, + { + "epoch": 0.01, + "learning_rate": 4.997422860087039e-05, + "loss": 1.3427, + "step": 14553 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974225035124696e-05, + "loss": 1.3418, + "step": 14554 + }, + { + "epoch": 0.01, + "learning_rate": 4.997422146913247e-05, + "loss": 1.3523, + "step": 14555 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974217902893706e-05, + "loss": 1.4194, + "step": 14556 + }, + { + "epoch": 0.01, + "learning_rate": 4.997421433640841e-05, + "loss": 1.1343, + "step": 14557 + }, + { + "epoch": 0.01, + "learning_rate": 4.997421076967658e-05, + "loss": 1.0051, + "step": 14558 + }, + { + "epoch": 0.01, + "learning_rate": 4.997420720269821e-05, + "loss": 1.3802, + "step": 14559 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974203635473304e-05, + "loss": 1.1661, + "step": 14560 + }, + { + "epoch": 0.01, + "learning_rate": 4.997420006800187e-05, + "loss": 1.1622, + "step": 14561 + }, + { + "epoch": 0.01, + "learning_rate": 4.99741965002839e-05, + "loss": 1.3033, + "step": 14562 + }, + { + "epoch": 0.01, + "learning_rate": 4.99741929323194e-05, + "loss": 0.9048, + "step": 14563 + }, + { + "epoch": 0.01, + "learning_rate": 4.997418936410835e-05, + "loss": 2.2987, + "step": 14564 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974185795650776e-05, + "loss": 1.6775, + "step": 14565 + }, + { + "epoch": 0.01, + "learning_rate": 4.997418222694667e-05, + "loss": 0.9796, + "step": 14566 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974178657996024e-05, + "loss": 1.2108, + "step": 14567 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974175088798846e-05, + "loss": 0.3747, + "step": 14568 + }, + { + "epoch": 0.01, + "learning_rate": 4.997417151935513e-05, + "loss": 1.1472, + "step": 14569 + }, + { + "epoch": 0.01, + "learning_rate": 4.997416794966488e-05, + "loss": 1.1262, + "step": 14570 + }, + { + "epoch": 0.01, + "learning_rate": 4.99741643797281e-05, + "loss": 0.9259, + "step": 14571 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974160809544784e-05, + "loss": 0.2318, + "step": 14572 + }, + { + "epoch": 0.01, + "learning_rate": 4.997415723911493e-05, + "loss": 0.7396, + "step": 14573 + }, + { + "epoch": 0.01, + "learning_rate": 4.997415366843855e-05, + "loss": 1.0218, + "step": 14574 + }, + { + "epoch": 0.01, + "learning_rate": 4.997415009751563e-05, + "loss": 1.2151, + "step": 14575 + }, + { + "epoch": 0.01, + "learning_rate": 4.997414652634617e-05, + "loss": 1.0314, + "step": 14576 + }, + { + "epoch": 0.01, + "learning_rate": 4.997414295493018e-05, + "loss": 1.1444, + "step": 14577 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974139383267657e-05, + "loss": 1.1257, + "step": 14578 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974135811358605e-05, + "loss": 1.1311, + "step": 14579 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974132239203006e-05, + "loss": 1.2159, + "step": 14580 + }, + { + "epoch": 0.01, + "learning_rate": 4.997412866680089e-05, + "loss": 1.1511, + "step": 14581 + }, + { + "epoch": 0.01, + "learning_rate": 4.997412509415223e-05, + "loss": 1.0625, + "step": 14582 + }, + { + "epoch": 0.01, + "learning_rate": 4.997412152125703e-05, + "loss": 1.8244, + "step": 14583 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974117948115304e-05, + "loss": 1.1516, + "step": 14584 + }, + { + "epoch": 0.01, + "learning_rate": 4.997411437472704e-05, + "loss": 0.9846, + "step": 14585 + }, + { + "epoch": 0.01, + "learning_rate": 4.997411080109225e-05, + "loss": 0.9495, + "step": 14586 + }, + { + "epoch": 0.01, + "learning_rate": 4.997410722721092e-05, + "loss": 1.0236, + "step": 14587 + }, + { + "epoch": 0.01, + "learning_rate": 4.997410365308306e-05, + "loss": 1.0757, + "step": 14588 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974100078708666e-05, + "loss": 1.1619, + "step": 14589 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974096504087734e-05, + "loss": 0.9697, + "step": 14590 + }, + { + "epoch": 0.01, + "learning_rate": 4.997409292922027e-05, + "loss": 1.085, + "step": 14591 + }, + { + "epoch": 0.01, + "learning_rate": 4.997408935410627e-05, + "loss": 1.4168, + "step": 14592 + }, + { + "epoch": 0.01, + "learning_rate": 4.997408577874574e-05, + "loss": 1.0358, + "step": 14593 + }, + { + "epoch": 0.01, + "learning_rate": 4.997408220313867e-05, + "loss": 0.9766, + "step": 14594 + }, + { + "epoch": 0.01, + "learning_rate": 4.997407862728507e-05, + "loss": 1.0095, + "step": 14595 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974075051184945e-05, + "loss": 1.1314, + "step": 14596 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974071474838275e-05, + "loss": 1.4483, + "step": 14597 + }, + { + "epoch": 0.01, + "learning_rate": 4.997406789824508e-05, + "loss": 0.9681, + "step": 14598 + }, + { + "epoch": 0.01, + "learning_rate": 4.997406432140534e-05, + "loss": 1.0439, + "step": 14599 + }, + { + "epoch": 0.01, + "learning_rate": 4.997406074431907e-05, + "loss": 1.0032, + "step": 14600 + }, + { + "epoch": 0.01, + "learning_rate": 4.997405716698628e-05, + "loss": 1.1184, + "step": 14601 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974053589406946e-05, + "loss": 1.1359, + "step": 14602 + }, + { + "epoch": 0.01, + "learning_rate": 4.997405001158108e-05, + "loss": 1.0905, + "step": 14603 + }, + { + "epoch": 0.01, + "learning_rate": 4.997404643350867e-05, + "loss": 1.1548, + "step": 14604 + }, + { + "epoch": 0.01, + "learning_rate": 4.997404285518974e-05, + "loss": 0.956, + "step": 14605 + }, + { + "epoch": 0.01, + "learning_rate": 4.997403927662428e-05, + "loss": 1.1926, + "step": 14606 + }, + { + "epoch": 0.01, + "learning_rate": 4.997403569781227e-05, + "loss": 1.0824, + "step": 14607 + }, + { + "epoch": 0.01, + "learning_rate": 4.997403211875374e-05, + "loss": 1.0924, + "step": 14608 + }, + { + "epoch": 0.01, + "learning_rate": 4.997402853944868e-05, + "loss": 1.0906, + "step": 14609 + }, + { + "epoch": 0.01, + "learning_rate": 4.997402495989708e-05, + "loss": 1.2025, + "step": 14610 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974021380098944e-05, + "loss": 1.3244, + "step": 14611 + }, + { + "epoch": 0.01, + "learning_rate": 4.997401780005428e-05, + "loss": 1.0882, + "step": 14612 + }, + { + "epoch": 0.01, + "learning_rate": 4.997401421976308e-05, + "loss": 1.0168, + "step": 14613 + }, + { + "epoch": 0.01, + "learning_rate": 4.997401063922535e-05, + "loss": 0.8935, + "step": 14614 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974007058441084e-05, + "loss": 1.1162, + "step": 14615 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974003477410295e-05, + "loss": 0.8198, + "step": 14616 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973999896132964e-05, + "loss": 0.7165, + "step": 14617 + }, + { + "epoch": 0.01, + "learning_rate": 4.997399631460909e-05, + "loss": 1.4271, + "step": 14618 + }, + { + "epoch": 0.01, + "learning_rate": 4.99739927328387e-05, + "loss": 1.2403, + "step": 14619 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973989150821775e-05, + "loss": 1.1047, + "step": 14620 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973985568558315e-05, + "loss": 0.9785, + "step": 14621 + }, + { + "epoch": 0.01, + "learning_rate": 4.997398198604832e-05, + "loss": 0.9175, + "step": 14622 + }, + { + "epoch": 0.01, + "learning_rate": 4.997397840329179e-05, + "loss": 0.9679, + "step": 14623 + }, + { + "epoch": 0.01, + "learning_rate": 4.997397482028874e-05, + "loss": 1.4641, + "step": 14624 + }, + { + "epoch": 0.01, + "learning_rate": 4.997397123703914e-05, + "loss": 1.4326, + "step": 14625 + }, + { + "epoch": 0.01, + "learning_rate": 4.997396765354302e-05, + "loss": 0.6138, + "step": 14626 + }, + { + "epoch": 0.01, + "learning_rate": 4.997396406980036e-05, + "loss": 0.5195, + "step": 14627 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973960485811176e-05, + "loss": 0.5505, + "step": 14628 + }, + { + "epoch": 0.01, + "learning_rate": 4.997395690157546e-05, + "loss": 1.2871, + "step": 14629 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973953317093205e-05, + "loss": 1.3319, + "step": 14630 + }, + { + "epoch": 0.01, + "learning_rate": 4.997394973236442e-05, + "loss": 1.171, + "step": 14631 + }, + { + "epoch": 0.01, + "learning_rate": 4.997394614738909e-05, + "loss": 1.0855, + "step": 14632 + }, + { + "epoch": 0.01, + "learning_rate": 4.997394256216725e-05, + "loss": 0.9017, + "step": 14633 + }, + { + "epoch": 0.01, + "learning_rate": 4.997393897669886e-05, + "loss": 0.8217, + "step": 14634 + }, + { + "epoch": 0.01, + "learning_rate": 4.997393539098395e-05, + "loss": 0.5106, + "step": 14635 + }, + { + "epoch": 0.01, + "learning_rate": 4.99739318050225e-05, + "loss": 0.6605, + "step": 14636 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973928218814525e-05, + "loss": 1.1345, + "step": 14637 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973924632360014e-05, + "loss": 1.3363, + "step": 14638 + }, + { + "epoch": 0.01, + "learning_rate": 4.997392104565897e-05, + "loss": 1.2334, + "step": 14639 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973917458711397e-05, + "loss": 1.2053, + "step": 14640 + }, + { + "epoch": 0.01, + "learning_rate": 4.997391387151728e-05, + "loss": 1.3028, + "step": 14641 + }, + { + "epoch": 0.01, + "learning_rate": 4.997391028407665e-05, + "loss": 1.1134, + "step": 14642 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973906696389475e-05, + "loss": 1.096, + "step": 14643 + }, + { + "epoch": 0.01, + "learning_rate": 4.997390310845578e-05, + "loss": 1.1165, + "step": 14644 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973899520275544e-05, + "loss": 1.0725, + "step": 14645 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973895931848774e-05, + "loss": 0.9818, + "step": 14646 + }, + { + "epoch": 0.01, + "learning_rate": 4.997389234317548e-05, + "loss": 0.951, + "step": 14647 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973888754255646e-05, + "loss": 1.2996, + "step": 14648 + }, + { + "epoch": 0.01, + "learning_rate": 4.997388516508929e-05, + "loss": 1.0835, + "step": 14649 + }, + { + "epoch": 0.01, + "learning_rate": 4.997388157567639e-05, + "loss": 1.3664, + "step": 14650 + }, + { + "epoch": 0.01, + "learning_rate": 4.997387798601696e-05, + "loss": 1.3241, + "step": 14651 + }, + { + "epoch": 0.01, + "learning_rate": 4.997387439611101e-05, + "loss": 0.752, + "step": 14652 + }, + { + "epoch": 0.01, + "learning_rate": 4.997387080595852e-05, + "loss": 0.9959, + "step": 14653 + }, + { + "epoch": 0.01, + "learning_rate": 4.99738672155595e-05, + "loss": 1.0986, + "step": 14654 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973863624913944e-05, + "loss": 1.1846, + "step": 14655 + }, + { + "epoch": 0.01, + "learning_rate": 4.997386003402187e-05, + "loss": 1.1293, + "step": 14656 + }, + { + "epoch": 0.01, + "learning_rate": 4.997385644288325e-05, + "loss": 1.0859, + "step": 14657 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973852851498107e-05, + "loss": 1.0269, + "step": 14658 + }, + { + "epoch": 0.01, + "learning_rate": 4.997384925986643e-05, + "loss": 1.2403, + "step": 14659 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973845667988216e-05, + "loss": 0.9942, + "step": 14660 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973842075863483e-05, + "loss": 1.0663, + "step": 14661 + }, + { + "epoch": 0.01, + "learning_rate": 4.997383848349221e-05, + "loss": 1.0268, + "step": 14662 + }, + { + "epoch": 0.01, + "learning_rate": 4.99738348908744e-05, + "loss": 1.1241, + "step": 14663 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973831298010074e-05, + "loss": 1.0425, + "step": 14664 + }, + { + "epoch": 0.01, + "learning_rate": 4.997382770489921e-05, + "loss": 0.7101, + "step": 14665 + }, + { + "epoch": 0.01, + "learning_rate": 4.997382411154181e-05, + "loss": 0.8925, + "step": 14666 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973820517937886e-05, + "loss": 1.0695, + "step": 14667 + }, + { + "epoch": 0.01, + "learning_rate": 4.997381692408743e-05, + "loss": 1.1682, + "step": 14668 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973813329990444e-05, + "loss": 1.0653, + "step": 14669 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973809735646926e-05, + "loss": 1.0438, + "step": 14670 + }, + { + "epoch": 0.01, + "learning_rate": 4.997380614105687e-05, + "loss": 0.848, + "step": 14671 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973802546220293e-05, + "loss": 0.9973, + "step": 14672 + }, + { + "epoch": 0.01, + "learning_rate": 4.997379895113718e-05, + "loss": 1.1369, + "step": 14673 + }, + { + "epoch": 0.01, + "learning_rate": 4.997379535580754e-05, + "loss": 1.0308, + "step": 14674 + }, + { + "epoch": 0.01, + "learning_rate": 4.997379176023136e-05, + "loss": 1.2479, + "step": 14675 + }, + { + "epoch": 0.01, + "learning_rate": 4.997378816440866e-05, + "loss": 0.8342, + "step": 14676 + }, + { + "epoch": 0.01, + "learning_rate": 4.997378456833942e-05, + "loss": 1.2351, + "step": 14677 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973780972023654e-05, + "loss": 1.0975, + "step": 14678 + }, + { + "epoch": 0.01, + "learning_rate": 4.997377737546136e-05, + "loss": 1.1363, + "step": 14679 + }, + { + "epoch": 0.01, + "learning_rate": 4.997377377865253e-05, + "loss": 0.975, + "step": 14680 + }, + { + "epoch": 0.01, + "learning_rate": 4.997377018159718e-05, + "loss": 0.724, + "step": 14681 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973766584295287e-05, + "loss": 0.3843, + "step": 14682 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973762986746866e-05, + "loss": 0.3052, + "step": 14683 + }, + { + "epoch": 0.01, + "learning_rate": 4.997375938895192e-05, + "loss": 0.3796, + "step": 14684 + }, + { + "epoch": 0.01, + "learning_rate": 4.997375579091044e-05, + "loss": 0.3063, + "step": 14685 + }, + { + "epoch": 0.01, + "learning_rate": 4.997375219262243e-05, + "loss": 0.8041, + "step": 14686 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973748594087886e-05, + "loss": 1.13, + "step": 14687 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973744995306816e-05, + "loss": 1.1825, + "step": 14688 + }, + { + "epoch": 0.01, + "learning_rate": 4.997374139627922e-05, + "loss": 0.9396, + "step": 14689 + }, + { + "epoch": 0.01, + "learning_rate": 4.997373779700508e-05, + "loss": 1.1783, + "step": 14690 + }, + { + "epoch": 0.01, + "learning_rate": 4.997373419748442e-05, + "loss": 1.1474, + "step": 14691 + }, + { + "epoch": 0.01, + "learning_rate": 4.997373059771723e-05, + "loss": 1.1786, + "step": 14692 + }, + { + "epoch": 0.01, + "learning_rate": 4.997372699770351e-05, + "loss": 1.0563, + "step": 14693 + }, + { + "epoch": 0.01, + "learning_rate": 4.997372339744326e-05, + "loss": 0.9318, + "step": 14694 + }, + { + "epoch": 0.01, + "learning_rate": 4.997371979693648e-05, + "loss": 0.858, + "step": 14695 + }, + { + "epoch": 0.01, + "learning_rate": 4.997371619618316e-05, + "loss": 0.8403, + "step": 14696 + }, + { + "epoch": 0.01, + "learning_rate": 4.997371259518332e-05, + "loss": 0.8338, + "step": 14697 + }, + { + "epoch": 0.01, + "learning_rate": 4.997370899393695e-05, + "loss": 1.2301, + "step": 14698 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973705392444045e-05, + "loss": 1.0208, + "step": 14699 + }, + { + "epoch": 0.01, + "learning_rate": 4.997370179070462e-05, + "loss": 1.033, + "step": 14700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973698188718656e-05, + "loss": 0.8073, + "step": 14701 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973694586486164e-05, + "loss": 1.0833, + "step": 14702 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973690984007145e-05, + "loss": 1.1689, + "step": 14703 + }, + { + "epoch": 0.01, + "learning_rate": 4.997368738128159e-05, + "loss": 1.225, + "step": 14704 + }, + { + "epoch": 0.01, + "learning_rate": 4.997368377830951e-05, + "loss": 1.1056, + "step": 14705 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973680175090896e-05, + "loss": 0.8393, + "step": 14706 + }, + { + "epoch": 0.01, + "learning_rate": 4.997367657162576e-05, + "loss": 0.91, + "step": 14707 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973672967914085e-05, + "loss": 0.9734, + "step": 14708 + }, + { + "epoch": 0.01, + "learning_rate": 4.997366936395589e-05, + "loss": 1.1687, + "step": 14709 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973665759751166e-05, + "loss": 0.9984, + "step": 14710 + }, + { + "epoch": 0.01, + "learning_rate": 4.997366215529991e-05, + "loss": 1.1054, + "step": 14711 + }, + { + "epoch": 0.01, + "learning_rate": 4.997365855060212e-05, + "loss": 1.0267, + "step": 14712 + }, + { + "epoch": 0.01, + "learning_rate": 4.99736549456578e-05, + "loss": 1.1135, + "step": 14713 + }, + { + "epoch": 0.01, + "learning_rate": 4.997365134046695e-05, + "loss": 0.9772, + "step": 14714 + }, + { + "epoch": 0.01, + "learning_rate": 4.997364773502958e-05, + "loss": 1.2222, + "step": 14715 + }, + { + "epoch": 0.01, + "learning_rate": 4.997364412934567e-05, + "loss": 0.9664, + "step": 14716 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973640523415236e-05, + "loss": 0.8003, + "step": 14717 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973636917238274e-05, + "loss": 0.6631, + "step": 14718 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973633310814785e-05, + "loss": 0.7529, + "step": 14719 + }, + { + "epoch": 0.01, + "learning_rate": 4.997362970414476e-05, + "loss": 0.9658, + "step": 14720 + }, + { + "epoch": 0.01, + "learning_rate": 4.997362609722821e-05, + "loss": 1.2275, + "step": 14721 + }, + { + "epoch": 0.01, + "learning_rate": 4.997362249006513e-05, + "loss": 1.1193, + "step": 14722 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973618882655515e-05, + "loss": 1.2217, + "step": 14723 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973615274999377e-05, + "loss": 1.0647, + "step": 14724 + }, + { + "epoch": 0.01, + "learning_rate": 4.997361166709671e-05, + "loss": 0.6829, + "step": 14725 + }, + { + "epoch": 0.01, + "learning_rate": 4.997360805894751e-05, + "loss": 1.0552, + "step": 14726 + }, + { + "epoch": 0.01, + "learning_rate": 4.997360445055179e-05, + "loss": 1.0175, + "step": 14727 + }, + { + "epoch": 0.01, + "learning_rate": 4.997360084190953e-05, + "loss": 1.0626, + "step": 14728 + }, + { + "epoch": 0.01, + "learning_rate": 4.997359723302075e-05, + "loss": 0.8943, + "step": 14729 + }, + { + "epoch": 0.01, + "learning_rate": 4.997359362388544e-05, + "loss": 1.1485, + "step": 14730 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973590014503594e-05, + "loss": 1.4467, + "step": 14731 + }, + { + "epoch": 0.01, + "learning_rate": 4.997358640487523e-05, + "loss": 0.9085, + "step": 14732 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973582795000326e-05, + "loss": 1.0796, + "step": 14733 + }, + { + "epoch": 0.01, + "learning_rate": 4.997357918487891e-05, + "loss": 1.1598, + "step": 14734 + }, + { + "epoch": 0.01, + "learning_rate": 4.997357557451094e-05, + "loss": 1.1493, + "step": 14735 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973571963896456e-05, + "loss": 1.0093, + "step": 14736 + }, + { + "epoch": 0.01, + "learning_rate": 4.997356835303545e-05, + "loss": 1.0047, + "step": 14737 + }, + { + "epoch": 0.01, + "learning_rate": 4.99735647419279e-05, + "loss": 1.1759, + "step": 14738 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973561130573834e-05, + "loss": 1.0909, + "step": 14739 + }, + { + "epoch": 0.01, + "learning_rate": 4.997355751897323e-05, + "loss": 0.8766, + "step": 14740 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973553907126117e-05, + "loss": 1.4397, + "step": 14741 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973550295032453e-05, + "loss": 0.9025, + "step": 14742 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973546682692277e-05, + "loss": 1.2771, + "step": 14743 + }, + { + "epoch": 0.01, + "learning_rate": 4.997354307010556e-05, + "loss": 1.2829, + "step": 14744 + }, + { + "epoch": 0.01, + "learning_rate": 4.997353945727232e-05, + "loss": 0.755, + "step": 14745 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973535844192555e-05, + "loss": 1.2094, + "step": 14746 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973532230866256e-05, + "loss": 1.1977, + "step": 14747 + }, + { + "epoch": 0.01, + "learning_rate": 4.997352861729343e-05, + "loss": 1.0674, + "step": 14748 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973525003474075e-05, + "loss": 1.1, + "step": 14749 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973521389408194e-05, + "loss": 1.5613, + "step": 14750 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973517775095786e-05, + "loss": 1.5573, + "step": 14751 + }, + { + "epoch": 0.01, + "learning_rate": 4.997351416053685e-05, + "loss": 1.4173, + "step": 14752 + }, + { + "epoch": 0.01, + "learning_rate": 4.997351054573138e-05, + "loss": 1.2348, + "step": 14753 + }, + { + "epoch": 0.01, + "learning_rate": 4.997350693067939e-05, + "loss": 1.2222, + "step": 14754 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973503315380866e-05, + "loss": 1.1506, + "step": 14755 + }, + { + "epoch": 0.01, + "learning_rate": 4.997349969983582e-05, + "loss": 0.746, + "step": 14756 + }, + { + "epoch": 0.01, + "learning_rate": 4.997349608404424e-05, + "loss": 0.935, + "step": 14757 + }, + { + "epoch": 0.01, + "learning_rate": 4.997349246800613e-05, + "loss": 1.1727, + "step": 14758 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973488851721504e-05, + "loss": 1.2878, + "step": 14759 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973485235190344e-05, + "loss": 1.2053, + "step": 14760 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973481618412656e-05, + "loss": 1.1233, + "step": 14761 + }, + { + "epoch": 0.01, + "learning_rate": 4.997347800138844e-05, + "loss": 1.3196, + "step": 14762 + }, + { + "epoch": 0.01, + "learning_rate": 4.99734743841177e-05, + "loss": 1.0109, + "step": 14763 + }, + { + "epoch": 0.01, + "learning_rate": 4.997347076660043e-05, + "loss": 1.2367, + "step": 14764 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973467148836626e-05, + "loss": 1.101, + "step": 14765 + }, + { + "epoch": 0.01, + "learning_rate": 4.99734635308263e-05, + "loss": 1.1594, + "step": 14766 + }, + { + "epoch": 0.01, + "learning_rate": 4.997345991256945e-05, + "loss": 1.2238, + "step": 14767 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973456294066066e-05, + "loss": 1.1648, + "step": 14768 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973452675316154e-05, + "loss": 1.1207, + "step": 14769 + }, + { + "epoch": 0.01, + "learning_rate": 4.997344905631972e-05, + "loss": 0.9738, + "step": 14770 + }, + { + "epoch": 0.01, + "learning_rate": 4.997344543707676e-05, + "loss": 1.0473, + "step": 14771 + }, + { + "epoch": 0.01, + "learning_rate": 4.997344181758726e-05, + "loss": 0.9454, + "step": 14772 + }, + { + "epoch": 0.01, + "learning_rate": 4.997343819785125e-05, + "loss": 0.8451, + "step": 14773 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973434577868704e-05, + "loss": 1.2283, + "step": 14774 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973430957639635e-05, + "loss": 0.9389, + "step": 14775 + }, + { + "epoch": 0.01, + "learning_rate": 4.997342733716403e-05, + "loss": 1.1241, + "step": 14776 + }, + { + "epoch": 0.01, + "learning_rate": 4.99734237164419e-05, + "loss": 0.9569, + "step": 14777 + }, + { + "epoch": 0.01, + "learning_rate": 4.997342009547325e-05, + "loss": 1.0957, + "step": 14778 + }, + { + "epoch": 0.01, + "learning_rate": 4.997341647425807e-05, + "loss": 0.6861, + "step": 14779 + }, + { + "epoch": 0.01, + "learning_rate": 4.997341285279637e-05, + "loss": 1.0632, + "step": 14780 + }, + { + "epoch": 0.01, + "learning_rate": 4.997340923108813e-05, + "loss": 1.0148, + "step": 14781 + }, + { + "epoch": 0.01, + "learning_rate": 4.997340560913336e-05, + "loss": 1.6072, + "step": 14782 + }, + { + "epoch": 0.01, + "learning_rate": 4.997340198693208e-05, + "loss": 1.3777, + "step": 14783 + }, + { + "epoch": 0.01, + "learning_rate": 4.997339836448426e-05, + "loss": 1.1734, + "step": 14784 + }, + { + "epoch": 0.01, + "learning_rate": 4.997339474178991e-05, + "loss": 0.4705, + "step": 14785 + }, + { + "epoch": 0.01, + "learning_rate": 4.997339111884904e-05, + "loss": 1.7411, + "step": 14786 + }, + { + "epoch": 0.01, + "learning_rate": 4.997338749566165e-05, + "loss": 2.3054, + "step": 14787 + }, + { + "epoch": 0.01, + "learning_rate": 4.997338387222773e-05, + "loss": 2.3027, + "step": 14788 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973380248547273e-05, + "loss": 2.2947, + "step": 14789 + }, + { + "epoch": 0.01, + "learning_rate": 4.99733766246203e-05, + "loss": 2.2729, + "step": 14790 + }, + { + "epoch": 0.01, + "learning_rate": 4.99733730004468e-05, + "loss": 2.2439, + "step": 14791 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973369376026765e-05, + "loss": 2.2121, + "step": 14792 + }, + { + "epoch": 0.01, + "learning_rate": 4.997336575136021e-05, + "loss": 1.6621, + "step": 14793 + }, + { + "epoch": 0.01, + "learning_rate": 4.997336212644713e-05, + "loss": 0.1036, + "step": 14794 + }, + { + "epoch": 0.01, + "learning_rate": 4.997335850128751e-05, + "loss": 0.0036, + "step": 14795 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973354875881384e-05, + "loss": 0.4035, + "step": 14796 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973351250228716e-05, + "loss": 1.617, + "step": 14797 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973347624329534e-05, + "loss": 1.0411, + "step": 14798 + }, + { + "epoch": 0.01, + "learning_rate": 4.997334399818382e-05, + "loss": 1.1408, + "step": 14799 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973340371791574e-05, + "loss": 1.0667, + "step": 14800 + }, + { + "epoch": 0.01, + "learning_rate": 4.997333674515281e-05, + "loss": 1.0492, + "step": 14801 + }, + { + "epoch": 0.01, + "learning_rate": 4.997333311826751e-05, + "loss": 1.1013, + "step": 14802 + }, + { + "epoch": 0.01, + "learning_rate": 4.997332949113569e-05, + "loss": 1.6289, + "step": 14803 + }, + { + "epoch": 0.01, + "learning_rate": 4.997332586375735e-05, + "loss": 1.1567, + "step": 14804 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973322236132475e-05, + "loss": 0.9798, + "step": 14805 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973318608261075e-05, + "loss": 1.0451, + "step": 14806 + }, + { + "epoch": 0.01, + "learning_rate": 4.997331498014315e-05, + "loss": 1.3002, + "step": 14807 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973311351778693e-05, + "loss": 0.9813, + "step": 14808 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973307723167725e-05, + "loss": 0.9681, + "step": 14809 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973304094310216e-05, + "loss": 0.894, + "step": 14810 + }, + { + "epoch": 0.01, + "learning_rate": 4.997330046520619e-05, + "loss": 0.7351, + "step": 14811 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973296835855636e-05, + "loss": 0.6698, + "step": 14812 + }, + { + "epoch": 0.01, + "learning_rate": 4.997329320625855e-05, + "loss": 0.6868, + "step": 14813 + }, + { + "epoch": 0.01, + "learning_rate": 4.997328957641495e-05, + "loss": 1.1907, + "step": 14814 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973285946324816e-05, + "loss": 1.1407, + "step": 14815 + }, + { + "epoch": 0.01, + "learning_rate": 4.997328231598816e-05, + "loss": 1.2337, + "step": 14816 + }, + { + "epoch": 0.01, + "learning_rate": 4.997327868540498e-05, + "loss": 1.1565, + "step": 14817 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973275054575264e-05, + "loss": 1.3925, + "step": 14818 + }, + { + "epoch": 0.01, + "learning_rate": 4.997327142349904e-05, + "loss": 1.2359, + "step": 14819 + }, + { + "epoch": 0.01, + "learning_rate": 4.997326779217627e-05, + "loss": 1.3999, + "step": 14820 + }, + { + "epoch": 0.01, + "learning_rate": 4.997326416060699e-05, + "loss": 0.9791, + "step": 14821 + }, + { + "epoch": 0.01, + "learning_rate": 4.997326052879118e-05, + "loss": 0.7765, + "step": 14822 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973256896728836e-05, + "loss": 1.2074, + "step": 14823 + }, + { + "epoch": 0.01, + "learning_rate": 4.997325326441997e-05, + "loss": 1.1995, + "step": 14824 + }, + { + "epoch": 0.01, + "learning_rate": 4.997324963186458e-05, + "loss": 0.9219, + "step": 14825 + }, + { + "epoch": 0.01, + "learning_rate": 4.997324599906267e-05, + "loss": 1.2767, + "step": 14826 + }, + { + "epoch": 0.01, + "learning_rate": 4.997324236601423e-05, + "loss": 1.108, + "step": 14827 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973238732719266e-05, + "loss": 0.6057, + "step": 14828 + }, + { + "epoch": 0.01, + "learning_rate": 4.997323509917778e-05, + "loss": 0.6592, + "step": 14829 + }, + { + "epoch": 0.01, + "learning_rate": 4.997323146538977e-05, + "loss": 1.0524, + "step": 14830 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973227831355226e-05, + "loss": 1.1696, + "step": 14831 + }, + { + "epoch": 0.01, + "learning_rate": 4.997322419707416e-05, + "loss": 0.9979, + "step": 14832 + }, + { + "epoch": 0.01, + "learning_rate": 4.997322056254657e-05, + "loss": 1.1966, + "step": 14833 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973216927772455e-05, + "loss": 1.2316, + "step": 14834 + }, + { + "epoch": 0.01, + "learning_rate": 4.997321329275181e-05, + "loss": 1.3542, + "step": 14835 + }, + { + "epoch": 0.01, + "learning_rate": 4.997320965748465e-05, + "loss": 1.1776, + "step": 14836 + }, + { + "epoch": 0.01, + "learning_rate": 4.997320602197096e-05, + "loss": 1.0939, + "step": 14837 + }, + { + "epoch": 0.01, + "learning_rate": 4.997320238621074e-05, + "loss": 1.1933, + "step": 14838 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973198750204e-05, + "loss": 0.9844, + "step": 14839 + }, + { + "epoch": 0.01, + "learning_rate": 4.997319511395074e-05, + "loss": 1.0345, + "step": 14840 + }, + { + "epoch": 0.01, + "learning_rate": 4.997319147745095e-05, + "loss": 0.9952, + "step": 14841 + }, + { + "epoch": 0.01, + "learning_rate": 4.997318784070463e-05, + "loss": 1.0174, + "step": 14842 + }, + { + "epoch": 0.01, + "learning_rate": 4.997318420371179e-05, + "loss": 0.8177, + "step": 14843 + }, + { + "epoch": 0.01, + "learning_rate": 4.997318056647243e-05, + "loss": 1.0, + "step": 14844 + }, + { + "epoch": 0.01, + "learning_rate": 4.997317692898654e-05, + "loss": 1.2882, + "step": 14845 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973173291254126e-05, + "loss": 1.1994, + "step": 14846 + }, + { + "epoch": 0.01, + "learning_rate": 4.997316965327519e-05, + "loss": 0.9474, + "step": 14847 + }, + { + "epoch": 0.01, + "learning_rate": 4.997316601504972e-05, + "loss": 1.3273, + "step": 14848 + }, + { + "epoch": 0.01, + "learning_rate": 4.997316237657774e-05, + "loss": 0.9667, + "step": 14849 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973158737859226e-05, + "loss": 0.8481, + "step": 14850 + }, + { + "epoch": 0.01, + "learning_rate": 4.997315509889419e-05, + "loss": 1.2891, + "step": 14851 + }, + { + "epoch": 0.01, + "learning_rate": 4.997315145968263e-05, + "loss": 1.218, + "step": 14852 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973147820224544e-05, + "loss": 1.0303, + "step": 14853 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973144180519924e-05, + "loss": 0.9202, + "step": 14854 + }, + { + "epoch": 0.01, + "learning_rate": 4.99731405405688e-05, + "loss": 1.045, + "step": 14855 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973136900371137e-05, + "loss": 1.4301, + "step": 14856 + }, + { + "epoch": 0.01, + "learning_rate": 4.997313325992695e-05, + "loss": 0.9022, + "step": 14857 + }, + { + "epoch": 0.01, + "learning_rate": 4.997312961923625e-05, + "loss": 1.0168, + "step": 14858 + }, + { + "epoch": 0.01, + "learning_rate": 4.997312597829902e-05, + "loss": 1.0202, + "step": 14859 + }, + { + "epoch": 0.01, + "learning_rate": 4.997312233711526e-05, + "loss": 1.1546, + "step": 14860 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973118695684985e-05, + "loss": 1.2422, + "step": 14861 + }, + { + "epoch": 0.01, + "learning_rate": 4.997311505400818e-05, + "loss": 0.9089, + "step": 14862 + }, + { + "epoch": 0.01, + "learning_rate": 4.997311141208485e-05, + "loss": 1.1787, + "step": 14863 + }, + { + "epoch": 0.01, + "learning_rate": 4.997310776991499e-05, + "loss": 1.0265, + "step": 14864 + }, + { + "epoch": 0.01, + "learning_rate": 4.997310412749862e-05, + "loss": 1.1003, + "step": 14865 + }, + { + "epoch": 0.01, + "learning_rate": 4.997310048483572e-05, + "loss": 1.1564, + "step": 14866 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973096841926294e-05, + "loss": 1.2079, + "step": 14867 + }, + { + "epoch": 0.01, + "learning_rate": 4.997309319877035e-05, + "loss": 1.1985, + "step": 14868 + }, + { + "epoch": 0.01, + "learning_rate": 4.997308955536788e-05, + "loss": 0.9917, + "step": 14869 + }, + { + "epoch": 0.01, + "learning_rate": 4.997308591171888e-05, + "loss": 1.0653, + "step": 14870 + }, + { + "epoch": 0.01, + "learning_rate": 4.997308226782336e-05, + "loss": 1.0926, + "step": 14871 + }, + { + "epoch": 0.01, + "learning_rate": 4.997307862368132e-05, + "loss": 0.8831, + "step": 14872 + }, + { + "epoch": 0.01, + "learning_rate": 4.997307497929276e-05, + "loss": 1.342, + "step": 14873 + }, + { + "epoch": 0.01, + "learning_rate": 4.997307133465766e-05, + "loss": 0.8392, + "step": 14874 + }, + { + "epoch": 0.01, + "learning_rate": 4.997306768977605e-05, + "loss": 1.1513, + "step": 14875 + }, + { + "epoch": 0.01, + "learning_rate": 4.997306404464791e-05, + "loss": 1.1801, + "step": 14876 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973060399273254e-05, + "loss": 1.3827, + "step": 14877 + }, + { + "epoch": 0.01, + "learning_rate": 4.997305675365206e-05, + "loss": 1.0725, + "step": 14878 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973053107784355e-05, + "loss": 0.8855, + "step": 14879 + }, + { + "epoch": 0.01, + "learning_rate": 4.997304946167013e-05, + "loss": 1.2834, + "step": 14880 + }, + { + "epoch": 0.01, + "learning_rate": 4.997304581530937e-05, + "loss": 1.0089, + "step": 14881 + }, + { + "epoch": 0.01, + "learning_rate": 4.997304216870209e-05, + "loss": 0.8269, + "step": 14882 + }, + { + "epoch": 0.01, + "learning_rate": 4.997303852184829e-05, + "loss": 1.0531, + "step": 14883 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973034874747964e-05, + "loss": 1.0446, + "step": 14884 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973031227401115e-05, + "loss": 2.3159, + "step": 14885 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973027579807745e-05, + "loss": 1.4127, + "step": 14886 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973023931967855e-05, + "loss": 1.1488, + "step": 14887 + }, + { + "epoch": 0.01, + "learning_rate": 4.997302028388143e-05, + "loss": 1.1259, + "step": 14888 + }, + { + "epoch": 0.01, + "learning_rate": 4.997301663554849e-05, + "loss": 1.1292, + "step": 14889 + }, + { + "epoch": 0.01, + "learning_rate": 4.997301298696903e-05, + "loss": 1.0684, + "step": 14890 + }, + { + "epoch": 0.01, + "learning_rate": 4.997300933814304e-05, + "loss": 0.9875, + "step": 14891 + }, + { + "epoch": 0.01, + "learning_rate": 4.997300568907053e-05, + "loss": 1.0519, + "step": 14892 + }, + { + "epoch": 0.01, + "learning_rate": 4.9973002039751496e-05, + "loss": 1.092, + "step": 14893 + }, + { + "epoch": 0.01, + "learning_rate": 4.997299839018594e-05, + "loss": 1.2802, + "step": 14894 + }, + { + "epoch": 0.01, + "learning_rate": 4.997299474037386e-05, + "loss": 1.1129, + "step": 14895 + }, + { + "epoch": 0.01, + "learning_rate": 4.997299109031526e-05, + "loss": 1.0056, + "step": 14896 + }, + { + "epoch": 0.01, + "learning_rate": 4.997298744001013e-05, + "loss": 1.1671, + "step": 14897 + }, + { + "epoch": 0.01, + "learning_rate": 4.997298378945849e-05, + "loss": 0.9901, + "step": 14898 + }, + { + "epoch": 0.01, + "learning_rate": 4.997298013866032e-05, + "loss": 0.94, + "step": 14899 + }, + { + "epoch": 0.01, + "learning_rate": 4.997297648761562e-05, + "loss": 0.9399, + "step": 14900 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972972836324405e-05, + "loss": 0.7191, + "step": 14901 + }, + { + "epoch": 0.01, + "learning_rate": 4.997296918478667e-05, + "loss": 0.909, + "step": 14902 + }, + { + "epoch": 0.01, + "learning_rate": 4.997296553300241e-05, + "loss": 1.1014, + "step": 14903 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972961880971624e-05, + "loss": 1.0286, + "step": 14904 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972958228694324e-05, + "loss": 1.1349, + "step": 14905 + }, + { + "epoch": 0.01, + "learning_rate": 4.997295457617049e-05, + "loss": 0.6595, + "step": 14906 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972950923400144e-05, + "loss": 0.7242, + "step": 14907 + }, + { + "epoch": 0.01, + "learning_rate": 4.997294727038326e-05, + "loss": 0.7094, + "step": 14908 + }, + { + "epoch": 0.01, + "learning_rate": 4.997294361711987e-05, + "loss": 0.6031, + "step": 14909 + }, + { + "epoch": 0.01, + "learning_rate": 4.997293996360995e-05, + "loss": 0.452, + "step": 14910 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972936309853504e-05, + "loss": 0.3931, + "step": 14911 + }, + { + "epoch": 0.01, + "learning_rate": 4.997293265585055e-05, + "loss": 0.7613, + "step": 14912 + }, + { + "epoch": 0.01, + "learning_rate": 4.997292900160106e-05, + "loss": 0.9411, + "step": 14913 + }, + { + "epoch": 0.01, + "learning_rate": 4.997292534710505e-05, + "loss": 0.8584, + "step": 14914 + }, + { + "epoch": 0.01, + "learning_rate": 4.997292169236252e-05, + "loss": 0.9696, + "step": 14915 + }, + { + "epoch": 0.01, + "learning_rate": 4.997291803737347e-05, + "loss": 0.8084, + "step": 14916 + }, + { + "epoch": 0.01, + "learning_rate": 4.99729143821379e-05, + "loss": 0.9684, + "step": 14917 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972910726655794e-05, + "loss": 1.0021, + "step": 14918 + }, + { + "epoch": 0.01, + "learning_rate": 4.997290707092718e-05, + "loss": 1.1548, + "step": 14919 + }, + { + "epoch": 0.01, + "learning_rate": 4.997290341495204e-05, + "loss": 1.0268, + "step": 14920 + }, + { + "epoch": 0.01, + "learning_rate": 4.997289975873038e-05, + "loss": 0.952, + "step": 14921 + }, + { + "epoch": 0.01, + "learning_rate": 4.997289610226219e-05, + "loss": 0.8096, + "step": 14922 + }, + { + "epoch": 0.01, + "learning_rate": 4.997289244554749e-05, + "loss": 0.567, + "step": 14923 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972888788586255e-05, + "loss": 1.1943, + "step": 14924 + }, + { + "epoch": 0.01, + "learning_rate": 4.997288513137851e-05, + "loss": 1.4243, + "step": 14925 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972881473924236e-05, + "loss": 0.9679, + "step": 14926 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972877816223446e-05, + "loss": 1.1361, + "step": 14927 + }, + { + "epoch": 0.01, + "learning_rate": 4.997287415827613e-05, + "loss": 1.0682, + "step": 14928 + }, + { + "epoch": 0.01, + "learning_rate": 4.997287050008229e-05, + "loss": 1.1659, + "step": 14929 + }, + { + "epoch": 0.01, + "learning_rate": 4.997286684164193e-05, + "loss": 1.0461, + "step": 14930 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972863182955054e-05, + "loss": 1.2201, + "step": 14931 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972859524021654e-05, + "loss": 1.1834, + "step": 14932 + }, + { + "epoch": 0.01, + "learning_rate": 4.997285586484173e-05, + "loss": 1.1125, + "step": 14933 + }, + { + "epoch": 0.01, + "learning_rate": 4.997285220541529e-05, + "loss": 1.0901, + "step": 14934 + }, + { + "epoch": 0.01, + "learning_rate": 4.997284854574233e-05, + "loss": 1.0592, + "step": 14935 + }, + { + "epoch": 0.01, + "learning_rate": 4.997284488582283e-05, + "loss": 1.0942, + "step": 14936 + }, + { + "epoch": 0.01, + "learning_rate": 4.997284122565682e-05, + "loss": 0.9409, + "step": 14937 + }, + { + "epoch": 0.01, + "learning_rate": 4.997283756524429e-05, + "loss": 1.2872, + "step": 14938 + }, + { + "epoch": 0.01, + "learning_rate": 4.997283390458524e-05, + "loss": 1.0326, + "step": 14939 + }, + { + "epoch": 0.01, + "learning_rate": 4.997283024367967e-05, + "loss": 1.243, + "step": 14940 + }, + { + "epoch": 0.01, + "learning_rate": 4.997282658252758e-05, + "loss": 1.1183, + "step": 14941 + }, + { + "epoch": 0.01, + "learning_rate": 4.997282292112896e-05, + "loss": 1.112, + "step": 14942 + }, + { + "epoch": 0.01, + "learning_rate": 4.997281925948382e-05, + "loss": 1.2697, + "step": 14943 + }, + { + "epoch": 0.01, + "learning_rate": 4.997281559759216e-05, + "loss": 1.11, + "step": 14944 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972811935453985e-05, + "loss": 0.5532, + "step": 14945 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972808273069286e-05, + "loss": 1.1706, + "step": 14946 + }, + { + "epoch": 0.01, + "learning_rate": 4.997280461043806e-05, + "loss": 1.176, + "step": 14947 + }, + { + "epoch": 0.01, + "learning_rate": 4.997280094756032e-05, + "loss": 1.0269, + "step": 14948 + }, + { + "epoch": 0.01, + "learning_rate": 4.997279728443606e-05, + "loss": 1.1924, + "step": 14949 + }, + { + "epoch": 0.01, + "learning_rate": 4.997279362106527e-05, + "loss": 1.3168, + "step": 14950 + }, + { + "epoch": 0.01, + "learning_rate": 4.997278995744797e-05, + "loss": 1.3441, + "step": 14951 + }, + { + "epoch": 0.01, + "learning_rate": 4.997278629358414e-05, + "loss": 1.2574, + "step": 14952 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972782629473796e-05, + "loss": 1.0412, + "step": 14953 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972778965116926e-05, + "loss": 1.0197, + "step": 14954 + }, + { + "epoch": 0.01, + "learning_rate": 4.997277530051354e-05, + "loss": 1.0773, + "step": 14955 + }, + { + "epoch": 0.01, + "learning_rate": 4.997277163566363e-05, + "loss": 1.0283, + "step": 14956 + }, + { + "epoch": 0.01, + "learning_rate": 4.99727679705672e-05, + "loss": 1.0716, + "step": 14957 + }, + { + "epoch": 0.01, + "learning_rate": 4.997276430522425e-05, + "loss": 1.0224, + "step": 14958 + }, + { + "epoch": 0.01, + "learning_rate": 4.997276063963478e-05, + "loss": 0.9932, + "step": 14959 + }, + { + "epoch": 0.01, + "learning_rate": 4.997275697379879e-05, + "loss": 0.7764, + "step": 14960 + }, + { + "epoch": 0.01, + "learning_rate": 4.997275330771628e-05, + "loss": 1.1036, + "step": 14961 + }, + { + "epoch": 0.01, + "learning_rate": 4.997274964138724e-05, + "loss": 0.9545, + "step": 14962 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972745974811696e-05, + "loss": 1.0358, + "step": 14963 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972742307989614e-05, + "loss": 1.1154, + "step": 14964 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972738640921026e-05, + "loss": 1.2059, + "step": 14965 + }, + { + "epoch": 0.01, + "learning_rate": 4.997273497360591e-05, + "loss": 1.0128, + "step": 14966 + }, + { + "epoch": 0.01, + "learning_rate": 4.997273130604428e-05, + "loss": 0.8856, + "step": 14967 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972727638236125e-05, + "loss": 0.4957, + "step": 14968 + }, + { + "epoch": 0.01, + "learning_rate": 4.997272397018145e-05, + "loss": 0.2882, + "step": 14969 + }, + { + "epoch": 0.01, + "learning_rate": 4.997272030188026e-05, + "loss": 0.2383, + "step": 14970 + }, + { + "epoch": 0.01, + "learning_rate": 4.997271663333254e-05, + "loss": 0.5858, + "step": 14971 + }, + { + "epoch": 0.01, + "learning_rate": 4.997271296453831e-05, + "loss": 1.2508, + "step": 14972 + }, + { + "epoch": 0.01, + "learning_rate": 4.997270929549754e-05, + "loss": 1.0206, + "step": 14973 + }, + { + "epoch": 0.01, + "learning_rate": 4.997270562621027e-05, + "loss": 1.0349, + "step": 14974 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972701956676476e-05, + "loss": 0.2786, + "step": 14975 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972698286896156e-05, + "loss": 0.2352, + "step": 14976 + }, + { + "epoch": 0.01, + "learning_rate": 4.997269461686933e-05, + "loss": 0.1715, + "step": 14977 + }, + { + "epoch": 0.01, + "learning_rate": 4.997269094659597e-05, + "loss": 0.1294, + "step": 14978 + }, + { + "epoch": 0.01, + "learning_rate": 4.997268727607609e-05, + "loss": 0.1131, + "step": 14979 + }, + { + "epoch": 0.01, + "learning_rate": 4.99726836053097e-05, + "loss": 0.1052, + "step": 14980 + }, + { + "epoch": 0.01, + "learning_rate": 4.997267993429678e-05, + "loss": 0.0949, + "step": 14981 + }, + { + "epoch": 0.01, + "learning_rate": 4.997267626303735e-05, + "loss": 0.5213, + "step": 14982 + }, + { + "epoch": 0.01, + "learning_rate": 4.99726725915314e-05, + "loss": 1.1957, + "step": 14983 + }, + { + "epoch": 0.01, + "learning_rate": 4.997266891977892e-05, + "loss": 0.9684, + "step": 14984 + }, + { + "epoch": 0.01, + "learning_rate": 4.997266524777993e-05, + "loss": 1.0212, + "step": 14985 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972661575534415e-05, + "loss": 1.2718, + "step": 14986 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972657903042383e-05, + "loss": 1.1513, + "step": 14987 + }, + { + "epoch": 0.01, + "learning_rate": 4.997265423030383e-05, + "loss": 1.2228, + "step": 14988 + }, + { + "epoch": 0.01, + "learning_rate": 4.997265055731876e-05, + "loss": 1.1608, + "step": 14989 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972646884087165e-05, + "loss": 0.9096, + "step": 14990 + }, + { + "epoch": 0.01, + "learning_rate": 4.997264321060906e-05, + "loss": 1.2469, + "step": 14991 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972639536884424e-05, + "loss": 1.3127, + "step": 14992 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972635862913276e-05, + "loss": 1.7783, + "step": 14993 + }, + { + "epoch": 0.01, + "learning_rate": 4.997263218869561e-05, + "loss": 0.7732, + "step": 14994 + }, + { + "epoch": 0.01, + "learning_rate": 4.997262851423142e-05, + "loss": 1.0013, + "step": 14995 + }, + { + "epoch": 0.01, + "learning_rate": 4.997262483952071e-05, + "loss": 2.2452, + "step": 14996 + }, + { + "epoch": 0.01, + "learning_rate": 4.997262116456348e-05, + "loss": 6.0102, + "step": 14997 + }, + { + "epoch": 0.01, + "learning_rate": 4.9972617489359736e-05, + "loss": 3.1733, + "step": 14998 + }, + { + "epoch": 0.01, + "learning_rate": 4.997261381390947e-05, + "loss": 1.0003, + "step": 14999 + }, + { + "epoch": 0.01, + "learning_rate": 4.997261013821268e-05, + "loss": 1.2808, + "step": 15000 + }, + { + "epoch": 0.01, + "eval_loss": 1.0408920049667358, + "eval_runtime": 84.1304, + "eval_samples_per_second": 16.463, + "eval_steps_per_second": 4.125, + "step": 15000 + } + ], + "max_steps": 1000000, + "num_train_epochs": 9223372036854775807, + "total_flos": 7.5185365450752e+17, + "trial_name": null, + "trial_params": null +}