diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,44803 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.99996650701678, + "eval_steps": 500, + "global_step": 7464, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 5e-05, + "loss": 2.9392, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998325294748127e-05, + "loss": 2.6567, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998325294748127e-05, + "loss": 1.7916, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 4.9998325294748127e-05, + "loss": 1.5362, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 4.999665058949625e-05, + "loss": 1.8376, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 4.9994975884244375e-05, + "loss": 3.8023, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.99933011789925e-05, + "loss": 2.5719, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 4.9991626473740623e-05, + "loss": 1.6241, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 4.998995176848875e-05, + "loss": 1.6154, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 4.998827706323687e-05, + "loss": 1.6939, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.998827706323687e-05, + "loss": 2.5177, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 4.998827706323687e-05, + "loss": 2.4488, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 4.9986602357984996e-05, + "loss": 2.3554, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 4.998492765273312e-05, + "loss": 1.3379, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 4.9983252947481244e-05, + "loss": 1.0237, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 4.998157824222937e-05, + "loss": 1.21, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 4.997990353697749e-05, + "loss": 1.3712, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 4.9978228831725624e-05, + "loss": 1.0884, + "step": 18 + }, + { + "epoch": 0.01, + "learning_rate": 4.997655412647374e-05, + "loss": 1.2374, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 4.9974879421221866e-05, + "loss": 1.5148, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.997320471596999e-05, + "loss": 1.5987, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 4.9971530010718114e-05, + "loss": 1.0174, + "step": 22 + }, + { + "epoch": 0.01, + "learning_rate": 4.996985530546624e-05, + "loss": 1.0426, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 4.996818060021436e-05, + "loss": 1.487, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 4.9966505894962487e-05, + "loss": 0.9066, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 4.996483118971062e-05, + "loss": 0.8649, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 4.996315648445874e-05, + "loss": 1.0143, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 4.9961481779206866e-05, + "loss": 0.9038, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 4.9959807073954983e-05, + "loss": 1.0776, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 4.995813236870311e-05, + "loss": 0.9807, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.995645766345123e-05, + "loss": 0.8628, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 4.9954782958199356e-05, + "loss": 0.9584, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 4.995310825294748e-05, + "loss": 1.045, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 4.995143354769561e-05, + "loss": 1.0932, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 4.9949758842443735e-05, + "loss": 0.944, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 4.994808413719186e-05, + "loss": 1.3057, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 4.9946409431939984e-05, + "loss": 0.9903, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 4.99447347266881e-05, + "loss": 0.9006, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 4.9943060021436226e-05, + "loss": 0.9935, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 4.994138531618435e-05, + "loss": 0.9132, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.993971061093248e-05, + "loss": 1.018, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 4.9938035905680605e-05, + "loss": 0.7663, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 4.993636120042873e-05, + "loss": 0.8465, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 4.993468649517685e-05, + "loss": 1.006, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 4.993301178992498e-05, + "loss": 0.9723, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 4.99313370846731e-05, + "loss": 0.8623, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 4.992966237942122e-05, + "loss": 1.0334, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 4.9927987674169343e-05, + "loss": 1.071, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 4.9926312968917474e-05, + "loss": 0.9592, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 4.99246382636656e-05, + "loss": 1.0811, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 4.992296355841372e-05, + "loss": 0.99, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 4.992128885316185e-05, + "loss": 0.9268, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 4.991961414790997e-05, + "loss": 0.8148, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 4.9917939442658095e-05, + "loss": 0.8892, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 4.991626473740622e-05, + "loss": 0.9298, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 4.9914590032154344e-05, + "loss": 0.9847, + "step": 56 + }, + { + "epoch": 0.02, + "learning_rate": 4.991291532690247e-05, + "loss": 0.8117, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 4.991124062165059e-05, + "loss": 0.8883, + "step": 58 + }, + { + "epoch": 0.02, + "learning_rate": 4.9909565916398716e-05, + "loss": 0.8508, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 4.990789121114684e-05, + "loss": 0.9061, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9906216505894965e-05, + "loss": 1.036, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 4.990454180064309e-05, + "loss": 0.8241, + "step": 62 + }, + { + "epoch": 0.02, + "learning_rate": 4.990286709539121e-05, + "loss": 0.8742, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 4.990119239013934e-05, + "loss": 1.119, + "step": 64 + }, + { + "epoch": 0.02, + "learning_rate": 4.989951768488746e-05, + "loss": 0.8472, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 4.9897842979635586e-05, + "loss": 0.7608, + "step": 66 + }, + { + "epoch": 0.02, + "learning_rate": 4.989616827438371e-05, + "loss": 0.9237, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 4.9894493569131834e-05, + "loss": 0.8608, + "step": 68 + }, + { + "epoch": 0.02, + "learning_rate": 4.989281886387996e-05, + "loss": 0.8263, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 4.989114415862808e-05, + "loss": 0.811, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.988946945337621e-05, + "loss": 0.7966, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 4.988779474812433e-05, + "loss": 0.8125, + "step": 72 + }, + { + "epoch": 0.02, + "learning_rate": 4.9886120042872455e-05, + "loss": 0.8263, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 4.9884445337620586e-05, + "loss": 0.9121, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 4.9882770632368704e-05, + "loss": 0.8683, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 4.988109592711683e-05, + "loss": 0.8902, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 4.987942122186495e-05, + "loss": 0.7441, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 4.9877746516613076e-05, + "loss": 0.7684, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 4.98760718113612e-05, + "loss": 0.6747, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 4.9874397106109325e-05, + "loss": 0.8418, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 4.987272240085745e-05, + "loss": 0.8672, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 4.987104769560558e-05, + "loss": 0.9059, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 4.9869372990353704e-05, + "loss": 0.8624, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 4.986769828510183e-05, + "loss": 0.9739, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 4.9866023579849946e-05, + "loss": 0.8516, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 4.986434887459807e-05, + "loss": 0.9347, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 4.9862674169346194e-05, + "loss": 0.7635, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 4.986099946409432e-05, + "loss": 0.9123, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 4.985932475884244e-05, + "loss": 0.7038, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 4.9857650053590574e-05, + "loss": 0.8283, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 4.98559753483387e-05, + "loss": 0.842, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 4.985430064308682e-05, + "loss": 1.0438, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 4.9852625937834946e-05, + "loss": 0.7957, + "step": 93 + }, + { + "epoch": 0.03, + "learning_rate": 4.9850951232583064e-05, + "loss": 0.7972, + "step": 94 + }, + { + "epoch": 0.03, + "learning_rate": 4.984927652733119e-05, + "loss": 0.8797, + "step": 95 + }, + { + "epoch": 0.03, + "learning_rate": 4.984760182207931e-05, + "loss": 0.705, + "step": 96 + }, + { + "epoch": 0.03, + "learning_rate": 4.984592711682744e-05, + "loss": 0.695, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 4.984425241157557e-05, + "loss": 0.7079, + "step": 98 + }, + { + "epoch": 0.03, + "learning_rate": 4.984257770632369e-05, + "loss": 0.8164, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 4.9840903001071816e-05, + "loss": 0.8113, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.983922829581994e-05, + "loss": 0.7871, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 4.9837553590568064e-05, + "loss": 0.7747, + "step": 102 + }, + { + "epoch": 0.03, + "learning_rate": 4.983587888531618e-05, + "loss": 0.8373, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 4.9834204180064306e-05, + "loss": 0.8499, + "step": 104 + }, + { + "epoch": 0.03, + "learning_rate": 4.983252947481244e-05, + "loss": 0.9244, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 4.983085476956056e-05, + "loss": 0.8403, + "step": 106 + }, + { + "epoch": 0.03, + "learning_rate": 4.9829180064308685e-05, + "loss": 0.7506, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 4.982750535905681e-05, + "loss": 0.7682, + "step": 108 + }, + { + "epoch": 0.03, + "learning_rate": 4.9825830653804934e-05, + "loss": 0.8765, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 4.982415594855306e-05, + "loss": 0.7255, + "step": 110 + }, + { + "epoch": 0.03, + "learning_rate": 4.982248124330118e-05, + "loss": 0.6878, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 4.9820806538049306e-05, + "loss": 1.0935, + "step": 112 + }, + { + "epoch": 0.03, + "learning_rate": 4.981913183279743e-05, + "loss": 1.0073, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 4.9817457127545555e-05, + "loss": 0.8339, + "step": 114 + }, + { + "epoch": 0.03, + "learning_rate": 4.981578242229368e-05, + "loss": 0.9106, + "step": 115 + }, + { + "epoch": 0.03, + "learning_rate": 4.98141077170418e-05, + "loss": 0.7108, + "step": 116 + }, + { + "epoch": 0.03, + "learning_rate": 4.981243301178993e-05, + "loss": 0.847, + "step": 117 + }, + { + "epoch": 0.03, + "learning_rate": 4.981075830653805e-05, + "loss": 1.0562, + "step": 118 + }, + { + "epoch": 0.03, + "learning_rate": 4.9809083601286176e-05, + "loss": 0.9888, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 4.98074088960343e-05, + "loss": 1.0542, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 4.9805734190782424e-05, + "loss": 1.1191, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 4.980405948553055e-05, + "loss": 0.7751, + "step": 122 + }, + { + "epoch": 0.03, + "learning_rate": 4.980238478027867e-05, + "loss": 0.6972, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 4.98007100750268e-05, + "loss": 0.7465, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 4.979903536977492e-05, + "loss": 0.8096, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 4.9797360664523045e-05, + "loss": 0.6955, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 4.979568595927117e-05, + "loss": 0.8186, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 4.9794011254019294e-05, + "loss": 0.8265, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 4.979233654876742e-05, + "loss": 0.847, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 4.979066184351555e-05, + "loss": 0.7376, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.9788987138263666e-05, + "loss": 0.6868, + "step": 131 + }, + { + "epoch": 0.04, + "learning_rate": 4.978731243301179e-05, + "loss": 0.7772, + "step": 132 + }, + { + "epoch": 0.04, + "learning_rate": 4.9785637727759915e-05, + "loss": 0.8519, + "step": 133 + }, + { + "epoch": 0.04, + "learning_rate": 4.978396302250804e-05, + "loss": 0.8817, + "step": 134 + }, + { + "epoch": 0.04, + "learning_rate": 4.978228831725616e-05, + "loss": 0.6881, + "step": 135 + }, + { + "epoch": 0.04, + "learning_rate": 4.978061361200429e-05, + "loss": 0.8228, + "step": 136 + }, + { + "epoch": 0.04, + "learning_rate": 4.977893890675241e-05, + "loss": 0.9304, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 4.977726420150054e-05, + "loss": 0.6981, + "step": 138 + }, + { + "epoch": 0.04, + "learning_rate": 4.977558949624867e-05, + "loss": 0.892, + "step": 139 + }, + { + "epoch": 0.04, + "learning_rate": 4.977391479099679e-05, + "loss": 0.705, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.977224008574491e-05, + "loss": 0.8988, + "step": 141 + }, + { + "epoch": 0.04, + "learning_rate": 4.977056538049303e-05, + "loss": 0.7518, + "step": 142 + }, + { + "epoch": 0.04, + "learning_rate": 4.976889067524116e-05, + "loss": 0.9121, + "step": 143 + }, + { + "epoch": 0.04, + "learning_rate": 4.976721596998928e-05, + "loss": 0.7451, + "step": 144 + }, + { + "epoch": 0.04, + "learning_rate": 4.9765541264737405e-05, + "loss": 0.7913, + "step": 145 + }, + { + "epoch": 0.04, + "learning_rate": 4.9763866559485536e-05, + "loss": 1.1875, + "step": 146 + }, + { + "epoch": 0.04, + "learning_rate": 4.976219185423366e-05, + "loss": 0.7331, + "step": 147 + }, + { + "epoch": 0.04, + "learning_rate": 4.9760517148981785e-05, + "loss": 0.7801, + "step": 148 + }, + { + "epoch": 0.04, + "learning_rate": 4.975884244372991e-05, + "loss": 0.8059, + "step": 149 + }, + { + "epoch": 0.04, + "learning_rate": 4.9757167738478026e-05, + "loss": 0.7641, + "step": 150 + }, + { + "epoch": 0.04, + "learning_rate": 4.975549303322615e-05, + "loss": 0.8704, + "step": 151 + }, + { + "epoch": 0.04, + "learning_rate": 4.9753818327974275e-05, + "loss": 0.7007, + "step": 152 + }, + { + "epoch": 0.04, + "learning_rate": 4.9752143622722406e-05, + "loss": 0.9755, + "step": 153 + }, + { + "epoch": 0.04, + "learning_rate": 4.975046891747053e-05, + "loss": 0.7712, + "step": 154 + }, + { + "epoch": 0.04, + "learning_rate": 4.9748794212218654e-05, + "loss": 0.6177, + "step": 155 + }, + { + "epoch": 0.04, + "learning_rate": 4.974711950696678e-05, + "loss": 0.6841, + "step": 156 + }, + { + "epoch": 0.04, + "learning_rate": 4.97454448017149e-05, + "loss": 0.8, + "step": 157 + }, + { + "epoch": 0.04, + "learning_rate": 4.974377009646303e-05, + "loss": 0.7895, + "step": 158 + }, + { + "epoch": 0.04, + "learning_rate": 4.9742095391211144e-05, + "loss": 0.7135, + "step": 159 + }, + { + "epoch": 0.04, + "learning_rate": 4.974042068595927e-05, + "loss": 0.7765, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 4.97387459807074e-05, + "loss": 0.9654, + "step": 161 + }, + { + "epoch": 0.04, + "learning_rate": 4.9737071275455524e-05, + "loss": 0.6658, + "step": 162 + }, + { + "epoch": 0.04, + "learning_rate": 4.973539657020365e-05, + "loss": 0.7369, + "step": 163 + }, + { + "epoch": 0.04, + "learning_rate": 4.973372186495177e-05, + "loss": 0.6088, + "step": 164 + }, + { + "epoch": 0.04, + "learning_rate": 4.9732047159699896e-05, + "loss": 0.6448, + "step": 165 + }, + { + "epoch": 0.04, + "learning_rate": 4.973037245444802e-05, + "loss": 0.8946, + "step": 166 + }, + { + "epoch": 0.04, + "learning_rate": 4.9728697749196145e-05, + "loss": 0.8684, + "step": 167 + }, + { + "epoch": 0.05, + "learning_rate": 4.972702304394427e-05, + "loss": 0.6573, + "step": 168 + }, + { + "epoch": 0.05, + "learning_rate": 4.972534833869239e-05, + "loss": 0.9701, + "step": 169 + }, + { + "epoch": 0.05, + "learning_rate": 4.972367363344052e-05, + "loss": 0.6526, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.972199892818864e-05, + "loss": 0.7742, + "step": 171 + }, + { + "epoch": 0.05, + "learning_rate": 4.9720324222936766e-05, + "loss": 0.6985, + "step": 172 + }, + { + "epoch": 0.05, + "learning_rate": 4.971864951768489e-05, + "loss": 0.7701, + "step": 173 + }, + { + "epoch": 0.05, + "learning_rate": 4.9716974812433014e-05, + "loss": 1.0254, + "step": 174 + }, + { + "epoch": 0.05, + "learning_rate": 4.971530010718114e-05, + "loss": 0.8379, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 4.971362540192926e-05, + "loss": 0.8684, + "step": 176 + }, + { + "epoch": 0.05, + "learning_rate": 4.971195069667739e-05, + "loss": 0.6276, + "step": 177 + }, + { + "epoch": 0.05, + "learning_rate": 4.971027599142551e-05, + "loss": 0.8299, + "step": 178 + }, + { + "epoch": 0.05, + "learning_rate": 4.9708601286173635e-05, + "loss": 0.6408, + "step": 179 + }, + { + "epoch": 0.05, + "learning_rate": 4.970692658092176e-05, + "loss": 0.6637, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 4.9705251875669884e-05, + "loss": 0.6656, + "step": 181 + }, + { + "epoch": 0.05, + "learning_rate": 4.970357717041801e-05, + "loss": 0.6243, + "step": 182 + }, + { + "epoch": 0.05, + "learning_rate": 4.970190246516613e-05, + "loss": 0.679, + "step": 183 + }, + { + "epoch": 0.05, + "learning_rate": 4.9700227759914256e-05, + "loss": 0.7503, + "step": 184 + }, + { + "epoch": 0.05, + "learning_rate": 4.969855305466238e-05, + "loss": 0.7288, + "step": 185 + }, + { + "epoch": 0.05, + "learning_rate": 4.969687834941051e-05, + "loss": 0.9351, + "step": 186 + }, + { + "epoch": 0.05, + "learning_rate": 4.969520364415863e-05, + "loss": 0.6451, + "step": 187 + }, + { + "epoch": 0.05, + "learning_rate": 4.969352893890675e-05, + "loss": 0.7161, + "step": 188 + }, + { + "epoch": 0.05, + "learning_rate": 4.969185423365488e-05, + "loss": 1.0097, + "step": 189 + }, + { + "epoch": 0.05, + "learning_rate": 4.9690179528403e-05, + "loss": 0.7679, + "step": 190 + }, + { + "epoch": 0.05, + "learning_rate": 4.9688504823151126e-05, + "loss": 0.7004, + "step": 191 + }, + { + "epoch": 0.05, + "learning_rate": 4.968683011789925e-05, + "loss": 0.8749, + "step": 192 + }, + { + "epoch": 0.05, + "learning_rate": 4.9685155412647374e-05, + "loss": 0.8248, + "step": 193 + }, + { + "epoch": 0.05, + "learning_rate": 4.9683480707395505e-05, + "loss": 0.781, + "step": 194 + }, + { + "epoch": 0.05, + "learning_rate": 4.968180600214363e-05, + "loss": 0.5861, + "step": 195 + }, + { + "epoch": 0.05, + "learning_rate": 4.9680131296891754e-05, + "loss": 0.6953, + "step": 196 + }, + { + "epoch": 0.05, + "learning_rate": 4.967845659163987e-05, + "loss": 0.8304, + "step": 197 + }, + { + "epoch": 0.05, + "learning_rate": 4.9676781886387995e-05, + "loss": 0.693, + "step": 198 + }, + { + "epoch": 0.05, + "learning_rate": 4.967510718113612e-05, + "loss": 0.8778, + "step": 199 + }, + { + "epoch": 0.05, + "learning_rate": 4.9673432475884244e-05, + "loss": 0.8689, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 4.967175777063237e-05, + "loss": 0.7132, + "step": 201 + }, + { + "epoch": 0.05, + "learning_rate": 4.96700830653805e-05, + "loss": 0.7991, + "step": 202 + }, + { + "epoch": 0.05, + "learning_rate": 4.966840836012862e-05, + "loss": 0.6917, + "step": 203 + }, + { + "epoch": 0.05, + "learning_rate": 4.966673365487675e-05, + "loss": 0.805, + "step": 204 + }, + { + "epoch": 0.05, + "learning_rate": 4.966505894962487e-05, + "loss": 0.7778, + "step": 205 + }, + { + "epoch": 0.06, + "learning_rate": 4.966338424437299e-05, + "loss": 0.8075, + "step": 206 + }, + { + "epoch": 0.06, + "learning_rate": 4.966170953912111e-05, + "loss": 0.6705, + "step": 207 + }, + { + "epoch": 0.06, + "learning_rate": 4.966003483386924e-05, + "loss": 1.0956, + "step": 208 + }, + { + "epoch": 0.06, + "learning_rate": 4.965836012861737e-05, + "loss": 0.7419, + "step": 209 + }, + { + "epoch": 0.06, + "learning_rate": 4.965668542336549e-05, + "loss": 0.947, + "step": 210 + }, + { + "epoch": 0.06, + "learning_rate": 4.965501071811362e-05, + "loss": 0.7351, + "step": 211 + }, + { + "epoch": 0.06, + "learning_rate": 4.965333601286174e-05, + "loss": 0.7663, + "step": 212 + }, + { + "epoch": 0.06, + "learning_rate": 4.9651661307609865e-05, + "loss": 0.8233, + "step": 213 + }, + { + "epoch": 0.06, + "learning_rate": 4.964998660235799e-05, + "loss": 0.6215, + "step": 214 + }, + { + "epoch": 0.06, + "learning_rate": 4.964831189710611e-05, + "loss": 0.8252, + "step": 215 + }, + { + "epoch": 0.06, + "learning_rate": 4.964663719185423e-05, + "loss": 0.6647, + "step": 216 + }, + { + "epoch": 0.06, + "learning_rate": 4.964496248660236e-05, + "loss": 0.7588, + "step": 217 + }, + { + "epoch": 0.06, + "learning_rate": 4.9643287781350486e-05, + "loss": 0.8624, + "step": 218 + }, + { + "epoch": 0.06, + "learning_rate": 4.964161307609861e-05, + "loss": 0.7725, + "step": 219 + }, + { + "epoch": 0.06, + "learning_rate": 4.9639938370846735e-05, + "loss": 0.5739, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 4.963826366559486e-05, + "loss": 0.9099, + "step": 221 + }, + { + "epoch": 0.06, + "learning_rate": 4.963658896034298e-05, + "loss": 0.8543, + "step": 222 + }, + { + "epoch": 0.06, + "learning_rate": 4.963491425509111e-05, + "loss": 0.7859, + "step": 223 + }, + { + "epoch": 0.06, + "learning_rate": 4.963323954983923e-05, + "loss": 0.6921, + "step": 224 + }, + { + "epoch": 0.06, + "learning_rate": 4.9631564844587356e-05, + "loss": 0.7691, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 4.962989013933548e-05, + "loss": 0.8727, + "step": 226 + }, + { + "epoch": 0.06, + "learning_rate": 4.9628215434083604e-05, + "loss": 0.8126, + "step": 227 + }, + { + "epoch": 0.06, + "learning_rate": 4.962654072883173e-05, + "loss": 0.6654, + "step": 228 + }, + { + "epoch": 0.06, + "learning_rate": 4.962486602357985e-05, + "loss": 0.6062, + "step": 229 + }, + { + "epoch": 0.06, + "learning_rate": 4.962319131832798e-05, + "loss": 0.8343, + "step": 230 + }, + { + "epoch": 0.06, + "learning_rate": 4.96215166130761e-05, + "loss": 0.8167, + "step": 231 + }, + { + "epoch": 0.06, + "learning_rate": 4.9619841907824225e-05, + "loss": 0.9165, + "step": 232 + }, + { + "epoch": 0.06, + "learning_rate": 4.961816720257235e-05, + "loss": 0.6795, + "step": 233 + }, + { + "epoch": 0.06, + "learning_rate": 4.9616492497320473e-05, + "loss": 0.7137, + "step": 234 + }, + { + "epoch": 0.06, + "learning_rate": 4.96148177920686e-05, + "loss": 0.8759, + "step": 235 + }, + { + "epoch": 0.06, + "learning_rate": 4.961314308681672e-05, + "loss": 0.7053, + "step": 236 + }, + { + "epoch": 0.06, + "learning_rate": 4.9611468381564846e-05, + "loss": 0.6875, + "step": 237 + }, + { + "epoch": 0.06, + "learning_rate": 4.960979367631297e-05, + "loss": 0.7135, + "step": 238 + }, + { + "epoch": 0.06, + "learning_rate": 4.9608118971061095e-05, + "loss": 0.667, + "step": 239 + }, + { + "epoch": 0.06, + "learning_rate": 4.960644426580922e-05, + "loss": 0.771, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 4.960476956055734e-05, + "loss": 0.6026, + "step": 241 + }, + { + "epoch": 0.06, + "learning_rate": 4.9603094855305474e-05, + "loss": 0.901, + "step": 242 + }, + { + "epoch": 0.07, + "learning_rate": 4.960142015005359e-05, + "loss": 0.733, + "step": 243 + }, + { + "epoch": 0.07, + "learning_rate": 4.9599745444801716e-05, + "loss": 0.8894, + "step": 244 + }, + { + "epoch": 0.07, + "learning_rate": 4.959807073954984e-05, + "loss": 0.8381, + "step": 245 + }, + { + "epoch": 0.07, + "learning_rate": 4.9596396034297964e-05, + "loss": 0.7577, + "step": 246 + }, + { + "epoch": 0.07, + "learning_rate": 4.959472132904609e-05, + "loss": 0.783, + "step": 247 + }, + { + "epoch": 0.07, + "learning_rate": 4.959304662379421e-05, + "loss": 0.6368, + "step": 248 + }, + { + "epoch": 0.07, + "learning_rate": 4.959137191854234e-05, + "loss": 0.8555, + "step": 249 + }, + { + "epoch": 0.07, + "learning_rate": 4.958969721329047e-05, + "loss": 0.6398, + "step": 250 + }, + { + "epoch": 0.07, + "learning_rate": 4.958802250803859e-05, + "loss": 0.6149, + "step": 251 + }, + { + "epoch": 0.07, + "learning_rate": 4.9586347802786716e-05, + "loss": 0.8193, + "step": 252 + }, + { + "epoch": 0.07, + "learning_rate": 4.9584673097534833e-05, + "loss": 0.885, + "step": 253 + }, + { + "epoch": 0.07, + "learning_rate": 4.958299839228296e-05, + "loss": 0.6146, + "step": 254 + }, + { + "epoch": 0.07, + "learning_rate": 4.958132368703108e-05, + "loss": 0.6728, + "step": 255 + }, + { + "epoch": 0.07, + "learning_rate": 4.9579648981779206e-05, + "loss": 0.7272, + "step": 256 + }, + { + "epoch": 0.07, + "learning_rate": 4.957797427652733e-05, + "loss": 0.5939, + "step": 257 + }, + { + "epoch": 0.07, + "learning_rate": 4.957629957127546e-05, + "loss": 0.9532, + "step": 258 + }, + { + "epoch": 0.07, + "learning_rate": 4.9574624866023585e-05, + "loss": 0.7405, + "step": 259 + }, + { + "epoch": 0.07, + "learning_rate": 4.957295016077171e-05, + "loss": 0.7841, + "step": 260 + }, + { + "epoch": 0.07, + "learning_rate": 4.9571275455519834e-05, + "loss": 0.7465, + "step": 261 + }, + { + "epoch": 0.07, + "learning_rate": 4.956960075026795e-05, + "loss": 0.7444, + "step": 262 + }, + { + "epoch": 0.07, + "learning_rate": 4.9567926045016076e-05, + "loss": 0.6083, + "step": 263 + }, + { + "epoch": 0.07, + "learning_rate": 4.95662513397642e-05, + "loss": 0.6559, + "step": 264 + }, + { + "epoch": 0.07, + "learning_rate": 4.956457663451233e-05, + "loss": 0.6922, + "step": 265 + }, + { + "epoch": 0.07, + "learning_rate": 4.9562901929260455e-05, + "loss": 0.7649, + "step": 266 + }, + { + "epoch": 0.07, + "learning_rate": 4.956122722400858e-05, + "loss": 0.6724, + "step": 267 + }, + { + "epoch": 0.07, + "learning_rate": 4.95595525187567e-05, + "loss": 0.6113, + "step": 268 + }, + { + "epoch": 0.07, + "learning_rate": 4.955787781350483e-05, + "loss": 0.6903, + "step": 269 + }, + { + "epoch": 0.07, + "learning_rate": 4.955620310825295e-05, + "loss": 0.8329, + "step": 270 + }, + { + "epoch": 0.07, + "learning_rate": 4.955452840300107e-05, + "loss": 0.9347, + "step": 271 + }, + { + "epoch": 0.07, + "learning_rate": 4.9552853697749193e-05, + "loss": 0.79, + "step": 272 + }, + { + "epoch": 0.07, + "learning_rate": 4.9551178992497324e-05, + "loss": 0.6094, + "step": 273 + }, + { + "epoch": 0.07, + "learning_rate": 4.954950428724545e-05, + "loss": 0.826, + "step": 274 + }, + { + "epoch": 0.07, + "learning_rate": 4.954782958199357e-05, + "loss": 0.7519, + "step": 275 + }, + { + "epoch": 0.07, + "learning_rate": 4.95461548767417e-05, + "loss": 0.7387, + "step": 276 + }, + { + "epoch": 0.07, + "learning_rate": 4.954448017148982e-05, + "loss": 0.8139, + "step": 277 + }, + { + "epoch": 0.07, + "learning_rate": 4.9542805466237945e-05, + "loss": 0.8129, + "step": 278 + }, + { + "epoch": 0.07, + "learning_rate": 4.954113076098607e-05, + "loss": 0.6177, + "step": 279 + }, + { + "epoch": 0.08, + "learning_rate": 4.9539456055734194e-05, + "loss": 0.8297, + "step": 280 + }, + { + "epoch": 0.08, + "learning_rate": 4.953778135048232e-05, + "loss": 0.9876, + "step": 281 + }, + { + "epoch": 0.08, + "learning_rate": 4.953610664523044e-05, + "loss": 0.6102, + "step": 282 + }, + { + "epoch": 0.08, + "learning_rate": 4.9534431939978567e-05, + "loss": 1.0176, + "step": 283 + }, + { + "epoch": 0.08, + "learning_rate": 4.953275723472669e-05, + "loss": 0.7817, + "step": 284 + }, + { + "epoch": 0.08, + "learning_rate": 4.9531082529474815e-05, + "loss": 0.6881, + "step": 285 + }, + { + "epoch": 0.08, + "learning_rate": 4.952940782422294e-05, + "loss": 0.7522, + "step": 286 + }, + { + "epoch": 0.08, + "learning_rate": 4.952773311897106e-05, + "loss": 0.8622, + "step": 287 + }, + { + "epoch": 0.08, + "learning_rate": 4.952605841371919e-05, + "loss": 0.6972, + "step": 288 + }, + { + "epoch": 0.08, + "learning_rate": 4.952438370846731e-05, + "loss": 0.6779, + "step": 289 + }, + { + "epoch": 0.08, + "learning_rate": 4.9522709003215436e-05, + "loss": 0.6858, + "step": 290 + }, + { + "epoch": 0.08, + "learning_rate": 4.952103429796356e-05, + "loss": 0.7135, + "step": 291 + }, + { + "epoch": 0.08, + "learning_rate": 4.9519359592711684e-05, + "loss": 0.7291, + "step": 292 + }, + { + "epoch": 0.08, + "learning_rate": 4.951768488745981e-05, + "loss": 0.6154, + "step": 293 + }, + { + "epoch": 0.08, + "learning_rate": 4.951601018220793e-05, + "loss": 0.9425, + "step": 294 + }, + { + "epoch": 0.08, + "learning_rate": 4.951433547695606e-05, + "loss": 0.6451, + "step": 295 + }, + { + "epoch": 0.08, + "learning_rate": 4.951266077170418e-05, + "loss": 0.6018, + "step": 296 + }, + { + "epoch": 0.08, + "learning_rate": 4.9510986066452305e-05, + "loss": 0.7122, + "step": 297 + }, + { + "epoch": 0.08, + "learning_rate": 4.9509311361200436e-05, + "loss": 0.5883, + "step": 298 + }, + { + "epoch": 0.08, + "learning_rate": 4.9507636655948554e-05, + "loss": 0.7708, + "step": 299 + }, + { + "epoch": 0.08, + "learning_rate": 4.950596195069668e-05, + "loss": 0.7504, + "step": 300 + }, + { + "epoch": 0.08, + "learning_rate": 4.95042872454448e-05, + "loss": 0.7559, + "step": 301 + }, + { + "epoch": 0.08, + "learning_rate": 4.9502612540192927e-05, + "loss": 0.6839, + "step": 302 + }, + { + "epoch": 0.08, + "learning_rate": 4.950093783494105e-05, + "loss": 0.8109, + "step": 303 + }, + { + "epoch": 0.08, + "learning_rate": 4.9499263129689175e-05, + "loss": 0.7186, + "step": 304 + }, + { + "epoch": 0.08, + "learning_rate": 4.94975884244373e-05, + "loss": 0.9995, + "step": 305 + }, + { + "epoch": 0.08, + "learning_rate": 4.949591371918543e-05, + "loss": 0.892, + "step": 306 + }, + { + "epoch": 0.08, + "learning_rate": 4.9494239013933554e-05, + "loss": 0.5767, + "step": 307 + }, + { + "epoch": 0.08, + "learning_rate": 4.949256430868168e-05, + "loss": 0.5862, + "step": 308 + }, + { + "epoch": 0.08, + "learning_rate": 4.9490889603429796e-05, + "loss": 0.7002, + "step": 309 + }, + { + "epoch": 0.08, + "learning_rate": 4.948921489817792e-05, + "loss": 0.7858, + "step": 310 + }, + { + "epoch": 0.08, + "learning_rate": 4.9487540192926044e-05, + "loss": 0.7221, + "step": 311 + }, + { + "epoch": 0.08, + "learning_rate": 4.948586548767417e-05, + "loss": 0.7687, + "step": 312 + }, + { + "epoch": 0.08, + "learning_rate": 4.948419078242229e-05, + "loss": 0.738, + "step": 313 + }, + { + "epoch": 0.08, + "learning_rate": 4.9482516077170424e-05, + "loss": 0.642, + "step": 314 + }, + { + "epoch": 0.08, + "learning_rate": 4.948084137191855e-05, + "loss": 0.9598, + "step": 315 + }, + { + "epoch": 0.08, + "learning_rate": 4.947916666666667e-05, + "loss": 0.6905, + "step": 316 + }, + { + "epoch": 0.08, + "learning_rate": 4.9477491961414796e-05, + "loss": 0.664, + "step": 317 + }, + { + "epoch": 0.09, + "learning_rate": 4.9475817256162914e-05, + "loss": 0.5571, + "step": 318 + }, + { + "epoch": 0.09, + "learning_rate": 4.947414255091104e-05, + "loss": 0.7156, + "step": 319 + }, + { + "epoch": 0.09, + "learning_rate": 4.947246784565916e-05, + "loss": 0.7043, + "step": 320 + }, + { + "epoch": 0.09, + "learning_rate": 4.947079314040729e-05, + "loss": 0.8427, + "step": 321 + }, + { + "epoch": 0.09, + "learning_rate": 4.946911843515542e-05, + "loss": 0.8915, + "step": 322 + }, + { + "epoch": 0.09, + "learning_rate": 4.946744372990354e-05, + "loss": 0.8585, + "step": 323 + }, + { + "epoch": 0.09, + "learning_rate": 4.9465769024651666e-05, + "loss": 0.7424, + "step": 324 + }, + { + "epoch": 0.09, + "learning_rate": 4.946409431939979e-05, + "loss": 0.6935, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 4.9462419614147914e-05, + "loss": 1.1106, + "step": 326 + }, + { + "epoch": 0.09, + "learning_rate": 4.946074490889603e-05, + "loss": 0.6214, + "step": 327 + }, + { + "epoch": 0.09, + "learning_rate": 4.9459070203644156e-05, + "loss": 0.5844, + "step": 328 + }, + { + "epoch": 0.09, + "learning_rate": 4.945739549839229e-05, + "loss": 0.753, + "step": 329 + }, + { + "epoch": 0.09, + "learning_rate": 4.945572079314041e-05, + "loss": 0.9278, + "step": 330 + }, + { + "epoch": 0.09, + "learning_rate": 4.9454046087888535e-05, + "loss": 0.5755, + "step": 331 + }, + { + "epoch": 0.09, + "learning_rate": 4.945237138263666e-05, + "loss": 0.5777, + "step": 332 + }, + { + "epoch": 0.09, + "learning_rate": 4.9450696677384784e-05, + "loss": 0.7309, + "step": 333 + }, + { + "epoch": 0.09, + "learning_rate": 4.944902197213291e-05, + "loss": 0.6205, + "step": 334 + }, + { + "epoch": 0.09, + "learning_rate": 4.944734726688103e-05, + "loss": 0.8122, + "step": 335 + }, + { + "epoch": 0.09, + "learning_rate": 4.9445672561629156e-05, + "loss": 0.687, + "step": 336 + }, + { + "epoch": 0.09, + "learning_rate": 4.944399785637728e-05, + "loss": 0.7499, + "step": 337 + }, + { + "epoch": 0.09, + "learning_rate": 4.9442323151125405e-05, + "loss": 0.7949, + "step": 338 + }, + { + "epoch": 0.09, + "learning_rate": 4.944064844587353e-05, + "loss": 0.8329, + "step": 339 + }, + { + "epoch": 0.09, + "learning_rate": 4.943897374062165e-05, + "loss": 0.706, + "step": 340 + }, + { + "epoch": 0.09, + "learning_rate": 4.943729903536978e-05, + "loss": 0.8541, + "step": 341 + }, + { + "epoch": 0.09, + "learning_rate": 4.94356243301179e-05, + "loss": 0.5058, + "step": 342 + }, + { + "epoch": 0.09, + "learning_rate": 4.9433949624866026e-05, + "loss": 0.6338, + "step": 343 + }, + { + "epoch": 0.09, + "learning_rate": 4.943227491961415e-05, + "loss": 0.6761, + "step": 344 + }, + { + "epoch": 0.09, + "learning_rate": 4.9430600214362274e-05, + "loss": 0.8447, + "step": 345 + }, + { + "epoch": 0.09, + "learning_rate": 4.94289255091104e-05, + "loss": 1.0754, + "step": 346 + }, + { + "epoch": 0.09, + "learning_rate": 4.942725080385852e-05, + "loss": 0.6639, + "step": 347 + }, + { + "epoch": 0.09, + "learning_rate": 4.942557609860665e-05, + "loss": 0.7888, + "step": 348 + }, + { + "epoch": 0.09, + "learning_rate": 4.942390139335477e-05, + "loss": 0.7107, + "step": 349 + }, + { + "epoch": 0.09, + "learning_rate": 4.9422226688102895e-05, + "loss": 0.533, + "step": 350 + }, + { + "epoch": 0.09, + "learning_rate": 4.942055198285102e-05, + "loss": 0.6722, + "step": 351 + }, + { + "epoch": 0.09, + "learning_rate": 4.9418877277599144e-05, + "loss": 0.7985, + "step": 352 + }, + { + "epoch": 0.09, + "learning_rate": 4.941720257234727e-05, + "loss": 0.7341, + "step": 353 + }, + { + "epoch": 0.09, + "learning_rate": 4.94155278670954e-05, + "loss": 0.5513, + "step": 354 + }, + { + "epoch": 0.1, + "learning_rate": 4.9413853161843516e-05, + "loss": 0.7577, + "step": 355 + }, + { + "epoch": 0.1, + "learning_rate": 4.941217845659164e-05, + "loss": 0.5206, + "step": 356 + }, + { + "epoch": 0.1, + "learning_rate": 4.9410503751339765e-05, + "loss": 0.7914, + "step": 357 + }, + { + "epoch": 0.1, + "learning_rate": 4.940882904608789e-05, + "loss": 0.5254, + "step": 358 + }, + { + "epoch": 0.1, + "learning_rate": 4.940715434083601e-05, + "loss": 0.6953, + "step": 359 + }, + { + "epoch": 0.1, + "learning_rate": 4.940547963558414e-05, + "loss": 0.7409, + "step": 360 + }, + { + "epoch": 0.1, + "learning_rate": 4.940380493033226e-05, + "loss": 0.7373, + "step": 361 + }, + { + "epoch": 0.1, + "learning_rate": 4.940213022508039e-05, + "loss": 0.7339, + "step": 362 + }, + { + "epoch": 0.1, + "learning_rate": 4.940045551982852e-05, + "loss": 0.8254, + "step": 363 + }, + { + "epoch": 0.1, + "learning_rate": 4.939878081457664e-05, + "loss": 0.6094, + "step": 364 + }, + { + "epoch": 0.1, + "learning_rate": 4.939710610932476e-05, + "loss": 0.924, + "step": 365 + }, + { + "epoch": 0.1, + "learning_rate": 4.939543140407288e-05, + "loss": 0.5664, + "step": 366 + }, + { + "epoch": 0.1, + "learning_rate": 4.939375669882101e-05, + "loss": 0.6629, + "step": 367 + }, + { + "epoch": 0.1, + "learning_rate": 4.939208199356913e-05, + "loss": 0.6086, + "step": 368 + }, + { + "epoch": 0.1, + "learning_rate": 4.9390407288317255e-05, + "loss": 0.6981, + "step": 369 + }, + { + "epoch": 0.1, + "learning_rate": 4.9388732583065386e-05, + "loss": 0.7551, + "step": 370 + }, + { + "epoch": 0.1, + "learning_rate": 4.938705787781351e-05, + "loss": 0.6434, + "step": 371 + }, + { + "epoch": 0.1, + "learning_rate": 4.9385383172561635e-05, + "loss": 0.4818, + "step": 372 + }, + { + "epoch": 0.1, + "learning_rate": 4.938370846730976e-05, + "loss": 0.8338, + "step": 373 + }, + { + "epoch": 0.1, + "learning_rate": 4.9382033762057876e-05, + "loss": 0.5942, + "step": 374 + }, + { + "epoch": 0.1, + "learning_rate": 4.9380359056806e-05, + "loss": 0.7428, + "step": 375 + }, + { + "epoch": 0.1, + "learning_rate": 4.9378684351554125e-05, + "loss": 0.5972, + "step": 376 + }, + { + "epoch": 0.1, + "learning_rate": 4.9377009646302256e-05, + "loss": 0.6468, + "step": 377 + }, + { + "epoch": 0.1, + "learning_rate": 4.937533494105038e-05, + "loss": 0.7936, + "step": 378 + }, + { + "epoch": 0.1, + "learning_rate": 4.9373660235798504e-05, + "loss": 0.873, + "step": 379 + }, + { + "epoch": 0.1, + "learning_rate": 4.937198553054663e-05, + "loss": 0.596, + "step": 380 + }, + { + "epoch": 0.1, + "learning_rate": 4.937031082529475e-05, + "loss": 0.6179, + "step": 381 + }, + { + "epoch": 0.1, + "learning_rate": 4.936863612004288e-05, + "loss": 0.6481, + "step": 382 + }, + { + "epoch": 0.1, + "learning_rate": 4.9366961414790994e-05, + "loss": 0.7066, + "step": 383 + }, + { + "epoch": 0.1, + "learning_rate": 4.936528670953912e-05, + "loss": 0.5405, + "step": 384 + }, + { + "epoch": 0.1, + "learning_rate": 4.936361200428725e-05, + "loss": 0.688, + "step": 385 + }, + { + "epoch": 0.1, + "learning_rate": 4.9361937299035374e-05, + "loss": 0.9768, + "step": 386 + }, + { + "epoch": 0.1, + "learning_rate": 4.93602625937835e-05, + "loss": 0.8218, + "step": 387 + }, + { + "epoch": 0.1, + "learning_rate": 4.935858788853162e-05, + "loss": 0.5568, + "step": 388 + }, + { + "epoch": 0.1, + "learning_rate": 4.9356913183279746e-05, + "loss": 0.554, + "step": 389 + }, + { + "epoch": 0.1, + "learning_rate": 4.935523847802787e-05, + "loss": 0.6614, + "step": 390 + }, + { + "epoch": 0.1, + "learning_rate": 4.9353563772775995e-05, + "loss": 0.5583, + "step": 391 + }, + { + "epoch": 0.11, + "learning_rate": 4.935188906752412e-05, + "loss": 0.5822, + "step": 392 + }, + { + "epoch": 0.11, + "learning_rate": 4.935021436227224e-05, + "loss": 0.5775, + "step": 393 + }, + { + "epoch": 0.11, + "learning_rate": 4.934853965702037e-05, + "loss": 0.8739, + "step": 394 + }, + { + "epoch": 0.11, + "learning_rate": 4.934686495176849e-05, + "loss": 0.712, + "step": 395 + }, + { + "epoch": 0.11, + "learning_rate": 4.9345190246516616e-05, + "loss": 0.8277, + "step": 396 + }, + { + "epoch": 0.11, + "learning_rate": 4.934351554126474e-05, + "loss": 0.8826, + "step": 397 + }, + { + "epoch": 0.11, + "learning_rate": 4.9341840836012864e-05, + "loss": 0.694, + "step": 398 + }, + { + "epoch": 0.11, + "learning_rate": 4.934016613076099e-05, + "loss": 0.7858, + "step": 399 + }, + { + "epoch": 0.11, + "learning_rate": 4.933849142550911e-05, + "loss": 0.6727, + "step": 400 + }, + { + "epoch": 0.11, + "learning_rate": 4.933681672025724e-05, + "loss": 0.7368, + "step": 401 + }, + { + "epoch": 0.11, + "learning_rate": 4.933514201500536e-05, + "loss": 0.4607, + "step": 402 + }, + { + "epoch": 0.11, + "learning_rate": 4.9333467309753485e-05, + "loss": 0.5873, + "step": 403 + }, + { + "epoch": 0.11, + "learning_rate": 4.933179260450161e-05, + "loss": 0.6752, + "step": 404 + }, + { + "epoch": 0.11, + "learning_rate": 4.9330117899249734e-05, + "loss": 0.6381, + "step": 405 + }, + { + "epoch": 0.11, + "learning_rate": 4.932844319399786e-05, + "loss": 0.7982, + "step": 406 + }, + { + "epoch": 0.11, + "learning_rate": 4.932676848874598e-05, + "loss": 0.706, + "step": 407 + }, + { + "epoch": 0.11, + "learning_rate": 4.9325093783494106e-05, + "loss": 0.5222, + "step": 408 + }, + { + "epoch": 0.11, + "learning_rate": 4.932341907824223e-05, + "loss": 0.9413, + "step": 409 + }, + { + "epoch": 0.11, + "learning_rate": 4.932174437299036e-05, + "loss": 0.6366, + "step": 410 + }, + { + "epoch": 0.11, + "learning_rate": 4.932006966773848e-05, + "loss": 0.5808, + "step": 411 + }, + { + "epoch": 0.11, + "learning_rate": 4.93183949624866e-05, + "loss": 0.6832, + "step": 412 + }, + { + "epoch": 0.11, + "learning_rate": 4.931672025723473e-05, + "loss": 0.8269, + "step": 413 + }, + { + "epoch": 0.11, + "learning_rate": 4.931504555198285e-05, + "loss": 0.6398, + "step": 414 + }, + { + "epoch": 0.11, + "learning_rate": 4.9313370846730976e-05, + "loss": 0.9231, + "step": 415 + }, + { + "epoch": 0.11, + "learning_rate": 4.93116961414791e-05, + "loss": 0.6937, + "step": 416 + }, + { + "epoch": 0.11, + "learning_rate": 4.9310021436227224e-05, + "loss": 0.6307, + "step": 417 + }, + { + "epoch": 0.11, + "learning_rate": 4.9308346730975355e-05, + "loss": 0.6586, + "step": 418 + }, + { + "epoch": 0.11, + "learning_rate": 4.930667202572348e-05, + "loss": 0.7306, + "step": 419 + }, + { + "epoch": 0.11, + "learning_rate": 4.9304997320471604e-05, + "loss": 0.735, + "step": 420 + }, + { + "epoch": 0.11, + "learning_rate": 4.930332261521972e-05, + "loss": 0.5111, + "step": 421 + }, + { + "epoch": 0.11, + "learning_rate": 4.9301647909967845e-05, + "loss": 0.6461, + "step": 422 + }, + { + "epoch": 0.11, + "learning_rate": 4.929997320471597e-05, + "loss": 0.5742, + "step": 423 + }, + { + "epoch": 0.11, + "learning_rate": 4.9298298499464094e-05, + "loss": 0.4713, + "step": 424 + }, + { + "epoch": 0.11, + "learning_rate": 4.929662379421222e-05, + "loss": 0.7709, + "step": 425 + }, + { + "epoch": 0.11, + "learning_rate": 4.929494908896035e-05, + "loss": 0.5484, + "step": 426 + }, + { + "epoch": 0.11, + "learning_rate": 4.929327438370847e-05, + "loss": 0.693, + "step": 427 + }, + { + "epoch": 0.11, + "learning_rate": 4.92915996784566e-05, + "loss": 0.578, + "step": 428 + }, + { + "epoch": 0.11, + "learning_rate": 4.928992497320472e-05, + "loss": 0.6974, + "step": 429 + }, + { + "epoch": 0.12, + "learning_rate": 4.928825026795284e-05, + "loss": 0.6513, + "step": 430 + }, + { + "epoch": 0.12, + "learning_rate": 4.928657556270096e-05, + "loss": 0.556, + "step": 431 + }, + { + "epoch": 0.12, + "learning_rate": 4.928490085744909e-05, + "loss": 0.5532, + "step": 432 + }, + { + "epoch": 0.12, + "learning_rate": 4.928322615219722e-05, + "loss": 0.6711, + "step": 433 + }, + { + "epoch": 0.12, + "learning_rate": 4.928155144694534e-05, + "loss": 0.6061, + "step": 434 + }, + { + "epoch": 0.12, + "learning_rate": 4.927987674169347e-05, + "loss": 0.6318, + "step": 435 + }, + { + "epoch": 0.12, + "learning_rate": 4.927820203644159e-05, + "loss": 0.9248, + "step": 436 + }, + { + "epoch": 0.12, + "learning_rate": 4.9276527331189715e-05, + "loss": 0.7943, + "step": 437 + }, + { + "epoch": 0.12, + "learning_rate": 4.927485262593784e-05, + "loss": 0.5563, + "step": 438 + }, + { + "epoch": 0.12, + "learning_rate": 4.927317792068596e-05, + "loss": 0.5968, + "step": 439 + }, + { + "epoch": 0.12, + "learning_rate": 4.927150321543408e-05, + "loss": 0.6193, + "step": 440 + }, + { + "epoch": 0.12, + "learning_rate": 4.926982851018221e-05, + "loss": 0.679, + "step": 441 + }, + { + "epoch": 0.12, + "learning_rate": 4.9268153804930336e-05, + "loss": 0.8155, + "step": 442 + }, + { + "epoch": 0.12, + "learning_rate": 4.926647909967846e-05, + "loss": 0.5743, + "step": 443 + }, + { + "epoch": 0.12, + "learning_rate": 4.9264804394426585e-05, + "loss": 0.616, + "step": 444 + }, + { + "epoch": 0.12, + "learning_rate": 4.926312968917471e-05, + "loss": 0.5442, + "step": 445 + }, + { + "epoch": 0.12, + "learning_rate": 4.926145498392283e-05, + "loss": 0.7795, + "step": 446 + }, + { + "epoch": 0.12, + "learning_rate": 4.925978027867096e-05, + "loss": 0.6532, + "step": 447 + }, + { + "epoch": 0.12, + "learning_rate": 4.925810557341908e-05, + "loss": 0.6243, + "step": 448 + }, + { + "epoch": 0.12, + "learning_rate": 4.9256430868167206e-05, + "loss": 0.5761, + "step": 449 + }, + { + "epoch": 0.12, + "learning_rate": 4.925475616291533e-05, + "loss": 0.6381, + "step": 450 + }, + { + "epoch": 0.12, + "learning_rate": 4.9253081457663454e-05, + "loss": 0.6382, + "step": 451 + }, + { + "epoch": 0.12, + "learning_rate": 4.925140675241158e-05, + "loss": 0.6244, + "step": 452 + }, + { + "epoch": 0.12, + "learning_rate": 4.92497320471597e-05, + "loss": 0.7042, + "step": 453 + }, + { + "epoch": 0.12, + "learning_rate": 4.924805734190783e-05, + "loss": 0.6602, + "step": 454 + }, + { + "epoch": 0.12, + "learning_rate": 4.924638263665595e-05, + "loss": 0.5293, + "step": 455 + }, + { + "epoch": 0.12, + "learning_rate": 4.9244707931404075e-05, + "loss": 0.6792, + "step": 456 + }, + { + "epoch": 0.12, + "learning_rate": 4.92430332261522e-05, + "loss": 0.5308, + "step": 457 + }, + { + "epoch": 0.12, + "learning_rate": 4.9241358520900324e-05, + "loss": 0.7105, + "step": 458 + }, + { + "epoch": 0.12, + "learning_rate": 4.923968381564845e-05, + "loss": 0.5973, + "step": 459 + }, + { + "epoch": 0.12, + "learning_rate": 4.923800911039657e-05, + "loss": 0.6125, + "step": 460 + }, + { + "epoch": 0.12, + "learning_rate": 4.9236334405144696e-05, + "loss": 0.7767, + "step": 461 + }, + { + "epoch": 0.12, + "learning_rate": 4.923465969989282e-05, + "loss": 0.6887, + "step": 462 + }, + { + "epoch": 0.12, + "learning_rate": 4.9232984994640945e-05, + "loss": 0.8226, + "step": 463 + }, + { + "epoch": 0.12, + "learning_rate": 4.923131028938907e-05, + "loss": 0.8416, + "step": 464 + }, + { + "epoch": 0.12, + "learning_rate": 4.922963558413719e-05, + "loss": 0.6965, + "step": 465 + }, + { + "epoch": 0.12, + "learning_rate": 4.9227960878885324e-05, + "loss": 0.6548, + "step": 466 + }, + { + "epoch": 0.13, + "learning_rate": 4.922628617363344e-05, + "loss": 0.6315, + "step": 467 + }, + { + "epoch": 0.13, + "learning_rate": 4.9224611468381566e-05, + "loss": 0.7018, + "step": 468 + }, + { + "epoch": 0.13, + "learning_rate": 4.922293676312969e-05, + "loss": 0.9012, + "step": 469 + }, + { + "epoch": 0.13, + "learning_rate": 4.9221262057877814e-05, + "loss": 0.5438, + "step": 470 + }, + { + "epoch": 0.13, + "learning_rate": 4.921958735262594e-05, + "loss": 0.5598, + "step": 471 + }, + { + "epoch": 0.13, + "learning_rate": 4.921791264737406e-05, + "loss": 0.5083, + "step": 472 + }, + { + "epoch": 0.13, + "learning_rate": 4.921623794212219e-05, + "loss": 0.6387, + "step": 473 + }, + { + "epoch": 0.13, + "learning_rate": 4.921456323687032e-05, + "loss": 0.6432, + "step": 474 + }, + { + "epoch": 0.13, + "learning_rate": 4.921288853161844e-05, + "loss": 0.4721, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 4.9211213826366566e-05, + "loss": 0.5716, + "step": 476 + }, + { + "epoch": 0.13, + "learning_rate": 4.9209539121114683e-05, + "loss": 0.5428, + "step": 477 + }, + { + "epoch": 0.13, + "learning_rate": 4.920786441586281e-05, + "loss": 0.5744, + "step": 478 + }, + { + "epoch": 0.13, + "learning_rate": 4.920618971061093e-05, + "loss": 0.7858, + "step": 479 + }, + { + "epoch": 0.13, + "learning_rate": 4.9204515005359056e-05, + "loss": 0.6492, + "step": 480 + }, + { + "epoch": 0.13, + "learning_rate": 4.920284030010718e-05, + "loss": 0.6307, + "step": 481 + }, + { + "epoch": 0.13, + "learning_rate": 4.920116559485531e-05, + "loss": 0.5008, + "step": 482 + }, + { + "epoch": 0.13, + "learning_rate": 4.9199490889603436e-05, + "loss": 0.4459, + "step": 483 + }, + { + "epoch": 0.13, + "learning_rate": 4.919781618435156e-05, + "loss": 0.5621, + "step": 484 + }, + { + "epoch": 0.13, + "learning_rate": 4.9196141479099684e-05, + "loss": 0.9677, + "step": 485 + }, + { + "epoch": 0.13, + "learning_rate": 4.91944667738478e-05, + "loss": 0.5841, + "step": 486 + }, + { + "epoch": 0.13, + "learning_rate": 4.9192792068595926e-05, + "loss": 0.6475, + "step": 487 + }, + { + "epoch": 0.13, + "learning_rate": 4.919111736334405e-05, + "loss": 0.7843, + "step": 488 + }, + { + "epoch": 0.13, + "learning_rate": 4.918944265809218e-05, + "loss": 0.4911, + "step": 489 + }, + { + "epoch": 0.13, + "learning_rate": 4.9187767952840305e-05, + "loss": 0.743, + "step": 490 + }, + { + "epoch": 0.13, + "learning_rate": 4.918609324758843e-05, + "loss": 0.5661, + "step": 491 + }, + { + "epoch": 0.13, + "learning_rate": 4.9184418542336553e-05, + "loss": 0.554, + "step": 492 + }, + { + "epoch": 0.13, + "learning_rate": 4.918274383708468e-05, + "loss": 0.5278, + "step": 493 + }, + { + "epoch": 0.13, + "learning_rate": 4.91810691318328e-05, + "loss": 0.886, + "step": 494 + }, + { + "epoch": 0.13, + "learning_rate": 4.917939442658092e-05, + "loss": 0.8079, + "step": 495 + }, + { + "epoch": 0.13, + "learning_rate": 4.9177719721329043e-05, + "loss": 0.5454, + "step": 496 + }, + { + "epoch": 0.13, + "learning_rate": 4.9176045016077174e-05, + "loss": 0.8192, + "step": 497 + }, + { + "epoch": 0.13, + "learning_rate": 4.91743703108253e-05, + "loss": 0.5087, + "step": 498 + }, + { + "epoch": 0.13, + "learning_rate": 4.917269560557342e-05, + "loss": 0.7442, + "step": 499 + }, + { + "epoch": 0.13, + "learning_rate": 4.917102090032155e-05, + "loss": 0.9639, + "step": 500 + }, + { + "epoch": 0.13, + "learning_rate": 4.916934619506967e-05, + "loss": 0.5166, + "step": 501 + }, + { + "epoch": 0.13, + "learning_rate": 4.9167671489817796e-05, + "loss": 0.6133, + "step": 502 + }, + { + "epoch": 0.13, + "learning_rate": 4.916599678456592e-05, + "loss": 0.7108, + "step": 503 + }, + { + "epoch": 0.14, + "learning_rate": 4.9164322079314044e-05, + "loss": 0.604, + "step": 504 + }, + { + "epoch": 0.14, + "learning_rate": 4.916264737406217e-05, + "loss": 0.7355, + "step": 505 + }, + { + "epoch": 0.14, + "learning_rate": 4.916097266881029e-05, + "loss": 0.8937, + "step": 506 + }, + { + "epoch": 0.14, + "learning_rate": 4.9159297963558417e-05, + "loss": 0.7085, + "step": 507 + }, + { + "epoch": 0.14, + "learning_rate": 4.915762325830654e-05, + "loss": 0.5976, + "step": 508 + }, + { + "epoch": 0.14, + "learning_rate": 4.9155948553054665e-05, + "loss": 0.8211, + "step": 509 + }, + { + "epoch": 0.14, + "learning_rate": 4.915427384780279e-05, + "loss": 0.9394, + "step": 510 + }, + { + "epoch": 0.14, + "learning_rate": 4.9152599142550913e-05, + "loss": 0.6873, + "step": 511 + }, + { + "epoch": 0.14, + "learning_rate": 4.915092443729904e-05, + "loss": 0.6967, + "step": 512 + }, + { + "epoch": 0.14, + "learning_rate": 4.914924973204716e-05, + "loss": 0.8206, + "step": 513 + }, + { + "epoch": 0.14, + "learning_rate": 4.9147575026795286e-05, + "loss": 0.9044, + "step": 514 + }, + { + "epoch": 0.14, + "learning_rate": 4.914590032154341e-05, + "loss": 0.7174, + "step": 515 + }, + { + "epoch": 0.14, + "learning_rate": 4.9144225616291534e-05, + "loss": 0.7024, + "step": 516 + }, + { + "epoch": 0.14, + "learning_rate": 4.914255091103966e-05, + "loss": 0.6625, + "step": 517 + }, + { + "epoch": 0.14, + "learning_rate": 4.914087620578778e-05, + "loss": 0.4751, + "step": 518 + }, + { + "epoch": 0.14, + "learning_rate": 4.913920150053591e-05, + "loss": 0.4568, + "step": 519 + }, + { + "epoch": 0.14, + "learning_rate": 4.913752679528403e-05, + "loss": 0.5413, + "step": 520 + }, + { + "epoch": 0.14, + "learning_rate": 4.9135852090032155e-05, + "loss": 0.7547, + "step": 521 + }, + { + "epoch": 0.14, + "learning_rate": 4.9134177384780286e-05, + "loss": 0.6993, + "step": 522 + }, + { + "epoch": 0.14, + "learning_rate": 4.9132502679528404e-05, + "loss": 0.8688, + "step": 523 + }, + { + "epoch": 0.14, + "learning_rate": 4.913082797427653e-05, + "loss": 0.7383, + "step": 524 + }, + { + "epoch": 0.14, + "learning_rate": 4.912915326902465e-05, + "loss": 0.8582, + "step": 525 + }, + { + "epoch": 0.14, + "learning_rate": 4.9127478563772777e-05, + "loss": 0.5507, + "step": 526 + }, + { + "epoch": 0.14, + "learning_rate": 4.91258038585209e-05, + "loss": 0.61, + "step": 527 + }, + { + "epoch": 0.14, + "learning_rate": 4.9124129153269025e-05, + "loss": 0.5494, + "step": 528 + }, + { + "epoch": 0.14, + "learning_rate": 4.912245444801715e-05, + "loss": 0.7338, + "step": 529 + }, + { + "epoch": 0.14, + "learning_rate": 4.912077974276528e-05, + "loss": 0.4794, + "step": 530 + }, + { + "epoch": 0.14, + "learning_rate": 4.9119105037513404e-05, + "loss": 0.6882, + "step": 531 + }, + { + "epoch": 0.14, + "learning_rate": 4.911743033226153e-05, + "loss": 0.6454, + "step": 532 + }, + { + "epoch": 0.14, + "learning_rate": 4.9115755627009646e-05, + "loss": 0.4698, + "step": 533 + }, + { + "epoch": 0.14, + "learning_rate": 4.911408092175777e-05, + "loss": 0.5207, + "step": 534 + }, + { + "epoch": 0.14, + "learning_rate": 4.9112406216505894e-05, + "loss": 0.8656, + "step": 535 + }, + { + "epoch": 0.14, + "learning_rate": 4.911073151125402e-05, + "loss": 0.822, + "step": 536 + }, + { + "epoch": 0.14, + "learning_rate": 4.910905680600214e-05, + "loss": 0.6993, + "step": 537 + }, + { + "epoch": 0.14, + "learning_rate": 4.9107382100750274e-05, + "loss": 0.6345, + "step": 538 + }, + { + "epoch": 0.14, + "learning_rate": 4.91057073954984e-05, + "loss": 0.7872, + "step": 539 + }, + { + "epoch": 0.14, + "learning_rate": 4.910403269024652e-05, + "loss": 0.5202, + "step": 540 + }, + { + "epoch": 0.14, + "learning_rate": 4.9102357984994646e-05, + "loss": 0.8437, + "step": 541 + }, + { + "epoch": 0.15, + "learning_rate": 4.9100683279742764e-05, + "loss": 0.5666, + "step": 542 + }, + { + "epoch": 0.15, + "learning_rate": 4.909900857449089e-05, + "loss": 0.6412, + "step": 543 + }, + { + "epoch": 0.15, + "learning_rate": 4.909733386923901e-05, + "loss": 0.5822, + "step": 544 + }, + { + "epoch": 0.15, + "learning_rate": 4.909565916398714e-05, + "loss": 0.7064, + "step": 545 + }, + { + "epoch": 0.15, + "learning_rate": 4.909398445873527e-05, + "loss": 0.5982, + "step": 546 + }, + { + "epoch": 0.15, + "learning_rate": 4.909230975348339e-05, + "loss": 0.7906, + "step": 547 + }, + { + "epoch": 0.15, + "learning_rate": 4.9090635048231516e-05, + "loss": 0.5323, + "step": 548 + }, + { + "epoch": 0.15, + "learning_rate": 4.908896034297964e-05, + "loss": 0.5076, + "step": 549 + }, + { + "epoch": 0.15, + "learning_rate": 4.9087285637727764e-05, + "loss": 0.5066, + "step": 550 + }, + { + "epoch": 0.15, + "learning_rate": 4.908561093247588e-05, + "loss": 0.6245, + "step": 551 + }, + { + "epoch": 0.15, + "learning_rate": 4.9083936227224006e-05, + "loss": 0.796, + "step": 552 + }, + { + "epoch": 0.15, + "learning_rate": 4.908226152197214e-05, + "loss": 0.5509, + "step": 553 + }, + { + "epoch": 0.15, + "learning_rate": 4.908058681672026e-05, + "loss": 0.487, + "step": 554 + }, + { + "epoch": 0.15, + "learning_rate": 4.9078912111468385e-05, + "loss": 0.7054, + "step": 555 + }, + { + "epoch": 0.15, + "learning_rate": 4.907723740621651e-05, + "loss": 0.6816, + "step": 556 + }, + { + "epoch": 0.15, + "learning_rate": 4.9075562700964634e-05, + "loss": 0.4544, + "step": 557 + }, + { + "epoch": 0.15, + "learning_rate": 4.907388799571276e-05, + "loss": 0.5273, + "step": 558 + }, + { + "epoch": 0.15, + "learning_rate": 4.907221329046088e-05, + "loss": 0.5809, + "step": 559 + }, + { + "epoch": 0.15, + "learning_rate": 4.9070538585209006e-05, + "loss": 0.679, + "step": 560 + }, + { + "epoch": 0.15, + "learning_rate": 4.906886387995713e-05, + "loss": 0.7681, + "step": 561 + }, + { + "epoch": 0.15, + "learning_rate": 4.9067189174705255e-05, + "loss": 0.5853, + "step": 562 + }, + { + "epoch": 0.15, + "learning_rate": 4.906551446945338e-05, + "loss": 1.0705, + "step": 563 + }, + { + "epoch": 0.15, + "learning_rate": 4.90638397642015e-05, + "loss": 0.7398, + "step": 564 + }, + { + "epoch": 0.15, + "learning_rate": 4.906216505894963e-05, + "loss": 0.6177, + "step": 565 + }, + { + "epoch": 0.15, + "learning_rate": 4.906049035369775e-05, + "loss": 0.5334, + "step": 566 + }, + { + "epoch": 0.15, + "learning_rate": 4.9058815648445876e-05, + "loss": 0.4446, + "step": 567 + }, + { + "epoch": 0.15, + "learning_rate": 4.9057140943194e-05, + "loss": 0.5575, + "step": 568 + }, + { + "epoch": 0.15, + "learning_rate": 4.9055466237942124e-05, + "loss": 0.5451, + "step": 569 + }, + { + "epoch": 0.15, + "learning_rate": 4.905379153269025e-05, + "loss": 0.662, + "step": 570 + }, + { + "epoch": 0.15, + "learning_rate": 4.905211682743837e-05, + "loss": 0.6506, + "step": 571 + }, + { + "epoch": 0.15, + "learning_rate": 4.90504421221865e-05, + "loss": 0.5699, + "step": 572 + }, + { + "epoch": 0.15, + "learning_rate": 4.904876741693462e-05, + "loss": 0.4702, + "step": 573 + }, + { + "epoch": 0.15, + "learning_rate": 4.9047092711682745e-05, + "loss": 0.547, + "step": 574 + }, + { + "epoch": 0.15, + "learning_rate": 4.904541800643087e-05, + "loss": 0.471, + "step": 575 + }, + { + "epoch": 0.15, + "learning_rate": 4.9043743301178994e-05, + "loss": 0.4568, + "step": 576 + }, + { + "epoch": 0.15, + "learning_rate": 4.904206859592712e-05, + "loss": 0.5089, + "step": 577 + }, + { + "epoch": 0.15, + "learning_rate": 4.904039389067525e-05, + "loss": 0.7871, + "step": 578 + }, + { + "epoch": 0.16, + "learning_rate": 4.9038719185423366e-05, + "loss": 0.6905, + "step": 579 + }, + { + "epoch": 0.16, + "learning_rate": 4.903704448017149e-05, + "loss": 0.4868, + "step": 580 + }, + { + "epoch": 0.16, + "learning_rate": 4.9035369774919615e-05, + "loss": 0.5547, + "step": 581 + }, + { + "epoch": 0.16, + "learning_rate": 4.903369506966774e-05, + "loss": 0.6187, + "step": 582 + }, + { + "epoch": 0.16, + "learning_rate": 4.903202036441586e-05, + "loss": 0.4877, + "step": 583 + }, + { + "epoch": 0.16, + "learning_rate": 4.903034565916399e-05, + "loss": 0.5248, + "step": 584 + }, + { + "epoch": 0.16, + "learning_rate": 4.902867095391211e-05, + "loss": 0.6994, + "step": 585 + }, + { + "epoch": 0.16, + "learning_rate": 4.902699624866024e-05, + "loss": 0.7236, + "step": 586 + }, + { + "epoch": 0.16, + "learning_rate": 4.902532154340837e-05, + "loss": 0.724, + "step": 587 + }, + { + "epoch": 0.16, + "learning_rate": 4.902364683815649e-05, + "loss": 0.666, + "step": 588 + }, + { + "epoch": 0.16, + "learning_rate": 4.902197213290461e-05, + "loss": 0.4374, + "step": 589 + }, + { + "epoch": 0.16, + "learning_rate": 4.902029742765273e-05, + "loss": 0.7238, + "step": 590 + }, + { + "epoch": 0.16, + "learning_rate": 4.901862272240086e-05, + "loss": 0.8397, + "step": 591 + }, + { + "epoch": 0.16, + "learning_rate": 4.901694801714898e-05, + "loss": 0.4641, + "step": 592 + }, + { + "epoch": 0.16, + "learning_rate": 4.9015273311897105e-05, + "loss": 0.4994, + "step": 593 + }, + { + "epoch": 0.16, + "learning_rate": 4.9013598606645236e-05, + "loss": 0.634, + "step": 594 + }, + { + "epoch": 0.16, + "learning_rate": 4.901192390139336e-05, + "loss": 0.5212, + "step": 595 + }, + { + "epoch": 0.16, + "learning_rate": 4.9010249196141485e-05, + "loss": 0.5196, + "step": 596 + }, + { + "epoch": 0.16, + "learning_rate": 4.900857449088961e-05, + "loss": 0.8071, + "step": 597 + }, + { + "epoch": 0.16, + "learning_rate": 4.9006899785637726e-05, + "loss": 0.5391, + "step": 598 + }, + { + "epoch": 0.16, + "learning_rate": 4.900522508038585e-05, + "loss": 0.6757, + "step": 599 + }, + { + "epoch": 0.16, + "learning_rate": 4.9003550375133975e-05, + "loss": 0.5972, + "step": 600 + }, + { + "epoch": 0.16, + "learning_rate": 4.9001875669882106e-05, + "loss": 0.7262, + "step": 601 + }, + { + "epoch": 0.16, + "learning_rate": 4.900020096463023e-05, + "loss": 0.6166, + "step": 602 + }, + { + "epoch": 0.16, + "learning_rate": 4.8998526259378354e-05, + "loss": 0.749, + "step": 603 + }, + { + "epoch": 0.16, + "learning_rate": 4.899685155412648e-05, + "loss": 0.6869, + "step": 604 + }, + { + "epoch": 0.16, + "learning_rate": 4.89951768488746e-05, + "loss": 0.6335, + "step": 605 + }, + { + "epoch": 0.16, + "learning_rate": 4.899350214362273e-05, + "loss": 0.4721, + "step": 606 + }, + { + "epoch": 0.16, + "learning_rate": 4.8991827438370844e-05, + "loss": 0.7885, + "step": 607 + }, + { + "epoch": 0.16, + "learning_rate": 4.899015273311897e-05, + "loss": 0.6326, + "step": 608 + }, + { + "epoch": 0.16, + "learning_rate": 4.89884780278671e-05, + "loss": 0.6112, + "step": 609 + }, + { + "epoch": 0.16, + "learning_rate": 4.8986803322615224e-05, + "loss": 0.745, + "step": 610 + }, + { + "epoch": 0.16, + "learning_rate": 4.898512861736335e-05, + "loss": 0.4897, + "step": 611 + }, + { + "epoch": 0.16, + "learning_rate": 4.898345391211147e-05, + "loss": 0.6717, + "step": 612 + }, + { + "epoch": 0.16, + "learning_rate": 4.8981779206859596e-05, + "loss": 0.5506, + "step": 613 + }, + { + "epoch": 0.16, + "learning_rate": 4.898010450160772e-05, + "loss": 0.6906, + "step": 614 + }, + { + "epoch": 0.16, + "learning_rate": 4.8978429796355845e-05, + "loss": 0.5022, + "step": 615 + }, + { + "epoch": 0.17, + "learning_rate": 4.897675509110397e-05, + "loss": 0.555, + "step": 616 + }, + { + "epoch": 0.17, + "learning_rate": 4.897508038585209e-05, + "loss": 0.6413, + "step": 617 + }, + { + "epoch": 0.17, + "learning_rate": 4.897340568060022e-05, + "loss": 0.6382, + "step": 618 + }, + { + "epoch": 0.17, + "learning_rate": 4.897173097534834e-05, + "loss": 0.5449, + "step": 619 + }, + { + "epoch": 0.17, + "learning_rate": 4.8970056270096466e-05, + "loss": 0.6888, + "step": 620 + }, + { + "epoch": 0.17, + "learning_rate": 4.896838156484459e-05, + "loss": 0.7265, + "step": 621 + }, + { + "epoch": 0.17, + "learning_rate": 4.8966706859592714e-05, + "loss": 0.4481, + "step": 622 + }, + { + "epoch": 0.17, + "learning_rate": 4.896503215434084e-05, + "loss": 0.3733, + "step": 623 + }, + { + "epoch": 0.17, + "learning_rate": 4.896335744908896e-05, + "loss": 0.5829, + "step": 624 + }, + { + "epoch": 0.17, + "learning_rate": 4.896168274383709e-05, + "loss": 0.7613, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 4.896000803858521e-05, + "loss": 0.8834, + "step": 626 + }, + { + "epoch": 0.17, + "learning_rate": 4.8958333333333335e-05, + "loss": 0.8419, + "step": 627 + }, + { + "epoch": 0.17, + "learning_rate": 4.895665862808146e-05, + "loss": 0.7234, + "step": 628 + }, + { + "epoch": 0.17, + "learning_rate": 4.8954983922829584e-05, + "loss": 0.5537, + "step": 629 + }, + { + "epoch": 0.17, + "learning_rate": 4.895330921757771e-05, + "loss": 0.816, + "step": 630 + }, + { + "epoch": 0.17, + "learning_rate": 4.895163451232583e-05, + "loss": 0.5175, + "step": 631 + }, + { + "epoch": 0.17, + "learning_rate": 4.8949959807073956e-05, + "loss": 0.5863, + "step": 632 + }, + { + "epoch": 0.17, + "learning_rate": 4.894828510182208e-05, + "loss": 0.7017, + "step": 633 + }, + { + "epoch": 0.17, + "learning_rate": 4.894661039657021e-05, + "loss": 0.7277, + "step": 634 + }, + { + "epoch": 0.17, + "learning_rate": 4.894493569131833e-05, + "loss": 0.7293, + "step": 635 + }, + { + "epoch": 0.17, + "learning_rate": 4.894326098606645e-05, + "loss": 0.588, + "step": 636 + }, + { + "epoch": 0.17, + "learning_rate": 4.894158628081458e-05, + "loss": 0.6862, + "step": 637 + }, + { + "epoch": 0.17, + "learning_rate": 4.89399115755627e-05, + "loss": 0.6978, + "step": 638 + }, + { + "epoch": 0.17, + "learning_rate": 4.8938236870310826e-05, + "loss": 0.5733, + "step": 639 + }, + { + "epoch": 0.17, + "learning_rate": 4.893656216505895e-05, + "loss": 0.4836, + "step": 640 + }, + { + "epoch": 0.17, + "learning_rate": 4.8934887459807074e-05, + "loss": 0.3815, + "step": 641 + }, + { + "epoch": 0.17, + "learning_rate": 4.8933212754555205e-05, + "loss": 0.4497, + "step": 642 + }, + { + "epoch": 0.17, + "learning_rate": 4.893153804930333e-05, + "loss": 0.5169, + "step": 643 + }, + { + "epoch": 0.17, + "learning_rate": 4.8929863344051454e-05, + "loss": 0.4884, + "step": 644 + }, + { + "epoch": 0.17, + "learning_rate": 4.892818863879957e-05, + "loss": 0.7891, + "step": 645 + }, + { + "epoch": 0.17, + "learning_rate": 4.8926513933547695e-05, + "loss": 0.6605, + "step": 646 + }, + { + "epoch": 0.17, + "learning_rate": 4.892483922829582e-05, + "loss": 0.5124, + "step": 647 + }, + { + "epoch": 0.17, + "learning_rate": 4.8923164523043944e-05, + "loss": 0.6516, + "step": 648 + }, + { + "epoch": 0.17, + "learning_rate": 4.892148981779207e-05, + "loss": 0.5306, + "step": 649 + }, + { + "epoch": 0.17, + "learning_rate": 4.89198151125402e-05, + "loss": 0.5345, + "step": 650 + }, + { + "epoch": 0.17, + "learning_rate": 4.891814040728832e-05, + "loss": 0.5001, + "step": 651 + }, + { + "epoch": 0.17, + "learning_rate": 4.891646570203645e-05, + "loss": 0.6334, + "step": 652 + }, + { + "epoch": 0.17, + "learning_rate": 4.891479099678457e-05, + "loss": 0.57, + "step": 653 + }, + { + "epoch": 0.18, + "learning_rate": 4.891311629153269e-05, + "loss": 0.763, + "step": 654 + }, + { + "epoch": 0.18, + "learning_rate": 4.891144158628081e-05, + "loss": 0.5529, + "step": 655 + }, + { + "epoch": 0.18, + "learning_rate": 4.890976688102894e-05, + "loss": 0.4622, + "step": 656 + }, + { + "epoch": 0.18, + "learning_rate": 4.890809217577706e-05, + "loss": 0.5134, + "step": 657 + }, + { + "epoch": 0.18, + "learning_rate": 4.890641747052519e-05, + "loss": 0.4745, + "step": 658 + }, + { + "epoch": 0.18, + "learning_rate": 4.890474276527332e-05, + "loss": 0.6692, + "step": 659 + }, + { + "epoch": 0.18, + "learning_rate": 4.890306806002144e-05, + "loss": 0.653, + "step": 660 + }, + { + "epoch": 0.18, + "learning_rate": 4.8901393354769565e-05, + "loss": 0.5142, + "step": 661 + }, + { + "epoch": 0.18, + "learning_rate": 4.889971864951769e-05, + "loss": 0.6008, + "step": 662 + }, + { + "epoch": 0.18, + "learning_rate": 4.889804394426581e-05, + "loss": 0.4871, + "step": 663 + }, + { + "epoch": 0.18, + "learning_rate": 4.889636923901393e-05, + "loss": 0.6472, + "step": 664 + }, + { + "epoch": 0.18, + "learning_rate": 4.889469453376206e-05, + "loss": 0.6256, + "step": 665 + }, + { + "epoch": 0.18, + "learning_rate": 4.8893019828510186e-05, + "loss": 0.7861, + "step": 666 + }, + { + "epoch": 0.18, + "learning_rate": 4.889134512325831e-05, + "loss": 0.522, + "step": 667 + }, + { + "epoch": 0.18, + "learning_rate": 4.8889670418006435e-05, + "loss": 0.544, + "step": 668 + }, + { + "epoch": 0.18, + "learning_rate": 4.888799571275456e-05, + "loss": 0.5368, + "step": 669 + }, + { + "epoch": 0.18, + "learning_rate": 4.888632100750268e-05, + "loss": 0.8357, + "step": 670 + }, + { + "epoch": 0.18, + "learning_rate": 4.888464630225081e-05, + "loss": 0.513, + "step": 671 + }, + { + "epoch": 0.18, + "learning_rate": 4.888297159699893e-05, + "loss": 0.5639, + "step": 672 + }, + { + "epoch": 0.18, + "learning_rate": 4.8881296891747056e-05, + "loss": 0.6444, + "step": 673 + }, + { + "epoch": 0.18, + "learning_rate": 4.887962218649518e-05, + "loss": 0.9503, + "step": 674 + }, + { + "epoch": 0.18, + "learning_rate": 4.8877947481243304e-05, + "loss": 0.5138, + "step": 675 + }, + { + "epoch": 0.18, + "learning_rate": 4.887627277599143e-05, + "loss": 0.6987, + "step": 676 + }, + { + "epoch": 0.18, + "learning_rate": 4.887459807073955e-05, + "loss": 0.6879, + "step": 677 + }, + { + "epoch": 0.18, + "learning_rate": 4.887292336548768e-05, + "loss": 0.6902, + "step": 678 + }, + { + "epoch": 0.18, + "learning_rate": 4.88712486602358e-05, + "loss": 0.5976, + "step": 679 + }, + { + "epoch": 0.18, + "learning_rate": 4.8869573954983925e-05, + "loss": 0.7545, + "step": 680 + }, + { + "epoch": 0.18, + "learning_rate": 4.886789924973205e-05, + "loss": 0.5586, + "step": 681 + }, + { + "epoch": 0.18, + "learning_rate": 4.8866224544480174e-05, + "loss": 0.663, + "step": 682 + }, + { + "epoch": 0.18, + "learning_rate": 4.88645498392283e-05, + "loss": 0.5607, + "step": 683 + }, + { + "epoch": 0.18, + "learning_rate": 4.886287513397642e-05, + "loss": 0.5746, + "step": 684 + }, + { + "epoch": 0.18, + "learning_rate": 4.8861200428724546e-05, + "loss": 0.6201, + "step": 685 + }, + { + "epoch": 0.18, + "learning_rate": 4.885952572347267e-05, + "loss": 0.5761, + "step": 686 + }, + { + "epoch": 0.18, + "learning_rate": 4.8857851018220795e-05, + "loss": 0.701, + "step": 687 + }, + { + "epoch": 0.18, + "learning_rate": 4.885617631296892e-05, + "loss": 0.6007, + "step": 688 + }, + { + "epoch": 0.18, + "learning_rate": 4.885450160771704e-05, + "loss": 0.765, + "step": 689 + }, + { + "epoch": 0.18, + "learning_rate": 4.8852826902465174e-05, + "loss": 0.614, + "step": 690 + }, + { + "epoch": 0.19, + "learning_rate": 4.885115219721329e-05, + "loss": 0.4049, + "step": 691 + }, + { + "epoch": 0.19, + "learning_rate": 4.8849477491961416e-05, + "loss": 0.5944, + "step": 692 + }, + { + "epoch": 0.19, + "learning_rate": 4.884780278670954e-05, + "loss": 0.7553, + "step": 693 + }, + { + "epoch": 0.19, + "learning_rate": 4.8846128081457664e-05, + "loss": 0.7794, + "step": 694 + }, + { + "epoch": 0.19, + "learning_rate": 4.884445337620579e-05, + "loss": 0.6787, + "step": 695 + }, + { + "epoch": 0.19, + "learning_rate": 4.884277867095391e-05, + "loss": 0.6654, + "step": 696 + }, + { + "epoch": 0.19, + "learning_rate": 4.884110396570204e-05, + "loss": 0.6546, + "step": 697 + }, + { + "epoch": 0.19, + "learning_rate": 4.883942926045017e-05, + "loss": 0.574, + "step": 698 + }, + { + "epoch": 0.19, + "learning_rate": 4.883775455519829e-05, + "loss": 0.509, + "step": 699 + }, + { + "epoch": 0.19, + "learning_rate": 4.8836079849946416e-05, + "loss": 0.4976, + "step": 700 + }, + { + "epoch": 0.19, + "learning_rate": 4.8834405144694534e-05, + "loss": 0.6752, + "step": 701 + }, + { + "epoch": 0.19, + "learning_rate": 4.883273043944266e-05, + "loss": 0.4687, + "step": 702 + }, + { + "epoch": 0.19, + "learning_rate": 4.883105573419078e-05, + "loss": 0.5031, + "step": 703 + }, + { + "epoch": 0.19, + "learning_rate": 4.8829381028938906e-05, + "loss": 0.5777, + "step": 704 + }, + { + "epoch": 0.19, + "learning_rate": 4.882770632368703e-05, + "loss": 0.7101, + "step": 705 + }, + { + "epoch": 0.19, + "learning_rate": 4.882603161843516e-05, + "loss": 0.6479, + "step": 706 + }, + { + "epoch": 0.19, + "learning_rate": 4.8824356913183286e-05, + "loss": 0.6628, + "step": 707 + }, + { + "epoch": 0.19, + "learning_rate": 4.882268220793141e-05, + "loss": 0.5517, + "step": 708 + }, + { + "epoch": 0.19, + "learning_rate": 4.8821007502679534e-05, + "loss": 0.5095, + "step": 709 + }, + { + "epoch": 0.19, + "learning_rate": 4.881933279742765e-05, + "loss": 0.4674, + "step": 710 + }, + { + "epoch": 0.19, + "learning_rate": 4.8817658092175776e-05, + "loss": 0.6557, + "step": 711 + }, + { + "epoch": 0.19, + "learning_rate": 4.88159833869239e-05, + "loss": 0.7015, + "step": 712 + }, + { + "epoch": 0.19, + "learning_rate": 4.8814308681672024e-05, + "loss": 0.5715, + "step": 713 + }, + { + "epoch": 0.19, + "learning_rate": 4.8812633976420155e-05, + "loss": 0.6922, + "step": 714 + }, + { + "epoch": 0.19, + "learning_rate": 4.881095927116828e-05, + "loss": 0.5041, + "step": 715 + }, + { + "epoch": 0.19, + "learning_rate": 4.8809284565916403e-05, + "loss": 0.7989, + "step": 716 + }, + { + "epoch": 0.19, + "learning_rate": 4.880760986066453e-05, + "loss": 0.5931, + "step": 717 + }, + { + "epoch": 0.19, + "learning_rate": 4.880593515541265e-05, + "loss": 0.5369, + "step": 718 + }, + { + "epoch": 0.19, + "learning_rate": 4.880426045016077e-05, + "loss": 0.441, + "step": 719 + }, + { + "epoch": 0.19, + "learning_rate": 4.8802585744908894e-05, + "loss": 0.5159, + "step": 720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8800911039657025e-05, + "loss": 0.5373, + "step": 721 + }, + { + "epoch": 0.19, + "learning_rate": 4.879923633440515e-05, + "loss": 0.5399, + "step": 722 + }, + { + "epoch": 0.19, + "learning_rate": 4.879756162915327e-05, + "loss": 0.5766, + "step": 723 + }, + { + "epoch": 0.19, + "learning_rate": 4.87958869239014e-05, + "loss": 0.4172, + "step": 724 + }, + { + "epoch": 0.19, + "learning_rate": 4.879421221864952e-05, + "loss": 0.6196, + "step": 725 + }, + { + "epoch": 0.19, + "learning_rate": 4.8792537513397646e-05, + "loss": 0.6442, + "step": 726 + }, + { + "epoch": 0.19, + "learning_rate": 4.879086280814577e-05, + "loss": 0.7829, + "step": 727 + }, + { + "epoch": 0.2, + "learning_rate": 4.8789188102893894e-05, + "loss": 0.9478, + "step": 728 + }, + { + "epoch": 0.2, + "learning_rate": 4.878751339764202e-05, + "loss": 0.4395, + "step": 729 + }, + { + "epoch": 0.2, + "learning_rate": 4.878583869239014e-05, + "loss": 0.6211, + "step": 730 + }, + { + "epoch": 0.2, + "learning_rate": 4.8784163987138267e-05, + "loss": 0.4888, + "step": 731 + }, + { + "epoch": 0.2, + "learning_rate": 4.878248928188639e-05, + "loss": 0.706, + "step": 732 + }, + { + "epoch": 0.2, + "learning_rate": 4.8780814576634515e-05, + "loss": 0.7012, + "step": 733 + }, + { + "epoch": 0.2, + "learning_rate": 4.877913987138264e-05, + "loss": 0.6862, + "step": 734 + }, + { + "epoch": 0.2, + "learning_rate": 4.8777465166130763e-05, + "loss": 0.4734, + "step": 735 + }, + { + "epoch": 0.2, + "learning_rate": 4.877579046087889e-05, + "loss": 0.6355, + "step": 736 + }, + { + "epoch": 0.2, + "learning_rate": 4.877411575562701e-05, + "loss": 0.53, + "step": 737 + }, + { + "epoch": 0.2, + "learning_rate": 4.8772441050375136e-05, + "loss": 0.5598, + "step": 738 + }, + { + "epoch": 0.2, + "learning_rate": 4.877076634512326e-05, + "loss": 0.6358, + "step": 739 + }, + { + "epoch": 0.2, + "learning_rate": 4.8769091639871384e-05, + "loss": 0.6894, + "step": 740 + }, + { + "epoch": 0.2, + "learning_rate": 4.876741693461951e-05, + "loss": 0.5, + "step": 741 + }, + { + "epoch": 0.2, + "learning_rate": 4.876574222936763e-05, + "loss": 0.415, + "step": 742 + }, + { + "epoch": 0.2, + "learning_rate": 4.876406752411576e-05, + "loss": 0.4649, + "step": 743 + }, + { + "epoch": 0.2, + "learning_rate": 4.876239281886388e-05, + "loss": 0.5378, + "step": 744 + }, + { + "epoch": 0.2, + "learning_rate": 4.8760718113612006e-05, + "loss": 0.5317, + "step": 745 + }, + { + "epoch": 0.2, + "learning_rate": 4.8759043408360137e-05, + "loss": 0.3618, + "step": 746 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757368703108254e-05, + "loss": 0.5133, + "step": 747 + }, + { + "epoch": 0.2, + "learning_rate": 4.875569399785638e-05, + "loss": 0.5289, + "step": 748 + }, + { + "epoch": 0.2, + "learning_rate": 4.87540192926045e-05, + "loss": 0.4139, + "step": 749 + }, + { + "epoch": 0.2, + "learning_rate": 4.8752344587352627e-05, + "loss": 0.4802, + "step": 750 + }, + { + "epoch": 0.2, + "learning_rate": 4.875066988210075e-05, + "loss": 0.4577, + "step": 751 + }, + { + "epoch": 0.2, + "learning_rate": 4.8748995176848875e-05, + "loss": 0.4153, + "step": 752 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747320471597e-05, + "loss": 0.4335, + "step": 753 + }, + { + "epoch": 0.2, + "learning_rate": 4.874564576634513e-05, + "loss": 0.6933, + "step": 754 + }, + { + "epoch": 0.2, + "learning_rate": 4.8743971061093254e-05, + "loss": 0.6059, + "step": 755 + }, + { + "epoch": 0.2, + "learning_rate": 4.874229635584138e-05, + "loss": 0.4575, + "step": 756 + }, + { + "epoch": 0.2, + "learning_rate": 4.8740621650589496e-05, + "loss": 0.606, + "step": 757 + }, + { + "epoch": 0.2, + "learning_rate": 4.873894694533762e-05, + "loss": 0.5232, + "step": 758 + }, + { + "epoch": 0.2, + "learning_rate": 4.8737272240085744e-05, + "loss": 0.8473, + "step": 759 + }, + { + "epoch": 0.2, + "learning_rate": 4.873559753483387e-05, + "loss": 0.8118, + "step": 760 + }, + { + "epoch": 0.2, + "learning_rate": 4.873392282958199e-05, + "loss": 0.6268, + "step": 761 + }, + { + "epoch": 0.2, + "learning_rate": 4.8732248124330124e-05, + "loss": 0.4456, + "step": 762 + }, + { + "epoch": 0.2, + "learning_rate": 4.873057341907825e-05, + "loss": 0.7699, + "step": 763 + }, + { + "epoch": 0.2, + "learning_rate": 4.872889871382637e-05, + "loss": 0.6609, + "step": 764 + }, + { + "epoch": 0.2, + "learning_rate": 4.8727224008574496e-05, + "loss": 0.8052, + "step": 765 + }, + { + "epoch": 0.21, + "learning_rate": 4.8725549303322614e-05, + "loss": 0.496, + "step": 766 + }, + { + "epoch": 0.21, + "learning_rate": 4.872387459807074e-05, + "loss": 0.4848, + "step": 767 + }, + { + "epoch": 0.21, + "learning_rate": 4.872219989281886e-05, + "loss": 0.4839, + "step": 768 + }, + { + "epoch": 0.21, + "learning_rate": 4.8720525187566987e-05, + "loss": 0.5354, + "step": 769 + }, + { + "epoch": 0.21, + "learning_rate": 4.871885048231512e-05, + "loss": 0.7097, + "step": 770 + }, + { + "epoch": 0.21, + "learning_rate": 4.871717577706324e-05, + "loss": 0.5773, + "step": 771 + }, + { + "epoch": 0.21, + "learning_rate": 4.8715501071811366e-05, + "loss": 0.5574, + "step": 772 + }, + { + "epoch": 0.21, + "learning_rate": 4.871382636655949e-05, + "loss": 0.6437, + "step": 773 + }, + { + "epoch": 0.21, + "learning_rate": 4.8712151661307614e-05, + "loss": 0.4009, + "step": 774 + }, + { + "epoch": 0.21, + "learning_rate": 4.871047695605573e-05, + "loss": 0.4763, + "step": 775 + }, + { + "epoch": 0.21, + "learning_rate": 4.8708802250803856e-05, + "loss": 0.4992, + "step": 776 + }, + { + "epoch": 0.21, + "learning_rate": 4.870712754555199e-05, + "loss": 0.5868, + "step": 777 + }, + { + "epoch": 0.21, + "learning_rate": 4.870545284030011e-05, + "loss": 0.685, + "step": 778 + }, + { + "epoch": 0.21, + "learning_rate": 4.8703778135048235e-05, + "loss": 0.5873, + "step": 779 + }, + { + "epoch": 0.21, + "learning_rate": 4.870210342979636e-05, + "loss": 0.5583, + "step": 780 + }, + { + "epoch": 0.21, + "learning_rate": 4.8700428724544484e-05, + "loss": 0.5661, + "step": 781 + }, + { + "epoch": 0.21, + "learning_rate": 4.869875401929261e-05, + "loss": 0.4534, + "step": 782 + }, + { + "epoch": 0.21, + "learning_rate": 4.869707931404073e-05, + "loss": 0.4766, + "step": 783 + }, + { + "epoch": 0.21, + "learning_rate": 4.8695404608788856e-05, + "loss": 0.8011, + "step": 784 + }, + { + "epoch": 0.21, + "learning_rate": 4.869372990353698e-05, + "loss": 0.8164, + "step": 785 + }, + { + "epoch": 0.21, + "learning_rate": 4.8692055198285105e-05, + "loss": 0.4942, + "step": 786 + }, + { + "epoch": 0.21, + "learning_rate": 4.869038049303323e-05, + "loss": 0.6497, + "step": 787 + }, + { + "epoch": 0.21, + "learning_rate": 4.868870578778135e-05, + "loss": 0.3483, + "step": 788 + }, + { + "epoch": 0.21, + "learning_rate": 4.868703108252948e-05, + "loss": 1.0048, + "step": 789 + }, + { + "epoch": 0.21, + "learning_rate": 4.86853563772776e-05, + "loss": 0.5312, + "step": 790 + }, + { + "epoch": 0.21, + "learning_rate": 4.8683681672025726e-05, + "loss": 0.591, + "step": 791 + }, + { + "epoch": 0.21, + "learning_rate": 4.868200696677385e-05, + "loss": 0.6649, + "step": 792 + }, + { + "epoch": 0.21, + "learning_rate": 4.8680332261521974e-05, + "loss": 0.5823, + "step": 793 + }, + { + "epoch": 0.21, + "learning_rate": 4.86786575562701e-05, + "loss": 0.6432, + "step": 794 + }, + { + "epoch": 0.21, + "learning_rate": 4.867698285101822e-05, + "loss": 0.5885, + "step": 795 + }, + { + "epoch": 0.21, + "learning_rate": 4.867530814576635e-05, + "loss": 0.5813, + "step": 796 + }, + { + "epoch": 0.21, + "learning_rate": 4.867363344051447e-05, + "loss": 0.4894, + "step": 797 + }, + { + "epoch": 0.21, + "learning_rate": 4.8671958735262595e-05, + "loss": 0.4354, + "step": 798 + }, + { + "epoch": 0.21, + "learning_rate": 4.867028403001072e-05, + "loss": 0.3853, + "step": 799 + }, + { + "epoch": 0.21, + "learning_rate": 4.8668609324758844e-05, + "loss": 0.4394, + "step": 800 + }, + { + "epoch": 0.21, + "learning_rate": 4.866693461950697e-05, + "loss": 0.5596, + "step": 801 + }, + { + "epoch": 0.21, + "learning_rate": 4.86652599142551e-05, + "loss": 0.4881, + "step": 802 + }, + { + "epoch": 0.22, + "learning_rate": 4.8663585209003216e-05, + "loss": 0.4963, + "step": 803 + }, + { + "epoch": 0.22, + "learning_rate": 4.866191050375134e-05, + "loss": 0.8357, + "step": 804 + }, + { + "epoch": 0.22, + "learning_rate": 4.8660235798499465e-05, + "loss": 0.4283, + "step": 805 + }, + { + "epoch": 0.22, + "learning_rate": 4.865856109324759e-05, + "loss": 0.5828, + "step": 806 + }, + { + "epoch": 0.22, + "learning_rate": 4.865688638799571e-05, + "loss": 0.4462, + "step": 807 + }, + { + "epoch": 0.22, + "learning_rate": 4.865521168274384e-05, + "loss": 0.5942, + "step": 808 + }, + { + "epoch": 0.22, + "learning_rate": 4.865353697749196e-05, + "loss": 0.5403, + "step": 809 + }, + { + "epoch": 0.22, + "learning_rate": 4.865186227224009e-05, + "loss": 0.6737, + "step": 810 + }, + { + "epoch": 0.22, + "learning_rate": 4.865018756698822e-05, + "loss": 0.7765, + "step": 811 + }, + { + "epoch": 0.22, + "learning_rate": 4.864851286173634e-05, + "loss": 0.4458, + "step": 812 + }, + { + "epoch": 0.22, + "learning_rate": 4.864683815648446e-05, + "loss": 0.7386, + "step": 813 + }, + { + "epoch": 0.22, + "learning_rate": 4.864516345123258e-05, + "loss": 0.3958, + "step": 814 + }, + { + "epoch": 0.22, + "learning_rate": 4.864348874598071e-05, + "loss": 0.508, + "step": 815 + }, + { + "epoch": 0.22, + "learning_rate": 4.864181404072883e-05, + "loss": 0.6366, + "step": 816 + }, + { + "epoch": 0.22, + "learning_rate": 4.8640139335476955e-05, + "loss": 0.5736, + "step": 817 + }, + { + "epoch": 0.22, + "learning_rate": 4.8638464630225086e-05, + "loss": 0.4471, + "step": 818 + }, + { + "epoch": 0.22, + "learning_rate": 4.863678992497321e-05, + "loss": 0.3859, + "step": 819 + }, + { + "epoch": 0.22, + "learning_rate": 4.8635115219721335e-05, + "loss": 0.6107, + "step": 820 + }, + { + "epoch": 0.22, + "learning_rate": 4.863344051446946e-05, + "loss": 0.5805, + "step": 821 + }, + { + "epoch": 0.22, + "learning_rate": 4.8631765809217576e-05, + "loss": 0.4405, + "step": 822 + }, + { + "epoch": 0.22, + "learning_rate": 4.86300911039657e-05, + "loss": 0.6442, + "step": 823 + }, + { + "epoch": 0.22, + "learning_rate": 4.8628416398713825e-05, + "loss": 0.8047, + "step": 824 + }, + { + "epoch": 0.22, + "learning_rate": 4.862674169346195e-05, + "loss": 0.6765, + "step": 825 + }, + { + "epoch": 0.22, + "learning_rate": 4.862506698821008e-05, + "loss": 0.5031, + "step": 826 + }, + { + "epoch": 0.22, + "learning_rate": 4.8623392282958204e-05, + "loss": 0.6808, + "step": 827 + }, + { + "epoch": 0.22, + "learning_rate": 4.862171757770633e-05, + "loss": 0.6014, + "step": 828 + }, + { + "epoch": 0.22, + "learning_rate": 4.862004287245445e-05, + "loss": 0.4878, + "step": 829 + }, + { + "epoch": 0.22, + "learning_rate": 4.861836816720258e-05, + "loss": 0.5679, + "step": 830 + }, + { + "epoch": 0.22, + "learning_rate": 4.8616693461950694e-05, + "loss": 0.7981, + "step": 831 + }, + { + "epoch": 0.22, + "learning_rate": 4.861501875669882e-05, + "loss": 0.5401, + "step": 832 + }, + { + "epoch": 0.22, + "learning_rate": 4.861334405144695e-05, + "loss": 0.4133, + "step": 833 + }, + { + "epoch": 0.22, + "learning_rate": 4.8611669346195074e-05, + "loss": 0.4782, + "step": 834 + }, + { + "epoch": 0.22, + "learning_rate": 4.86099946409432e-05, + "loss": 0.5932, + "step": 835 + }, + { + "epoch": 0.22, + "learning_rate": 4.860831993569132e-05, + "loss": 0.5205, + "step": 836 + }, + { + "epoch": 0.22, + "learning_rate": 4.8606645230439446e-05, + "loss": 0.528, + "step": 837 + }, + { + "epoch": 0.22, + "learning_rate": 4.860497052518757e-05, + "loss": 0.6522, + "step": 838 + }, + { + "epoch": 0.22, + "learning_rate": 4.8603295819935695e-05, + "loss": 0.5196, + "step": 839 + }, + { + "epoch": 0.23, + "learning_rate": 4.860162111468382e-05, + "loss": 0.4807, + "step": 840 + }, + { + "epoch": 0.23, + "learning_rate": 4.859994640943194e-05, + "loss": 0.4131, + "step": 841 + }, + { + "epoch": 0.23, + "learning_rate": 4.859827170418007e-05, + "loss": 0.4373, + "step": 842 + }, + { + "epoch": 0.23, + "learning_rate": 4.859659699892819e-05, + "loss": 0.7546, + "step": 843 + }, + { + "epoch": 0.23, + "learning_rate": 4.8594922293676316e-05, + "loss": 0.5079, + "step": 844 + }, + { + "epoch": 0.23, + "learning_rate": 4.859324758842444e-05, + "loss": 0.6204, + "step": 845 + }, + { + "epoch": 0.23, + "learning_rate": 4.8591572883172564e-05, + "loss": 0.5173, + "step": 846 + }, + { + "epoch": 0.23, + "learning_rate": 4.858989817792069e-05, + "loss": 0.4436, + "step": 847 + }, + { + "epoch": 0.23, + "learning_rate": 4.858822347266881e-05, + "loss": 0.6586, + "step": 848 + }, + { + "epoch": 0.23, + "learning_rate": 4.858654876741694e-05, + "loss": 0.6666, + "step": 849 + }, + { + "epoch": 0.23, + "learning_rate": 4.858487406216506e-05, + "loss": 0.5088, + "step": 850 + }, + { + "epoch": 0.23, + "learning_rate": 4.8583199356913185e-05, + "loss": 0.513, + "step": 851 + }, + { + "epoch": 0.23, + "learning_rate": 4.858152465166131e-05, + "loss": 0.469, + "step": 852 + }, + { + "epoch": 0.23, + "learning_rate": 4.8579849946409434e-05, + "loss": 0.4938, + "step": 853 + }, + { + "epoch": 0.23, + "learning_rate": 4.857817524115756e-05, + "loss": 0.504, + "step": 854 + }, + { + "epoch": 0.23, + "learning_rate": 4.857650053590568e-05, + "loss": 0.441, + "step": 855 + }, + { + "epoch": 0.23, + "learning_rate": 4.8574825830653806e-05, + "loss": 0.6497, + "step": 856 + }, + { + "epoch": 0.23, + "learning_rate": 4.857315112540193e-05, + "loss": 0.5277, + "step": 857 + }, + { + "epoch": 0.23, + "learning_rate": 4.857147642015006e-05, + "loss": 0.4454, + "step": 858 + }, + { + "epoch": 0.23, + "learning_rate": 4.856980171489818e-05, + "loss": 0.5388, + "step": 859 + }, + { + "epoch": 0.23, + "learning_rate": 4.85681270096463e-05, + "loss": 0.4469, + "step": 860 + }, + { + "epoch": 0.23, + "learning_rate": 4.856645230439443e-05, + "loss": 0.4353, + "step": 861 + }, + { + "epoch": 0.23, + "learning_rate": 4.856477759914255e-05, + "loss": 0.7186, + "step": 862 + }, + { + "epoch": 0.23, + "learning_rate": 4.8563102893890676e-05, + "loss": 0.6, + "step": 863 + }, + { + "epoch": 0.23, + "learning_rate": 4.85614281886388e-05, + "loss": 0.5035, + "step": 864 + }, + { + "epoch": 0.23, + "learning_rate": 4.8559753483386924e-05, + "loss": 0.6421, + "step": 865 + }, + { + "epoch": 0.23, + "learning_rate": 4.8558078778135055e-05, + "loss": 0.6842, + "step": 866 + }, + { + "epoch": 0.23, + "learning_rate": 4.855640407288318e-05, + "loss": 0.4593, + "step": 867 + }, + { + "epoch": 0.23, + "learning_rate": 4.8554729367631304e-05, + "loss": 0.4416, + "step": 868 + }, + { + "epoch": 0.23, + "learning_rate": 4.855305466237942e-05, + "loss": 0.4536, + "step": 869 + }, + { + "epoch": 0.23, + "learning_rate": 4.8551379957127545e-05, + "loss": 0.6327, + "step": 870 + }, + { + "epoch": 0.23, + "learning_rate": 4.854970525187567e-05, + "loss": 0.7537, + "step": 871 + }, + { + "epoch": 0.23, + "learning_rate": 4.8548030546623794e-05, + "loss": 0.4468, + "step": 872 + }, + { + "epoch": 0.23, + "learning_rate": 4.854635584137192e-05, + "loss": 0.4895, + "step": 873 + }, + { + "epoch": 0.23, + "learning_rate": 4.854468113612005e-05, + "loss": 0.5509, + "step": 874 + }, + { + "epoch": 0.23, + "learning_rate": 4.854300643086817e-05, + "loss": 0.644, + "step": 875 + }, + { + "epoch": 0.23, + "learning_rate": 4.85413317256163e-05, + "loss": 0.734, + "step": 876 + }, + { + "epoch": 0.23, + "learning_rate": 4.853965702036442e-05, + "loss": 0.4207, + "step": 877 + }, + { + "epoch": 0.24, + "learning_rate": 4.853798231511254e-05, + "loss": 0.7109, + "step": 878 + }, + { + "epoch": 0.24, + "learning_rate": 4.853630760986066e-05, + "loss": 0.9153, + "step": 879 + }, + { + "epoch": 0.24, + "learning_rate": 4.853463290460879e-05, + "loss": 0.5348, + "step": 880 + }, + { + "epoch": 0.24, + "learning_rate": 4.853295819935691e-05, + "loss": 0.6508, + "step": 881 + }, + { + "epoch": 0.24, + "learning_rate": 4.853128349410504e-05, + "loss": 0.6477, + "step": 882 + }, + { + "epoch": 0.24, + "learning_rate": 4.852960878885317e-05, + "loss": 0.4935, + "step": 883 + }, + { + "epoch": 0.24, + "learning_rate": 4.852793408360129e-05, + "loss": 0.574, + "step": 884 + }, + { + "epoch": 0.24, + "learning_rate": 4.8526259378349415e-05, + "loss": 0.7316, + "step": 885 + }, + { + "epoch": 0.24, + "learning_rate": 4.852458467309754e-05, + "loss": 0.3846, + "step": 886 + }, + { + "epoch": 0.24, + "learning_rate": 4.852290996784566e-05, + "loss": 0.5246, + "step": 887 + }, + { + "epoch": 0.24, + "learning_rate": 4.852123526259378e-05, + "loss": 0.571, + "step": 888 + }, + { + "epoch": 0.24, + "learning_rate": 4.851956055734191e-05, + "loss": 0.4578, + "step": 889 + }, + { + "epoch": 0.24, + "learning_rate": 4.8517885852090036e-05, + "loss": 1.2341, + "step": 890 + }, + { + "epoch": 0.24, + "learning_rate": 4.851621114683816e-05, + "loss": 0.4472, + "step": 891 + }, + { + "epoch": 0.24, + "learning_rate": 4.8514536441586285e-05, + "loss": 0.4993, + "step": 892 + }, + { + "epoch": 0.24, + "learning_rate": 4.851286173633441e-05, + "loss": 0.703, + "step": 893 + }, + { + "epoch": 0.24, + "learning_rate": 4.851118703108253e-05, + "loss": 0.4058, + "step": 894 + }, + { + "epoch": 0.24, + "learning_rate": 4.850951232583066e-05, + "loss": 0.4954, + "step": 895 + }, + { + "epoch": 0.24, + "learning_rate": 4.850783762057878e-05, + "loss": 0.4851, + "step": 896 + }, + { + "epoch": 0.24, + "learning_rate": 4.8506162915326906e-05, + "loss": 0.5883, + "step": 897 + }, + { + "epoch": 0.24, + "learning_rate": 4.850448821007503e-05, + "loss": 0.469, + "step": 898 + }, + { + "epoch": 0.24, + "learning_rate": 4.8502813504823154e-05, + "loss": 0.5422, + "step": 899 + }, + { + "epoch": 0.24, + "learning_rate": 4.850113879957128e-05, + "loss": 0.8019, + "step": 900 + }, + { + "epoch": 0.24, + "learning_rate": 4.84994640943194e-05, + "loss": 0.6637, + "step": 901 + }, + { + "epoch": 0.24, + "learning_rate": 4.849778938906753e-05, + "loss": 0.4593, + "step": 902 + }, + { + "epoch": 0.24, + "learning_rate": 4.849611468381565e-05, + "loss": 0.4259, + "step": 903 + }, + { + "epoch": 0.24, + "learning_rate": 4.8494439978563775e-05, + "loss": 0.6069, + "step": 904 + }, + { + "epoch": 0.24, + "learning_rate": 4.84927652733119e-05, + "loss": 0.4008, + "step": 905 + }, + { + "epoch": 0.24, + "learning_rate": 4.8491090568060024e-05, + "loss": 0.5829, + "step": 906 + }, + { + "epoch": 0.24, + "learning_rate": 4.848941586280815e-05, + "loss": 0.5306, + "step": 907 + }, + { + "epoch": 0.24, + "learning_rate": 4.848774115755627e-05, + "loss": 0.4945, + "step": 908 + }, + { + "epoch": 0.24, + "learning_rate": 4.8486066452304396e-05, + "loss": 0.3998, + "step": 909 + }, + { + "epoch": 0.24, + "learning_rate": 4.848439174705252e-05, + "loss": 0.5048, + "step": 910 + }, + { + "epoch": 0.24, + "learning_rate": 4.8482717041800645e-05, + "loss": 0.403, + "step": 911 + }, + { + "epoch": 0.24, + "learning_rate": 4.848104233654877e-05, + "loss": 0.6041, + "step": 912 + }, + { + "epoch": 0.24, + "learning_rate": 4.847936763129689e-05, + "loss": 0.4674, + "step": 913 + }, + { + "epoch": 0.24, + "learning_rate": 4.8477692926045024e-05, + "loss": 0.5343, + "step": 914 + }, + { + "epoch": 0.25, + "learning_rate": 4.847601822079314e-05, + "loss": 0.5288, + "step": 915 + }, + { + "epoch": 0.25, + "learning_rate": 4.8474343515541266e-05, + "loss": 0.4602, + "step": 916 + }, + { + "epoch": 0.25, + "learning_rate": 4.847266881028939e-05, + "loss": 0.555, + "step": 917 + }, + { + "epoch": 0.25, + "learning_rate": 4.8470994105037514e-05, + "loss": 0.5056, + "step": 918 + }, + { + "epoch": 0.25, + "learning_rate": 4.846931939978564e-05, + "loss": 0.6213, + "step": 919 + }, + { + "epoch": 0.25, + "learning_rate": 4.846764469453376e-05, + "loss": 0.6171, + "step": 920 + }, + { + "epoch": 0.25, + "learning_rate": 4.846596998928189e-05, + "loss": 0.426, + "step": 921 + }, + { + "epoch": 0.25, + "learning_rate": 4.846429528403002e-05, + "loss": 0.6773, + "step": 922 + }, + { + "epoch": 0.25, + "learning_rate": 4.846262057877814e-05, + "loss": 0.5696, + "step": 923 + }, + { + "epoch": 0.25, + "learning_rate": 4.8460945873526266e-05, + "loss": 0.4793, + "step": 924 + }, + { + "epoch": 0.25, + "learning_rate": 4.8459271168274384e-05, + "loss": 0.5332, + "step": 925 + }, + { + "epoch": 0.25, + "learning_rate": 4.845759646302251e-05, + "loss": 0.5276, + "step": 926 + }, + { + "epoch": 0.25, + "learning_rate": 4.845592175777063e-05, + "loss": 0.5117, + "step": 927 + }, + { + "epoch": 0.25, + "learning_rate": 4.8454247052518756e-05, + "loss": 0.6729, + "step": 928 + }, + { + "epoch": 0.25, + "learning_rate": 4.845257234726688e-05, + "loss": 0.5065, + "step": 929 + }, + { + "epoch": 0.25, + "learning_rate": 4.845089764201501e-05, + "loss": 0.4515, + "step": 930 + }, + { + "epoch": 0.25, + "learning_rate": 4.8449222936763136e-05, + "loss": 0.6484, + "step": 931 + }, + { + "epoch": 0.25, + "learning_rate": 4.844754823151126e-05, + "loss": 0.4856, + "step": 932 + }, + { + "epoch": 0.25, + "learning_rate": 4.8445873526259384e-05, + "loss": 0.6715, + "step": 933 + }, + { + "epoch": 0.25, + "learning_rate": 4.84441988210075e-05, + "loss": 0.4616, + "step": 934 + }, + { + "epoch": 0.25, + "learning_rate": 4.8442524115755626e-05, + "loss": 0.4206, + "step": 935 + }, + { + "epoch": 0.25, + "learning_rate": 4.844084941050375e-05, + "loss": 0.4216, + "step": 936 + }, + { + "epoch": 0.25, + "learning_rate": 4.8439174705251874e-05, + "loss": 0.5587, + "step": 937 + }, + { + "epoch": 0.25, + "learning_rate": 4.8437500000000005e-05, + "loss": 0.5826, + "step": 938 + }, + { + "epoch": 0.25, + "learning_rate": 4.843582529474813e-05, + "loss": 0.4582, + "step": 939 + }, + { + "epoch": 0.25, + "learning_rate": 4.8434150589496253e-05, + "loss": 0.5035, + "step": 940 + }, + { + "epoch": 0.25, + "learning_rate": 4.843247588424438e-05, + "loss": 0.4956, + "step": 941 + }, + { + "epoch": 0.25, + "learning_rate": 4.84308011789925e-05, + "loss": 0.5232, + "step": 942 + }, + { + "epoch": 0.25, + "learning_rate": 4.842912647374062e-05, + "loss": 0.7575, + "step": 943 + }, + { + "epoch": 0.25, + "learning_rate": 4.8427451768488744e-05, + "loss": 0.4187, + "step": 944 + }, + { + "epoch": 0.25, + "learning_rate": 4.8425777063236875e-05, + "loss": 0.7081, + "step": 945 + }, + { + "epoch": 0.25, + "learning_rate": 4.8424102357985e-05, + "loss": 0.4946, + "step": 946 + }, + { + "epoch": 0.25, + "learning_rate": 4.842242765273312e-05, + "loss": 0.4067, + "step": 947 + }, + { + "epoch": 0.25, + "learning_rate": 4.842075294748125e-05, + "loss": 0.3502, + "step": 948 + }, + { + "epoch": 0.25, + "learning_rate": 4.841907824222937e-05, + "loss": 0.5394, + "step": 949 + }, + { + "epoch": 0.25, + "learning_rate": 4.8417403536977496e-05, + "loss": 0.488, + "step": 950 + }, + { + "epoch": 0.25, + "learning_rate": 4.841572883172562e-05, + "loss": 0.4344, + "step": 951 + }, + { + "epoch": 0.26, + "learning_rate": 4.841405412647374e-05, + "loss": 0.504, + "step": 952 + }, + { + "epoch": 0.26, + "learning_rate": 4.841237942122187e-05, + "loss": 0.7941, + "step": 953 + }, + { + "epoch": 0.26, + "learning_rate": 4.841070471596999e-05, + "loss": 0.46, + "step": 954 + }, + { + "epoch": 0.26, + "learning_rate": 4.840903001071812e-05, + "loss": 0.7818, + "step": 955 + }, + { + "epoch": 0.26, + "learning_rate": 4.840735530546624e-05, + "loss": 0.3113, + "step": 956 + }, + { + "epoch": 0.26, + "learning_rate": 4.8405680600214365e-05, + "loss": 0.4894, + "step": 957 + }, + { + "epoch": 0.26, + "learning_rate": 4.840400589496249e-05, + "loss": 0.6446, + "step": 958 + }, + { + "epoch": 0.26, + "learning_rate": 4.8402331189710613e-05, + "loss": 0.5707, + "step": 959 + }, + { + "epoch": 0.26, + "learning_rate": 4.840065648445874e-05, + "loss": 0.4412, + "step": 960 + }, + { + "epoch": 0.26, + "learning_rate": 4.839898177920686e-05, + "loss": 0.4511, + "step": 961 + }, + { + "epoch": 0.26, + "learning_rate": 4.8397307073954986e-05, + "loss": 0.4868, + "step": 962 + }, + { + "epoch": 0.26, + "learning_rate": 4.839563236870311e-05, + "loss": 0.5199, + "step": 963 + }, + { + "epoch": 0.26, + "learning_rate": 4.8393957663451235e-05, + "loss": 0.5764, + "step": 964 + }, + { + "epoch": 0.26, + "learning_rate": 4.839228295819936e-05, + "loss": 0.7825, + "step": 965 + }, + { + "epoch": 0.26, + "learning_rate": 4.839060825294748e-05, + "loss": 0.542, + "step": 966 + }, + { + "epoch": 0.26, + "learning_rate": 4.838893354769561e-05, + "loss": 0.6215, + "step": 967 + }, + { + "epoch": 0.26, + "learning_rate": 4.838725884244373e-05, + "loss": 0.3242, + "step": 968 + }, + { + "epoch": 0.26, + "learning_rate": 4.8385584137191856e-05, + "loss": 0.6276, + "step": 969 + }, + { + "epoch": 0.26, + "learning_rate": 4.8383909431939987e-05, + "loss": 0.5278, + "step": 970 + }, + { + "epoch": 0.26, + "learning_rate": 4.8382234726688104e-05, + "loss": 0.5721, + "step": 971 + }, + { + "epoch": 0.26, + "learning_rate": 4.838056002143623e-05, + "loss": 0.3957, + "step": 972 + }, + { + "epoch": 0.26, + "learning_rate": 4.837888531618435e-05, + "loss": 0.5172, + "step": 973 + }, + { + "epoch": 0.26, + "learning_rate": 4.837721061093248e-05, + "loss": 0.4393, + "step": 974 + }, + { + "epoch": 0.26, + "learning_rate": 4.83755359056806e-05, + "loss": 0.4675, + "step": 975 + }, + { + "epoch": 0.26, + "learning_rate": 4.8373861200428725e-05, + "loss": 0.6345, + "step": 976 + }, + { + "epoch": 0.26, + "learning_rate": 4.837218649517685e-05, + "loss": 0.371, + "step": 977 + }, + { + "epoch": 0.26, + "learning_rate": 4.837051178992498e-05, + "loss": 0.4059, + "step": 978 + }, + { + "epoch": 0.26, + "learning_rate": 4.8368837084673104e-05, + "loss": 0.384, + "step": 979 + }, + { + "epoch": 0.26, + "learning_rate": 4.836716237942122e-05, + "loss": 0.5362, + "step": 980 + }, + { + "epoch": 0.26, + "learning_rate": 4.8365487674169346e-05, + "loss": 0.5123, + "step": 981 + }, + { + "epoch": 0.26, + "learning_rate": 4.836381296891747e-05, + "loss": 0.5257, + "step": 982 + }, + { + "epoch": 0.26, + "learning_rate": 4.8362138263665595e-05, + "loss": 0.6835, + "step": 983 + }, + { + "epoch": 0.26, + "learning_rate": 4.836046355841372e-05, + "loss": 0.4261, + "step": 984 + }, + { + "epoch": 0.26, + "learning_rate": 4.835878885316184e-05, + "loss": 0.5065, + "step": 985 + }, + { + "epoch": 0.26, + "learning_rate": 4.8357114147909974e-05, + "loss": 0.3967, + "step": 986 + }, + { + "epoch": 0.26, + "learning_rate": 4.83554394426581e-05, + "loss": 0.7072, + "step": 987 + }, + { + "epoch": 0.26, + "learning_rate": 4.835376473740622e-05, + "loss": 0.5566, + "step": 988 + }, + { + "epoch": 0.26, + "learning_rate": 4.8352090032154347e-05, + "loss": 0.5507, + "step": 989 + }, + { + "epoch": 0.27, + "learning_rate": 4.8350415326902464e-05, + "loss": 0.5782, + "step": 990 + }, + { + "epoch": 0.27, + "learning_rate": 4.834874062165059e-05, + "loss": 0.5821, + "step": 991 + }, + { + "epoch": 0.27, + "learning_rate": 4.834706591639871e-05, + "loss": 0.7934, + "step": 992 + }, + { + "epoch": 0.27, + "learning_rate": 4.8345391211146837e-05, + "loss": 0.3892, + "step": 993 + }, + { + "epoch": 0.27, + "learning_rate": 4.834371650589497e-05, + "loss": 0.5343, + "step": 994 + }, + { + "epoch": 0.27, + "learning_rate": 4.834204180064309e-05, + "loss": 0.4467, + "step": 995 + }, + { + "epoch": 0.27, + "learning_rate": 4.8340367095391216e-05, + "loss": 0.4802, + "step": 996 + }, + { + "epoch": 0.27, + "learning_rate": 4.833869239013934e-05, + "loss": 0.3701, + "step": 997 + }, + { + "epoch": 0.27, + "learning_rate": 4.8337017684887464e-05, + "loss": 0.8213, + "step": 998 + }, + { + "epoch": 0.27, + "learning_rate": 4.833534297963558e-05, + "loss": 0.354, + "step": 999 + }, + { + "epoch": 0.27, + "learning_rate": 4.8333668274383706e-05, + "loss": 0.5714, + "step": 1000 + }, + { + "epoch": 0.27, + "learning_rate": 4.833199356913184e-05, + "loss": 0.4534, + "step": 1001 + }, + { + "epoch": 0.27, + "learning_rate": 4.833031886387996e-05, + "loss": 0.4542, + "step": 1002 + }, + { + "epoch": 0.27, + "learning_rate": 4.8328644158628085e-05, + "loss": 0.3979, + "step": 1003 + }, + { + "epoch": 0.27, + "learning_rate": 4.832696945337621e-05, + "loss": 0.4599, + "step": 1004 + }, + { + "epoch": 0.27, + "learning_rate": 4.8325294748124334e-05, + "loss": 0.4254, + "step": 1005 + }, + { + "epoch": 0.27, + "learning_rate": 4.832362004287246e-05, + "loss": 0.5145, + "step": 1006 + }, + { + "epoch": 0.27, + "learning_rate": 4.832194533762058e-05, + "loss": 0.3472, + "step": 1007 + }, + { + "epoch": 0.27, + "learning_rate": 4.83202706323687e-05, + "loss": 0.5129, + "step": 1008 + }, + { + "epoch": 0.27, + "learning_rate": 4.831859592711683e-05, + "loss": 0.4347, + "step": 1009 + }, + { + "epoch": 0.27, + "learning_rate": 4.8316921221864955e-05, + "loss": 0.3978, + "step": 1010 + }, + { + "epoch": 0.27, + "learning_rate": 4.831524651661308e-05, + "loss": 0.4726, + "step": 1011 + }, + { + "epoch": 0.27, + "learning_rate": 4.83135718113612e-05, + "loss": 0.4989, + "step": 1012 + }, + { + "epoch": 0.27, + "learning_rate": 4.831189710610933e-05, + "loss": 0.3085, + "step": 1013 + }, + { + "epoch": 0.27, + "learning_rate": 4.831022240085745e-05, + "loss": 0.3875, + "step": 1014 + }, + { + "epoch": 0.27, + "learning_rate": 4.8308547695605576e-05, + "loss": 0.4797, + "step": 1015 + }, + { + "epoch": 0.27, + "learning_rate": 4.83068729903537e-05, + "loss": 0.434, + "step": 1016 + }, + { + "epoch": 0.27, + "learning_rate": 4.8305198285101824e-05, + "loss": 0.3722, + "step": 1017 + }, + { + "epoch": 0.27, + "learning_rate": 4.830352357984995e-05, + "loss": 0.4032, + "step": 1018 + }, + { + "epoch": 0.27, + "learning_rate": 4.830184887459807e-05, + "loss": 0.5203, + "step": 1019 + }, + { + "epoch": 0.27, + "learning_rate": 4.83001741693462e-05, + "loss": 0.479, + "step": 1020 + }, + { + "epoch": 0.27, + "learning_rate": 4.829849946409432e-05, + "loss": 0.5829, + "step": 1021 + }, + { + "epoch": 0.27, + "learning_rate": 4.8296824758842445e-05, + "loss": 0.4758, + "step": 1022 + }, + { + "epoch": 0.27, + "learning_rate": 4.829515005359057e-05, + "loss": 0.4744, + "step": 1023 + }, + { + "epoch": 0.27, + "learning_rate": 4.8293475348338694e-05, + "loss": 0.5298, + "step": 1024 + }, + { + "epoch": 0.27, + "learning_rate": 4.829180064308682e-05, + "loss": 0.4614, + "step": 1025 + }, + { + "epoch": 0.27, + "learning_rate": 4.829012593783495e-05, + "loss": 0.4619, + "step": 1026 + }, + { + "epoch": 0.28, + "learning_rate": 4.8288451232583066e-05, + "loss": 0.3503, + "step": 1027 + }, + { + "epoch": 0.28, + "learning_rate": 4.828677652733119e-05, + "loss": 0.762, + "step": 1028 + }, + { + "epoch": 0.28, + "learning_rate": 4.8285101822079315e-05, + "loss": 0.5944, + "step": 1029 + }, + { + "epoch": 0.28, + "learning_rate": 4.828342711682744e-05, + "loss": 0.654, + "step": 1030 + }, + { + "epoch": 0.28, + "learning_rate": 4.828175241157556e-05, + "loss": 0.531, + "step": 1031 + }, + { + "epoch": 0.28, + "learning_rate": 4.828007770632369e-05, + "loss": 0.5073, + "step": 1032 + }, + { + "epoch": 0.28, + "learning_rate": 4.827840300107181e-05, + "loss": 0.528, + "step": 1033 + }, + { + "epoch": 0.28, + "learning_rate": 4.827672829581994e-05, + "loss": 0.4321, + "step": 1034 + }, + { + "epoch": 0.28, + "learning_rate": 4.827505359056807e-05, + "loss": 0.3654, + "step": 1035 + }, + { + "epoch": 0.28, + "learning_rate": 4.8273378885316184e-05, + "loss": 0.5186, + "step": 1036 + }, + { + "epoch": 0.28, + "learning_rate": 4.827170418006431e-05, + "loss": 0.6571, + "step": 1037 + }, + { + "epoch": 0.28, + "learning_rate": 4.827002947481243e-05, + "loss": 0.5014, + "step": 1038 + }, + { + "epoch": 0.28, + "learning_rate": 4.826835476956056e-05, + "loss": 0.754, + "step": 1039 + }, + { + "epoch": 0.28, + "learning_rate": 4.826668006430868e-05, + "loss": 0.5432, + "step": 1040 + }, + { + "epoch": 0.28, + "learning_rate": 4.8265005359056805e-05, + "loss": 0.3296, + "step": 1041 + }, + { + "epoch": 0.28, + "learning_rate": 4.8263330653804936e-05, + "loss": 0.6721, + "step": 1042 + }, + { + "epoch": 0.28, + "learning_rate": 4.826165594855306e-05, + "loss": 0.4706, + "step": 1043 + }, + { + "epoch": 0.28, + "learning_rate": 4.8259981243301185e-05, + "loss": 0.4585, + "step": 1044 + }, + { + "epoch": 0.28, + "learning_rate": 4.825830653804931e-05, + "loss": 0.4679, + "step": 1045 + }, + { + "epoch": 0.28, + "learning_rate": 4.8256631832797426e-05, + "loss": 0.4139, + "step": 1046 + }, + { + "epoch": 0.28, + "learning_rate": 4.825495712754555e-05, + "loss": 0.3923, + "step": 1047 + }, + { + "epoch": 0.28, + "learning_rate": 4.8253282422293675e-05, + "loss": 0.4015, + "step": 1048 + }, + { + "epoch": 0.28, + "learning_rate": 4.82516077170418e-05, + "loss": 0.5723, + "step": 1049 + }, + { + "epoch": 0.28, + "learning_rate": 4.824993301178993e-05, + "loss": 0.4932, + "step": 1050 + }, + { + "epoch": 0.28, + "learning_rate": 4.8248258306538054e-05, + "loss": 0.5309, + "step": 1051 + }, + { + "epoch": 0.28, + "learning_rate": 4.824658360128618e-05, + "loss": 0.534, + "step": 1052 + }, + { + "epoch": 0.28, + "learning_rate": 4.82449088960343e-05, + "loss": 0.5114, + "step": 1053 + }, + { + "epoch": 0.28, + "learning_rate": 4.824323419078243e-05, + "loss": 0.4255, + "step": 1054 + }, + { + "epoch": 0.28, + "learning_rate": 4.8241559485530544e-05, + "loss": 0.5274, + "step": 1055 + }, + { + "epoch": 0.28, + "learning_rate": 4.823988478027867e-05, + "loss": 0.4633, + "step": 1056 + }, + { + "epoch": 0.28, + "learning_rate": 4.82382100750268e-05, + "loss": 0.3927, + "step": 1057 + }, + { + "epoch": 0.28, + "learning_rate": 4.8236535369774924e-05, + "loss": 0.7678, + "step": 1058 + }, + { + "epoch": 0.28, + "learning_rate": 4.823486066452305e-05, + "loss": 0.6244, + "step": 1059 + }, + { + "epoch": 0.28, + "learning_rate": 4.823318595927117e-05, + "loss": 0.4786, + "step": 1060 + }, + { + "epoch": 0.28, + "learning_rate": 4.8231511254019296e-05, + "loss": 0.5823, + "step": 1061 + }, + { + "epoch": 0.28, + "learning_rate": 4.822983654876742e-05, + "loss": 0.3649, + "step": 1062 + }, + { + "epoch": 0.28, + "learning_rate": 4.8228161843515545e-05, + "loss": 0.5894, + "step": 1063 + }, + { + "epoch": 0.29, + "learning_rate": 4.822648713826366e-05, + "loss": 0.4501, + "step": 1064 + }, + { + "epoch": 0.29, + "learning_rate": 4.822481243301179e-05, + "loss": 0.5679, + "step": 1065 + }, + { + "epoch": 0.29, + "learning_rate": 4.822313772775992e-05, + "loss": 0.4393, + "step": 1066 + }, + { + "epoch": 0.29, + "learning_rate": 4.822146302250804e-05, + "loss": 0.4172, + "step": 1067 + }, + { + "epoch": 0.29, + "learning_rate": 4.8219788317256166e-05, + "loss": 0.4845, + "step": 1068 + }, + { + "epoch": 0.29, + "learning_rate": 4.821811361200429e-05, + "loss": 0.3994, + "step": 1069 + }, + { + "epoch": 0.29, + "learning_rate": 4.8216438906752414e-05, + "loss": 0.7136, + "step": 1070 + }, + { + "epoch": 0.29, + "learning_rate": 4.821476420150054e-05, + "loss": 0.6165, + "step": 1071 + }, + { + "epoch": 0.29, + "learning_rate": 4.821308949624866e-05, + "loss": 0.5621, + "step": 1072 + }, + { + "epoch": 0.29, + "learning_rate": 4.821141479099679e-05, + "loss": 0.4665, + "step": 1073 + }, + { + "epoch": 0.29, + "learning_rate": 4.820974008574491e-05, + "loss": 0.5498, + "step": 1074 + }, + { + "epoch": 0.29, + "learning_rate": 4.8208065380493035e-05, + "loss": 0.4965, + "step": 1075 + }, + { + "epoch": 0.29, + "learning_rate": 4.820639067524116e-05, + "loss": 0.581, + "step": 1076 + }, + { + "epoch": 0.29, + "learning_rate": 4.8204715969989284e-05, + "loss": 0.6506, + "step": 1077 + }, + { + "epoch": 0.29, + "learning_rate": 4.820304126473741e-05, + "loss": 0.5399, + "step": 1078 + }, + { + "epoch": 0.29, + "learning_rate": 4.820136655948553e-05, + "loss": 0.6262, + "step": 1079 + }, + { + "epoch": 0.29, + "learning_rate": 4.8199691854233656e-05, + "loss": 0.7049, + "step": 1080 + }, + { + "epoch": 0.29, + "learning_rate": 4.819801714898178e-05, + "loss": 0.5837, + "step": 1081 + }, + { + "epoch": 0.29, + "learning_rate": 4.819634244372991e-05, + "loss": 0.4753, + "step": 1082 + }, + { + "epoch": 0.29, + "learning_rate": 4.819466773847803e-05, + "loss": 0.4868, + "step": 1083 + }, + { + "epoch": 0.29, + "learning_rate": 4.819299303322615e-05, + "loss": 0.4362, + "step": 1084 + }, + { + "epoch": 0.29, + "learning_rate": 4.819131832797428e-05, + "loss": 0.5566, + "step": 1085 + }, + { + "epoch": 0.29, + "learning_rate": 4.81896436227224e-05, + "loss": 0.5102, + "step": 1086 + }, + { + "epoch": 0.29, + "learning_rate": 4.8187968917470526e-05, + "loss": 0.4421, + "step": 1087 + }, + { + "epoch": 0.29, + "learning_rate": 4.818629421221865e-05, + "loss": 0.8233, + "step": 1088 + }, + { + "epoch": 0.29, + "learning_rate": 4.8184619506966774e-05, + "loss": 0.5337, + "step": 1089 + }, + { + "epoch": 0.29, + "learning_rate": 4.8182944801714905e-05, + "loss": 0.5718, + "step": 1090 + }, + { + "epoch": 0.29, + "learning_rate": 4.818127009646303e-05, + "loss": 0.4525, + "step": 1091 + }, + { + "epoch": 0.29, + "learning_rate": 4.817959539121115e-05, + "loss": 0.3845, + "step": 1092 + }, + { + "epoch": 0.29, + "learning_rate": 4.817792068595927e-05, + "loss": 0.4783, + "step": 1093 + }, + { + "epoch": 0.29, + "learning_rate": 4.8176245980707395e-05, + "loss": 0.4668, + "step": 1094 + }, + { + "epoch": 0.29, + "learning_rate": 4.817457127545552e-05, + "loss": 0.349, + "step": 1095 + }, + { + "epoch": 0.29, + "learning_rate": 4.8172896570203644e-05, + "loss": 0.2732, + "step": 1096 + }, + { + "epoch": 0.29, + "learning_rate": 4.817122186495177e-05, + "loss": 0.5146, + "step": 1097 + }, + { + "epoch": 0.29, + "learning_rate": 4.81695471596999e-05, + "loss": 0.6323, + "step": 1098 + }, + { + "epoch": 0.29, + "learning_rate": 4.816787245444802e-05, + "loss": 0.523, + "step": 1099 + }, + { + "epoch": 0.29, + "learning_rate": 4.816619774919615e-05, + "loss": 0.4971, + "step": 1100 + }, + { + "epoch": 0.3, + "learning_rate": 4.816452304394427e-05, + "loss": 0.4149, + "step": 1101 + }, + { + "epoch": 0.3, + "learning_rate": 4.816284833869239e-05, + "loss": 0.4486, + "step": 1102 + }, + { + "epoch": 0.3, + "learning_rate": 4.816117363344051e-05, + "loss": 0.5465, + "step": 1103 + }, + { + "epoch": 0.3, + "learning_rate": 4.815949892818864e-05, + "loss": 0.48, + "step": 1104 + }, + { + "epoch": 0.3, + "learning_rate": 4.815782422293676e-05, + "loss": 0.5086, + "step": 1105 + }, + { + "epoch": 0.3, + "learning_rate": 4.815614951768489e-05, + "loss": 0.3051, + "step": 1106 + }, + { + "epoch": 0.3, + "learning_rate": 4.815447481243302e-05, + "loss": 0.5085, + "step": 1107 + }, + { + "epoch": 0.3, + "learning_rate": 4.815280010718114e-05, + "loss": 0.4634, + "step": 1108 + }, + { + "epoch": 0.3, + "learning_rate": 4.8151125401929265e-05, + "loss": 0.3898, + "step": 1109 + }, + { + "epoch": 0.3, + "learning_rate": 4.814945069667739e-05, + "loss": 0.3626, + "step": 1110 + }, + { + "epoch": 0.3, + "learning_rate": 4.814777599142551e-05, + "loss": 0.6474, + "step": 1111 + }, + { + "epoch": 0.3, + "learning_rate": 4.814610128617363e-05, + "loss": 0.3687, + "step": 1112 + }, + { + "epoch": 0.3, + "learning_rate": 4.814442658092176e-05, + "loss": 0.3378, + "step": 1113 + }, + { + "epoch": 0.3, + "learning_rate": 4.8142751875669886e-05, + "loss": 0.3604, + "step": 1114 + }, + { + "epoch": 0.3, + "learning_rate": 4.814107717041801e-05, + "loss": 0.4856, + "step": 1115 + }, + { + "epoch": 0.3, + "learning_rate": 4.8139402465166135e-05, + "loss": 0.6231, + "step": 1116 + }, + { + "epoch": 0.3, + "learning_rate": 4.813772775991426e-05, + "loss": 0.5231, + "step": 1117 + }, + { + "epoch": 0.3, + "learning_rate": 4.813605305466238e-05, + "loss": 0.55, + "step": 1118 + }, + { + "epoch": 0.3, + "learning_rate": 4.813437834941051e-05, + "loss": 0.5888, + "step": 1119 + }, + { + "epoch": 0.3, + "learning_rate": 4.8132703644158625e-05, + "loss": 0.4768, + "step": 1120 + }, + { + "epoch": 0.3, + "learning_rate": 4.8131028938906756e-05, + "loss": 0.4496, + "step": 1121 + }, + { + "epoch": 0.3, + "learning_rate": 4.812935423365488e-05, + "loss": 0.3644, + "step": 1122 + }, + { + "epoch": 0.3, + "learning_rate": 4.8127679528403004e-05, + "loss": 0.5377, + "step": 1123 + }, + { + "epoch": 0.3, + "learning_rate": 4.812600482315113e-05, + "loss": 0.3102, + "step": 1124 + }, + { + "epoch": 0.3, + "learning_rate": 4.812433011789925e-05, + "loss": 0.5877, + "step": 1125 + }, + { + "epoch": 0.3, + "learning_rate": 4.812265541264738e-05, + "loss": 0.4168, + "step": 1126 + }, + { + "epoch": 0.3, + "learning_rate": 4.81209807073955e-05, + "loss": 0.4366, + "step": 1127 + }, + { + "epoch": 0.3, + "learning_rate": 4.8119306002143625e-05, + "loss": 0.5442, + "step": 1128 + }, + { + "epoch": 0.3, + "learning_rate": 4.811763129689175e-05, + "loss": 0.5363, + "step": 1129 + }, + { + "epoch": 0.3, + "learning_rate": 4.8115956591639874e-05, + "loss": 0.5207, + "step": 1130 + }, + { + "epoch": 0.3, + "learning_rate": 4.8114281886388e-05, + "loss": 0.5852, + "step": 1131 + }, + { + "epoch": 0.3, + "learning_rate": 4.811260718113612e-05, + "loss": 1.1342, + "step": 1132 + }, + { + "epoch": 0.3, + "learning_rate": 4.8110932475884246e-05, + "loss": 0.6427, + "step": 1133 + }, + { + "epoch": 0.3, + "learning_rate": 4.810925777063237e-05, + "loss": 0.5754, + "step": 1134 + }, + { + "epoch": 0.3, + "learning_rate": 4.8107583065380495e-05, + "loss": 0.4995, + "step": 1135 + }, + { + "epoch": 0.3, + "learning_rate": 4.810590836012862e-05, + "loss": 0.4613, + "step": 1136 + }, + { + "epoch": 0.3, + "learning_rate": 4.810423365487674e-05, + "loss": 0.3072, + "step": 1137 + }, + { + "epoch": 0.3, + "learning_rate": 4.8102558949624874e-05, + "loss": 0.3375, + "step": 1138 + }, + { + "epoch": 0.31, + "learning_rate": 4.810088424437299e-05, + "loss": 0.3767, + "step": 1139 + }, + { + "epoch": 0.31, + "learning_rate": 4.8099209539121116e-05, + "loss": 0.4302, + "step": 1140 + }, + { + "epoch": 0.31, + "learning_rate": 4.809753483386924e-05, + "loss": 0.6856, + "step": 1141 + }, + { + "epoch": 0.31, + "learning_rate": 4.8095860128617364e-05, + "loss": 0.4368, + "step": 1142 + }, + { + "epoch": 0.31, + "learning_rate": 4.809418542336549e-05, + "loss": 0.4633, + "step": 1143 + }, + { + "epoch": 0.31, + "learning_rate": 4.809251071811361e-05, + "loss": 0.5486, + "step": 1144 + }, + { + "epoch": 0.31, + "learning_rate": 4.809083601286174e-05, + "loss": 0.7023, + "step": 1145 + }, + { + "epoch": 0.31, + "learning_rate": 4.808916130760987e-05, + "loss": 0.4103, + "step": 1146 + }, + { + "epoch": 0.31, + "learning_rate": 4.808748660235799e-05, + "loss": 0.7724, + "step": 1147 + }, + { + "epoch": 0.31, + "learning_rate": 4.808581189710611e-05, + "loss": 0.616, + "step": 1148 + }, + { + "epoch": 0.31, + "learning_rate": 4.8084137191854234e-05, + "loss": 0.4072, + "step": 1149 + }, + { + "epoch": 0.31, + "learning_rate": 4.808246248660236e-05, + "loss": 0.4011, + "step": 1150 + }, + { + "epoch": 0.31, + "learning_rate": 4.808078778135048e-05, + "loss": 0.4145, + "step": 1151 + }, + { + "epoch": 0.31, + "learning_rate": 4.8079113076098606e-05, + "loss": 0.4635, + "step": 1152 + }, + { + "epoch": 0.31, + "learning_rate": 4.807743837084673e-05, + "loss": 0.4797, + "step": 1153 + }, + { + "epoch": 0.31, + "learning_rate": 4.807576366559486e-05, + "loss": 0.4662, + "step": 1154 + }, + { + "epoch": 0.31, + "learning_rate": 4.8074088960342986e-05, + "loss": 0.4571, + "step": 1155 + }, + { + "epoch": 0.31, + "learning_rate": 4.807241425509111e-05, + "loss": 0.4205, + "step": 1156 + }, + { + "epoch": 0.31, + "learning_rate": 4.8070739549839234e-05, + "loss": 0.5888, + "step": 1157 + }, + { + "epoch": 0.31, + "learning_rate": 4.806906484458735e-05, + "loss": 0.711, + "step": 1158 + }, + { + "epoch": 0.31, + "learning_rate": 4.8067390139335476e-05, + "loss": 0.6301, + "step": 1159 + }, + { + "epoch": 0.31, + "learning_rate": 4.80657154340836e-05, + "loss": 0.5838, + "step": 1160 + }, + { + "epoch": 0.31, + "learning_rate": 4.8064040728831724e-05, + "loss": 0.4579, + "step": 1161 + }, + { + "epoch": 0.31, + "learning_rate": 4.8062366023579855e-05, + "loss": 0.5941, + "step": 1162 + }, + { + "epoch": 0.31, + "learning_rate": 4.806069131832798e-05, + "loss": 0.3159, + "step": 1163 + }, + { + "epoch": 0.31, + "learning_rate": 4.8059016613076104e-05, + "loss": 0.3711, + "step": 1164 + }, + { + "epoch": 0.31, + "learning_rate": 4.805734190782423e-05, + "loss": 0.3781, + "step": 1165 + }, + { + "epoch": 0.31, + "learning_rate": 4.805566720257235e-05, + "loss": 0.4098, + "step": 1166 + }, + { + "epoch": 0.31, + "learning_rate": 4.805399249732047e-05, + "loss": 0.3625, + "step": 1167 + }, + { + "epoch": 0.31, + "learning_rate": 4.8052317792068594e-05, + "loss": 0.3477, + "step": 1168 + }, + { + "epoch": 0.31, + "learning_rate": 4.8050643086816725e-05, + "loss": 0.4868, + "step": 1169 + }, + { + "epoch": 0.31, + "learning_rate": 4.804896838156485e-05, + "loss": 0.5629, + "step": 1170 + }, + { + "epoch": 0.31, + "learning_rate": 4.804729367631297e-05, + "loss": 0.5106, + "step": 1171 + }, + { + "epoch": 0.31, + "learning_rate": 4.80456189710611e-05, + "loss": 0.4262, + "step": 1172 + }, + { + "epoch": 0.31, + "learning_rate": 4.804394426580922e-05, + "loss": 0.4853, + "step": 1173 + }, + { + "epoch": 0.31, + "learning_rate": 4.8042269560557346e-05, + "loss": 0.4096, + "step": 1174 + }, + { + "epoch": 0.31, + "learning_rate": 4.804059485530547e-05, + "loss": 0.4199, + "step": 1175 + }, + { + "epoch": 0.32, + "learning_rate": 4.803892015005359e-05, + "loss": 0.4991, + "step": 1176 + }, + { + "epoch": 0.32, + "learning_rate": 4.803724544480172e-05, + "loss": 0.3656, + "step": 1177 + }, + { + "epoch": 0.32, + "learning_rate": 4.803557073954984e-05, + "loss": 0.4515, + "step": 1178 + }, + { + "epoch": 0.32, + "learning_rate": 4.803389603429797e-05, + "loss": 0.6569, + "step": 1179 + }, + { + "epoch": 0.32, + "learning_rate": 4.803222132904609e-05, + "loss": 0.5365, + "step": 1180 + }, + { + "epoch": 0.32, + "learning_rate": 4.8030546623794215e-05, + "loss": 0.648, + "step": 1181 + }, + { + "epoch": 0.32, + "learning_rate": 4.802887191854234e-05, + "loss": 0.356, + "step": 1182 + }, + { + "epoch": 0.32, + "learning_rate": 4.8027197213290464e-05, + "loss": 0.4614, + "step": 1183 + }, + { + "epoch": 0.32, + "learning_rate": 4.802552250803859e-05, + "loss": 0.2743, + "step": 1184 + }, + { + "epoch": 0.32, + "learning_rate": 4.802384780278671e-05, + "loss": 0.509, + "step": 1185 + }, + { + "epoch": 0.32, + "learning_rate": 4.8022173097534836e-05, + "loss": 0.3626, + "step": 1186 + }, + { + "epoch": 0.32, + "learning_rate": 4.802049839228296e-05, + "loss": 0.4769, + "step": 1187 + }, + { + "epoch": 0.32, + "learning_rate": 4.8018823687031085e-05, + "loss": 0.4709, + "step": 1188 + }, + { + "epoch": 0.32, + "learning_rate": 4.801714898177921e-05, + "loss": 0.4413, + "step": 1189 + }, + { + "epoch": 0.32, + "learning_rate": 4.801547427652733e-05, + "loss": 0.5571, + "step": 1190 + }, + { + "epoch": 0.32, + "learning_rate": 4.801379957127546e-05, + "loss": 0.3219, + "step": 1191 + }, + { + "epoch": 0.32, + "learning_rate": 4.801212486602358e-05, + "loss": 0.6329, + "step": 1192 + }, + { + "epoch": 0.32, + "learning_rate": 4.8010450160771706e-05, + "loss": 0.3881, + "step": 1193 + }, + { + "epoch": 0.32, + "learning_rate": 4.8008775455519837e-05, + "loss": 0.6305, + "step": 1194 + }, + { + "epoch": 0.32, + "learning_rate": 4.8007100750267954e-05, + "loss": 0.4631, + "step": 1195 + }, + { + "epoch": 0.32, + "learning_rate": 4.800542604501608e-05, + "loss": 0.3298, + "step": 1196 + }, + { + "epoch": 0.32, + "learning_rate": 4.80037513397642e-05, + "loss": 0.6595, + "step": 1197 + }, + { + "epoch": 0.32, + "learning_rate": 4.800207663451233e-05, + "loss": 0.3041, + "step": 1198 + }, + { + "epoch": 0.32, + "learning_rate": 4.800040192926045e-05, + "loss": 0.4332, + "step": 1199 + }, + { + "epoch": 0.32, + "learning_rate": 4.7998727224008575e-05, + "loss": 0.3923, + "step": 1200 + }, + { + "epoch": 0.32, + "learning_rate": 4.79970525187567e-05, + "loss": 0.3268, + "step": 1201 + }, + { + "epoch": 0.32, + "learning_rate": 4.799537781350483e-05, + "loss": 0.7526, + "step": 1202 + }, + { + "epoch": 0.32, + "learning_rate": 4.7993703108252954e-05, + "loss": 0.6435, + "step": 1203 + }, + { + "epoch": 0.32, + "learning_rate": 4.799202840300107e-05, + "loss": 0.4399, + "step": 1204 + }, + { + "epoch": 0.32, + "learning_rate": 4.7990353697749196e-05, + "loss": 0.444, + "step": 1205 + }, + { + "epoch": 0.32, + "learning_rate": 4.798867899249732e-05, + "loss": 0.4256, + "step": 1206 + }, + { + "epoch": 0.32, + "learning_rate": 4.7987004287245445e-05, + "loss": 0.4782, + "step": 1207 + }, + { + "epoch": 0.32, + "learning_rate": 4.798532958199357e-05, + "loss": 0.4261, + "step": 1208 + }, + { + "epoch": 0.32, + "learning_rate": 4.798365487674169e-05, + "loss": 0.4646, + "step": 1209 + }, + { + "epoch": 0.32, + "learning_rate": 4.7981980171489824e-05, + "loss": 0.4613, + "step": 1210 + }, + { + "epoch": 0.32, + "learning_rate": 4.798030546623795e-05, + "loss": 0.6128, + "step": 1211 + }, + { + "epoch": 0.32, + "learning_rate": 4.797863076098607e-05, + "loss": 0.34, + "step": 1212 + }, + { + "epoch": 0.33, + "learning_rate": 4.7976956055734197e-05, + "loss": 0.4826, + "step": 1213 + }, + { + "epoch": 0.33, + "learning_rate": 4.7975281350482314e-05, + "loss": 0.4206, + "step": 1214 + }, + { + "epoch": 0.33, + "learning_rate": 4.797360664523044e-05, + "loss": 0.7267, + "step": 1215 + }, + { + "epoch": 0.33, + "learning_rate": 4.797193193997856e-05, + "loss": 0.4867, + "step": 1216 + }, + { + "epoch": 0.33, + "learning_rate": 4.797025723472669e-05, + "loss": 0.3086, + "step": 1217 + }, + { + "epoch": 0.33, + "learning_rate": 4.796858252947482e-05, + "loss": 0.4885, + "step": 1218 + }, + { + "epoch": 0.33, + "learning_rate": 4.796690782422294e-05, + "loss": 0.4467, + "step": 1219 + }, + { + "epoch": 0.33, + "learning_rate": 4.7965233118971066e-05, + "loss": 0.5662, + "step": 1220 + }, + { + "epoch": 0.33, + "learning_rate": 4.796355841371919e-05, + "loss": 0.3947, + "step": 1221 + }, + { + "epoch": 0.33, + "learning_rate": 4.7961883708467314e-05, + "loss": 0.5573, + "step": 1222 + }, + { + "epoch": 0.33, + "learning_rate": 4.796020900321543e-05, + "loss": 0.3816, + "step": 1223 + }, + { + "epoch": 0.33, + "learning_rate": 4.7958534297963556e-05, + "loss": 0.3603, + "step": 1224 + }, + { + "epoch": 0.33, + "learning_rate": 4.795685959271169e-05, + "loss": 0.5138, + "step": 1225 + }, + { + "epoch": 0.33, + "learning_rate": 4.795518488745981e-05, + "loss": 0.4924, + "step": 1226 + }, + { + "epoch": 0.33, + "learning_rate": 4.7953510182207936e-05, + "loss": 0.5579, + "step": 1227 + }, + { + "epoch": 0.33, + "learning_rate": 4.795183547695606e-05, + "loss": 0.4343, + "step": 1228 + }, + { + "epoch": 0.33, + "learning_rate": 4.7950160771704184e-05, + "loss": 0.6082, + "step": 1229 + }, + { + "epoch": 0.33, + "learning_rate": 4.794848606645231e-05, + "loss": 0.3983, + "step": 1230 + }, + { + "epoch": 0.33, + "learning_rate": 4.794681136120043e-05, + "loss": 0.428, + "step": 1231 + }, + { + "epoch": 0.33, + "learning_rate": 4.794513665594855e-05, + "loss": 0.6144, + "step": 1232 + }, + { + "epoch": 0.33, + "learning_rate": 4.794346195069668e-05, + "loss": 0.5546, + "step": 1233 + }, + { + "epoch": 0.33, + "learning_rate": 4.7941787245444805e-05, + "loss": 0.5602, + "step": 1234 + }, + { + "epoch": 0.33, + "learning_rate": 4.794011254019293e-05, + "loss": 0.3742, + "step": 1235 + }, + { + "epoch": 0.33, + "learning_rate": 4.7938437834941053e-05, + "loss": 0.6174, + "step": 1236 + }, + { + "epoch": 0.33, + "learning_rate": 4.793676312968918e-05, + "loss": 0.342, + "step": 1237 + }, + { + "epoch": 0.33, + "learning_rate": 4.79350884244373e-05, + "loss": 0.5411, + "step": 1238 + }, + { + "epoch": 0.33, + "learning_rate": 4.7933413719185426e-05, + "loss": 0.4389, + "step": 1239 + }, + { + "epoch": 0.33, + "learning_rate": 4.793173901393355e-05, + "loss": 0.4202, + "step": 1240 + }, + { + "epoch": 0.33, + "learning_rate": 4.7930064308681674e-05, + "loss": 0.5494, + "step": 1241 + }, + { + "epoch": 0.33, + "learning_rate": 4.79283896034298e-05, + "loss": 0.4925, + "step": 1242 + }, + { + "epoch": 0.33, + "learning_rate": 4.792671489817792e-05, + "loss": 0.334, + "step": 1243 + }, + { + "epoch": 0.33, + "learning_rate": 4.792504019292605e-05, + "loss": 0.3749, + "step": 1244 + }, + { + "epoch": 0.33, + "learning_rate": 4.792336548767417e-05, + "loss": 0.4167, + "step": 1245 + }, + { + "epoch": 0.33, + "learning_rate": 4.7921690782422295e-05, + "loss": 0.5983, + "step": 1246 + }, + { + "epoch": 0.33, + "learning_rate": 4.792001607717042e-05, + "loss": 0.3664, + "step": 1247 + }, + { + "epoch": 0.33, + "learning_rate": 4.7918341371918544e-05, + "loss": 0.3893, + "step": 1248 + }, + { + "epoch": 0.33, + "learning_rate": 4.791666666666667e-05, + "loss": 0.3515, + "step": 1249 + }, + { + "epoch": 0.33, + "learning_rate": 4.79149919614148e-05, + "loss": 0.5639, + "step": 1250 + }, + { + "epoch": 0.34, + "learning_rate": 4.7913317256162917e-05, + "loss": 0.7304, + "step": 1251 + }, + { + "epoch": 0.34, + "learning_rate": 4.791164255091104e-05, + "loss": 0.4009, + "step": 1252 + }, + { + "epoch": 0.34, + "learning_rate": 4.7909967845659165e-05, + "loss": 0.406, + "step": 1253 + }, + { + "epoch": 0.34, + "learning_rate": 4.790829314040729e-05, + "loss": 0.5813, + "step": 1254 + }, + { + "epoch": 0.34, + "learning_rate": 4.790661843515541e-05, + "loss": 0.3059, + "step": 1255 + }, + { + "epoch": 0.34, + "learning_rate": 4.790494372990354e-05, + "loss": 0.476, + "step": 1256 + }, + { + "epoch": 0.34, + "learning_rate": 4.790326902465166e-05, + "loss": 0.4433, + "step": 1257 + }, + { + "epoch": 0.34, + "learning_rate": 4.790159431939979e-05, + "loss": 0.4865, + "step": 1258 + }, + { + "epoch": 0.34, + "learning_rate": 4.789991961414792e-05, + "loss": 0.3837, + "step": 1259 + }, + { + "epoch": 0.34, + "learning_rate": 4.7898244908896034e-05, + "loss": 0.4293, + "step": 1260 + }, + { + "epoch": 0.34, + "learning_rate": 4.789657020364416e-05, + "loss": 0.2847, + "step": 1261 + }, + { + "epoch": 0.34, + "learning_rate": 4.789489549839228e-05, + "loss": 0.5089, + "step": 1262 + }, + { + "epoch": 0.34, + "learning_rate": 4.789322079314041e-05, + "loss": 0.8416, + "step": 1263 + }, + { + "epoch": 0.34, + "learning_rate": 4.789154608788853e-05, + "loss": 0.4538, + "step": 1264 + }, + { + "epoch": 0.34, + "learning_rate": 4.7889871382636655e-05, + "loss": 0.7332, + "step": 1265 + }, + { + "epoch": 0.34, + "learning_rate": 4.7888196677384786e-05, + "loss": 0.8659, + "step": 1266 + }, + { + "epoch": 0.34, + "learning_rate": 4.788652197213291e-05, + "loss": 0.7373, + "step": 1267 + }, + { + "epoch": 0.34, + "learning_rate": 4.7884847266881035e-05, + "loss": 0.3191, + "step": 1268 + }, + { + "epoch": 0.34, + "learning_rate": 4.788317256162916e-05, + "loss": 0.6293, + "step": 1269 + }, + { + "epoch": 0.34, + "learning_rate": 4.7881497856377277e-05, + "loss": 0.5067, + "step": 1270 + }, + { + "epoch": 0.34, + "learning_rate": 4.78798231511254e-05, + "loss": 0.3911, + "step": 1271 + }, + { + "epoch": 0.34, + "learning_rate": 4.7878148445873525e-05, + "loss": 0.394, + "step": 1272 + }, + { + "epoch": 0.34, + "learning_rate": 4.787647374062165e-05, + "loss": 0.4538, + "step": 1273 + }, + { + "epoch": 0.34, + "learning_rate": 4.787479903536978e-05, + "loss": 0.421, + "step": 1274 + }, + { + "epoch": 0.34, + "learning_rate": 4.7873124330117904e-05, + "loss": 0.3402, + "step": 1275 + }, + { + "epoch": 0.34, + "learning_rate": 4.787144962486603e-05, + "loss": 0.3631, + "step": 1276 + }, + { + "epoch": 0.34, + "learning_rate": 4.786977491961415e-05, + "loss": 0.6128, + "step": 1277 + }, + { + "epoch": 0.34, + "learning_rate": 4.786810021436228e-05, + "loss": 0.6183, + "step": 1278 + }, + { + "epoch": 0.34, + "learning_rate": 4.7866425509110394e-05, + "loss": 0.4959, + "step": 1279 + }, + { + "epoch": 0.34, + "learning_rate": 4.786475080385852e-05, + "loss": 0.7535, + "step": 1280 + }, + { + "epoch": 0.34, + "learning_rate": 4.786307609860665e-05, + "loss": 0.4727, + "step": 1281 + }, + { + "epoch": 0.34, + "learning_rate": 4.7861401393354774e-05, + "loss": 0.7213, + "step": 1282 + }, + { + "epoch": 0.34, + "learning_rate": 4.78597266881029e-05, + "loss": 0.716, + "step": 1283 + }, + { + "epoch": 0.34, + "learning_rate": 4.785805198285102e-05, + "loss": 0.6485, + "step": 1284 + }, + { + "epoch": 0.34, + "learning_rate": 4.7856377277599146e-05, + "loss": 0.4317, + "step": 1285 + }, + { + "epoch": 0.34, + "learning_rate": 4.785470257234727e-05, + "loss": 0.3948, + "step": 1286 + }, + { + "epoch": 0.34, + "learning_rate": 4.7853027867095395e-05, + "loss": 0.4737, + "step": 1287 + }, + { + "epoch": 0.35, + "learning_rate": 4.785135316184351e-05, + "loss": 0.6327, + "step": 1288 + }, + { + "epoch": 0.35, + "learning_rate": 4.784967845659164e-05, + "loss": 0.6743, + "step": 1289 + }, + { + "epoch": 0.35, + "learning_rate": 4.784800375133977e-05, + "loss": 0.6237, + "step": 1290 + }, + { + "epoch": 0.35, + "learning_rate": 4.784632904608789e-05, + "loss": 0.289, + "step": 1291 + }, + { + "epoch": 0.35, + "learning_rate": 4.7844654340836016e-05, + "loss": 0.4042, + "step": 1292 + }, + { + "epoch": 0.35, + "learning_rate": 4.784297963558414e-05, + "loss": 0.5209, + "step": 1293 + }, + { + "epoch": 0.35, + "learning_rate": 4.7841304930332264e-05, + "loss": 0.469, + "step": 1294 + }, + { + "epoch": 0.35, + "learning_rate": 4.783963022508039e-05, + "loss": 0.5865, + "step": 1295 + }, + { + "epoch": 0.35, + "learning_rate": 4.783795551982851e-05, + "loss": 0.504, + "step": 1296 + }, + { + "epoch": 0.35, + "learning_rate": 4.783628081457664e-05, + "loss": 0.6217, + "step": 1297 + }, + { + "epoch": 0.35, + "learning_rate": 4.783460610932476e-05, + "loss": 0.308, + "step": 1298 + }, + { + "epoch": 0.35, + "learning_rate": 4.7832931404072885e-05, + "loss": 0.444, + "step": 1299 + }, + { + "epoch": 0.35, + "learning_rate": 4.783125669882101e-05, + "loss": 0.4239, + "step": 1300 + }, + { + "epoch": 0.35, + "learning_rate": 4.7829581993569134e-05, + "loss": 0.3883, + "step": 1301 + }, + { + "epoch": 0.35, + "learning_rate": 4.782790728831726e-05, + "loss": 0.3559, + "step": 1302 + }, + { + "epoch": 0.35, + "learning_rate": 4.782623258306538e-05, + "loss": 0.4609, + "step": 1303 + }, + { + "epoch": 0.35, + "learning_rate": 4.7824557877813506e-05, + "loss": 0.4022, + "step": 1304 + }, + { + "epoch": 0.35, + "learning_rate": 4.782288317256163e-05, + "loss": 0.3819, + "step": 1305 + }, + { + "epoch": 0.35, + "learning_rate": 4.782120846730976e-05, + "loss": 0.5702, + "step": 1306 + }, + { + "epoch": 0.35, + "learning_rate": 4.781953376205788e-05, + "loss": 0.9536, + "step": 1307 + }, + { + "epoch": 0.35, + "learning_rate": 4.7817859056806e-05, + "loss": 0.4642, + "step": 1308 + }, + { + "epoch": 0.35, + "learning_rate": 4.781618435155413e-05, + "loss": 0.3998, + "step": 1309 + }, + { + "epoch": 0.35, + "learning_rate": 4.781450964630225e-05, + "loss": 0.4143, + "step": 1310 + }, + { + "epoch": 0.35, + "learning_rate": 4.7812834941050376e-05, + "loss": 0.3262, + "step": 1311 + }, + { + "epoch": 0.35, + "learning_rate": 4.78111602357985e-05, + "loss": 0.6393, + "step": 1312 + }, + { + "epoch": 0.35, + "learning_rate": 4.7809485530546624e-05, + "loss": 0.3588, + "step": 1313 + }, + { + "epoch": 0.35, + "learning_rate": 4.7807810825294755e-05, + "loss": 0.3272, + "step": 1314 + }, + { + "epoch": 0.35, + "learning_rate": 4.780613612004288e-05, + "loss": 0.3922, + "step": 1315 + }, + { + "epoch": 0.35, + "learning_rate": 4.7804461414791e-05, + "loss": 0.7254, + "step": 1316 + }, + { + "epoch": 0.35, + "learning_rate": 4.780278670953912e-05, + "loss": 0.3962, + "step": 1317 + }, + { + "epoch": 0.35, + "learning_rate": 4.7801112004287245e-05, + "loss": 0.3388, + "step": 1318 + }, + { + "epoch": 0.35, + "learning_rate": 4.779943729903537e-05, + "loss": 0.3118, + "step": 1319 + }, + { + "epoch": 0.35, + "learning_rate": 4.7797762593783494e-05, + "loss": 0.3906, + "step": 1320 + }, + { + "epoch": 0.35, + "learning_rate": 4.779608788853162e-05, + "loss": 0.4657, + "step": 1321 + }, + { + "epoch": 0.35, + "learning_rate": 4.779441318327975e-05, + "loss": 0.7081, + "step": 1322 + }, + { + "epoch": 0.35, + "learning_rate": 4.779273847802787e-05, + "loss": 0.3859, + "step": 1323 + }, + { + "epoch": 0.35, + "learning_rate": 4.7791063772776e-05, + "loss": 0.4616, + "step": 1324 + }, + { + "epoch": 0.36, + "learning_rate": 4.778938906752412e-05, + "loss": 0.3666, + "step": 1325 + }, + { + "epoch": 0.36, + "learning_rate": 4.778771436227224e-05, + "loss": 0.3456, + "step": 1326 + }, + { + "epoch": 0.36, + "learning_rate": 4.778603965702036e-05, + "loss": 0.3908, + "step": 1327 + }, + { + "epoch": 0.36, + "learning_rate": 4.778436495176849e-05, + "loss": 0.5245, + "step": 1328 + }, + { + "epoch": 0.36, + "learning_rate": 4.778269024651661e-05, + "loss": 0.5888, + "step": 1329 + }, + { + "epoch": 0.36, + "learning_rate": 4.778101554126474e-05, + "loss": 0.5485, + "step": 1330 + }, + { + "epoch": 0.36, + "learning_rate": 4.777934083601287e-05, + "loss": 0.4168, + "step": 1331 + }, + { + "epoch": 0.36, + "learning_rate": 4.777766613076099e-05, + "loss": 0.595, + "step": 1332 + }, + { + "epoch": 0.36, + "learning_rate": 4.7775991425509115e-05, + "loss": 0.3444, + "step": 1333 + }, + { + "epoch": 0.36, + "learning_rate": 4.777431672025724e-05, + "loss": 0.4528, + "step": 1334 + }, + { + "epoch": 0.36, + "learning_rate": 4.777264201500536e-05, + "loss": 0.358, + "step": 1335 + }, + { + "epoch": 0.36, + "learning_rate": 4.777096730975348e-05, + "loss": 0.6133, + "step": 1336 + }, + { + "epoch": 0.36, + "learning_rate": 4.7769292604501605e-05, + "loss": 0.412, + "step": 1337 + }, + { + "epoch": 0.36, + "learning_rate": 4.7767617899249736e-05, + "loss": 0.3817, + "step": 1338 + }, + { + "epoch": 0.36, + "learning_rate": 4.776594319399786e-05, + "loss": 0.4902, + "step": 1339 + }, + { + "epoch": 0.36, + "learning_rate": 4.7764268488745985e-05, + "loss": 0.4152, + "step": 1340 + }, + { + "epoch": 0.36, + "learning_rate": 4.776259378349411e-05, + "loss": 0.3978, + "step": 1341 + }, + { + "epoch": 0.36, + "learning_rate": 4.776091907824223e-05, + "loss": 0.7061, + "step": 1342 + }, + { + "epoch": 0.36, + "learning_rate": 4.775924437299036e-05, + "loss": 0.4723, + "step": 1343 + }, + { + "epoch": 0.36, + "learning_rate": 4.7757569667738475e-05, + "loss": 0.4009, + "step": 1344 + }, + { + "epoch": 0.36, + "learning_rate": 4.7755894962486606e-05, + "loss": 0.5598, + "step": 1345 + }, + { + "epoch": 0.36, + "learning_rate": 4.775422025723473e-05, + "loss": 0.357, + "step": 1346 + }, + { + "epoch": 0.36, + "learning_rate": 4.7752545551982854e-05, + "loss": 0.3759, + "step": 1347 + }, + { + "epoch": 0.36, + "learning_rate": 4.775087084673098e-05, + "loss": 0.6217, + "step": 1348 + }, + { + "epoch": 0.36, + "learning_rate": 4.77491961414791e-05, + "loss": 0.3208, + "step": 1349 + }, + { + "epoch": 0.36, + "learning_rate": 4.774752143622723e-05, + "loss": 0.6169, + "step": 1350 + }, + { + "epoch": 0.36, + "learning_rate": 4.774584673097535e-05, + "loss": 0.5109, + "step": 1351 + }, + { + "epoch": 0.36, + "learning_rate": 4.7744172025723475e-05, + "loss": 0.5287, + "step": 1352 + }, + { + "epoch": 0.36, + "learning_rate": 4.77424973204716e-05, + "loss": 0.7862, + "step": 1353 + }, + { + "epoch": 0.36, + "learning_rate": 4.7740822615219724e-05, + "loss": 0.3747, + "step": 1354 + }, + { + "epoch": 0.36, + "learning_rate": 4.773914790996785e-05, + "loss": 0.525, + "step": 1355 + }, + { + "epoch": 0.36, + "learning_rate": 4.773747320471597e-05, + "loss": 0.537, + "step": 1356 + }, + { + "epoch": 0.36, + "learning_rate": 4.7735798499464096e-05, + "loss": 0.393, + "step": 1357 + }, + { + "epoch": 0.36, + "learning_rate": 4.773412379421222e-05, + "loss": 0.2898, + "step": 1358 + }, + { + "epoch": 0.36, + "learning_rate": 4.7732449088960345e-05, + "loss": 0.6586, + "step": 1359 + }, + { + "epoch": 0.36, + "learning_rate": 4.773077438370847e-05, + "loss": 0.376, + "step": 1360 + }, + { + "epoch": 0.36, + "learning_rate": 4.772909967845659e-05, + "loss": 0.9452, + "step": 1361 + }, + { + "epoch": 0.36, + "learning_rate": 4.772742497320472e-05, + "loss": 0.3113, + "step": 1362 + }, + { + "epoch": 0.37, + "learning_rate": 4.772575026795284e-05, + "loss": 0.2599, + "step": 1363 + }, + { + "epoch": 0.37, + "learning_rate": 4.7724075562700966e-05, + "loss": 0.5187, + "step": 1364 + }, + { + "epoch": 0.37, + "learning_rate": 4.772240085744909e-05, + "loss": 0.3502, + "step": 1365 + }, + { + "epoch": 0.37, + "learning_rate": 4.7720726152197214e-05, + "loss": 0.3477, + "step": 1366 + }, + { + "epoch": 0.37, + "learning_rate": 4.771905144694534e-05, + "loss": 0.5283, + "step": 1367 + }, + { + "epoch": 0.37, + "learning_rate": 4.771737674169346e-05, + "loss": 0.4069, + "step": 1368 + }, + { + "epoch": 0.37, + "learning_rate": 4.771570203644159e-05, + "loss": 0.6289, + "step": 1369 + }, + { + "epoch": 0.37, + "learning_rate": 4.771402733118972e-05, + "loss": 0.3908, + "step": 1370 + }, + { + "epoch": 0.37, + "learning_rate": 4.771235262593784e-05, + "loss": 0.3752, + "step": 1371 + }, + { + "epoch": 0.37, + "learning_rate": 4.771067792068596e-05, + "loss": 0.9133, + "step": 1372 + }, + { + "epoch": 0.37, + "learning_rate": 4.7709003215434084e-05, + "loss": 0.3553, + "step": 1373 + }, + { + "epoch": 0.37, + "learning_rate": 4.770732851018221e-05, + "loss": 0.4432, + "step": 1374 + }, + { + "epoch": 0.37, + "learning_rate": 4.770565380493033e-05, + "loss": 0.3838, + "step": 1375 + }, + { + "epoch": 0.37, + "learning_rate": 4.7703979099678456e-05, + "loss": 0.3365, + "step": 1376 + }, + { + "epoch": 0.37, + "learning_rate": 4.770230439442658e-05, + "loss": 0.2861, + "step": 1377 + }, + { + "epoch": 0.37, + "learning_rate": 4.770062968917471e-05, + "loss": 0.2865, + "step": 1378 + }, + { + "epoch": 0.37, + "learning_rate": 4.7698954983922836e-05, + "loss": 0.4684, + "step": 1379 + }, + { + "epoch": 0.37, + "learning_rate": 4.769728027867096e-05, + "loss": 0.6493, + "step": 1380 + }, + { + "epoch": 0.37, + "learning_rate": 4.7695605573419084e-05, + "loss": 0.6153, + "step": 1381 + }, + { + "epoch": 0.37, + "learning_rate": 4.76939308681672e-05, + "loss": 0.5634, + "step": 1382 + }, + { + "epoch": 0.37, + "learning_rate": 4.7692256162915326e-05, + "loss": 0.3955, + "step": 1383 + }, + { + "epoch": 0.37, + "learning_rate": 4.769058145766345e-05, + "loss": 0.3365, + "step": 1384 + }, + { + "epoch": 0.37, + "learning_rate": 4.7688906752411574e-05, + "loss": 0.4804, + "step": 1385 + }, + { + "epoch": 0.37, + "learning_rate": 4.7687232047159705e-05, + "loss": 0.3531, + "step": 1386 + }, + { + "epoch": 0.37, + "learning_rate": 4.768555734190783e-05, + "loss": 0.3808, + "step": 1387 + }, + { + "epoch": 0.37, + "learning_rate": 4.7683882636655954e-05, + "loss": 0.3759, + "step": 1388 + }, + { + "epoch": 0.37, + "learning_rate": 4.768220793140408e-05, + "loss": 0.3649, + "step": 1389 + }, + { + "epoch": 0.37, + "learning_rate": 4.76805332261522e-05, + "loss": 0.4703, + "step": 1390 + }, + { + "epoch": 0.37, + "learning_rate": 4.767885852090032e-05, + "loss": 0.4614, + "step": 1391 + }, + { + "epoch": 0.37, + "learning_rate": 4.7677183815648444e-05, + "loss": 0.5656, + "step": 1392 + }, + { + "epoch": 0.37, + "learning_rate": 4.767550911039657e-05, + "loss": 0.4249, + "step": 1393 + }, + { + "epoch": 0.37, + "learning_rate": 4.76738344051447e-05, + "loss": 0.5627, + "step": 1394 + }, + { + "epoch": 0.37, + "learning_rate": 4.767215969989282e-05, + "loss": 0.397, + "step": 1395 + }, + { + "epoch": 0.37, + "learning_rate": 4.767048499464095e-05, + "loss": 0.5581, + "step": 1396 + }, + { + "epoch": 0.37, + "learning_rate": 4.766881028938907e-05, + "loss": 0.3047, + "step": 1397 + }, + { + "epoch": 0.37, + "learning_rate": 4.7667135584137196e-05, + "loss": 0.4764, + "step": 1398 + }, + { + "epoch": 0.37, + "learning_rate": 4.766546087888532e-05, + "loss": 0.3698, + "step": 1399 + }, + { + "epoch": 0.38, + "learning_rate": 4.766378617363344e-05, + "loss": 0.3058, + "step": 1400 + }, + { + "epoch": 0.38, + "learning_rate": 4.766211146838157e-05, + "loss": 0.4712, + "step": 1401 + }, + { + "epoch": 0.38, + "learning_rate": 4.766043676312969e-05, + "loss": 0.2503, + "step": 1402 + }, + { + "epoch": 0.38, + "learning_rate": 4.765876205787782e-05, + "loss": 0.3231, + "step": 1403 + }, + { + "epoch": 0.38, + "learning_rate": 4.765708735262594e-05, + "loss": 0.366, + "step": 1404 + }, + { + "epoch": 0.38, + "learning_rate": 4.7655412647374065e-05, + "loss": 0.4432, + "step": 1405 + }, + { + "epoch": 0.38, + "learning_rate": 4.765373794212219e-05, + "loss": 0.5037, + "step": 1406 + }, + { + "epoch": 0.38, + "learning_rate": 4.7652063236870314e-05, + "loss": 0.4902, + "step": 1407 + }, + { + "epoch": 0.38, + "learning_rate": 4.765038853161844e-05, + "loss": 0.4419, + "step": 1408 + }, + { + "epoch": 0.38, + "learning_rate": 4.764871382636656e-05, + "loss": 0.38, + "step": 1409 + }, + { + "epoch": 0.38, + "learning_rate": 4.7647039121114686e-05, + "loss": 0.4292, + "step": 1410 + }, + { + "epoch": 0.38, + "learning_rate": 4.764536441586281e-05, + "loss": 0.4018, + "step": 1411 + }, + { + "epoch": 0.38, + "learning_rate": 4.7643689710610935e-05, + "loss": 0.3234, + "step": 1412 + }, + { + "epoch": 0.38, + "learning_rate": 4.764201500535906e-05, + "loss": 0.6352, + "step": 1413 + }, + { + "epoch": 0.38, + "learning_rate": 4.764034030010718e-05, + "loss": 0.3614, + "step": 1414 + }, + { + "epoch": 0.38, + "learning_rate": 4.763866559485531e-05, + "loss": 0.3348, + "step": 1415 + }, + { + "epoch": 0.38, + "learning_rate": 4.763699088960343e-05, + "loss": 0.3743, + "step": 1416 + }, + { + "epoch": 0.38, + "learning_rate": 4.7635316184351556e-05, + "loss": 0.3746, + "step": 1417 + }, + { + "epoch": 0.38, + "learning_rate": 4.763364147909968e-05, + "loss": 0.4093, + "step": 1418 + }, + { + "epoch": 0.38, + "learning_rate": 4.7631966773847804e-05, + "loss": 0.7868, + "step": 1419 + }, + { + "epoch": 0.38, + "learning_rate": 4.763029206859593e-05, + "loss": 0.5247, + "step": 1420 + }, + { + "epoch": 0.38, + "learning_rate": 4.762861736334405e-05, + "loss": 0.4911, + "step": 1421 + }, + { + "epoch": 0.38, + "learning_rate": 4.762694265809218e-05, + "loss": 0.6331, + "step": 1422 + }, + { + "epoch": 0.38, + "learning_rate": 4.76252679528403e-05, + "loss": 0.533, + "step": 1423 + }, + { + "epoch": 0.38, + "learning_rate": 4.7623593247588425e-05, + "loss": 0.5281, + "step": 1424 + }, + { + "epoch": 0.38, + "learning_rate": 4.762191854233655e-05, + "loss": 0.5243, + "step": 1425 + }, + { + "epoch": 0.38, + "learning_rate": 4.762024383708468e-05, + "loss": 0.4326, + "step": 1426 + }, + { + "epoch": 0.38, + "learning_rate": 4.7618569131832805e-05, + "loss": 0.3096, + "step": 1427 + }, + { + "epoch": 0.38, + "learning_rate": 4.761689442658092e-05, + "loss": 0.3051, + "step": 1428 + }, + { + "epoch": 0.38, + "learning_rate": 4.7615219721329046e-05, + "loss": 0.4746, + "step": 1429 + }, + { + "epoch": 0.38, + "learning_rate": 4.761354501607717e-05, + "loss": 0.5569, + "step": 1430 + }, + { + "epoch": 0.38, + "learning_rate": 4.7611870310825295e-05, + "loss": 0.5593, + "step": 1431 + }, + { + "epoch": 0.38, + "learning_rate": 4.761019560557342e-05, + "loss": 0.3534, + "step": 1432 + }, + { + "epoch": 0.38, + "learning_rate": 4.760852090032154e-05, + "loss": 0.3205, + "step": 1433 + }, + { + "epoch": 0.38, + "learning_rate": 4.7606846195069674e-05, + "loss": 0.595, + "step": 1434 + }, + { + "epoch": 0.38, + "learning_rate": 4.76051714898178e-05, + "loss": 0.4554, + "step": 1435 + }, + { + "epoch": 0.38, + "learning_rate": 4.760349678456592e-05, + "loss": 0.3194, + "step": 1436 + }, + { + "epoch": 0.39, + "learning_rate": 4.7601822079314047e-05, + "loss": 0.3894, + "step": 1437 + }, + { + "epoch": 0.39, + "learning_rate": 4.7600147374062164e-05, + "loss": 0.3657, + "step": 1438 + }, + { + "epoch": 0.39, + "learning_rate": 4.759847266881029e-05, + "loss": 0.3618, + "step": 1439 + }, + { + "epoch": 0.39, + "learning_rate": 4.759679796355841e-05, + "loss": 0.8606, + "step": 1440 + }, + { + "epoch": 0.39, + "learning_rate": 4.759512325830654e-05, + "loss": 0.3096, + "step": 1441 + }, + { + "epoch": 0.39, + "learning_rate": 4.759344855305467e-05, + "loss": 0.3708, + "step": 1442 + }, + { + "epoch": 0.39, + "learning_rate": 4.759177384780279e-05, + "loss": 0.424, + "step": 1443 + }, + { + "epoch": 0.39, + "learning_rate": 4.7590099142550916e-05, + "loss": 0.8528, + "step": 1444 + }, + { + "epoch": 0.39, + "learning_rate": 4.758842443729904e-05, + "loss": 0.3271, + "step": 1445 + }, + { + "epoch": 0.39, + "learning_rate": 4.7586749732047164e-05, + "loss": 0.3292, + "step": 1446 + }, + { + "epoch": 0.39, + "learning_rate": 4.758507502679528e-05, + "loss": 0.3704, + "step": 1447 + }, + { + "epoch": 0.39, + "learning_rate": 4.7583400321543406e-05, + "loss": 0.4642, + "step": 1448 + }, + { + "epoch": 0.39, + "learning_rate": 4.758172561629153e-05, + "loss": 0.5774, + "step": 1449 + }, + { + "epoch": 0.39, + "learning_rate": 4.758005091103966e-05, + "loss": 0.3166, + "step": 1450 + }, + { + "epoch": 0.39, + "learning_rate": 4.7578376205787786e-05, + "loss": 0.467, + "step": 1451 + }, + { + "epoch": 0.39, + "learning_rate": 4.757670150053591e-05, + "loss": 0.6342, + "step": 1452 + }, + { + "epoch": 0.39, + "learning_rate": 4.7575026795284034e-05, + "loss": 0.6713, + "step": 1453 + }, + { + "epoch": 0.39, + "learning_rate": 4.757335209003216e-05, + "loss": 0.355, + "step": 1454 + }, + { + "epoch": 0.39, + "learning_rate": 4.757167738478028e-05, + "loss": 0.3777, + "step": 1455 + }, + { + "epoch": 0.39, + "learning_rate": 4.75700026795284e-05, + "loss": 0.4761, + "step": 1456 + }, + { + "epoch": 0.39, + "learning_rate": 4.756832797427653e-05, + "loss": 0.6365, + "step": 1457 + }, + { + "epoch": 0.39, + "learning_rate": 4.7566653269024655e-05, + "loss": 0.3974, + "step": 1458 + }, + { + "epoch": 0.39, + "learning_rate": 4.756497856377278e-05, + "loss": 0.4834, + "step": 1459 + }, + { + "epoch": 0.39, + "learning_rate": 4.7563303858520903e-05, + "loss": 0.6154, + "step": 1460 + }, + { + "epoch": 0.39, + "learning_rate": 4.756162915326903e-05, + "loss": 0.3004, + "step": 1461 + }, + { + "epoch": 0.39, + "learning_rate": 4.755995444801715e-05, + "loss": 0.4903, + "step": 1462 + }, + { + "epoch": 0.39, + "learning_rate": 4.7558279742765276e-05, + "loss": 0.3762, + "step": 1463 + }, + { + "epoch": 0.39, + "learning_rate": 4.75566050375134e-05, + "loss": 0.3473, + "step": 1464 + }, + { + "epoch": 0.39, + "learning_rate": 4.7554930332261524e-05, + "loss": 0.4983, + "step": 1465 + }, + { + "epoch": 0.39, + "learning_rate": 4.755325562700965e-05, + "loss": 0.2887, + "step": 1466 + }, + { + "epoch": 0.39, + "learning_rate": 4.755158092175777e-05, + "loss": 0.7218, + "step": 1467 + }, + { + "epoch": 0.39, + "learning_rate": 4.75499062165059e-05, + "loss": 0.2945, + "step": 1468 + }, + { + "epoch": 0.39, + "learning_rate": 4.754823151125402e-05, + "loss": 0.2969, + "step": 1469 + }, + { + "epoch": 0.39, + "learning_rate": 4.7546556806002146e-05, + "loss": 0.3939, + "step": 1470 + }, + { + "epoch": 0.39, + "learning_rate": 4.754488210075027e-05, + "loss": 0.262, + "step": 1471 + }, + { + "epoch": 0.39, + "learning_rate": 4.7543207395498394e-05, + "loss": 0.7303, + "step": 1472 + }, + { + "epoch": 0.39, + "learning_rate": 4.754153269024652e-05, + "loss": 0.5093, + "step": 1473 + }, + { + "epoch": 0.39, + "learning_rate": 4.753985798499464e-05, + "loss": 0.5613, + "step": 1474 + }, + { + "epoch": 0.4, + "learning_rate": 4.7538183279742767e-05, + "loss": 0.9547, + "step": 1475 + }, + { + "epoch": 0.4, + "learning_rate": 4.753650857449089e-05, + "loss": 0.3738, + "step": 1476 + }, + { + "epoch": 0.4, + "learning_rate": 4.7534833869239015e-05, + "loss": 0.4663, + "step": 1477 + }, + { + "epoch": 0.4, + "learning_rate": 4.753315916398714e-05, + "loss": 0.4092, + "step": 1478 + }, + { + "epoch": 0.4, + "learning_rate": 4.7531484458735263e-05, + "loss": 0.3016, + "step": 1479 + }, + { + "epoch": 0.4, + "learning_rate": 4.752980975348339e-05, + "loss": 0.5396, + "step": 1480 + }, + { + "epoch": 0.4, + "learning_rate": 4.752813504823151e-05, + "loss": 0.4523, + "step": 1481 + }, + { + "epoch": 0.4, + "learning_rate": 4.752646034297964e-05, + "loss": 0.4113, + "step": 1482 + }, + { + "epoch": 0.4, + "learning_rate": 4.752478563772777e-05, + "loss": 0.5383, + "step": 1483 + }, + { + "epoch": 0.4, + "learning_rate": 4.7523110932475884e-05, + "loss": 0.4363, + "step": 1484 + }, + { + "epoch": 0.4, + "learning_rate": 4.752143622722401e-05, + "loss": 0.493, + "step": 1485 + }, + { + "epoch": 0.4, + "learning_rate": 4.751976152197213e-05, + "loss": 0.3487, + "step": 1486 + }, + { + "epoch": 0.4, + "learning_rate": 4.751808681672026e-05, + "loss": 0.3806, + "step": 1487 + }, + { + "epoch": 0.4, + "learning_rate": 4.751641211146838e-05, + "loss": 0.5417, + "step": 1488 + }, + { + "epoch": 0.4, + "learning_rate": 4.7514737406216506e-05, + "loss": 0.5042, + "step": 1489 + }, + { + "epoch": 0.4, + "learning_rate": 4.7513062700964636e-05, + "loss": 0.3549, + "step": 1490 + }, + { + "epoch": 0.4, + "learning_rate": 4.751138799571276e-05, + "loss": 0.259, + "step": 1491 + }, + { + "epoch": 0.4, + "learning_rate": 4.7509713290460885e-05, + "loss": 0.4379, + "step": 1492 + }, + { + "epoch": 0.4, + "learning_rate": 4.750803858520901e-05, + "loss": 0.4724, + "step": 1493 + }, + { + "epoch": 0.4, + "learning_rate": 4.7506363879957127e-05, + "loss": 0.3132, + "step": 1494 + }, + { + "epoch": 0.4, + "learning_rate": 4.750468917470525e-05, + "loss": 0.4685, + "step": 1495 + }, + { + "epoch": 0.4, + "learning_rate": 4.7503014469453375e-05, + "loss": 0.3915, + "step": 1496 + }, + { + "epoch": 0.4, + "learning_rate": 4.75013397642015e-05, + "loss": 0.4758, + "step": 1497 + }, + { + "epoch": 0.4, + "learning_rate": 4.749966505894963e-05, + "loss": 0.307, + "step": 1498 + }, + { + "epoch": 0.4, + "learning_rate": 4.7497990353697754e-05, + "loss": 0.4142, + "step": 1499 + }, + { + "epoch": 0.4, + "learning_rate": 4.749631564844588e-05, + "loss": 0.3148, + "step": 1500 + }, + { + "epoch": 0.4, + "learning_rate": 4.7494640943194e-05, + "loss": 0.2883, + "step": 1501 + }, + { + "epoch": 0.4, + "learning_rate": 4.749296623794213e-05, + "loss": 0.4077, + "step": 1502 + }, + { + "epoch": 0.4, + "learning_rate": 4.7491291532690244e-05, + "loss": 0.5432, + "step": 1503 + }, + { + "epoch": 0.4, + "learning_rate": 4.748961682743837e-05, + "loss": 0.3308, + "step": 1504 + }, + { + "epoch": 0.4, + "learning_rate": 4.748794212218649e-05, + "loss": 0.6588, + "step": 1505 + }, + { + "epoch": 0.4, + "learning_rate": 4.7486267416934624e-05, + "loss": 0.5541, + "step": 1506 + }, + { + "epoch": 0.4, + "learning_rate": 4.748459271168275e-05, + "loss": 0.3414, + "step": 1507 + }, + { + "epoch": 0.4, + "learning_rate": 4.748291800643087e-05, + "loss": 0.356, + "step": 1508 + }, + { + "epoch": 0.4, + "learning_rate": 4.7481243301178996e-05, + "loss": 0.3004, + "step": 1509 + }, + { + "epoch": 0.4, + "learning_rate": 4.747956859592712e-05, + "loss": 0.4372, + "step": 1510 + }, + { + "epoch": 0.4, + "learning_rate": 4.7477893890675245e-05, + "loss": 0.369, + "step": 1511 + }, + { + "epoch": 0.41, + "learning_rate": 4.747621918542336e-05, + "loss": 0.3832, + "step": 1512 + }, + { + "epoch": 0.41, + "learning_rate": 4.747454448017149e-05, + "loss": 0.3076, + "step": 1513 + }, + { + "epoch": 0.41, + "learning_rate": 4.747286977491962e-05, + "loss": 0.3918, + "step": 1514 + }, + { + "epoch": 0.41, + "learning_rate": 4.747119506966774e-05, + "loss": 0.3544, + "step": 1515 + }, + { + "epoch": 0.41, + "learning_rate": 4.7469520364415866e-05, + "loss": 0.3684, + "step": 1516 + }, + { + "epoch": 0.41, + "learning_rate": 4.746784565916399e-05, + "loss": 0.3577, + "step": 1517 + }, + { + "epoch": 0.41, + "learning_rate": 4.7466170953912114e-05, + "loss": 0.4244, + "step": 1518 + }, + { + "epoch": 0.41, + "learning_rate": 4.746449624866024e-05, + "loss": 0.2846, + "step": 1519 + }, + { + "epoch": 0.41, + "learning_rate": 4.746282154340836e-05, + "loss": 0.4074, + "step": 1520 + }, + { + "epoch": 0.41, + "learning_rate": 4.746114683815649e-05, + "loss": 0.3639, + "step": 1521 + }, + { + "epoch": 0.41, + "learning_rate": 4.745947213290461e-05, + "loss": 0.3937, + "step": 1522 + }, + { + "epoch": 0.41, + "learning_rate": 4.7457797427652735e-05, + "loss": 0.3791, + "step": 1523 + }, + { + "epoch": 0.41, + "learning_rate": 4.745612272240086e-05, + "loss": 0.3237, + "step": 1524 + }, + { + "epoch": 0.41, + "learning_rate": 4.7454448017148984e-05, + "loss": 0.3433, + "step": 1525 + }, + { + "epoch": 0.41, + "learning_rate": 4.745277331189711e-05, + "loss": 0.6899, + "step": 1526 + }, + { + "epoch": 0.41, + "learning_rate": 4.745109860664523e-05, + "loss": 0.5339, + "step": 1527 + }, + { + "epoch": 0.41, + "learning_rate": 4.7449423901393356e-05, + "loss": 0.2967, + "step": 1528 + }, + { + "epoch": 0.41, + "learning_rate": 4.744774919614148e-05, + "loss": 0.4622, + "step": 1529 + }, + { + "epoch": 0.41, + "learning_rate": 4.7446074490889605e-05, + "loss": 0.3112, + "step": 1530 + }, + { + "epoch": 0.41, + "learning_rate": 4.744439978563773e-05, + "loss": 0.3435, + "step": 1531 + }, + { + "epoch": 0.41, + "learning_rate": 4.744272508038585e-05, + "loss": 0.5079, + "step": 1532 + }, + { + "epoch": 0.41, + "learning_rate": 4.744105037513398e-05, + "loss": 0.6446, + "step": 1533 + }, + { + "epoch": 0.41, + "learning_rate": 4.74393756698821e-05, + "loss": 0.3917, + "step": 1534 + }, + { + "epoch": 0.41, + "learning_rate": 4.7437700964630226e-05, + "loss": 0.3756, + "step": 1535 + }, + { + "epoch": 0.41, + "learning_rate": 4.743602625937835e-05, + "loss": 0.5277, + "step": 1536 + }, + { + "epoch": 0.41, + "learning_rate": 4.7434351554126474e-05, + "loss": 0.4207, + "step": 1537 + }, + { + "epoch": 0.41, + "learning_rate": 4.7432676848874605e-05, + "loss": 0.2386, + "step": 1538 + }, + { + "epoch": 0.41, + "learning_rate": 4.743100214362273e-05, + "loss": 0.3119, + "step": 1539 + }, + { + "epoch": 0.41, + "learning_rate": 4.742932743837085e-05, + "loss": 0.5523, + "step": 1540 + }, + { + "epoch": 0.41, + "learning_rate": 4.742765273311897e-05, + "loss": 0.4091, + "step": 1541 + }, + { + "epoch": 0.41, + "learning_rate": 4.7425978027867095e-05, + "loss": 0.4043, + "step": 1542 + }, + { + "epoch": 0.41, + "learning_rate": 4.742430332261522e-05, + "loss": 0.264, + "step": 1543 + }, + { + "epoch": 0.41, + "learning_rate": 4.7422628617363344e-05, + "loss": 0.3695, + "step": 1544 + }, + { + "epoch": 0.41, + "learning_rate": 4.742095391211147e-05, + "loss": 0.3776, + "step": 1545 + }, + { + "epoch": 0.41, + "learning_rate": 4.74192792068596e-05, + "loss": 0.4284, + "step": 1546 + }, + { + "epoch": 0.41, + "learning_rate": 4.741760450160772e-05, + "loss": 0.5956, + "step": 1547 + }, + { + "epoch": 0.41, + "learning_rate": 4.741592979635585e-05, + "loss": 0.4281, + "step": 1548 + }, + { + "epoch": 0.42, + "learning_rate": 4.741425509110397e-05, + "loss": 0.3888, + "step": 1549 + }, + { + "epoch": 0.42, + "learning_rate": 4.741258038585209e-05, + "loss": 0.4753, + "step": 1550 + }, + { + "epoch": 0.42, + "learning_rate": 4.741090568060021e-05, + "loss": 0.5651, + "step": 1551 + }, + { + "epoch": 0.42, + "learning_rate": 4.740923097534834e-05, + "loss": 0.3075, + "step": 1552 + }, + { + "epoch": 0.42, + "learning_rate": 4.740755627009646e-05, + "loss": 0.4033, + "step": 1553 + }, + { + "epoch": 0.42, + "learning_rate": 4.740588156484459e-05, + "loss": 0.2715, + "step": 1554 + }, + { + "epoch": 0.42, + "learning_rate": 4.740420685959272e-05, + "loss": 0.3859, + "step": 1555 + }, + { + "epoch": 0.42, + "learning_rate": 4.740253215434084e-05, + "loss": 0.3297, + "step": 1556 + }, + { + "epoch": 0.42, + "learning_rate": 4.7400857449088965e-05, + "loss": 0.5546, + "step": 1557 + }, + { + "epoch": 0.42, + "learning_rate": 4.739918274383709e-05, + "loss": 0.3431, + "step": 1558 + }, + { + "epoch": 0.42, + "learning_rate": 4.739750803858521e-05, + "loss": 0.4908, + "step": 1559 + }, + { + "epoch": 0.42, + "learning_rate": 4.739583333333333e-05, + "loss": 0.4104, + "step": 1560 + }, + { + "epoch": 0.42, + "learning_rate": 4.7394158628081455e-05, + "loss": 0.3471, + "step": 1561 + }, + { + "epoch": 0.42, + "learning_rate": 4.7392483922829586e-05, + "loss": 0.4947, + "step": 1562 + }, + { + "epoch": 0.42, + "learning_rate": 4.739080921757771e-05, + "loss": 0.5799, + "step": 1563 + }, + { + "epoch": 0.42, + "learning_rate": 4.7389134512325835e-05, + "loss": 0.4422, + "step": 1564 + }, + { + "epoch": 0.42, + "learning_rate": 4.738745980707396e-05, + "loss": 0.3515, + "step": 1565 + }, + { + "epoch": 0.42, + "learning_rate": 4.738578510182208e-05, + "loss": 0.4597, + "step": 1566 + }, + { + "epoch": 0.42, + "learning_rate": 4.738411039657021e-05, + "loss": 0.5496, + "step": 1567 + }, + { + "epoch": 0.42, + "learning_rate": 4.7382435691318325e-05, + "loss": 0.4774, + "step": 1568 + }, + { + "epoch": 0.42, + "learning_rate": 4.7380760986066456e-05, + "loss": 0.45, + "step": 1569 + }, + { + "epoch": 0.42, + "learning_rate": 4.737908628081458e-05, + "loss": 0.5679, + "step": 1570 + }, + { + "epoch": 0.42, + "learning_rate": 4.7377411575562704e-05, + "loss": 0.2713, + "step": 1571 + }, + { + "epoch": 0.42, + "learning_rate": 4.737573687031083e-05, + "loss": 0.5754, + "step": 1572 + }, + { + "epoch": 0.42, + "learning_rate": 4.737406216505895e-05, + "loss": 0.4738, + "step": 1573 + }, + { + "epoch": 0.42, + "learning_rate": 4.737238745980708e-05, + "loss": 0.3368, + "step": 1574 + }, + { + "epoch": 0.42, + "learning_rate": 4.73707127545552e-05, + "loss": 0.387, + "step": 1575 + }, + { + "epoch": 0.42, + "learning_rate": 4.7369038049303325e-05, + "loss": 0.3386, + "step": 1576 + }, + { + "epoch": 0.42, + "learning_rate": 4.736736334405145e-05, + "loss": 0.5844, + "step": 1577 + }, + { + "epoch": 0.42, + "learning_rate": 4.7365688638799574e-05, + "loss": 0.3927, + "step": 1578 + }, + { + "epoch": 0.42, + "learning_rate": 4.73640139335477e-05, + "loss": 0.2743, + "step": 1579 + }, + { + "epoch": 0.42, + "learning_rate": 4.736233922829582e-05, + "loss": 0.47, + "step": 1580 + }, + { + "epoch": 0.42, + "learning_rate": 4.7360664523043946e-05, + "loss": 0.4096, + "step": 1581 + }, + { + "epoch": 0.42, + "learning_rate": 4.735898981779207e-05, + "loss": 0.448, + "step": 1582 + }, + { + "epoch": 0.42, + "learning_rate": 4.7357315112540195e-05, + "loss": 0.4131, + "step": 1583 + }, + { + "epoch": 0.42, + "learning_rate": 4.735564040728832e-05, + "loss": 0.3804, + "step": 1584 + }, + { + "epoch": 0.42, + "learning_rate": 4.735396570203644e-05, + "loss": 0.3235, + "step": 1585 + }, + { + "epoch": 0.42, + "learning_rate": 4.735229099678457e-05, + "loss": 0.3158, + "step": 1586 + }, + { + "epoch": 0.43, + "learning_rate": 4.735061629153269e-05, + "loss": 0.3285, + "step": 1587 + }, + { + "epoch": 0.43, + "learning_rate": 4.7348941586280816e-05, + "loss": 0.6648, + "step": 1588 + }, + { + "epoch": 0.43, + "learning_rate": 4.734726688102894e-05, + "loss": 0.3147, + "step": 1589 + }, + { + "epoch": 0.43, + "learning_rate": 4.7345592175777064e-05, + "loss": 0.2873, + "step": 1590 + }, + { + "epoch": 0.43, + "learning_rate": 4.734391747052519e-05, + "loss": 0.3993, + "step": 1591 + }, + { + "epoch": 0.43, + "learning_rate": 4.734224276527331e-05, + "loss": 0.4614, + "step": 1592 + }, + { + "epoch": 0.43, + "learning_rate": 4.734056806002144e-05, + "loss": 0.3439, + "step": 1593 + }, + { + "epoch": 0.43, + "learning_rate": 4.733889335476957e-05, + "loss": 0.5656, + "step": 1594 + }, + { + "epoch": 0.43, + "learning_rate": 4.733721864951769e-05, + "loss": 0.5629, + "step": 1595 + }, + { + "epoch": 0.43, + "learning_rate": 4.733554394426581e-05, + "loss": 0.2886, + "step": 1596 + }, + { + "epoch": 0.43, + "learning_rate": 4.7333869239013934e-05, + "loss": 0.2647, + "step": 1597 + }, + { + "epoch": 0.43, + "learning_rate": 4.733219453376206e-05, + "loss": 0.2745, + "step": 1598 + }, + { + "epoch": 0.43, + "learning_rate": 4.733051982851018e-05, + "loss": 0.6064, + "step": 1599 + }, + { + "epoch": 0.43, + "learning_rate": 4.7328845123258306e-05, + "loss": 0.2937, + "step": 1600 + }, + { + "epoch": 0.43, + "learning_rate": 4.732717041800643e-05, + "loss": 0.3789, + "step": 1601 + }, + { + "epoch": 0.43, + "learning_rate": 4.732549571275456e-05, + "loss": 0.6507, + "step": 1602 + }, + { + "epoch": 0.43, + "learning_rate": 4.7323821007502686e-05, + "loss": 0.2857, + "step": 1603 + }, + { + "epoch": 0.43, + "learning_rate": 4.732214630225081e-05, + "loss": 0.5295, + "step": 1604 + }, + { + "epoch": 0.43, + "learning_rate": 4.7320471596998934e-05, + "loss": 0.5014, + "step": 1605 + }, + { + "epoch": 0.43, + "learning_rate": 4.731879689174705e-05, + "loss": 0.3036, + "step": 1606 + }, + { + "epoch": 0.43, + "learning_rate": 4.7317122186495176e-05, + "loss": 0.4563, + "step": 1607 + }, + { + "epoch": 0.43, + "learning_rate": 4.73154474812433e-05, + "loss": 0.2736, + "step": 1608 + }, + { + "epoch": 0.43, + "learning_rate": 4.7313772775991424e-05, + "loss": 0.308, + "step": 1609 + }, + { + "epoch": 0.43, + "learning_rate": 4.7312098070739555e-05, + "loss": 0.5182, + "step": 1610 + }, + { + "epoch": 0.43, + "learning_rate": 4.731042336548768e-05, + "loss": 0.4082, + "step": 1611 + }, + { + "epoch": 0.43, + "learning_rate": 4.7308748660235804e-05, + "loss": 0.3684, + "step": 1612 + }, + { + "epoch": 0.43, + "learning_rate": 4.730707395498393e-05, + "loss": 0.4508, + "step": 1613 + }, + { + "epoch": 0.43, + "learning_rate": 4.730539924973205e-05, + "loss": 0.564, + "step": 1614 + }, + { + "epoch": 0.43, + "learning_rate": 4.730372454448017e-05, + "loss": 0.7052, + "step": 1615 + }, + { + "epoch": 0.43, + "learning_rate": 4.7302049839228294e-05, + "loss": 0.431, + "step": 1616 + }, + { + "epoch": 0.43, + "learning_rate": 4.730037513397642e-05, + "loss": 0.3312, + "step": 1617 + }, + { + "epoch": 0.43, + "learning_rate": 4.729870042872455e-05, + "loss": 0.3263, + "step": 1618 + }, + { + "epoch": 0.43, + "learning_rate": 4.729702572347267e-05, + "loss": 0.288, + "step": 1619 + }, + { + "epoch": 0.43, + "learning_rate": 4.72953510182208e-05, + "loss": 0.3234, + "step": 1620 + }, + { + "epoch": 0.43, + "learning_rate": 4.729367631296892e-05, + "loss": 0.5542, + "step": 1621 + }, + { + "epoch": 0.43, + "learning_rate": 4.7292001607717046e-05, + "loss": 0.5772, + "step": 1622 + }, + { + "epoch": 0.43, + "learning_rate": 4.729032690246517e-05, + "loss": 0.4139, + "step": 1623 + }, + { + "epoch": 0.44, + "learning_rate": 4.728865219721329e-05, + "loss": 0.3816, + "step": 1624 + }, + { + "epoch": 0.44, + "learning_rate": 4.728697749196142e-05, + "loss": 0.3479, + "step": 1625 + }, + { + "epoch": 0.44, + "learning_rate": 4.728530278670954e-05, + "loss": 0.3359, + "step": 1626 + }, + { + "epoch": 0.44, + "learning_rate": 4.728362808145767e-05, + "loss": 0.3749, + "step": 1627 + }, + { + "epoch": 0.44, + "learning_rate": 4.728195337620579e-05, + "loss": 0.2869, + "step": 1628 + }, + { + "epoch": 0.44, + "learning_rate": 4.7280278670953915e-05, + "loss": 0.4385, + "step": 1629 + }, + { + "epoch": 0.44, + "learning_rate": 4.727860396570204e-05, + "loss": 0.4031, + "step": 1630 + }, + { + "epoch": 0.44, + "learning_rate": 4.7276929260450164e-05, + "loss": 0.3336, + "step": 1631 + }, + { + "epoch": 0.44, + "learning_rate": 4.727525455519829e-05, + "loss": 0.3577, + "step": 1632 + }, + { + "epoch": 0.44, + "learning_rate": 4.727357984994641e-05, + "loss": 0.2827, + "step": 1633 + }, + { + "epoch": 0.44, + "learning_rate": 4.7271905144694536e-05, + "loss": 0.3778, + "step": 1634 + }, + { + "epoch": 0.44, + "learning_rate": 4.727023043944266e-05, + "loss": 0.3729, + "step": 1635 + }, + { + "epoch": 0.44, + "learning_rate": 4.7268555734190785e-05, + "loss": 0.3742, + "step": 1636 + }, + { + "epoch": 0.44, + "learning_rate": 4.726688102893891e-05, + "loss": 0.438, + "step": 1637 + }, + { + "epoch": 0.44, + "learning_rate": 4.726520632368703e-05, + "loss": 0.3845, + "step": 1638 + }, + { + "epoch": 0.44, + "learning_rate": 4.726353161843516e-05, + "loss": 0.5107, + "step": 1639 + }, + { + "epoch": 0.44, + "learning_rate": 4.726185691318328e-05, + "loss": 0.5321, + "step": 1640 + }, + { + "epoch": 0.44, + "learning_rate": 4.7260182207931406e-05, + "loss": 0.264, + "step": 1641 + }, + { + "epoch": 0.44, + "learning_rate": 4.725850750267953e-05, + "loss": 0.3309, + "step": 1642 + }, + { + "epoch": 0.44, + "learning_rate": 4.7256832797427654e-05, + "loss": 0.5144, + "step": 1643 + }, + { + "epoch": 0.44, + "learning_rate": 4.725515809217578e-05, + "loss": 0.4448, + "step": 1644 + }, + { + "epoch": 0.44, + "learning_rate": 4.72534833869239e-05, + "loss": 0.4203, + "step": 1645 + }, + { + "epoch": 0.44, + "learning_rate": 4.725180868167203e-05, + "loss": 0.4316, + "step": 1646 + }, + { + "epoch": 0.44, + "learning_rate": 4.725013397642015e-05, + "loss": 0.6509, + "step": 1647 + }, + { + "epoch": 0.44, + "learning_rate": 4.7248459271168275e-05, + "loss": 0.3697, + "step": 1648 + }, + { + "epoch": 0.44, + "learning_rate": 4.72467845659164e-05, + "loss": 0.5392, + "step": 1649 + }, + { + "epoch": 0.44, + "learning_rate": 4.724510986066453e-05, + "loss": 0.2817, + "step": 1650 + }, + { + "epoch": 0.44, + "learning_rate": 4.7243435155412655e-05, + "loss": 0.4513, + "step": 1651 + }, + { + "epoch": 0.44, + "learning_rate": 4.724176045016077e-05, + "loss": 0.4677, + "step": 1652 + }, + { + "epoch": 0.44, + "learning_rate": 4.7240085744908896e-05, + "loss": 0.616, + "step": 1653 + }, + { + "epoch": 0.44, + "learning_rate": 4.723841103965702e-05, + "loss": 0.4649, + "step": 1654 + }, + { + "epoch": 0.44, + "learning_rate": 4.7236736334405145e-05, + "loss": 0.2757, + "step": 1655 + }, + { + "epoch": 0.44, + "learning_rate": 4.723506162915327e-05, + "loss": 0.3354, + "step": 1656 + }, + { + "epoch": 0.44, + "learning_rate": 4.723338692390139e-05, + "loss": 0.4749, + "step": 1657 + }, + { + "epoch": 0.44, + "learning_rate": 4.7231712218649524e-05, + "loss": 0.3728, + "step": 1658 + }, + { + "epoch": 0.44, + "learning_rate": 4.723003751339765e-05, + "loss": 0.414, + "step": 1659 + }, + { + "epoch": 0.44, + "learning_rate": 4.722836280814577e-05, + "loss": 0.2955, + "step": 1660 + }, + { + "epoch": 0.45, + "learning_rate": 4.72266881028939e-05, + "loss": 0.3602, + "step": 1661 + }, + { + "epoch": 0.45, + "learning_rate": 4.7225013397642014e-05, + "loss": 0.3332, + "step": 1662 + }, + { + "epoch": 0.45, + "learning_rate": 4.722333869239014e-05, + "loss": 0.2699, + "step": 1663 + }, + { + "epoch": 0.45, + "learning_rate": 4.722166398713826e-05, + "loss": 0.3745, + "step": 1664 + }, + { + "epoch": 0.45, + "learning_rate": 4.721998928188639e-05, + "loss": 0.5292, + "step": 1665 + }, + { + "epoch": 0.45, + "learning_rate": 4.721831457663452e-05, + "loss": 0.2821, + "step": 1666 + }, + { + "epoch": 0.45, + "learning_rate": 4.721663987138264e-05, + "loss": 0.4491, + "step": 1667 + }, + { + "epoch": 0.45, + "learning_rate": 4.7214965166130766e-05, + "loss": 0.4006, + "step": 1668 + }, + { + "epoch": 0.45, + "learning_rate": 4.721329046087889e-05, + "loss": 0.3061, + "step": 1669 + }, + { + "epoch": 0.45, + "learning_rate": 4.7211615755627015e-05, + "loss": 0.5538, + "step": 1670 + }, + { + "epoch": 0.45, + "learning_rate": 4.720994105037513e-05, + "loss": 0.387, + "step": 1671 + }, + { + "epoch": 0.45, + "learning_rate": 4.7208266345123256e-05, + "loss": 0.4007, + "step": 1672 + }, + { + "epoch": 0.45, + "learning_rate": 4.720659163987138e-05, + "loss": 0.401, + "step": 1673 + }, + { + "epoch": 0.45, + "learning_rate": 4.720491693461951e-05, + "loss": 0.7366, + "step": 1674 + }, + { + "epoch": 0.45, + "learning_rate": 4.7203242229367636e-05, + "loss": 0.5811, + "step": 1675 + }, + { + "epoch": 0.45, + "learning_rate": 4.720156752411576e-05, + "loss": 0.3663, + "step": 1676 + }, + { + "epoch": 0.45, + "learning_rate": 4.7199892818863884e-05, + "loss": 0.5428, + "step": 1677 + }, + { + "epoch": 0.45, + "learning_rate": 4.719821811361201e-05, + "loss": 0.3801, + "step": 1678 + }, + { + "epoch": 0.45, + "learning_rate": 4.719654340836013e-05, + "loss": 0.4255, + "step": 1679 + }, + { + "epoch": 0.45, + "learning_rate": 4.719486870310825e-05, + "loss": 0.3061, + "step": 1680 + }, + { + "epoch": 0.45, + "learning_rate": 4.719319399785638e-05, + "loss": 0.2291, + "step": 1681 + }, + { + "epoch": 0.45, + "learning_rate": 4.7191519292604505e-05, + "loss": 0.7736, + "step": 1682 + }, + { + "epoch": 0.45, + "learning_rate": 4.718984458735263e-05, + "loss": 0.4721, + "step": 1683 + }, + { + "epoch": 0.45, + "learning_rate": 4.7188169882100753e-05, + "loss": 0.4759, + "step": 1684 + }, + { + "epoch": 0.45, + "learning_rate": 4.718649517684888e-05, + "loss": 0.3735, + "step": 1685 + }, + { + "epoch": 0.45, + "learning_rate": 4.7184820471597e-05, + "loss": 0.5718, + "step": 1686 + }, + { + "epoch": 0.45, + "learning_rate": 4.7183145766345126e-05, + "loss": 0.3089, + "step": 1687 + }, + { + "epoch": 0.45, + "learning_rate": 4.718147106109325e-05, + "loss": 0.4513, + "step": 1688 + }, + { + "epoch": 0.45, + "learning_rate": 4.7179796355841375e-05, + "loss": 0.3793, + "step": 1689 + }, + { + "epoch": 0.45, + "learning_rate": 4.71781216505895e-05, + "loss": 0.3121, + "step": 1690 + }, + { + "epoch": 0.45, + "learning_rate": 4.717644694533762e-05, + "loss": 0.3352, + "step": 1691 + }, + { + "epoch": 0.45, + "learning_rate": 4.717477224008575e-05, + "loss": 0.3191, + "step": 1692 + }, + { + "epoch": 0.45, + "learning_rate": 4.717309753483387e-05, + "loss": 0.404, + "step": 1693 + }, + { + "epoch": 0.45, + "learning_rate": 4.7171422829581996e-05, + "loss": 0.5756, + "step": 1694 + }, + { + "epoch": 0.45, + "learning_rate": 4.716974812433012e-05, + "loss": 0.3495, + "step": 1695 + }, + { + "epoch": 0.45, + "learning_rate": 4.7168073419078244e-05, + "loss": 0.4414, + "step": 1696 + }, + { + "epoch": 0.45, + "learning_rate": 4.716639871382637e-05, + "loss": 0.3371, + "step": 1697 + }, + { + "epoch": 0.45, + "learning_rate": 4.716472400857449e-05, + "loss": 0.3034, + "step": 1698 + }, + { + "epoch": 0.46, + "learning_rate": 4.7163049303322617e-05, + "loss": 0.4836, + "step": 1699 + }, + { + "epoch": 0.46, + "learning_rate": 4.716137459807074e-05, + "loss": 0.3225, + "step": 1700 + }, + { + "epoch": 0.46, + "learning_rate": 4.7159699892818865e-05, + "loss": 0.3271, + "step": 1701 + }, + { + "epoch": 0.46, + "learning_rate": 4.715802518756699e-05, + "loss": 0.3142, + "step": 1702 + }, + { + "epoch": 0.46, + "learning_rate": 4.7156350482315113e-05, + "loss": 0.4794, + "step": 1703 + }, + { + "epoch": 0.46, + "learning_rate": 4.715467577706324e-05, + "loss": 0.3624, + "step": 1704 + }, + { + "epoch": 0.46, + "learning_rate": 4.715300107181136e-05, + "loss": 0.3447, + "step": 1705 + }, + { + "epoch": 0.46, + "learning_rate": 4.715132636655949e-05, + "loss": 0.3748, + "step": 1706 + }, + { + "epoch": 0.46, + "learning_rate": 4.714965166130762e-05, + "loss": 0.5236, + "step": 1707 + }, + { + "epoch": 0.46, + "learning_rate": 4.7147976956055734e-05, + "loss": 0.3918, + "step": 1708 + }, + { + "epoch": 0.46, + "learning_rate": 4.714630225080386e-05, + "loss": 0.7156, + "step": 1709 + }, + { + "epoch": 0.46, + "learning_rate": 4.714462754555198e-05, + "loss": 0.4691, + "step": 1710 + }, + { + "epoch": 0.46, + "learning_rate": 4.714295284030011e-05, + "loss": 0.3628, + "step": 1711 + }, + { + "epoch": 0.46, + "learning_rate": 4.714127813504823e-05, + "loss": 0.3656, + "step": 1712 + }, + { + "epoch": 0.46, + "learning_rate": 4.7139603429796356e-05, + "loss": 0.3814, + "step": 1713 + }, + { + "epoch": 0.46, + "learning_rate": 4.7137928724544487e-05, + "loss": 0.3253, + "step": 1714 + }, + { + "epoch": 0.46, + "learning_rate": 4.713625401929261e-05, + "loss": 0.3898, + "step": 1715 + }, + { + "epoch": 0.46, + "learning_rate": 4.7134579314040735e-05, + "loss": 0.4648, + "step": 1716 + }, + { + "epoch": 0.46, + "learning_rate": 4.713290460878886e-05, + "loss": 0.4702, + "step": 1717 + }, + { + "epoch": 0.46, + "learning_rate": 4.7131229903536977e-05, + "loss": 0.276, + "step": 1718 + }, + { + "epoch": 0.46, + "learning_rate": 4.71295551982851e-05, + "loss": 0.314, + "step": 1719 + }, + { + "epoch": 0.46, + "learning_rate": 4.7127880493033225e-05, + "loss": 0.5727, + "step": 1720 + }, + { + "epoch": 0.46, + "learning_rate": 4.712620578778135e-05, + "loss": 0.3182, + "step": 1721 + }, + { + "epoch": 0.46, + "learning_rate": 4.712453108252948e-05, + "loss": 0.3677, + "step": 1722 + }, + { + "epoch": 0.46, + "learning_rate": 4.7122856377277604e-05, + "loss": 0.2751, + "step": 1723 + }, + { + "epoch": 0.46, + "learning_rate": 4.712118167202573e-05, + "loss": 0.3751, + "step": 1724 + }, + { + "epoch": 0.46, + "learning_rate": 4.711950696677385e-05, + "loss": 0.3125, + "step": 1725 + }, + { + "epoch": 0.46, + "learning_rate": 4.711783226152198e-05, + "loss": 0.3137, + "step": 1726 + }, + { + "epoch": 0.46, + "learning_rate": 4.7116157556270094e-05, + "loss": 0.3695, + "step": 1727 + }, + { + "epoch": 0.46, + "learning_rate": 4.711448285101822e-05, + "loss": 0.6473, + "step": 1728 + }, + { + "epoch": 0.46, + "learning_rate": 4.711280814576634e-05, + "loss": 0.3241, + "step": 1729 + }, + { + "epoch": 0.46, + "learning_rate": 4.7111133440514474e-05, + "loss": 0.4776, + "step": 1730 + }, + { + "epoch": 0.46, + "learning_rate": 4.71094587352626e-05, + "loss": 0.6382, + "step": 1731 + }, + { + "epoch": 0.46, + "learning_rate": 4.710778403001072e-05, + "loss": 0.5415, + "step": 1732 + }, + { + "epoch": 0.46, + "learning_rate": 4.7106109324758847e-05, + "loss": 0.596, + "step": 1733 + }, + { + "epoch": 0.46, + "learning_rate": 4.710443461950697e-05, + "loss": 0.2461, + "step": 1734 + }, + { + "epoch": 0.46, + "learning_rate": 4.7102759914255095e-05, + "loss": 0.3526, + "step": 1735 + }, + { + "epoch": 0.47, + "learning_rate": 4.710108520900321e-05, + "loss": 0.351, + "step": 1736 + }, + { + "epoch": 0.47, + "learning_rate": 4.709941050375134e-05, + "loss": 0.2919, + "step": 1737 + }, + { + "epoch": 0.47, + "learning_rate": 4.709773579849947e-05, + "loss": 0.3106, + "step": 1738 + }, + { + "epoch": 0.47, + "learning_rate": 4.709606109324759e-05, + "loss": 0.3123, + "step": 1739 + }, + { + "epoch": 0.47, + "learning_rate": 4.7094386387995716e-05, + "loss": 0.4044, + "step": 1740 + }, + { + "epoch": 0.47, + "learning_rate": 4.709271168274384e-05, + "loss": 0.4564, + "step": 1741 + }, + { + "epoch": 0.47, + "learning_rate": 4.7091036977491964e-05, + "loss": 0.4157, + "step": 1742 + }, + { + "epoch": 0.47, + "learning_rate": 4.708936227224009e-05, + "loss": 0.5049, + "step": 1743 + }, + { + "epoch": 0.47, + "learning_rate": 4.708768756698821e-05, + "loss": 0.4045, + "step": 1744 + }, + { + "epoch": 0.47, + "learning_rate": 4.708601286173634e-05, + "loss": 0.3513, + "step": 1745 + }, + { + "epoch": 0.47, + "learning_rate": 4.708433815648446e-05, + "loss": 0.4769, + "step": 1746 + }, + { + "epoch": 0.47, + "learning_rate": 4.7082663451232585e-05, + "loss": 0.3812, + "step": 1747 + }, + { + "epoch": 0.47, + "learning_rate": 4.708098874598071e-05, + "loss": 0.3878, + "step": 1748 + }, + { + "epoch": 0.47, + "learning_rate": 4.7079314040728834e-05, + "loss": 0.3935, + "step": 1749 + }, + { + "epoch": 0.47, + "learning_rate": 4.707763933547696e-05, + "loss": 0.3272, + "step": 1750 + }, + { + "epoch": 0.47, + "learning_rate": 4.707596463022508e-05, + "loss": 0.5333, + "step": 1751 + }, + { + "epoch": 0.47, + "learning_rate": 4.7074289924973206e-05, + "loss": 0.4138, + "step": 1752 + }, + { + "epoch": 0.47, + "learning_rate": 4.707261521972133e-05, + "loss": 0.2662, + "step": 1753 + }, + { + "epoch": 0.47, + "learning_rate": 4.7070940514469455e-05, + "loss": 0.2493, + "step": 1754 + }, + { + "epoch": 0.47, + "learning_rate": 4.706926580921758e-05, + "loss": 0.4483, + "step": 1755 + }, + { + "epoch": 0.47, + "learning_rate": 4.70675911039657e-05, + "loss": 0.3288, + "step": 1756 + }, + { + "epoch": 0.47, + "learning_rate": 4.706591639871383e-05, + "loss": 0.6409, + "step": 1757 + }, + { + "epoch": 0.47, + "learning_rate": 4.706424169346195e-05, + "loss": 0.3296, + "step": 1758 + }, + { + "epoch": 0.47, + "learning_rate": 4.7062566988210076e-05, + "loss": 0.2634, + "step": 1759 + }, + { + "epoch": 0.47, + "learning_rate": 4.70608922829582e-05, + "loss": 0.6523, + "step": 1760 + }, + { + "epoch": 0.47, + "learning_rate": 4.7059217577706324e-05, + "loss": 0.3718, + "step": 1761 + }, + { + "epoch": 0.47, + "learning_rate": 4.7057542872454455e-05, + "loss": 0.3175, + "step": 1762 + }, + { + "epoch": 0.47, + "learning_rate": 4.705586816720258e-05, + "loss": 0.4089, + "step": 1763 + }, + { + "epoch": 0.47, + "learning_rate": 4.70541934619507e-05, + "loss": 0.3113, + "step": 1764 + }, + { + "epoch": 0.47, + "learning_rate": 4.705251875669882e-05, + "loss": 0.2824, + "step": 1765 + }, + { + "epoch": 0.47, + "learning_rate": 4.7050844051446945e-05, + "loss": 0.2797, + "step": 1766 + }, + { + "epoch": 0.47, + "learning_rate": 4.704916934619507e-05, + "loss": 0.4741, + "step": 1767 + }, + { + "epoch": 0.47, + "learning_rate": 4.7047494640943194e-05, + "loss": 0.4781, + "step": 1768 + }, + { + "epoch": 0.47, + "learning_rate": 4.704581993569132e-05, + "loss": 0.3617, + "step": 1769 + }, + { + "epoch": 0.47, + "learning_rate": 4.704414523043945e-05, + "loss": 0.5056, + "step": 1770 + }, + { + "epoch": 0.47, + "learning_rate": 4.704247052518757e-05, + "loss": 0.3519, + "step": 1771 + }, + { + "epoch": 0.47, + "learning_rate": 4.70407958199357e-05, + "loss": 0.3732, + "step": 1772 + }, + { + "epoch": 0.48, + "learning_rate": 4.703912111468382e-05, + "loss": 0.3568, + "step": 1773 + }, + { + "epoch": 0.48, + "learning_rate": 4.703744640943194e-05, + "loss": 0.6148, + "step": 1774 + }, + { + "epoch": 0.48, + "learning_rate": 4.703577170418006e-05, + "loss": 0.3512, + "step": 1775 + }, + { + "epoch": 0.48, + "learning_rate": 4.703409699892819e-05, + "loss": 0.3564, + "step": 1776 + }, + { + "epoch": 0.48, + "learning_rate": 4.703242229367631e-05, + "loss": 0.705, + "step": 1777 + }, + { + "epoch": 0.48, + "learning_rate": 4.703074758842444e-05, + "loss": 0.4804, + "step": 1778 + }, + { + "epoch": 0.48, + "learning_rate": 4.702907288317257e-05, + "loss": 0.7573, + "step": 1779 + }, + { + "epoch": 0.48, + "learning_rate": 4.702739817792069e-05, + "loss": 0.3721, + "step": 1780 + }, + { + "epoch": 0.48, + "learning_rate": 4.7025723472668815e-05, + "loss": 0.2173, + "step": 1781 + }, + { + "epoch": 0.48, + "learning_rate": 4.702404876741694e-05, + "loss": 0.2931, + "step": 1782 + }, + { + "epoch": 0.48, + "learning_rate": 4.702237406216506e-05, + "loss": 0.4436, + "step": 1783 + }, + { + "epoch": 0.48, + "learning_rate": 4.702069935691318e-05, + "loss": 0.3061, + "step": 1784 + }, + { + "epoch": 0.48, + "learning_rate": 4.7019024651661305e-05, + "loss": 0.3953, + "step": 1785 + }, + { + "epoch": 0.48, + "learning_rate": 4.7017349946409436e-05, + "loss": 0.3574, + "step": 1786 + }, + { + "epoch": 0.48, + "learning_rate": 4.701567524115756e-05, + "loss": 0.491, + "step": 1787 + }, + { + "epoch": 0.48, + "learning_rate": 4.7014000535905685e-05, + "loss": 0.3153, + "step": 1788 + }, + { + "epoch": 0.48, + "learning_rate": 4.701232583065381e-05, + "loss": 0.3102, + "step": 1789 + }, + { + "epoch": 0.48, + "learning_rate": 4.701065112540193e-05, + "loss": 0.4052, + "step": 1790 + }, + { + "epoch": 0.48, + "learning_rate": 4.700897642015006e-05, + "loss": 0.3689, + "step": 1791 + }, + { + "epoch": 0.48, + "learning_rate": 4.7007301714898175e-05, + "loss": 0.2379, + "step": 1792 + }, + { + "epoch": 0.48, + "learning_rate": 4.7005627009646306e-05, + "loss": 0.4511, + "step": 1793 + }, + { + "epoch": 0.48, + "learning_rate": 4.700395230439443e-05, + "loss": 0.4804, + "step": 1794 + }, + { + "epoch": 0.48, + "learning_rate": 4.7002277599142554e-05, + "loss": 0.3566, + "step": 1795 + }, + { + "epoch": 0.48, + "learning_rate": 4.700060289389068e-05, + "loss": 0.5476, + "step": 1796 + }, + { + "epoch": 0.48, + "learning_rate": 4.69989281886388e-05, + "loss": 0.4017, + "step": 1797 + }, + { + "epoch": 0.48, + "learning_rate": 4.699725348338693e-05, + "loss": 0.445, + "step": 1798 + }, + { + "epoch": 0.48, + "learning_rate": 4.699557877813505e-05, + "loss": 0.6067, + "step": 1799 + }, + { + "epoch": 0.48, + "learning_rate": 4.6993904072883175e-05, + "loss": 0.2736, + "step": 1800 + }, + { + "epoch": 0.48, + "learning_rate": 4.69922293676313e-05, + "loss": 0.2644, + "step": 1801 + }, + { + "epoch": 0.48, + "learning_rate": 4.6990554662379424e-05, + "loss": 0.5112, + "step": 1802 + }, + { + "epoch": 0.48, + "learning_rate": 4.698887995712755e-05, + "loss": 0.7355, + "step": 1803 + }, + { + "epoch": 0.48, + "learning_rate": 4.698720525187567e-05, + "loss": 0.569, + "step": 1804 + }, + { + "epoch": 0.48, + "learning_rate": 4.6985530546623796e-05, + "loss": 0.3598, + "step": 1805 + }, + { + "epoch": 0.48, + "learning_rate": 4.698385584137192e-05, + "loss": 0.3436, + "step": 1806 + }, + { + "epoch": 0.48, + "learning_rate": 4.6982181136120045e-05, + "loss": 0.3405, + "step": 1807 + }, + { + "epoch": 0.48, + "learning_rate": 4.698050643086817e-05, + "loss": 0.4061, + "step": 1808 + }, + { + "epoch": 0.48, + "learning_rate": 4.697883172561629e-05, + "loss": 0.3482, + "step": 1809 + }, + { + "epoch": 0.48, + "learning_rate": 4.697715702036442e-05, + "loss": 0.461, + "step": 1810 + }, + { + "epoch": 0.49, + "learning_rate": 4.697548231511254e-05, + "loss": 0.2878, + "step": 1811 + }, + { + "epoch": 0.49, + "learning_rate": 4.6973807609860666e-05, + "loss": 0.3458, + "step": 1812 + }, + { + "epoch": 0.49, + "learning_rate": 4.697213290460879e-05, + "loss": 0.2395, + "step": 1813 + }, + { + "epoch": 0.49, + "learning_rate": 4.6970458199356914e-05, + "loss": 0.3932, + "step": 1814 + }, + { + "epoch": 0.49, + "learning_rate": 4.696878349410504e-05, + "loss": 0.2727, + "step": 1815 + }, + { + "epoch": 0.49, + "learning_rate": 4.696710878885316e-05, + "loss": 0.4889, + "step": 1816 + }, + { + "epoch": 0.49, + "learning_rate": 4.696543408360129e-05, + "loss": 0.5112, + "step": 1817 + }, + { + "epoch": 0.49, + "learning_rate": 4.696375937834942e-05, + "loss": 0.5405, + "step": 1818 + }, + { + "epoch": 0.49, + "learning_rate": 4.696208467309754e-05, + "loss": 0.4896, + "step": 1819 + }, + { + "epoch": 0.49, + "learning_rate": 4.696040996784566e-05, + "loss": 0.2987, + "step": 1820 + }, + { + "epoch": 0.49, + "learning_rate": 4.6958735262593784e-05, + "loss": 0.5305, + "step": 1821 + }, + { + "epoch": 0.49, + "learning_rate": 4.695706055734191e-05, + "loss": 0.6473, + "step": 1822 + }, + { + "epoch": 0.49, + "learning_rate": 4.695538585209003e-05, + "loss": 0.6035, + "step": 1823 + }, + { + "epoch": 0.49, + "learning_rate": 4.6953711146838156e-05, + "loss": 0.4814, + "step": 1824 + }, + { + "epoch": 0.49, + "learning_rate": 4.695203644158628e-05, + "loss": 0.2376, + "step": 1825 + }, + { + "epoch": 0.49, + "learning_rate": 4.695036173633441e-05, + "loss": 0.5202, + "step": 1826 + }, + { + "epoch": 0.49, + "learning_rate": 4.6948687031082536e-05, + "loss": 0.2275, + "step": 1827 + }, + { + "epoch": 0.49, + "learning_rate": 4.694701232583066e-05, + "loss": 0.3358, + "step": 1828 + }, + { + "epoch": 0.49, + "learning_rate": 4.6945337620578784e-05, + "loss": 0.3166, + "step": 1829 + }, + { + "epoch": 0.49, + "learning_rate": 4.69436629153269e-05, + "loss": 0.3212, + "step": 1830 + }, + { + "epoch": 0.49, + "learning_rate": 4.6941988210075026e-05, + "loss": 0.272, + "step": 1831 + }, + { + "epoch": 0.49, + "learning_rate": 4.694031350482315e-05, + "loss": 0.2742, + "step": 1832 + }, + { + "epoch": 0.49, + "learning_rate": 4.6938638799571274e-05, + "loss": 0.2778, + "step": 1833 + }, + { + "epoch": 0.49, + "learning_rate": 4.6936964094319405e-05, + "loss": 0.5236, + "step": 1834 + }, + { + "epoch": 0.49, + "learning_rate": 4.693528938906753e-05, + "loss": 0.3395, + "step": 1835 + }, + { + "epoch": 0.49, + "learning_rate": 4.6933614683815654e-05, + "loss": 0.3013, + "step": 1836 + }, + { + "epoch": 0.49, + "learning_rate": 4.693193997856378e-05, + "loss": 0.4563, + "step": 1837 + }, + { + "epoch": 0.49, + "learning_rate": 4.69302652733119e-05, + "loss": 0.2207, + "step": 1838 + }, + { + "epoch": 0.49, + "learning_rate": 4.692859056806002e-05, + "loss": 0.3947, + "step": 1839 + }, + { + "epoch": 0.49, + "learning_rate": 4.6926915862808144e-05, + "loss": 0.3735, + "step": 1840 + }, + { + "epoch": 0.49, + "learning_rate": 4.692524115755627e-05, + "loss": 0.4248, + "step": 1841 + }, + { + "epoch": 0.49, + "learning_rate": 4.69235664523044e-05, + "loss": 0.2815, + "step": 1842 + }, + { + "epoch": 0.49, + "learning_rate": 4.692189174705252e-05, + "loss": 0.2428, + "step": 1843 + }, + { + "epoch": 0.49, + "learning_rate": 4.692021704180065e-05, + "loss": 0.6195, + "step": 1844 + }, + { + "epoch": 0.49, + "learning_rate": 4.691854233654877e-05, + "loss": 0.3523, + "step": 1845 + }, + { + "epoch": 0.49, + "learning_rate": 4.6916867631296896e-05, + "loss": 0.3653, + "step": 1846 + }, + { + "epoch": 0.49, + "learning_rate": 4.691519292604502e-05, + "loss": 0.1978, + "step": 1847 + }, + { + "epoch": 0.5, + "learning_rate": 4.691351822079314e-05, + "loss": 0.3568, + "step": 1848 + }, + { + "epoch": 0.5, + "learning_rate": 4.691184351554127e-05, + "loss": 0.4228, + "step": 1849 + }, + { + "epoch": 0.5, + "learning_rate": 4.691016881028939e-05, + "loss": 0.4929, + "step": 1850 + }, + { + "epoch": 0.5, + "learning_rate": 4.690849410503752e-05, + "loss": 0.2536, + "step": 1851 + }, + { + "epoch": 0.5, + "learning_rate": 4.690681939978564e-05, + "loss": 0.2482, + "step": 1852 + }, + { + "epoch": 0.5, + "learning_rate": 4.6905144694533765e-05, + "loss": 0.2879, + "step": 1853 + }, + { + "epoch": 0.5, + "learning_rate": 4.690346998928189e-05, + "loss": 0.2491, + "step": 1854 + }, + { + "epoch": 0.5, + "learning_rate": 4.6901795284030014e-05, + "loss": 0.2959, + "step": 1855 + }, + { + "epoch": 0.5, + "learning_rate": 4.690012057877814e-05, + "loss": 0.3165, + "step": 1856 + }, + { + "epoch": 0.5, + "learning_rate": 4.689844587352626e-05, + "loss": 0.2674, + "step": 1857 + }, + { + "epoch": 0.5, + "learning_rate": 4.6896771168274386e-05, + "loss": 0.5369, + "step": 1858 + }, + { + "epoch": 0.5, + "learning_rate": 4.689509646302251e-05, + "loss": 0.3413, + "step": 1859 + }, + { + "epoch": 0.5, + "learning_rate": 4.6893421757770635e-05, + "loss": 0.6186, + "step": 1860 + }, + { + "epoch": 0.5, + "learning_rate": 4.689174705251876e-05, + "loss": 0.5941, + "step": 1861 + }, + { + "epoch": 0.5, + "learning_rate": 4.689007234726688e-05, + "loss": 0.6223, + "step": 1862 + }, + { + "epoch": 0.5, + "learning_rate": 4.688839764201501e-05, + "loss": 0.3524, + "step": 1863 + }, + { + "epoch": 0.5, + "learning_rate": 4.688672293676313e-05, + "loss": 0.3062, + "step": 1864 + }, + { + "epoch": 0.5, + "learning_rate": 4.6885048231511256e-05, + "loss": 0.2929, + "step": 1865 + }, + { + "epoch": 0.5, + "learning_rate": 4.688337352625938e-05, + "loss": 0.4601, + "step": 1866 + }, + { + "epoch": 0.5, + "learning_rate": 4.6881698821007504e-05, + "loss": 0.2405, + "step": 1867 + }, + { + "epoch": 0.5, + "learning_rate": 4.688002411575563e-05, + "loss": 0.409, + "step": 1868 + }, + { + "epoch": 0.5, + "learning_rate": 4.687834941050375e-05, + "loss": 0.3945, + "step": 1869 + }, + { + "epoch": 0.5, + "learning_rate": 4.687667470525188e-05, + "loss": 0.4457, + "step": 1870 + }, + { + "epoch": 0.5, + "learning_rate": 4.6875e-05, + "loss": 0.3507, + "step": 1871 + }, + { + "epoch": 0.5, + "learning_rate": 4.6873325294748125e-05, + "loss": 0.509, + "step": 1872 + }, + { + "epoch": 0.5, + "learning_rate": 4.687165058949625e-05, + "loss": 0.3512, + "step": 1873 + }, + { + "epoch": 0.5, + "learning_rate": 4.686997588424438e-05, + "loss": 0.2153, + "step": 1874 + }, + { + "epoch": 0.5, + "learning_rate": 4.6868301178992505e-05, + "loss": 0.2597, + "step": 1875 + }, + { + "epoch": 0.5, + "learning_rate": 4.686662647374062e-05, + "loss": 0.2879, + "step": 1876 + }, + { + "epoch": 0.5, + "learning_rate": 4.6864951768488746e-05, + "loss": 0.2951, + "step": 1877 + }, + { + "epoch": 0.5, + "learning_rate": 4.686327706323687e-05, + "loss": 0.4047, + "step": 1878 + }, + { + "epoch": 0.5, + "learning_rate": 4.6861602357984995e-05, + "loss": 0.3089, + "step": 1879 + }, + { + "epoch": 0.5, + "learning_rate": 4.685992765273312e-05, + "loss": 0.4418, + "step": 1880 + }, + { + "epoch": 0.5, + "learning_rate": 4.685825294748124e-05, + "loss": 0.2697, + "step": 1881 + }, + { + "epoch": 0.5, + "learning_rate": 4.6856578242229374e-05, + "loss": 0.3285, + "step": 1882 + }, + { + "epoch": 0.5, + "learning_rate": 4.68549035369775e-05, + "loss": 0.2646, + "step": 1883 + }, + { + "epoch": 0.5, + "learning_rate": 4.685322883172562e-05, + "loss": 0.2321, + "step": 1884 + }, + { + "epoch": 0.51, + "learning_rate": 4.685155412647374e-05, + "loss": 0.35, + "step": 1885 + }, + { + "epoch": 0.51, + "learning_rate": 4.6849879421221864e-05, + "loss": 0.391, + "step": 1886 + }, + { + "epoch": 0.51, + "learning_rate": 4.684820471596999e-05, + "loss": 0.4945, + "step": 1887 + }, + { + "epoch": 0.51, + "learning_rate": 4.684653001071811e-05, + "loss": 0.2617, + "step": 1888 + }, + { + "epoch": 0.51, + "learning_rate": 4.684485530546624e-05, + "loss": 0.4167, + "step": 1889 + }, + { + "epoch": 0.51, + "learning_rate": 4.684318060021437e-05, + "loss": 0.3725, + "step": 1890 + }, + { + "epoch": 0.51, + "learning_rate": 4.684150589496249e-05, + "loss": 0.477, + "step": 1891 + }, + { + "epoch": 0.51, + "learning_rate": 4.6839831189710616e-05, + "loss": 0.2802, + "step": 1892 + }, + { + "epoch": 0.51, + "learning_rate": 4.683815648445874e-05, + "loss": 0.2319, + "step": 1893 + }, + { + "epoch": 0.51, + "learning_rate": 4.6836481779206865e-05, + "loss": 0.2735, + "step": 1894 + }, + { + "epoch": 0.51, + "learning_rate": 4.683480707395498e-05, + "loss": 0.5017, + "step": 1895 + }, + { + "epoch": 0.51, + "learning_rate": 4.6833132368703106e-05, + "loss": 0.2948, + "step": 1896 + }, + { + "epoch": 0.51, + "learning_rate": 4.683145766345123e-05, + "loss": 0.5598, + "step": 1897 + }, + { + "epoch": 0.51, + "learning_rate": 4.682978295819936e-05, + "loss": 0.6854, + "step": 1898 + }, + { + "epoch": 0.51, + "learning_rate": 4.6828108252947486e-05, + "loss": 0.3608, + "step": 1899 + }, + { + "epoch": 0.51, + "learning_rate": 4.682643354769561e-05, + "loss": 0.423, + "step": 1900 + }, + { + "epoch": 0.51, + "learning_rate": 4.6824758842443734e-05, + "loss": 0.4839, + "step": 1901 + }, + { + "epoch": 0.51, + "learning_rate": 4.682308413719186e-05, + "loss": 0.2915, + "step": 1902 + }, + { + "epoch": 0.51, + "learning_rate": 4.682140943193998e-05, + "loss": 0.253, + "step": 1903 + }, + { + "epoch": 0.51, + "learning_rate": 4.68197347266881e-05, + "loss": 0.4179, + "step": 1904 + }, + { + "epoch": 0.51, + "learning_rate": 4.681806002143623e-05, + "loss": 0.3434, + "step": 1905 + }, + { + "epoch": 0.51, + "learning_rate": 4.6816385316184355e-05, + "loss": 0.4794, + "step": 1906 + }, + { + "epoch": 0.51, + "learning_rate": 4.681471061093248e-05, + "loss": 0.3264, + "step": 1907 + }, + { + "epoch": 0.51, + "learning_rate": 4.6813035905680604e-05, + "loss": 0.498, + "step": 1908 + }, + { + "epoch": 0.51, + "learning_rate": 4.681136120042873e-05, + "loss": 0.3183, + "step": 1909 + }, + { + "epoch": 0.51, + "learning_rate": 4.680968649517685e-05, + "loss": 0.3748, + "step": 1910 + }, + { + "epoch": 0.51, + "learning_rate": 4.6808011789924976e-05, + "loss": 0.2848, + "step": 1911 + }, + { + "epoch": 0.51, + "learning_rate": 4.68063370846731e-05, + "loss": 0.2491, + "step": 1912 + }, + { + "epoch": 0.51, + "learning_rate": 4.6804662379421225e-05, + "loss": 0.2736, + "step": 1913 + }, + { + "epoch": 0.51, + "learning_rate": 4.680298767416935e-05, + "loss": 0.4025, + "step": 1914 + }, + { + "epoch": 0.51, + "learning_rate": 4.680131296891747e-05, + "loss": 0.3619, + "step": 1915 + }, + { + "epoch": 0.51, + "learning_rate": 4.67996382636656e-05, + "loss": 0.414, + "step": 1916 + }, + { + "epoch": 0.51, + "learning_rate": 4.679796355841372e-05, + "loss": 0.2824, + "step": 1917 + }, + { + "epoch": 0.51, + "learning_rate": 4.6796288853161846e-05, + "loss": 0.3475, + "step": 1918 + }, + { + "epoch": 0.51, + "learning_rate": 4.679461414790997e-05, + "loss": 0.2782, + "step": 1919 + }, + { + "epoch": 0.51, + "learning_rate": 4.6792939442658094e-05, + "loss": 0.4945, + "step": 1920 + }, + { + "epoch": 0.51, + "learning_rate": 4.679126473740622e-05, + "loss": 0.2741, + "step": 1921 + }, + { + "epoch": 0.51, + "learning_rate": 4.678959003215434e-05, + "loss": 0.2274, + "step": 1922 + }, + { + "epoch": 0.52, + "learning_rate": 4.678791532690247e-05, + "loss": 0.484, + "step": 1923 + }, + { + "epoch": 0.52, + "learning_rate": 4.678624062165059e-05, + "loss": 0.3838, + "step": 1924 + }, + { + "epoch": 0.52, + "learning_rate": 4.6784565916398715e-05, + "loss": 0.4341, + "step": 1925 + }, + { + "epoch": 0.52, + "learning_rate": 4.678289121114684e-05, + "loss": 0.4779, + "step": 1926 + }, + { + "epoch": 0.52, + "learning_rate": 4.6781216505894963e-05, + "loss": 0.2952, + "step": 1927 + }, + { + "epoch": 0.52, + "learning_rate": 4.677954180064309e-05, + "loss": 0.4143, + "step": 1928 + }, + { + "epoch": 0.52, + "learning_rate": 4.677786709539121e-05, + "loss": 0.5768, + "step": 1929 + }, + { + "epoch": 0.52, + "learning_rate": 4.677619239013934e-05, + "loss": 0.3106, + "step": 1930 + }, + { + "epoch": 0.52, + "learning_rate": 4.677451768488747e-05, + "loss": 0.3396, + "step": 1931 + }, + { + "epoch": 0.52, + "learning_rate": 4.6772842979635585e-05, + "loss": 0.3711, + "step": 1932 + }, + { + "epoch": 0.52, + "learning_rate": 4.677116827438371e-05, + "loss": 0.6913, + "step": 1933 + }, + { + "epoch": 0.52, + "learning_rate": 4.676949356913183e-05, + "loss": 0.2951, + "step": 1934 + }, + { + "epoch": 0.52, + "learning_rate": 4.676781886387996e-05, + "loss": 0.2746, + "step": 1935 + }, + { + "epoch": 0.52, + "learning_rate": 4.676614415862808e-05, + "loss": 0.2866, + "step": 1936 + }, + { + "epoch": 0.52, + "learning_rate": 4.6764469453376206e-05, + "loss": 0.4411, + "step": 1937 + }, + { + "epoch": 0.52, + "learning_rate": 4.6762794748124337e-05, + "loss": 0.3581, + "step": 1938 + }, + { + "epoch": 0.52, + "learning_rate": 4.676112004287246e-05, + "loss": 0.4961, + "step": 1939 + }, + { + "epoch": 0.52, + "learning_rate": 4.6759445337620585e-05, + "loss": 0.4967, + "step": 1940 + }, + { + "epoch": 0.52, + "learning_rate": 4.67577706323687e-05, + "loss": 0.3217, + "step": 1941 + }, + { + "epoch": 0.52, + "learning_rate": 4.675609592711683e-05, + "loss": 0.3877, + "step": 1942 + }, + { + "epoch": 0.52, + "learning_rate": 4.675442122186495e-05, + "loss": 0.4344, + "step": 1943 + }, + { + "epoch": 0.52, + "learning_rate": 4.6752746516613075e-05, + "loss": 0.3304, + "step": 1944 + }, + { + "epoch": 0.52, + "learning_rate": 4.67510718113612e-05, + "loss": 0.5631, + "step": 1945 + }, + { + "epoch": 0.52, + "learning_rate": 4.674939710610933e-05, + "loss": 0.4649, + "step": 1946 + }, + { + "epoch": 0.52, + "learning_rate": 4.6747722400857454e-05, + "loss": 0.3137, + "step": 1947 + }, + { + "epoch": 0.52, + "learning_rate": 4.674604769560558e-05, + "loss": 0.2895, + "step": 1948 + }, + { + "epoch": 0.52, + "learning_rate": 4.67443729903537e-05, + "loss": 0.4034, + "step": 1949 + }, + { + "epoch": 0.52, + "learning_rate": 4.674269828510183e-05, + "loss": 0.621, + "step": 1950 + }, + { + "epoch": 0.52, + "learning_rate": 4.6741023579849945e-05, + "loss": 0.2619, + "step": 1951 + }, + { + "epoch": 0.52, + "learning_rate": 4.673934887459807e-05, + "loss": 0.2687, + "step": 1952 + }, + { + "epoch": 0.52, + "learning_rate": 4.673767416934619e-05, + "loss": 0.3932, + "step": 1953 + }, + { + "epoch": 0.52, + "learning_rate": 4.6735999464094324e-05, + "loss": 0.212, + "step": 1954 + }, + { + "epoch": 0.52, + "learning_rate": 4.673432475884245e-05, + "loss": 0.6047, + "step": 1955 + }, + { + "epoch": 0.52, + "learning_rate": 4.673265005359057e-05, + "loss": 0.3529, + "step": 1956 + }, + { + "epoch": 0.52, + "learning_rate": 4.6730975348338697e-05, + "loss": 0.2838, + "step": 1957 + }, + { + "epoch": 0.52, + "learning_rate": 4.672930064308682e-05, + "loss": 0.6027, + "step": 1958 + }, + { + "epoch": 0.52, + "learning_rate": 4.6727625937834945e-05, + "loss": 0.3613, + "step": 1959 + }, + { + "epoch": 0.53, + "learning_rate": 4.672595123258306e-05, + "loss": 0.4761, + "step": 1960 + }, + { + "epoch": 0.53, + "learning_rate": 4.6724276527331187e-05, + "loss": 0.5326, + "step": 1961 + }, + { + "epoch": 0.53, + "learning_rate": 4.672260182207932e-05, + "loss": 0.28, + "step": 1962 + }, + { + "epoch": 0.53, + "learning_rate": 4.672092711682744e-05, + "loss": 0.3971, + "step": 1963 + }, + { + "epoch": 0.53, + "learning_rate": 4.6719252411575566e-05, + "loss": 0.2522, + "step": 1964 + }, + { + "epoch": 0.53, + "learning_rate": 4.671757770632369e-05, + "loss": 0.4464, + "step": 1965 + }, + { + "epoch": 0.53, + "learning_rate": 4.6715903001071814e-05, + "loss": 0.2856, + "step": 1966 + }, + { + "epoch": 0.53, + "learning_rate": 4.671422829581994e-05, + "loss": 0.2409, + "step": 1967 + }, + { + "epoch": 0.53, + "learning_rate": 4.671255359056806e-05, + "loss": 0.276, + "step": 1968 + }, + { + "epoch": 0.53, + "learning_rate": 4.671087888531619e-05, + "loss": 0.4962, + "step": 1969 + }, + { + "epoch": 0.53, + "learning_rate": 4.670920418006431e-05, + "loss": 0.2568, + "step": 1970 + }, + { + "epoch": 0.53, + "learning_rate": 4.6707529474812435e-05, + "loss": 0.4392, + "step": 1971 + }, + { + "epoch": 0.53, + "learning_rate": 4.670585476956056e-05, + "loss": 0.6223, + "step": 1972 + }, + { + "epoch": 0.53, + "learning_rate": 4.6704180064308684e-05, + "loss": 0.5057, + "step": 1973 + }, + { + "epoch": 0.53, + "learning_rate": 4.670250535905681e-05, + "loss": 0.3573, + "step": 1974 + }, + { + "epoch": 0.53, + "learning_rate": 4.670083065380493e-05, + "loss": 1.7444, + "step": 1975 + }, + { + "epoch": 0.53, + "learning_rate": 4.6699155948553057e-05, + "loss": 0.3917, + "step": 1976 + }, + { + "epoch": 0.53, + "learning_rate": 4.669748124330118e-05, + "loss": 0.3335, + "step": 1977 + }, + { + "epoch": 0.53, + "learning_rate": 4.6695806538049305e-05, + "loss": 0.3829, + "step": 1978 + }, + { + "epoch": 0.53, + "learning_rate": 4.669413183279743e-05, + "loss": 0.306, + "step": 1979 + }, + { + "epoch": 0.53, + "learning_rate": 4.669245712754555e-05, + "loss": 0.3125, + "step": 1980 + }, + { + "epoch": 0.53, + "learning_rate": 4.669078242229368e-05, + "loss": 0.3117, + "step": 1981 + }, + { + "epoch": 0.53, + "learning_rate": 4.66891077170418e-05, + "loss": 0.3413, + "step": 1982 + }, + { + "epoch": 0.53, + "learning_rate": 4.6687433011789926e-05, + "loss": 0.4294, + "step": 1983 + }, + { + "epoch": 0.53, + "learning_rate": 4.668575830653805e-05, + "loss": 0.4926, + "step": 1984 + }, + { + "epoch": 0.53, + "learning_rate": 4.6684083601286174e-05, + "loss": 0.2921, + "step": 1985 + }, + { + "epoch": 0.53, + "learning_rate": 4.66824088960343e-05, + "loss": 0.3047, + "step": 1986 + }, + { + "epoch": 0.53, + "learning_rate": 4.668073419078243e-05, + "loss": 0.2948, + "step": 1987 + }, + { + "epoch": 0.53, + "learning_rate": 4.667905948553055e-05, + "loss": 0.3746, + "step": 1988 + }, + { + "epoch": 0.53, + "learning_rate": 4.667738478027867e-05, + "loss": 0.2333, + "step": 1989 + }, + { + "epoch": 0.53, + "learning_rate": 4.6675710075026795e-05, + "loss": 0.2471, + "step": 1990 + }, + { + "epoch": 0.53, + "learning_rate": 4.667403536977492e-05, + "loss": 0.277, + "step": 1991 + }, + { + "epoch": 0.53, + "learning_rate": 4.6672360664523044e-05, + "loss": 0.5004, + "step": 1992 + }, + { + "epoch": 0.53, + "learning_rate": 4.667068595927117e-05, + "loss": 0.4162, + "step": 1993 + }, + { + "epoch": 0.53, + "learning_rate": 4.66690112540193e-05, + "loss": 0.2807, + "step": 1994 + }, + { + "epoch": 0.53, + "learning_rate": 4.666733654876742e-05, + "loss": 0.5448, + "step": 1995 + }, + { + "epoch": 0.53, + "learning_rate": 4.666566184351555e-05, + "loss": 0.4487, + "step": 1996 + }, + { + "epoch": 0.54, + "learning_rate": 4.6663987138263665e-05, + "loss": 0.4201, + "step": 1997 + }, + { + "epoch": 0.54, + "learning_rate": 4.666231243301179e-05, + "loss": 0.4372, + "step": 1998 + }, + { + "epoch": 0.54, + "learning_rate": 4.666063772775991e-05, + "loss": 0.6957, + "step": 1999 + }, + { + "epoch": 0.54, + "learning_rate": 4.665896302250804e-05, + "loss": 0.378, + "step": 2000 + }, + { + "epoch": 0.54, + "learning_rate": 4.665728831725616e-05, + "loss": 0.3118, + "step": 2001 + }, + { + "epoch": 0.54, + "learning_rate": 4.665561361200429e-05, + "loss": 0.4972, + "step": 2002 + }, + { + "epoch": 0.54, + "learning_rate": 4.665393890675242e-05, + "loss": 0.4181, + "step": 2003 + }, + { + "epoch": 0.54, + "learning_rate": 4.665226420150054e-05, + "loss": 0.2804, + "step": 2004 + }, + { + "epoch": 0.54, + "learning_rate": 4.6650589496248665e-05, + "loss": 0.4148, + "step": 2005 + }, + { + "epoch": 0.54, + "learning_rate": 4.664891479099679e-05, + "loss": 0.4311, + "step": 2006 + }, + { + "epoch": 0.54, + "learning_rate": 4.664724008574491e-05, + "loss": 0.3829, + "step": 2007 + }, + { + "epoch": 0.54, + "learning_rate": 4.664556538049303e-05, + "loss": 0.324, + "step": 2008 + }, + { + "epoch": 0.54, + "learning_rate": 4.6643890675241155e-05, + "loss": 0.4916, + "step": 2009 + }, + { + "epoch": 0.54, + "learning_rate": 4.6642215969989286e-05, + "loss": 0.3064, + "step": 2010 + }, + { + "epoch": 0.54, + "learning_rate": 4.664054126473741e-05, + "loss": 0.3393, + "step": 2011 + }, + { + "epoch": 0.54, + "learning_rate": 4.6638866559485535e-05, + "loss": 0.2945, + "step": 2012 + }, + { + "epoch": 0.54, + "learning_rate": 4.663719185423366e-05, + "loss": 0.3204, + "step": 2013 + }, + { + "epoch": 0.54, + "learning_rate": 4.663551714898178e-05, + "loss": 0.5365, + "step": 2014 + }, + { + "epoch": 0.54, + "learning_rate": 4.663384244372991e-05, + "loss": 0.2936, + "step": 2015 + }, + { + "epoch": 0.54, + "learning_rate": 4.6632167738478025e-05, + "loss": 0.248, + "step": 2016 + }, + { + "epoch": 0.54, + "learning_rate": 4.663049303322615e-05, + "loss": 0.3227, + "step": 2017 + }, + { + "epoch": 0.54, + "learning_rate": 4.662881832797428e-05, + "loss": 0.3614, + "step": 2018 + }, + { + "epoch": 0.54, + "learning_rate": 4.6627143622722404e-05, + "loss": 0.5263, + "step": 2019 + }, + { + "epoch": 0.54, + "learning_rate": 4.662546891747053e-05, + "loss": 0.4576, + "step": 2020 + }, + { + "epoch": 0.54, + "learning_rate": 4.662379421221865e-05, + "loss": 0.596, + "step": 2021 + }, + { + "epoch": 0.54, + "learning_rate": 4.662211950696678e-05, + "loss": 0.5977, + "step": 2022 + }, + { + "epoch": 0.54, + "learning_rate": 4.66204448017149e-05, + "loss": 0.5256, + "step": 2023 + }, + { + "epoch": 0.54, + "learning_rate": 4.6618770096463025e-05, + "loss": 0.4335, + "step": 2024 + }, + { + "epoch": 0.54, + "learning_rate": 4.661709539121115e-05, + "loss": 0.4516, + "step": 2025 + }, + { + "epoch": 0.54, + "learning_rate": 4.6615420685959274e-05, + "loss": 0.3242, + "step": 2026 + }, + { + "epoch": 0.54, + "learning_rate": 4.66137459807074e-05, + "loss": 0.3877, + "step": 2027 + }, + { + "epoch": 0.54, + "learning_rate": 4.661207127545552e-05, + "loss": 0.2889, + "step": 2028 + }, + { + "epoch": 0.54, + "learning_rate": 4.6610396570203646e-05, + "loss": 0.3985, + "step": 2029 + }, + { + "epoch": 0.54, + "learning_rate": 4.660872186495177e-05, + "loss": 0.227, + "step": 2030 + }, + { + "epoch": 0.54, + "learning_rate": 4.6607047159699895e-05, + "loss": 0.5159, + "step": 2031 + }, + { + "epoch": 0.54, + "learning_rate": 4.660537245444802e-05, + "loss": 0.3207, + "step": 2032 + }, + { + "epoch": 0.54, + "learning_rate": 4.660369774919614e-05, + "loss": 0.3393, + "step": 2033 + }, + { + "epoch": 0.54, + "learning_rate": 4.660202304394427e-05, + "loss": 0.3094, + "step": 2034 + }, + { + "epoch": 0.55, + "learning_rate": 4.660034833869239e-05, + "loss": 0.3408, + "step": 2035 + }, + { + "epoch": 0.55, + "learning_rate": 4.6598673633440516e-05, + "loss": 0.337, + "step": 2036 + }, + { + "epoch": 0.55, + "learning_rate": 4.659699892818864e-05, + "loss": 0.4144, + "step": 2037 + }, + { + "epoch": 0.55, + "learning_rate": 4.6595324222936764e-05, + "loss": 0.2594, + "step": 2038 + }, + { + "epoch": 0.55, + "learning_rate": 4.659364951768489e-05, + "loss": 0.2785, + "step": 2039 + }, + { + "epoch": 0.55, + "learning_rate": 4.659197481243301e-05, + "loss": 0.3688, + "step": 2040 + }, + { + "epoch": 0.55, + "learning_rate": 4.659030010718114e-05, + "loss": 0.3505, + "step": 2041 + }, + { + "epoch": 0.55, + "learning_rate": 4.658862540192926e-05, + "loss": 0.564, + "step": 2042 + }, + { + "epoch": 0.55, + "learning_rate": 4.658695069667739e-05, + "loss": 0.3435, + "step": 2043 + }, + { + "epoch": 0.55, + "learning_rate": 4.658527599142551e-05, + "loss": 0.2857, + "step": 2044 + }, + { + "epoch": 0.55, + "learning_rate": 4.6583601286173634e-05, + "loss": 0.5541, + "step": 2045 + }, + { + "epoch": 0.55, + "learning_rate": 4.658192658092176e-05, + "loss": 0.5434, + "step": 2046 + }, + { + "epoch": 0.55, + "learning_rate": 4.658025187566988e-05, + "loss": 0.2962, + "step": 2047 + }, + { + "epoch": 0.55, + "learning_rate": 4.6578577170418006e-05, + "loss": 0.3813, + "step": 2048 + }, + { + "epoch": 0.55, + "learning_rate": 4.657690246516613e-05, + "loss": 0.4268, + "step": 2049 + }, + { + "epoch": 0.55, + "learning_rate": 4.657522775991426e-05, + "loss": 0.3211, + "step": 2050 + }, + { + "epoch": 0.55, + "learning_rate": 4.6573553054662386e-05, + "loss": 0.2543, + "step": 2051 + }, + { + "epoch": 0.55, + "learning_rate": 4.657187834941051e-05, + "loss": 0.2312, + "step": 2052 + }, + { + "epoch": 0.55, + "learning_rate": 4.657020364415863e-05, + "loss": 0.482, + "step": 2053 + }, + { + "epoch": 0.55, + "learning_rate": 4.656852893890675e-05, + "loss": 0.5978, + "step": 2054 + }, + { + "epoch": 0.55, + "learning_rate": 4.6566854233654876e-05, + "loss": 0.4658, + "step": 2055 + }, + { + "epoch": 0.55, + "learning_rate": 4.6565179528403e-05, + "loss": 0.3977, + "step": 2056 + }, + { + "epoch": 0.55, + "learning_rate": 4.6563504823151124e-05, + "loss": 0.302, + "step": 2057 + }, + { + "epoch": 0.55, + "learning_rate": 4.6561830117899255e-05, + "loss": 0.2374, + "step": 2058 + }, + { + "epoch": 0.55, + "learning_rate": 4.656015541264738e-05, + "loss": 0.3149, + "step": 2059 + }, + { + "epoch": 0.55, + "learning_rate": 4.6558480707395504e-05, + "loss": 0.4016, + "step": 2060 + }, + { + "epoch": 0.55, + "learning_rate": 4.655680600214363e-05, + "loss": 0.3904, + "step": 2061 + }, + { + "epoch": 0.55, + "learning_rate": 4.655513129689175e-05, + "loss": 0.2895, + "step": 2062 + }, + { + "epoch": 0.55, + "learning_rate": 4.655345659163987e-05, + "loss": 0.305, + "step": 2063 + }, + { + "epoch": 0.55, + "learning_rate": 4.6551781886387994e-05, + "loss": 0.3446, + "step": 2064 + }, + { + "epoch": 0.55, + "learning_rate": 4.655010718113612e-05, + "loss": 0.2684, + "step": 2065 + }, + { + "epoch": 0.55, + "learning_rate": 4.654843247588425e-05, + "loss": 0.4261, + "step": 2066 + }, + { + "epoch": 0.55, + "learning_rate": 4.654675777063237e-05, + "loss": 0.2538, + "step": 2067 + }, + { + "epoch": 0.55, + "learning_rate": 4.65450830653805e-05, + "loss": 0.4965, + "step": 2068 + }, + { + "epoch": 0.55, + "learning_rate": 4.654340836012862e-05, + "loss": 0.2249, + "step": 2069 + }, + { + "epoch": 0.55, + "learning_rate": 4.6541733654876746e-05, + "loss": 0.344, + "step": 2070 + }, + { + "epoch": 0.55, + "learning_rate": 4.654005894962487e-05, + "loss": 0.2591, + "step": 2071 + }, + { + "epoch": 0.56, + "learning_rate": 4.653838424437299e-05, + "loss": 0.5158, + "step": 2072 + }, + { + "epoch": 0.56, + "learning_rate": 4.653670953912111e-05, + "loss": 0.2587, + "step": 2073 + }, + { + "epoch": 0.56, + "learning_rate": 4.653503483386924e-05, + "loss": 0.4324, + "step": 2074 + }, + { + "epoch": 0.56, + "learning_rate": 4.653336012861737e-05, + "loss": 0.2771, + "step": 2075 + }, + { + "epoch": 0.56, + "learning_rate": 4.653168542336549e-05, + "loss": 0.3414, + "step": 2076 + }, + { + "epoch": 0.56, + "learning_rate": 4.6530010718113615e-05, + "loss": 0.2808, + "step": 2077 + }, + { + "epoch": 0.56, + "learning_rate": 4.652833601286174e-05, + "loss": 0.2315, + "step": 2078 + }, + { + "epoch": 0.56, + "learning_rate": 4.6526661307609864e-05, + "loss": 0.2825, + "step": 2079 + }, + { + "epoch": 0.56, + "learning_rate": 4.652498660235799e-05, + "loss": 0.4448, + "step": 2080 + }, + { + "epoch": 0.56, + "learning_rate": 4.652331189710611e-05, + "loss": 0.3261, + "step": 2081 + }, + { + "epoch": 0.56, + "learning_rate": 4.6521637191854236e-05, + "loss": 0.5951, + "step": 2082 + }, + { + "epoch": 0.56, + "learning_rate": 4.651996248660236e-05, + "loss": 0.2896, + "step": 2083 + }, + { + "epoch": 0.56, + "learning_rate": 4.6518287781350485e-05, + "loss": 0.4784, + "step": 2084 + }, + { + "epoch": 0.56, + "learning_rate": 4.651661307609861e-05, + "loss": 0.2362, + "step": 2085 + }, + { + "epoch": 0.56, + "learning_rate": 4.651493837084673e-05, + "loss": 0.3621, + "step": 2086 + }, + { + "epoch": 0.56, + "learning_rate": 4.651326366559486e-05, + "loss": 0.2551, + "step": 2087 + }, + { + "epoch": 0.56, + "learning_rate": 4.651158896034298e-05, + "loss": 0.2693, + "step": 2088 + }, + { + "epoch": 0.56, + "learning_rate": 4.6509914255091106e-05, + "loss": 0.3678, + "step": 2089 + }, + { + "epoch": 0.56, + "learning_rate": 4.650823954983923e-05, + "loss": 0.3406, + "step": 2090 + }, + { + "epoch": 0.56, + "learning_rate": 4.6506564844587354e-05, + "loss": 0.3516, + "step": 2091 + }, + { + "epoch": 0.56, + "learning_rate": 4.650489013933548e-05, + "loss": 0.6603, + "step": 2092 + }, + { + "epoch": 0.56, + "learning_rate": 4.65032154340836e-05, + "loss": 0.2777, + "step": 2093 + }, + { + "epoch": 0.56, + "learning_rate": 4.650154072883173e-05, + "loss": 0.2148, + "step": 2094 + }, + { + "epoch": 0.56, + "learning_rate": 4.649986602357985e-05, + "loss": 0.3537, + "step": 2095 + }, + { + "epoch": 0.56, + "learning_rate": 4.6498191318327975e-05, + "loss": 0.2489, + "step": 2096 + }, + { + "epoch": 0.56, + "learning_rate": 4.64965166130761e-05, + "loss": 0.374, + "step": 2097 + }, + { + "epoch": 0.56, + "learning_rate": 4.6494841907824224e-05, + "loss": 0.3123, + "step": 2098 + }, + { + "epoch": 0.56, + "learning_rate": 4.6493167202572355e-05, + "loss": 0.4605, + "step": 2099 + }, + { + "epoch": 0.56, + "learning_rate": 4.649149249732047e-05, + "loss": 0.2115, + "step": 2100 + }, + { + "epoch": 0.56, + "learning_rate": 4.6489817792068596e-05, + "loss": 0.6176, + "step": 2101 + }, + { + "epoch": 0.56, + "learning_rate": 4.648814308681672e-05, + "loss": 0.4371, + "step": 2102 + }, + { + "epoch": 0.56, + "learning_rate": 4.6486468381564845e-05, + "loss": 0.335, + "step": 2103 + }, + { + "epoch": 0.56, + "learning_rate": 4.648479367631297e-05, + "loss": 0.3463, + "step": 2104 + }, + { + "epoch": 0.56, + "learning_rate": 4.648311897106109e-05, + "loss": 0.4245, + "step": 2105 + }, + { + "epoch": 0.56, + "learning_rate": 4.6481444265809224e-05, + "loss": 0.348, + "step": 2106 + }, + { + "epoch": 0.56, + "learning_rate": 4.647976956055735e-05, + "loss": 0.2817, + "step": 2107 + }, + { + "epoch": 0.56, + "learning_rate": 4.647809485530547e-05, + "loss": 0.4523, + "step": 2108 + }, + { + "epoch": 0.57, + "learning_rate": 4.647642015005359e-05, + "loss": 0.2894, + "step": 2109 + }, + { + "epoch": 0.57, + "learning_rate": 4.6474745444801714e-05, + "loss": 0.4929, + "step": 2110 + }, + { + "epoch": 0.57, + "learning_rate": 4.647307073954984e-05, + "loss": 0.3835, + "step": 2111 + }, + { + "epoch": 0.57, + "learning_rate": 4.647139603429796e-05, + "loss": 0.2433, + "step": 2112 + }, + { + "epoch": 0.57, + "learning_rate": 4.646972132904609e-05, + "loss": 0.3891, + "step": 2113 + }, + { + "epoch": 0.57, + "learning_rate": 4.646804662379422e-05, + "loss": 0.3342, + "step": 2114 + }, + { + "epoch": 0.57, + "learning_rate": 4.646637191854234e-05, + "loss": 0.3732, + "step": 2115 + }, + { + "epoch": 0.57, + "learning_rate": 4.6464697213290466e-05, + "loss": 0.3438, + "step": 2116 + }, + { + "epoch": 0.57, + "learning_rate": 4.646302250803859e-05, + "loss": 0.3389, + "step": 2117 + }, + { + "epoch": 0.57, + "learning_rate": 4.6461347802786715e-05, + "loss": 0.3743, + "step": 2118 + }, + { + "epoch": 0.57, + "learning_rate": 4.645967309753483e-05, + "loss": 0.3377, + "step": 2119 + }, + { + "epoch": 0.57, + "learning_rate": 4.6457998392282956e-05, + "loss": 0.3746, + "step": 2120 + }, + { + "epoch": 0.57, + "learning_rate": 4.645632368703108e-05, + "loss": 0.3371, + "step": 2121 + }, + { + "epoch": 0.57, + "learning_rate": 4.645464898177921e-05, + "loss": 0.2883, + "step": 2122 + }, + { + "epoch": 0.57, + "learning_rate": 4.6452974276527336e-05, + "loss": 0.3663, + "step": 2123 + }, + { + "epoch": 0.57, + "learning_rate": 4.645129957127546e-05, + "loss": 0.7113, + "step": 2124 + }, + { + "epoch": 0.57, + "learning_rate": 4.6449624866023584e-05, + "loss": 0.4889, + "step": 2125 + }, + { + "epoch": 0.57, + "learning_rate": 4.644795016077171e-05, + "loss": 0.23, + "step": 2126 + }, + { + "epoch": 0.57, + "learning_rate": 4.644627545551983e-05, + "loss": 0.6056, + "step": 2127 + }, + { + "epoch": 0.57, + "learning_rate": 4.644460075026795e-05, + "loss": 0.5941, + "step": 2128 + }, + { + "epoch": 0.57, + "learning_rate": 4.6442926045016074e-05, + "loss": 0.3281, + "step": 2129 + }, + { + "epoch": 0.57, + "learning_rate": 4.6441251339764205e-05, + "loss": 0.3197, + "step": 2130 + }, + { + "epoch": 0.57, + "learning_rate": 4.643957663451233e-05, + "loss": 0.2801, + "step": 2131 + }, + { + "epoch": 0.57, + "learning_rate": 4.6437901929260454e-05, + "loss": 0.301, + "step": 2132 + }, + { + "epoch": 0.57, + "learning_rate": 4.643622722400858e-05, + "loss": 0.3213, + "step": 2133 + }, + { + "epoch": 0.57, + "learning_rate": 4.64345525187567e-05, + "loss": 0.1977, + "step": 2134 + }, + { + "epoch": 0.57, + "learning_rate": 4.6432877813504826e-05, + "loss": 0.4045, + "step": 2135 + }, + { + "epoch": 0.57, + "learning_rate": 4.643120310825295e-05, + "loss": 0.3129, + "step": 2136 + }, + { + "epoch": 0.57, + "learning_rate": 4.6429528403001075e-05, + "loss": 0.3371, + "step": 2137 + }, + { + "epoch": 0.57, + "learning_rate": 4.64278536977492e-05, + "loss": 0.621, + "step": 2138 + }, + { + "epoch": 0.57, + "learning_rate": 4.642617899249732e-05, + "loss": 0.2652, + "step": 2139 + }, + { + "epoch": 0.57, + "learning_rate": 4.642450428724545e-05, + "loss": 0.3543, + "step": 2140 + }, + { + "epoch": 0.57, + "learning_rate": 4.642282958199357e-05, + "loss": 0.2834, + "step": 2141 + }, + { + "epoch": 0.57, + "learning_rate": 4.6421154876741696e-05, + "loss": 0.3276, + "step": 2142 + }, + { + "epoch": 0.57, + "learning_rate": 4.641948017148982e-05, + "loss": 0.7317, + "step": 2143 + }, + { + "epoch": 0.57, + "learning_rate": 4.6417805466237944e-05, + "loss": 0.5443, + "step": 2144 + }, + { + "epoch": 0.57, + "learning_rate": 4.641613076098607e-05, + "loss": 0.2901, + "step": 2145 + }, + { + "epoch": 0.58, + "learning_rate": 4.641445605573419e-05, + "loss": 0.315, + "step": 2146 + }, + { + "epoch": 0.58, + "learning_rate": 4.641278135048232e-05, + "loss": 0.2284, + "step": 2147 + }, + { + "epoch": 0.58, + "learning_rate": 4.641110664523044e-05, + "loss": 0.4134, + "step": 2148 + }, + { + "epoch": 0.58, + "learning_rate": 4.6409431939978565e-05, + "loss": 0.2695, + "step": 2149 + }, + { + "epoch": 0.58, + "learning_rate": 4.640775723472669e-05, + "loss": 0.3747, + "step": 2150 + }, + { + "epoch": 0.58, + "learning_rate": 4.6406082529474814e-05, + "loss": 0.3379, + "step": 2151 + }, + { + "epoch": 0.58, + "learning_rate": 4.640440782422294e-05, + "loss": 0.3452, + "step": 2152 + }, + { + "epoch": 0.58, + "learning_rate": 4.640273311897106e-05, + "loss": 0.2672, + "step": 2153 + }, + { + "epoch": 0.58, + "learning_rate": 4.6401058413719186e-05, + "loss": 0.3504, + "step": 2154 + }, + { + "epoch": 0.58, + "learning_rate": 4.639938370846732e-05, + "loss": 0.2561, + "step": 2155 + }, + { + "epoch": 0.58, + "learning_rate": 4.6397709003215435e-05, + "loss": 0.5252, + "step": 2156 + }, + { + "epoch": 0.58, + "learning_rate": 4.639603429796356e-05, + "loss": 0.4219, + "step": 2157 + }, + { + "epoch": 0.58, + "learning_rate": 4.639435959271168e-05, + "loss": 0.2394, + "step": 2158 + }, + { + "epoch": 0.58, + "learning_rate": 4.639268488745981e-05, + "loss": 0.3118, + "step": 2159 + }, + { + "epoch": 0.58, + "learning_rate": 4.639101018220793e-05, + "loss": 0.5567, + "step": 2160 + }, + { + "epoch": 0.58, + "learning_rate": 4.6389335476956056e-05, + "loss": 0.2739, + "step": 2161 + }, + { + "epoch": 0.58, + "learning_rate": 4.6387660771704187e-05, + "loss": 0.2278, + "step": 2162 + }, + { + "epoch": 0.58, + "learning_rate": 4.638598606645231e-05, + "loss": 0.2763, + "step": 2163 + }, + { + "epoch": 0.58, + "learning_rate": 4.6384311361200435e-05, + "loss": 0.2413, + "step": 2164 + }, + { + "epoch": 0.58, + "learning_rate": 4.638263665594855e-05, + "loss": 0.2803, + "step": 2165 + }, + { + "epoch": 0.58, + "learning_rate": 4.638096195069668e-05, + "loss": 0.2345, + "step": 2166 + }, + { + "epoch": 0.58, + "learning_rate": 4.63792872454448e-05, + "loss": 0.3308, + "step": 2167 + }, + { + "epoch": 0.58, + "learning_rate": 4.6377612540192925e-05, + "loss": 0.2793, + "step": 2168 + }, + { + "epoch": 0.58, + "learning_rate": 4.637593783494105e-05, + "loss": 0.2445, + "step": 2169 + }, + { + "epoch": 0.58, + "learning_rate": 4.637426312968918e-05, + "loss": 0.4227, + "step": 2170 + }, + { + "epoch": 0.58, + "learning_rate": 4.6372588424437304e-05, + "loss": 0.2614, + "step": 2171 + }, + { + "epoch": 0.58, + "learning_rate": 4.637091371918543e-05, + "loss": 0.2323, + "step": 2172 + }, + { + "epoch": 0.58, + "learning_rate": 4.636923901393355e-05, + "loss": 0.5715, + "step": 2173 + }, + { + "epoch": 0.58, + "learning_rate": 4.636756430868168e-05, + "loss": 0.7111, + "step": 2174 + }, + { + "epoch": 0.58, + "learning_rate": 4.6365889603429795e-05, + "loss": 0.55, + "step": 2175 + }, + { + "epoch": 0.58, + "learning_rate": 4.636421489817792e-05, + "loss": 0.7089, + "step": 2176 + }, + { + "epoch": 0.58, + "learning_rate": 4.636254019292604e-05, + "loss": 0.2867, + "step": 2177 + }, + { + "epoch": 0.58, + "learning_rate": 4.6360865487674174e-05, + "loss": 0.3507, + "step": 2178 + }, + { + "epoch": 0.58, + "learning_rate": 4.63591907824223e-05, + "loss": 0.3407, + "step": 2179 + }, + { + "epoch": 0.58, + "learning_rate": 4.635751607717042e-05, + "loss": 0.2953, + "step": 2180 + }, + { + "epoch": 0.58, + "learning_rate": 4.6355841371918547e-05, + "loss": 0.2756, + "step": 2181 + }, + { + "epoch": 0.58, + "learning_rate": 4.635416666666667e-05, + "loss": 0.2507, + "step": 2182 + }, + { + "epoch": 0.58, + "learning_rate": 4.6352491961414795e-05, + "loss": 0.4567, + "step": 2183 + }, + { + "epoch": 0.59, + "learning_rate": 4.635081725616291e-05, + "loss": 0.2688, + "step": 2184 + }, + { + "epoch": 0.59, + "learning_rate": 4.634914255091104e-05, + "loss": 0.245, + "step": 2185 + }, + { + "epoch": 0.59, + "learning_rate": 4.634746784565917e-05, + "loss": 0.2871, + "step": 2186 + }, + { + "epoch": 0.59, + "learning_rate": 4.634579314040729e-05, + "loss": 0.3715, + "step": 2187 + }, + { + "epoch": 0.59, + "learning_rate": 4.6344118435155416e-05, + "loss": 0.2687, + "step": 2188 + }, + { + "epoch": 0.59, + "learning_rate": 4.634244372990354e-05, + "loss": 0.3055, + "step": 2189 + }, + { + "epoch": 0.59, + "learning_rate": 4.6340769024651664e-05, + "loss": 0.4772, + "step": 2190 + }, + { + "epoch": 0.59, + "learning_rate": 4.633909431939979e-05, + "loss": 0.2376, + "step": 2191 + }, + { + "epoch": 0.59, + "learning_rate": 4.633741961414791e-05, + "loss": 0.2728, + "step": 2192 + }, + { + "epoch": 0.59, + "learning_rate": 4.633574490889604e-05, + "loss": 0.4352, + "step": 2193 + }, + { + "epoch": 0.59, + "learning_rate": 4.633407020364416e-05, + "loss": 0.3839, + "step": 2194 + }, + { + "epoch": 0.59, + "learning_rate": 4.6332395498392286e-05, + "loss": 0.5803, + "step": 2195 + }, + { + "epoch": 0.59, + "learning_rate": 4.633072079314041e-05, + "loss": 0.6643, + "step": 2196 + }, + { + "epoch": 0.59, + "learning_rate": 4.6329046087888534e-05, + "loss": 0.2539, + "step": 2197 + }, + { + "epoch": 0.59, + "learning_rate": 4.632737138263666e-05, + "loss": 0.4251, + "step": 2198 + }, + { + "epoch": 0.59, + "learning_rate": 4.632569667738478e-05, + "loss": 0.4488, + "step": 2199 + }, + { + "epoch": 0.59, + "learning_rate": 4.6324021972132907e-05, + "loss": 0.3214, + "step": 2200 + }, + { + "epoch": 0.59, + "learning_rate": 4.632234726688103e-05, + "loss": 0.5559, + "step": 2201 + }, + { + "epoch": 0.59, + "learning_rate": 4.6320672561629155e-05, + "loss": 0.2107, + "step": 2202 + }, + { + "epoch": 0.59, + "learning_rate": 4.631899785637728e-05, + "loss": 0.2856, + "step": 2203 + }, + { + "epoch": 0.59, + "learning_rate": 4.6317323151125403e-05, + "loss": 0.2565, + "step": 2204 + }, + { + "epoch": 0.59, + "learning_rate": 4.631564844587353e-05, + "loss": 0.6104, + "step": 2205 + }, + { + "epoch": 0.59, + "learning_rate": 4.631397374062165e-05, + "loss": 0.4981, + "step": 2206 + }, + { + "epoch": 0.59, + "learning_rate": 4.6312299035369776e-05, + "loss": 0.2606, + "step": 2207 + }, + { + "epoch": 0.59, + "learning_rate": 4.63106243301179e-05, + "loss": 0.3013, + "step": 2208 + }, + { + "epoch": 0.59, + "learning_rate": 4.6308949624866024e-05, + "loss": 0.4347, + "step": 2209 + }, + { + "epoch": 0.59, + "learning_rate": 4.630727491961415e-05, + "loss": 0.2558, + "step": 2210 + }, + { + "epoch": 0.59, + "learning_rate": 4.630560021436228e-05, + "loss": 0.2477, + "step": 2211 + }, + { + "epoch": 0.59, + "learning_rate": 4.63039255091104e-05, + "loss": 0.197, + "step": 2212 + }, + { + "epoch": 0.59, + "learning_rate": 4.630225080385852e-05, + "loss": 0.2886, + "step": 2213 + }, + { + "epoch": 0.59, + "learning_rate": 4.6300576098606645e-05, + "loss": 0.2323, + "step": 2214 + }, + { + "epoch": 0.59, + "learning_rate": 4.629890139335477e-05, + "loss": 0.3503, + "step": 2215 + }, + { + "epoch": 0.59, + "learning_rate": 4.6297226688102894e-05, + "loss": 0.5429, + "step": 2216 + }, + { + "epoch": 0.59, + "learning_rate": 4.629555198285102e-05, + "loss": 0.2977, + "step": 2217 + }, + { + "epoch": 0.59, + "learning_rate": 4.629387727759915e-05, + "loss": 0.3467, + "step": 2218 + }, + { + "epoch": 0.59, + "learning_rate": 4.629220257234727e-05, + "loss": 0.525, + "step": 2219 + }, + { + "epoch": 0.59, + "learning_rate": 4.62905278670954e-05, + "loss": 0.2738, + "step": 2220 + }, + { + "epoch": 0.6, + "learning_rate": 4.6288853161843515e-05, + "loss": 0.9795, + "step": 2221 + }, + { + "epoch": 0.6, + "learning_rate": 4.628717845659164e-05, + "loss": 0.4563, + "step": 2222 + }, + { + "epoch": 0.6, + "learning_rate": 4.628550375133976e-05, + "loss": 0.4951, + "step": 2223 + }, + { + "epoch": 0.6, + "learning_rate": 4.628382904608789e-05, + "loss": 0.2436, + "step": 2224 + }, + { + "epoch": 0.6, + "learning_rate": 4.628215434083601e-05, + "loss": 0.2374, + "step": 2225 + }, + { + "epoch": 0.6, + "learning_rate": 4.628047963558414e-05, + "loss": 0.344, + "step": 2226 + }, + { + "epoch": 0.6, + "learning_rate": 4.627880493033227e-05, + "loss": 0.5227, + "step": 2227 + }, + { + "epoch": 0.6, + "learning_rate": 4.627713022508039e-05, + "loss": 0.3327, + "step": 2228 + }, + { + "epoch": 0.6, + "learning_rate": 4.6275455519828515e-05, + "loss": 0.2868, + "step": 2229 + }, + { + "epoch": 0.6, + "learning_rate": 4.627378081457664e-05, + "loss": 0.4423, + "step": 2230 + }, + { + "epoch": 0.6, + "learning_rate": 4.627210610932476e-05, + "loss": 0.5511, + "step": 2231 + }, + { + "epoch": 0.6, + "learning_rate": 4.627043140407288e-05, + "loss": 0.2902, + "step": 2232 + }, + { + "epoch": 0.6, + "learning_rate": 4.6268756698821005e-05, + "loss": 0.5033, + "step": 2233 + }, + { + "epoch": 0.6, + "learning_rate": 4.6267081993569136e-05, + "loss": 0.3914, + "step": 2234 + }, + { + "epoch": 0.6, + "learning_rate": 4.626540728831726e-05, + "loss": 0.2809, + "step": 2235 + }, + { + "epoch": 0.6, + "learning_rate": 4.6263732583065385e-05, + "loss": 0.4339, + "step": 2236 + }, + { + "epoch": 0.6, + "learning_rate": 4.626205787781351e-05, + "loss": 0.3286, + "step": 2237 + }, + { + "epoch": 0.6, + "learning_rate": 4.626038317256163e-05, + "loss": 0.3613, + "step": 2238 + }, + { + "epoch": 0.6, + "learning_rate": 4.625870846730976e-05, + "loss": 0.2461, + "step": 2239 + }, + { + "epoch": 0.6, + "learning_rate": 4.6257033762057875e-05, + "loss": 0.308, + "step": 2240 + }, + { + "epoch": 0.6, + "learning_rate": 4.6255359056806e-05, + "loss": 0.229, + "step": 2241 + }, + { + "epoch": 0.6, + "learning_rate": 4.625368435155413e-05, + "loss": 0.2492, + "step": 2242 + }, + { + "epoch": 0.6, + "learning_rate": 4.6252009646302254e-05, + "loss": 0.4651, + "step": 2243 + }, + { + "epoch": 0.6, + "learning_rate": 4.625033494105038e-05, + "loss": 0.4385, + "step": 2244 + }, + { + "epoch": 0.6, + "learning_rate": 4.62486602357985e-05, + "loss": 0.3455, + "step": 2245 + }, + { + "epoch": 0.6, + "learning_rate": 4.624698553054663e-05, + "loss": 0.225, + "step": 2246 + }, + { + "epoch": 0.6, + "learning_rate": 4.624531082529475e-05, + "loss": 0.2951, + "step": 2247 + }, + { + "epoch": 0.6, + "learning_rate": 4.6243636120042875e-05, + "loss": 0.5389, + "step": 2248 + }, + { + "epoch": 0.6, + "learning_rate": 4.6241961414791e-05, + "loss": 0.3506, + "step": 2249 + }, + { + "epoch": 0.6, + "learning_rate": 4.6240286709539124e-05, + "loss": 0.3304, + "step": 2250 + }, + { + "epoch": 0.6, + "learning_rate": 4.623861200428725e-05, + "loss": 0.2126, + "step": 2251 + }, + { + "epoch": 0.6, + "learning_rate": 4.623693729903537e-05, + "loss": 0.254, + "step": 2252 + }, + { + "epoch": 0.6, + "learning_rate": 4.6235262593783496e-05, + "loss": 0.6119, + "step": 2253 + }, + { + "epoch": 0.6, + "learning_rate": 4.623358788853162e-05, + "loss": 0.3954, + "step": 2254 + }, + { + "epoch": 0.6, + "learning_rate": 4.6231913183279745e-05, + "loss": 0.33, + "step": 2255 + }, + { + "epoch": 0.6, + "learning_rate": 4.623023847802787e-05, + "loss": 0.3862, + "step": 2256 + }, + { + "epoch": 0.6, + "learning_rate": 4.622856377277599e-05, + "loss": 0.5253, + "step": 2257 + }, + { + "epoch": 0.61, + "learning_rate": 4.622688906752412e-05, + "loss": 0.287, + "step": 2258 + }, + { + "epoch": 0.61, + "learning_rate": 4.622521436227224e-05, + "loss": 0.5021, + "step": 2259 + }, + { + "epoch": 0.61, + "learning_rate": 4.6223539657020366e-05, + "loss": 0.2782, + "step": 2260 + }, + { + "epoch": 0.61, + "learning_rate": 4.622186495176849e-05, + "loss": 0.4318, + "step": 2261 + }, + { + "epoch": 0.61, + "learning_rate": 4.6220190246516614e-05, + "loss": 0.2445, + "step": 2262 + }, + { + "epoch": 0.61, + "learning_rate": 4.621851554126474e-05, + "loss": 0.2723, + "step": 2263 + }, + { + "epoch": 0.61, + "learning_rate": 4.621684083601286e-05, + "loss": 0.5292, + "step": 2264 + }, + { + "epoch": 0.61, + "learning_rate": 4.621516613076099e-05, + "loss": 0.3733, + "step": 2265 + }, + { + "epoch": 0.61, + "learning_rate": 4.621349142550911e-05, + "loss": 0.212, + "step": 2266 + }, + { + "epoch": 0.61, + "learning_rate": 4.621181672025724e-05, + "loss": 0.513, + "step": 2267 + }, + { + "epoch": 0.61, + "learning_rate": 4.621014201500536e-05, + "loss": 0.4008, + "step": 2268 + }, + { + "epoch": 0.61, + "learning_rate": 4.6208467309753484e-05, + "loss": 0.5834, + "step": 2269 + }, + { + "epoch": 0.61, + "learning_rate": 4.620679260450161e-05, + "loss": 0.3282, + "step": 2270 + }, + { + "epoch": 0.61, + "learning_rate": 4.620511789924973e-05, + "loss": 0.3116, + "step": 2271 + }, + { + "epoch": 0.61, + "learning_rate": 4.6203443193997856e-05, + "loss": 0.4448, + "step": 2272 + }, + { + "epoch": 0.61, + "learning_rate": 4.620176848874598e-05, + "loss": 0.2528, + "step": 2273 + }, + { + "epoch": 0.61, + "learning_rate": 4.620009378349411e-05, + "loss": 0.3045, + "step": 2274 + }, + { + "epoch": 0.61, + "learning_rate": 4.6198419078242236e-05, + "loss": 0.3142, + "step": 2275 + }, + { + "epoch": 0.61, + "learning_rate": 4.619674437299036e-05, + "loss": 0.2092, + "step": 2276 + }, + { + "epoch": 0.61, + "learning_rate": 4.619506966773848e-05, + "loss": 0.2117, + "step": 2277 + }, + { + "epoch": 0.61, + "learning_rate": 4.61933949624866e-05, + "loss": 0.3122, + "step": 2278 + }, + { + "epoch": 0.61, + "learning_rate": 4.6191720257234726e-05, + "loss": 0.2098, + "step": 2279 + }, + { + "epoch": 0.61, + "learning_rate": 4.619004555198285e-05, + "loss": 0.2679, + "step": 2280 + }, + { + "epoch": 0.61, + "learning_rate": 4.6188370846730974e-05, + "loss": 0.2796, + "step": 2281 + }, + { + "epoch": 0.61, + "learning_rate": 4.6186696141479105e-05, + "loss": 0.2362, + "step": 2282 + }, + { + "epoch": 0.61, + "learning_rate": 4.618502143622723e-05, + "loss": 0.4297, + "step": 2283 + }, + { + "epoch": 0.61, + "learning_rate": 4.6183346730975354e-05, + "loss": 0.2592, + "step": 2284 + }, + { + "epoch": 0.61, + "learning_rate": 4.618167202572348e-05, + "loss": 0.4216, + "step": 2285 + }, + { + "epoch": 0.61, + "learning_rate": 4.61799973204716e-05, + "loss": 0.2267, + "step": 2286 + }, + { + "epoch": 0.61, + "learning_rate": 4.617832261521972e-05, + "loss": 0.3613, + "step": 2287 + }, + { + "epoch": 0.61, + "learning_rate": 4.6176647909967844e-05, + "loss": 0.2374, + "step": 2288 + }, + { + "epoch": 0.61, + "learning_rate": 4.617497320471597e-05, + "loss": 0.2869, + "step": 2289 + }, + { + "epoch": 0.61, + "learning_rate": 4.61732984994641e-05, + "loss": 0.2579, + "step": 2290 + }, + { + "epoch": 0.61, + "learning_rate": 4.617162379421222e-05, + "loss": 0.3054, + "step": 2291 + }, + { + "epoch": 0.61, + "learning_rate": 4.616994908896035e-05, + "loss": 0.2222, + "step": 2292 + }, + { + "epoch": 0.61, + "learning_rate": 4.616827438370847e-05, + "loss": 0.2909, + "step": 2293 + }, + { + "epoch": 0.61, + "learning_rate": 4.6166599678456596e-05, + "loss": 0.4101, + "step": 2294 + }, + { + "epoch": 0.61, + "learning_rate": 4.616492497320472e-05, + "loss": 0.5564, + "step": 2295 + }, + { + "epoch": 0.62, + "learning_rate": 4.616325026795284e-05, + "loss": 0.4815, + "step": 2296 + }, + { + "epoch": 0.62, + "learning_rate": 4.616157556270096e-05, + "loss": 0.288, + "step": 2297 + }, + { + "epoch": 0.62, + "learning_rate": 4.615990085744909e-05, + "loss": 0.3936, + "step": 2298 + }, + { + "epoch": 0.62, + "learning_rate": 4.615822615219722e-05, + "loss": 0.3086, + "step": 2299 + }, + { + "epoch": 0.62, + "learning_rate": 4.615655144694534e-05, + "loss": 0.339, + "step": 2300 + }, + { + "epoch": 0.62, + "learning_rate": 4.6154876741693465e-05, + "loss": 0.2315, + "step": 2301 + }, + { + "epoch": 0.62, + "learning_rate": 4.615320203644159e-05, + "loss": 0.3954, + "step": 2302 + }, + { + "epoch": 0.62, + "learning_rate": 4.6151527331189714e-05, + "loss": 0.4816, + "step": 2303 + }, + { + "epoch": 0.62, + "learning_rate": 4.614985262593784e-05, + "loss": 0.3043, + "step": 2304 + }, + { + "epoch": 0.62, + "learning_rate": 4.614817792068596e-05, + "loss": 0.2191, + "step": 2305 + }, + { + "epoch": 0.62, + "learning_rate": 4.6146503215434086e-05, + "loss": 0.3815, + "step": 2306 + }, + { + "epoch": 0.62, + "learning_rate": 4.614482851018221e-05, + "loss": 0.2733, + "step": 2307 + }, + { + "epoch": 0.62, + "learning_rate": 4.6143153804930335e-05, + "loss": 0.2144, + "step": 2308 + }, + { + "epoch": 0.62, + "learning_rate": 4.614147909967846e-05, + "loss": 0.4787, + "step": 2309 + }, + { + "epoch": 0.62, + "learning_rate": 4.613980439442658e-05, + "loss": 0.2531, + "step": 2310 + }, + { + "epoch": 0.62, + "learning_rate": 4.613812968917471e-05, + "loss": 0.451, + "step": 2311 + }, + { + "epoch": 0.62, + "learning_rate": 4.613645498392283e-05, + "loss": 0.25, + "step": 2312 + }, + { + "epoch": 0.62, + "learning_rate": 4.6134780278670956e-05, + "loss": 0.3498, + "step": 2313 + }, + { + "epoch": 0.62, + "learning_rate": 4.613310557341908e-05, + "loss": 0.2857, + "step": 2314 + }, + { + "epoch": 0.62, + "learning_rate": 4.6131430868167204e-05, + "loss": 0.4198, + "step": 2315 + }, + { + "epoch": 0.62, + "learning_rate": 4.612975616291533e-05, + "loss": 0.253, + "step": 2316 + }, + { + "epoch": 0.62, + "learning_rate": 4.612808145766345e-05, + "loss": 0.2898, + "step": 2317 + }, + { + "epoch": 0.62, + "learning_rate": 4.612640675241158e-05, + "loss": 0.2611, + "step": 2318 + }, + { + "epoch": 0.62, + "learning_rate": 4.61247320471597e-05, + "loss": 0.3101, + "step": 2319 + }, + { + "epoch": 0.62, + "learning_rate": 4.6123057341907825e-05, + "loss": 0.3269, + "step": 2320 + }, + { + "epoch": 0.62, + "learning_rate": 4.612138263665595e-05, + "loss": 0.3272, + "step": 2321 + }, + { + "epoch": 0.62, + "learning_rate": 4.6119707931404074e-05, + "loss": 0.6007, + "step": 2322 + }, + { + "epoch": 0.62, + "learning_rate": 4.6118033226152205e-05, + "loss": 0.2449, + "step": 2323 + }, + { + "epoch": 0.62, + "learning_rate": 4.611635852090032e-05, + "loss": 0.3885, + "step": 2324 + }, + { + "epoch": 0.62, + "learning_rate": 4.6114683815648446e-05, + "loss": 0.3719, + "step": 2325 + }, + { + "epoch": 0.62, + "learning_rate": 4.611300911039657e-05, + "loss": 0.4426, + "step": 2326 + }, + { + "epoch": 0.62, + "learning_rate": 4.6111334405144695e-05, + "loss": 0.3104, + "step": 2327 + }, + { + "epoch": 0.62, + "learning_rate": 4.610965969989282e-05, + "loss": 0.4794, + "step": 2328 + }, + { + "epoch": 0.62, + "learning_rate": 4.610798499464094e-05, + "loss": 0.3531, + "step": 2329 + }, + { + "epoch": 0.62, + "learning_rate": 4.6106310289389074e-05, + "loss": 0.2332, + "step": 2330 + }, + { + "epoch": 0.62, + "learning_rate": 4.61046355841372e-05, + "loss": 0.3671, + "step": 2331 + }, + { + "epoch": 0.62, + "learning_rate": 4.610296087888532e-05, + "loss": 0.3025, + "step": 2332 + }, + { + "epoch": 0.63, + "learning_rate": 4.610128617363344e-05, + "loss": 0.4581, + "step": 2333 + }, + { + "epoch": 0.63, + "learning_rate": 4.6099611468381564e-05, + "loss": 0.3124, + "step": 2334 + }, + { + "epoch": 0.63, + "learning_rate": 4.609793676312969e-05, + "loss": 0.4297, + "step": 2335 + }, + { + "epoch": 0.63, + "learning_rate": 4.609626205787781e-05, + "loss": 0.2376, + "step": 2336 + }, + { + "epoch": 0.63, + "learning_rate": 4.609458735262594e-05, + "loss": 0.4735, + "step": 2337 + }, + { + "epoch": 0.63, + "learning_rate": 4.609291264737407e-05, + "loss": 0.2606, + "step": 2338 + }, + { + "epoch": 0.63, + "learning_rate": 4.609123794212219e-05, + "loss": 0.3189, + "step": 2339 + }, + { + "epoch": 0.63, + "learning_rate": 4.6089563236870316e-05, + "loss": 0.2371, + "step": 2340 + }, + { + "epoch": 0.63, + "learning_rate": 4.608788853161844e-05, + "loss": 0.2099, + "step": 2341 + }, + { + "epoch": 0.63, + "learning_rate": 4.6086213826366565e-05, + "loss": 0.337, + "step": 2342 + }, + { + "epoch": 0.63, + "learning_rate": 4.608453912111468e-05, + "loss": 0.2644, + "step": 2343 + }, + { + "epoch": 0.63, + "learning_rate": 4.6082864415862806e-05, + "loss": 0.2229, + "step": 2344 + }, + { + "epoch": 0.63, + "learning_rate": 4.608118971061093e-05, + "loss": 0.3333, + "step": 2345 + }, + { + "epoch": 0.63, + "learning_rate": 4.607951500535906e-05, + "loss": 0.3963, + "step": 2346 + }, + { + "epoch": 0.63, + "learning_rate": 4.6077840300107186e-05, + "loss": 0.2194, + "step": 2347 + }, + { + "epoch": 0.63, + "learning_rate": 4.607616559485531e-05, + "loss": 0.4317, + "step": 2348 + }, + { + "epoch": 0.63, + "learning_rate": 4.6074490889603434e-05, + "loss": 0.2989, + "step": 2349 + }, + { + "epoch": 0.63, + "learning_rate": 4.607281618435156e-05, + "loss": 0.2536, + "step": 2350 + }, + { + "epoch": 0.63, + "learning_rate": 4.607114147909968e-05, + "loss": 0.2039, + "step": 2351 + }, + { + "epoch": 0.63, + "learning_rate": 4.60694667738478e-05, + "loss": 0.6608, + "step": 2352 + }, + { + "epoch": 0.63, + "learning_rate": 4.6067792068595924e-05, + "loss": 0.244, + "step": 2353 + }, + { + "epoch": 0.63, + "learning_rate": 4.6066117363344055e-05, + "loss": 0.2941, + "step": 2354 + }, + { + "epoch": 0.63, + "learning_rate": 4.606444265809218e-05, + "loss": 0.2069, + "step": 2355 + }, + { + "epoch": 0.63, + "learning_rate": 4.6062767952840304e-05, + "loss": 0.3928, + "step": 2356 + }, + { + "epoch": 0.63, + "learning_rate": 4.606109324758843e-05, + "loss": 0.2664, + "step": 2357 + }, + { + "epoch": 0.63, + "learning_rate": 4.605941854233655e-05, + "loss": 0.3085, + "step": 2358 + }, + { + "epoch": 0.63, + "learning_rate": 4.6057743837084676e-05, + "loss": 0.3331, + "step": 2359 + }, + { + "epoch": 0.63, + "learning_rate": 4.60560691318328e-05, + "loss": 0.3895, + "step": 2360 + }, + { + "epoch": 0.63, + "learning_rate": 4.6054394426580925e-05, + "loss": 0.3152, + "step": 2361 + }, + { + "epoch": 0.63, + "learning_rate": 4.605271972132905e-05, + "loss": 0.5069, + "step": 2362 + }, + { + "epoch": 0.63, + "learning_rate": 4.605104501607717e-05, + "loss": 0.2115, + "step": 2363 + }, + { + "epoch": 0.63, + "learning_rate": 4.60493703108253e-05, + "loss": 0.2737, + "step": 2364 + }, + { + "epoch": 0.63, + "learning_rate": 4.604769560557342e-05, + "loss": 0.3509, + "step": 2365 + }, + { + "epoch": 0.63, + "learning_rate": 4.6046020900321546e-05, + "loss": 0.4231, + "step": 2366 + }, + { + "epoch": 0.63, + "learning_rate": 4.604434619506967e-05, + "loss": 0.1976, + "step": 2367 + }, + { + "epoch": 0.63, + "learning_rate": 4.6042671489817794e-05, + "loss": 0.2162, + "step": 2368 + }, + { + "epoch": 0.63, + "learning_rate": 4.604099678456592e-05, + "loss": 0.5174, + "step": 2369 + }, + { + "epoch": 0.64, + "learning_rate": 4.603932207931404e-05, + "loss": 0.4244, + "step": 2370 + }, + { + "epoch": 0.64, + "learning_rate": 4.603764737406217e-05, + "loss": 0.3703, + "step": 2371 + }, + { + "epoch": 0.64, + "learning_rate": 4.603597266881029e-05, + "loss": 0.3126, + "step": 2372 + }, + { + "epoch": 0.64, + "learning_rate": 4.6034297963558415e-05, + "loss": 0.2961, + "step": 2373 + }, + { + "epoch": 0.64, + "learning_rate": 4.603262325830654e-05, + "loss": 0.3988, + "step": 2374 + }, + { + "epoch": 0.64, + "learning_rate": 4.6030948553054664e-05, + "loss": 0.3334, + "step": 2375 + }, + { + "epoch": 0.64, + "learning_rate": 4.602927384780279e-05, + "loss": 0.4051, + "step": 2376 + }, + { + "epoch": 0.64, + "learning_rate": 4.602759914255091e-05, + "loss": 0.3387, + "step": 2377 + }, + { + "epoch": 0.64, + "learning_rate": 4.6025924437299036e-05, + "loss": 0.3755, + "step": 2378 + }, + { + "epoch": 0.64, + "learning_rate": 4.602424973204717e-05, + "loss": 0.2748, + "step": 2379 + }, + { + "epoch": 0.64, + "learning_rate": 4.6022575026795285e-05, + "loss": 0.2556, + "step": 2380 + }, + { + "epoch": 0.64, + "learning_rate": 4.602090032154341e-05, + "loss": 0.3818, + "step": 2381 + }, + { + "epoch": 0.64, + "learning_rate": 4.601922561629153e-05, + "loss": 0.2917, + "step": 2382 + }, + { + "epoch": 0.64, + "learning_rate": 4.601755091103966e-05, + "loss": 0.2825, + "step": 2383 + }, + { + "epoch": 0.64, + "learning_rate": 4.601587620578778e-05, + "loss": 0.425, + "step": 2384 + }, + { + "epoch": 0.64, + "learning_rate": 4.6014201500535906e-05, + "loss": 0.2561, + "step": 2385 + }, + { + "epoch": 0.64, + "learning_rate": 4.601252679528404e-05, + "loss": 0.5152, + "step": 2386 + }, + { + "epoch": 0.64, + "learning_rate": 4.601085209003216e-05, + "loss": 0.2311, + "step": 2387 + }, + { + "epoch": 0.64, + "learning_rate": 4.6009177384780285e-05, + "loss": 0.4792, + "step": 2388 + }, + { + "epoch": 0.64, + "learning_rate": 4.60075026795284e-05, + "loss": 0.3016, + "step": 2389 + }, + { + "epoch": 0.64, + "learning_rate": 4.600582797427653e-05, + "loss": 0.367, + "step": 2390 + }, + { + "epoch": 0.64, + "learning_rate": 4.600415326902465e-05, + "loss": 0.3641, + "step": 2391 + }, + { + "epoch": 0.64, + "learning_rate": 4.6002478563772775e-05, + "loss": 0.2766, + "step": 2392 + }, + { + "epoch": 0.64, + "learning_rate": 4.60008038585209e-05, + "loss": 0.2183, + "step": 2393 + }, + { + "epoch": 0.64, + "learning_rate": 4.599912915326903e-05, + "loss": 0.3974, + "step": 2394 + }, + { + "epoch": 0.64, + "learning_rate": 4.5997454448017155e-05, + "loss": 0.5415, + "step": 2395 + }, + { + "epoch": 0.64, + "learning_rate": 4.599577974276528e-05, + "loss": 0.2876, + "step": 2396 + }, + { + "epoch": 0.64, + "learning_rate": 4.59941050375134e-05, + "loss": 0.2701, + "step": 2397 + }, + { + "epoch": 0.64, + "learning_rate": 4.599243033226153e-05, + "loss": 0.2625, + "step": 2398 + }, + { + "epoch": 0.64, + "learning_rate": 4.5990755627009645e-05, + "loss": 0.4743, + "step": 2399 + }, + { + "epoch": 0.64, + "learning_rate": 4.598908092175777e-05, + "loss": 0.278, + "step": 2400 + }, + { + "epoch": 0.64, + "learning_rate": 4.598740621650589e-05, + "loss": 0.5101, + "step": 2401 + }, + { + "epoch": 0.64, + "learning_rate": 4.5985731511254024e-05, + "loss": 0.2818, + "step": 2402 + }, + { + "epoch": 0.64, + "learning_rate": 4.598405680600215e-05, + "loss": 0.3106, + "step": 2403 + }, + { + "epoch": 0.64, + "learning_rate": 4.598238210075027e-05, + "loss": 0.262, + "step": 2404 + }, + { + "epoch": 0.64, + "learning_rate": 4.59807073954984e-05, + "loss": 0.2549, + "step": 2405 + }, + { + "epoch": 0.64, + "learning_rate": 4.597903269024652e-05, + "loss": 0.4603, + "step": 2406 + }, + { + "epoch": 0.64, + "learning_rate": 4.5977357984994645e-05, + "loss": 0.416, + "step": 2407 + }, + { + "epoch": 0.65, + "learning_rate": 4.597568327974276e-05, + "loss": 0.3537, + "step": 2408 + }, + { + "epoch": 0.65, + "learning_rate": 4.597400857449089e-05, + "loss": 0.2823, + "step": 2409 + }, + { + "epoch": 0.65, + "learning_rate": 4.597233386923902e-05, + "loss": 0.3921, + "step": 2410 + }, + { + "epoch": 0.65, + "learning_rate": 4.597065916398714e-05, + "loss": 0.2153, + "step": 2411 + }, + { + "epoch": 0.65, + "learning_rate": 4.5968984458735266e-05, + "loss": 0.2262, + "step": 2412 + }, + { + "epoch": 0.65, + "learning_rate": 4.596730975348339e-05, + "loss": 0.5434, + "step": 2413 + }, + { + "epoch": 0.65, + "learning_rate": 4.5965635048231515e-05, + "loss": 0.5958, + "step": 2414 + }, + { + "epoch": 0.65, + "learning_rate": 4.596396034297964e-05, + "loss": 0.3891, + "step": 2415 + }, + { + "epoch": 0.65, + "learning_rate": 4.596228563772776e-05, + "loss": 0.2882, + "step": 2416 + }, + { + "epoch": 0.65, + "learning_rate": 4.596061093247589e-05, + "loss": 0.2731, + "step": 2417 + }, + { + "epoch": 0.65, + "learning_rate": 4.595893622722401e-05, + "loss": 0.2004, + "step": 2418 + }, + { + "epoch": 0.65, + "learning_rate": 4.5957261521972136e-05, + "loss": 0.2891, + "step": 2419 + }, + { + "epoch": 0.65, + "learning_rate": 4.595558681672026e-05, + "loss": 0.2636, + "step": 2420 + }, + { + "epoch": 0.65, + "learning_rate": 4.5953912111468384e-05, + "loss": 0.3499, + "step": 2421 + }, + { + "epoch": 0.65, + "learning_rate": 4.595223740621651e-05, + "loss": 0.5334, + "step": 2422 + }, + { + "epoch": 0.65, + "learning_rate": 4.595056270096463e-05, + "loss": 0.2781, + "step": 2423 + }, + { + "epoch": 0.65, + "learning_rate": 4.5948887995712757e-05, + "loss": 0.3784, + "step": 2424 + }, + { + "epoch": 0.65, + "learning_rate": 4.594721329046088e-05, + "loss": 0.2654, + "step": 2425 + }, + { + "epoch": 0.65, + "learning_rate": 4.5945538585209005e-05, + "loss": 0.5421, + "step": 2426 + }, + { + "epoch": 0.65, + "learning_rate": 4.594386387995713e-05, + "loss": 0.4583, + "step": 2427 + }, + { + "epoch": 0.65, + "learning_rate": 4.5942189174705253e-05, + "loss": 0.5407, + "step": 2428 + }, + { + "epoch": 0.65, + "learning_rate": 4.594051446945338e-05, + "loss": 0.2383, + "step": 2429 + }, + { + "epoch": 0.65, + "learning_rate": 4.59388397642015e-05, + "loss": 0.1977, + "step": 2430 + }, + { + "epoch": 0.65, + "learning_rate": 4.5937165058949626e-05, + "loss": 0.197, + "step": 2431 + }, + { + "epoch": 0.65, + "learning_rate": 4.593549035369775e-05, + "loss": 0.3881, + "step": 2432 + }, + { + "epoch": 0.65, + "learning_rate": 4.5933815648445874e-05, + "loss": 0.2256, + "step": 2433 + }, + { + "epoch": 0.65, + "learning_rate": 4.5932140943194e-05, + "loss": 0.2791, + "step": 2434 + }, + { + "epoch": 0.65, + "learning_rate": 4.593046623794213e-05, + "loss": 0.5171, + "step": 2435 + }, + { + "epoch": 0.65, + "learning_rate": 4.592879153269025e-05, + "loss": 0.2666, + "step": 2436 + }, + { + "epoch": 0.65, + "learning_rate": 4.592711682743837e-05, + "loss": 0.3147, + "step": 2437 + }, + { + "epoch": 0.65, + "learning_rate": 4.5925442122186496e-05, + "loss": 0.2777, + "step": 2438 + }, + { + "epoch": 0.65, + "learning_rate": 4.592376741693462e-05, + "loss": 0.4177, + "step": 2439 + }, + { + "epoch": 0.65, + "learning_rate": 4.5922092711682744e-05, + "loss": 0.7226, + "step": 2440 + }, + { + "epoch": 0.65, + "learning_rate": 4.592041800643087e-05, + "loss": 0.4616, + "step": 2441 + }, + { + "epoch": 0.65, + "learning_rate": 4.5918743301179e-05, + "loss": 0.42, + "step": 2442 + }, + { + "epoch": 0.65, + "learning_rate": 4.591706859592712e-05, + "loss": 0.263, + "step": 2443 + }, + { + "epoch": 0.65, + "learning_rate": 4.591539389067525e-05, + "loss": 0.2896, + "step": 2444 + }, + { + "epoch": 0.66, + "learning_rate": 4.5913719185423365e-05, + "loss": 0.5333, + "step": 2445 + }, + { + "epoch": 0.66, + "learning_rate": 4.591204448017149e-05, + "loss": 0.2597, + "step": 2446 + }, + { + "epoch": 0.66, + "learning_rate": 4.5910369774919613e-05, + "loss": 0.3518, + "step": 2447 + }, + { + "epoch": 0.66, + "learning_rate": 4.590869506966774e-05, + "loss": 0.2767, + "step": 2448 + }, + { + "epoch": 0.66, + "learning_rate": 4.590702036441586e-05, + "loss": 0.2222, + "step": 2449 + }, + { + "epoch": 0.66, + "learning_rate": 4.590534565916399e-05, + "loss": 0.4978, + "step": 2450 + }, + { + "epoch": 0.66, + "learning_rate": 4.590367095391212e-05, + "loss": 0.1905, + "step": 2451 + }, + { + "epoch": 0.66, + "learning_rate": 4.590199624866024e-05, + "loss": 0.2795, + "step": 2452 + }, + { + "epoch": 0.66, + "learning_rate": 4.5900321543408365e-05, + "loss": 0.2654, + "step": 2453 + }, + { + "epoch": 0.66, + "learning_rate": 4.589864683815649e-05, + "loss": 0.2613, + "step": 2454 + }, + { + "epoch": 0.66, + "learning_rate": 4.589697213290461e-05, + "loss": 0.3564, + "step": 2455 + }, + { + "epoch": 0.66, + "learning_rate": 4.589529742765273e-05, + "loss": 0.5761, + "step": 2456 + }, + { + "epoch": 0.66, + "learning_rate": 4.5893622722400856e-05, + "loss": 0.2058, + "step": 2457 + }, + { + "epoch": 0.66, + "learning_rate": 4.5891948017148987e-05, + "loss": 0.5028, + "step": 2458 + }, + { + "epoch": 0.66, + "learning_rate": 4.589027331189711e-05, + "loss": 0.6846, + "step": 2459 + }, + { + "epoch": 0.66, + "learning_rate": 4.5888598606645235e-05, + "loss": 0.2482, + "step": 2460 + }, + { + "epoch": 0.66, + "learning_rate": 4.588692390139336e-05, + "loss": 0.196, + "step": 2461 + }, + { + "epoch": 0.66, + "learning_rate": 4.588524919614148e-05, + "loss": 0.2568, + "step": 2462 + }, + { + "epoch": 0.66, + "learning_rate": 4.588357449088961e-05, + "loss": 0.4216, + "step": 2463 + }, + { + "epoch": 0.66, + "learning_rate": 4.5881899785637725e-05, + "loss": 0.2538, + "step": 2464 + }, + { + "epoch": 0.66, + "learning_rate": 4.588022508038585e-05, + "loss": 0.2438, + "step": 2465 + }, + { + "epoch": 0.66, + "learning_rate": 4.587855037513398e-05, + "loss": 0.3795, + "step": 2466 + }, + { + "epoch": 0.66, + "learning_rate": 4.5876875669882104e-05, + "loss": 0.2403, + "step": 2467 + }, + { + "epoch": 0.66, + "learning_rate": 4.587520096463023e-05, + "loss": 0.2247, + "step": 2468 + }, + { + "epoch": 0.66, + "learning_rate": 4.587352625937835e-05, + "loss": 0.2094, + "step": 2469 + }, + { + "epoch": 0.66, + "learning_rate": 4.587185155412648e-05, + "loss": 0.2633, + "step": 2470 + }, + { + "epoch": 0.66, + "learning_rate": 4.58701768488746e-05, + "loss": 0.245, + "step": 2471 + }, + { + "epoch": 0.66, + "learning_rate": 4.5868502143622725e-05, + "loss": 0.3864, + "step": 2472 + }, + { + "epoch": 0.66, + "learning_rate": 4.586682743837085e-05, + "loss": 0.4472, + "step": 2473 + }, + { + "epoch": 0.66, + "learning_rate": 4.5865152733118974e-05, + "loss": 0.3276, + "step": 2474 + }, + { + "epoch": 0.66, + "learning_rate": 4.58634780278671e-05, + "loss": 0.2116, + "step": 2475 + }, + { + "epoch": 0.66, + "learning_rate": 4.586180332261522e-05, + "loss": 0.5621, + "step": 2476 + }, + { + "epoch": 0.66, + "learning_rate": 4.5860128617363346e-05, + "loss": 0.2053, + "step": 2477 + }, + { + "epoch": 0.66, + "learning_rate": 4.585845391211147e-05, + "loss": 0.232, + "step": 2478 + }, + { + "epoch": 0.66, + "learning_rate": 4.5856779206859595e-05, + "loss": 0.2329, + "step": 2479 + }, + { + "epoch": 0.66, + "learning_rate": 4.585510450160772e-05, + "loss": 0.4734, + "step": 2480 + }, + { + "epoch": 0.66, + "learning_rate": 4.585342979635584e-05, + "loss": 0.6547, + "step": 2481 + }, + { + "epoch": 0.67, + "learning_rate": 4.585175509110397e-05, + "loss": 0.3025, + "step": 2482 + }, + { + "epoch": 0.67, + "learning_rate": 4.585008038585209e-05, + "loss": 0.2865, + "step": 2483 + }, + { + "epoch": 0.67, + "learning_rate": 4.5848405680600216e-05, + "loss": 0.5509, + "step": 2484 + }, + { + "epoch": 0.67, + "learning_rate": 4.584673097534834e-05, + "loss": 0.2898, + "step": 2485 + }, + { + "epoch": 0.67, + "learning_rate": 4.5845056270096464e-05, + "loss": 0.5547, + "step": 2486 + }, + { + "epoch": 0.67, + "learning_rate": 4.584338156484459e-05, + "loss": 0.4947, + "step": 2487 + }, + { + "epoch": 0.67, + "learning_rate": 4.584170685959271e-05, + "loss": 0.387, + "step": 2488 + }, + { + "epoch": 0.67, + "learning_rate": 4.584003215434084e-05, + "loss": 0.348, + "step": 2489 + }, + { + "epoch": 0.67, + "learning_rate": 4.583835744908896e-05, + "loss": 0.2977, + "step": 2490 + }, + { + "epoch": 0.67, + "learning_rate": 4.583668274383709e-05, + "loss": 0.4839, + "step": 2491 + }, + { + "epoch": 0.67, + "learning_rate": 4.583500803858521e-05, + "loss": 0.4104, + "step": 2492 + }, + { + "epoch": 0.67, + "learning_rate": 4.5833333333333334e-05, + "loss": 0.563, + "step": 2493 + }, + { + "epoch": 0.67, + "learning_rate": 4.583165862808146e-05, + "loss": 0.2972, + "step": 2494 + }, + { + "epoch": 0.67, + "learning_rate": 4.582998392282958e-05, + "loss": 0.4032, + "step": 2495 + }, + { + "epoch": 0.67, + "learning_rate": 4.5828309217577706e-05, + "loss": 0.4718, + "step": 2496 + }, + { + "epoch": 0.67, + "learning_rate": 4.582663451232583e-05, + "loss": 0.4841, + "step": 2497 + }, + { + "epoch": 0.67, + "learning_rate": 4.582495980707396e-05, + "loss": 0.2123, + "step": 2498 + }, + { + "epoch": 0.67, + "learning_rate": 4.5823285101822086e-05, + "loss": 0.2601, + "step": 2499 + }, + { + "epoch": 0.67, + "learning_rate": 4.582161039657021e-05, + "loss": 0.297, + "step": 2500 + }, + { + "epoch": 0.67, + "learning_rate": 4.581993569131833e-05, + "loss": 0.2106, + "step": 2501 + }, + { + "epoch": 0.67, + "learning_rate": 4.581826098606645e-05, + "loss": 0.2416, + "step": 2502 + }, + { + "epoch": 0.67, + "learning_rate": 4.5816586280814576e-05, + "loss": 0.2893, + "step": 2503 + }, + { + "epoch": 0.67, + "learning_rate": 4.58149115755627e-05, + "loss": 0.3226, + "step": 2504 + }, + { + "epoch": 0.67, + "learning_rate": 4.5813236870310824e-05, + "loss": 0.2311, + "step": 2505 + }, + { + "epoch": 0.67, + "learning_rate": 4.5811562165058955e-05, + "loss": 0.2563, + "step": 2506 + }, + { + "epoch": 0.67, + "learning_rate": 4.580988745980708e-05, + "loss": 0.2166, + "step": 2507 + }, + { + "epoch": 0.67, + "learning_rate": 4.5808212754555204e-05, + "loss": 0.3154, + "step": 2508 + }, + { + "epoch": 0.67, + "learning_rate": 4.580653804930333e-05, + "loss": 0.5414, + "step": 2509 + }, + { + "epoch": 0.67, + "learning_rate": 4.580486334405145e-05, + "loss": 0.2485, + "step": 2510 + }, + { + "epoch": 0.67, + "learning_rate": 4.580318863879957e-05, + "loss": 0.2835, + "step": 2511 + }, + { + "epoch": 0.67, + "learning_rate": 4.5801513933547694e-05, + "loss": 0.2272, + "step": 2512 + }, + { + "epoch": 0.67, + "learning_rate": 4.579983922829582e-05, + "loss": 0.2179, + "step": 2513 + }, + { + "epoch": 0.67, + "learning_rate": 4.579816452304395e-05, + "loss": 0.7328, + "step": 2514 + }, + { + "epoch": 0.67, + "learning_rate": 4.579648981779207e-05, + "loss": 0.2775, + "step": 2515 + }, + { + "epoch": 0.67, + "learning_rate": 4.57948151125402e-05, + "loss": 0.2668, + "step": 2516 + }, + { + "epoch": 0.67, + "learning_rate": 4.579314040728832e-05, + "loss": 0.6031, + "step": 2517 + }, + { + "epoch": 0.67, + "learning_rate": 4.5791465702036446e-05, + "loss": 0.5003, + "step": 2518 + }, + { + "epoch": 0.67, + "learning_rate": 4.578979099678457e-05, + "loss": 0.3251, + "step": 2519 + }, + { + "epoch": 0.68, + "learning_rate": 4.578811629153269e-05, + "loss": 0.2426, + "step": 2520 + }, + { + "epoch": 0.68, + "learning_rate": 4.578644158628081e-05, + "loss": 0.2242, + "step": 2521 + }, + { + "epoch": 0.68, + "learning_rate": 4.578476688102894e-05, + "loss": 0.2619, + "step": 2522 + }, + { + "epoch": 0.68, + "learning_rate": 4.578309217577707e-05, + "loss": 0.4188, + "step": 2523 + }, + { + "epoch": 0.68, + "learning_rate": 4.578141747052519e-05, + "loss": 0.2614, + "step": 2524 + }, + { + "epoch": 0.68, + "learning_rate": 4.5779742765273315e-05, + "loss": 0.2369, + "step": 2525 + }, + { + "epoch": 0.68, + "learning_rate": 4.577806806002144e-05, + "loss": 0.3607, + "step": 2526 + }, + { + "epoch": 0.68, + "learning_rate": 4.5776393354769564e-05, + "loss": 0.2975, + "step": 2527 + }, + { + "epoch": 0.68, + "learning_rate": 4.577471864951769e-05, + "loss": 0.5516, + "step": 2528 + }, + { + "epoch": 0.68, + "learning_rate": 4.577304394426581e-05, + "loss": 0.42, + "step": 2529 + }, + { + "epoch": 0.68, + "learning_rate": 4.5771369239013936e-05, + "loss": 0.4624, + "step": 2530 + }, + { + "epoch": 0.68, + "learning_rate": 4.576969453376206e-05, + "loss": 0.1903, + "step": 2531 + }, + { + "epoch": 0.68, + "learning_rate": 4.5768019828510185e-05, + "loss": 0.2472, + "step": 2532 + }, + { + "epoch": 0.68, + "learning_rate": 4.576634512325831e-05, + "loss": 0.4166, + "step": 2533 + }, + { + "epoch": 0.68, + "learning_rate": 4.576467041800643e-05, + "loss": 0.622, + "step": 2534 + }, + { + "epoch": 0.68, + "learning_rate": 4.576299571275456e-05, + "loss": 0.202, + "step": 2535 + }, + { + "epoch": 0.68, + "learning_rate": 4.576132100750268e-05, + "loss": 0.4041, + "step": 2536 + }, + { + "epoch": 0.68, + "learning_rate": 4.5759646302250806e-05, + "loss": 0.3931, + "step": 2537 + }, + { + "epoch": 0.68, + "learning_rate": 4.575797159699893e-05, + "loss": 0.2309, + "step": 2538 + }, + { + "epoch": 0.68, + "learning_rate": 4.5756296891747054e-05, + "loss": 0.3862, + "step": 2539 + }, + { + "epoch": 0.68, + "learning_rate": 4.575462218649518e-05, + "loss": 0.4366, + "step": 2540 + }, + { + "epoch": 0.68, + "learning_rate": 4.57529474812433e-05, + "loss": 0.4053, + "step": 2541 + }, + { + "epoch": 0.68, + "learning_rate": 4.575127277599143e-05, + "loss": 0.3582, + "step": 2542 + }, + { + "epoch": 0.68, + "learning_rate": 4.574959807073955e-05, + "loss": 0.1828, + "step": 2543 + }, + { + "epoch": 0.68, + "learning_rate": 4.5747923365487675e-05, + "loss": 0.4431, + "step": 2544 + }, + { + "epoch": 0.68, + "learning_rate": 4.57462486602358e-05, + "loss": 0.2171, + "step": 2545 + }, + { + "epoch": 0.68, + "learning_rate": 4.5744573954983924e-05, + "loss": 0.2787, + "step": 2546 + }, + { + "epoch": 0.68, + "learning_rate": 4.5742899249732055e-05, + "loss": 0.2762, + "step": 2547 + }, + { + "epoch": 0.68, + "learning_rate": 4.574122454448017e-05, + "loss": 0.2926, + "step": 2548 + }, + { + "epoch": 0.68, + "learning_rate": 4.5739549839228296e-05, + "loss": 0.273, + "step": 2549 + }, + { + "epoch": 0.68, + "learning_rate": 4.573787513397642e-05, + "loss": 0.4184, + "step": 2550 + }, + { + "epoch": 0.68, + "learning_rate": 4.5736200428724545e-05, + "loss": 0.5117, + "step": 2551 + }, + { + "epoch": 0.68, + "learning_rate": 4.573452572347267e-05, + "loss": 0.296, + "step": 2552 + }, + { + "epoch": 0.68, + "learning_rate": 4.573285101822079e-05, + "loss": 0.2219, + "step": 2553 + }, + { + "epoch": 0.68, + "learning_rate": 4.5731176312968924e-05, + "loss": 0.3087, + "step": 2554 + }, + { + "epoch": 0.68, + "learning_rate": 4.572950160771705e-05, + "loss": 0.2499, + "step": 2555 + }, + { + "epoch": 0.68, + "learning_rate": 4.572782690246517e-05, + "loss": 0.2156, + "step": 2556 + }, + { + "epoch": 0.69, + "learning_rate": 4.572615219721329e-05, + "loss": 0.2415, + "step": 2557 + }, + { + "epoch": 0.69, + "learning_rate": 4.5724477491961414e-05, + "loss": 0.4384, + "step": 2558 + }, + { + "epoch": 0.69, + "learning_rate": 4.572280278670954e-05, + "loss": 0.398, + "step": 2559 + }, + { + "epoch": 0.69, + "learning_rate": 4.572112808145766e-05, + "loss": 0.2354, + "step": 2560 + }, + { + "epoch": 0.69, + "learning_rate": 4.571945337620579e-05, + "loss": 0.2319, + "step": 2561 + }, + { + "epoch": 0.69, + "learning_rate": 4.571777867095392e-05, + "loss": 0.3789, + "step": 2562 + }, + { + "epoch": 0.69, + "learning_rate": 4.571610396570204e-05, + "loss": 0.3911, + "step": 2563 + }, + { + "epoch": 0.69, + "learning_rate": 4.5714429260450166e-05, + "loss": 0.227, + "step": 2564 + }, + { + "epoch": 0.69, + "learning_rate": 4.571275455519829e-05, + "loss": 0.3227, + "step": 2565 + }, + { + "epoch": 0.69, + "learning_rate": 4.5711079849946415e-05, + "loss": 0.2626, + "step": 2566 + }, + { + "epoch": 0.69, + "learning_rate": 4.570940514469453e-05, + "loss": 0.3785, + "step": 2567 + }, + { + "epoch": 0.69, + "learning_rate": 4.5707730439442656e-05, + "loss": 0.3439, + "step": 2568 + }, + { + "epoch": 0.69, + "learning_rate": 4.570605573419078e-05, + "loss": 0.3276, + "step": 2569 + }, + { + "epoch": 0.69, + "learning_rate": 4.570438102893891e-05, + "loss": 0.3058, + "step": 2570 + }, + { + "epoch": 0.69, + "learning_rate": 4.5702706323687036e-05, + "loss": 0.2013, + "step": 2571 + }, + { + "epoch": 0.69, + "learning_rate": 4.570103161843516e-05, + "loss": 0.265, + "step": 2572 + }, + { + "epoch": 0.69, + "learning_rate": 4.5699356913183284e-05, + "loss": 0.223, + "step": 2573 + }, + { + "epoch": 0.69, + "learning_rate": 4.569768220793141e-05, + "loss": 0.3465, + "step": 2574 + }, + { + "epoch": 0.69, + "learning_rate": 4.569600750267953e-05, + "loss": 0.4901, + "step": 2575 + }, + { + "epoch": 0.69, + "learning_rate": 4.569433279742765e-05, + "loss": 0.2451, + "step": 2576 + }, + { + "epoch": 0.69, + "learning_rate": 4.5692658092175774e-05, + "loss": 0.2162, + "step": 2577 + }, + { + "epoch": 0.69, + "learning_rate": 4.5690983386923905e-05, + "loss": 0.221, + "step": 2578 + }, + { + "epoch": 0.69, + "learning_rate": 4.568930868167203e-05, + "loss": 0.5015, + "step": 2579 + }, + { + "epoch": 0.69, + "learning_rate": 4.5687633976420154e-05, + "loss": 0.3025, + "step": 2580 + }, + { + "epoch": 0.69, + "learning_rate": 4.568595927116828e-05, + "loss": 0.3319, + "step": 2581 + }, + { + "epoch": 0.69, + "learning_rate": 4.56842845659164e-05, + "loss": 0.4647, + "step": 2582 + }, + { + "epoch": 0.69, + "learning_rate": 4.5682609860664526e-05, + "loss": 0.2262, + "step": 2583 + }, + { + "epoch": 0.69, + "learning_rate": 4.568093515541265e-05, + "loss": 0.4097, + "step": 2584 + }, + { + "epoch": 0.69, + "learning_rate": 4.567926045016077e-05, + "loss": 0.2034, + "step": 2585 + }, + { + "epoch": 0.69, + "learning_rate": 4.56775857449089e-05, + "loss": 0.438, + "step": 2586 + }, + { + "epoch": 0.69, + "learning_rate": 4.567591103965702e-05, + "loss": 0.5529, + "step": 2587 + }, + { + "epoch": 0.69, + "learning_rate": 4.567423633440515e-05, + "loss": 0.3293, + "step": 2588 + }, + { + "epoch": 0.69, + "learning_rate": 4.567256162915327e-05, + "loss": 0.5334, + "step": 2589 + }, + { + "epoch": 0.69, + "learning_rate": 4.5670886923901396e-05, + "loss": 0.3499, + "step": 2590 + }, + { + "epoch": 0.69, + "learning_rate": 4.566921221864952e-05, + "loss": 0.3913, + "step": 2591 + }, + { + "epoch": 0.69, + "learning_rate": 4.5667537513397644e-05, + "loss": 0.1944, + "step": 2592 + }, + { + "epoch": 0.69, + "learning_rate": 4.566586280814577e-05, + "loss": 0.449, + "step": 2593 + }, + { + "epoch": 0.7, + "learning_rate": 4.566418810289389e-05, + "loss": 0.3416, + "step": 2594 + }, + { + "epoch": 0.7, + "learning_rate": 4.566251339764202e-05, + "loss": 0.2572, + "step": 2595 + }, + { + "epoch": 0.7, + "learning_rate": 4.566083869239014e-05, + "loss": 0.3912, + "step": 2596 + }, + { + "epoch": 0.7, + "learning_rate": 4.5659163987138265e-05, + "loss": 0.3054, + "step": 2597 + }, + { + "epoch": 0.7, + "learning_rate": 4.565748928188639e-05, + "loss": 0.4969, + "step": 2598 + }, + { + "epoch": 0.7, + "learning_rate": 4.5655814576634514e-05, + "loss": 0.2356, + "step": 2599 + }, + { + "epoch": 0.7, + "learning_rate": 4.565413987138264e-05, + "loss": 0.2479, + "step": 2600 + }, + { + "epoch": 0.7, + "learning_rate": 4.565246516613076e-05, + "loss": 0.334, + "step": 2601 + }, + { + "epoch": 0.7, + "learning_rate": 4.5650790460878886e-05, + "loss": 0.2192, + "step": 2602 + }, + { + "epoch": 0.7, + "learning_rate": 4.564911575562702e-05, + "loss": 0.357, + "step": 2603 + }, + { + "epoch": 0.7, + "learning_rate": 4.5647441050375135e-05, + "loss": 0.4737, + "step": 2604 + }, + { + "epoch": 0.7, + "learning_rate": 4.564576634512326e-05, + "loss": 0.2654, + "step": 2605 + }, + { + "epoch": 0.7, + "learning_rate": 4.564409163987138e-05, + "loss": 0.1976, + "step": 2606 + }, + { + "epoch": 0.7, + "learning_rate": 4.564241693461951e-05, + "loss": 0.2302, + "step": 2607 + }, + { + "epoch": 0.7, + "learning_rate": 4.564074222936763e-05, + "loss": 0.216, + "step": 2608 + }, + { + "epoch": 0.7, + "learning_rate": 4.5639067524115756e-05, + "loss": 0.2245, + "step": 2609 + }, + { + "epoch": 0.7, + "learning_rate": 4.563739281886389e-05, + "loss": 0.2448, + "step": 2610 + }, + { + "epoch": 0.7, + "learning_rate": 4.563571811361201e-05, + "loss": 0.4727, + "step": 2611 + }, + { + "epoch": 0.7, + "learning_rate": 4.5634043408360135e-05, + "loss": 0.3719, + "step": 2612 + }, + { + "epoch": 0.7, + "learning_rate": 4.563236870310825e-05, + "loss": 0.1981, + "step": 2613 + }, + { + "epoch": 0.7, + "learning_rate": 4.563069399785638e-05, + "loss": 0.3521, + "step": 2614 + }, + { + "epoch": 0.7, + "learning_rate": 4.56290192926045e-05, + "loss": 0.3066, + "step": 2615 + }, + { + "epoch": 0.7, + "learning_rate": 4.5627344587352625e-05, + "loss": 0.2604, + "step": 2616 + }, + { + "epoch": 0.7, + "learning_rate": 4.562566988210075e-05, + "loss": 0.4102, + "step": 2617 + }, + { + "epoch": 0.7, + "learning_rate": 4.562399517684888e-05, + "loss": 0.2917, + "step": 2618 + }, + { + "epoch": 0.7, + "learning_rate": 4.5622320471597005e-05, + "loss": 0.3115, + "step": 2619 + }, + { + "epoch": 0.7, + "learning_rate": 4.562064576634513e-05, + "loss": 0.2605, + "step": 2620 + }, + { + "epoch": 0.7, + "learning_rate": 4.561897106109325e-05, + "loss": 0.4791, + "step": 2621 + }, + { + "epoch": 0.7, + "learning_rate": 4.561729635584138e-05, + "loss": 0.2594, + "step": 2622 + }, + { + "epoch": 0.7, + "learning_rate": 4.5615621650589495e-05, + "loss": 0.2127, + "step": 2623 + }, + { + "epoch": 0.7, + "learning_rate": 4.561394694533762e-05, + "loss": 0.6047, + "step": 2624 + }, + { + "epoch": 0.7, + "learning_rate": 4.561227224008574e-05, + "loss": 0.2498, + "step": 2625 + }, + { + "epoch": 0.7, + "learning_rate": 4.5610597534833874e-05, + "loss": 0.2293, + "step": 2626 + }, + { + "epoch": 0.7, + "learning_rate": 4.5608922829582e-05, + "loss": 0.2244, + "step": 2627 + }, + { + "epoch": 0.7, + "learning_rate": 4.560724812433012e-05, + "loss": 0.4419, + "step": 2628 + }, + { + "epoch": 0.7, + "learning_rate": 4.560557341907825e-05, + "loss": 0.3061, + "step": 2629 + }, + { + "epoch": 0.7, + "learning_rate": 4.560389871382637e-05, + "loss": 0.2443, + "step": 2630 + }, + { + "epoch": 0.7, + "learning_rate": 4.5602224008574495e-05, + "loss": 0.2139, + "step": 2631 + }, + { + "epoch": 0.71, + "learning_rate": 4.560054930332261e-05, + "loss": 0.4151, + "step": 2632 + }, + { + "epoch": 0.71, + "learning_rate": 4.559887459807074e-05, + "loss": 0.205, + "step": 2633 + }, + { + "epoch": 0.71, + "learning_rate": 4.559719989281887e-05, + "loss": 0.2244, + "step": 2634 + }, + { + "epoch": 0.71, + "learning_rate": 4.559552518756699e-05, + "loss": 0.8106, + "step": 2635 + }, + { + "epoch": 0.71, + "learning_rate": 4.5593850482315116e-05, + "loss": 0.4597, + "step": 2636 + }, + { + "epoch": 0.71, + "learning_rate": 4.559217577706324e-05, + "loss": 0.1948, + "step": 2637 + }, + { + "epoch": 0.71, + "learning_rate": 4.5590501071811365e-05, + "loss": 0.256, + "step": 2638 + }, + { + "epoch": 0.71, + "learning_rate": 4.558882636655949e-05, + "loss": 0.577, + "step": 2639 + }, + { + "epoch": 0.71, + "learning_rate": 4.558715166130761e-05, + "loss": 0.2397, + "step": 2640 + }, + { + "epoch": 0.71, + "learning_rate": 4.558547695605573e-05, + "loss": 0.2515, + "step": 2641 + }, + { + "epoch": 0.71, + "learning_rate": 4.558380225080386e-05, + "loss": 0.3387, + "step": 2642 + }, + { + "epoch": 0.71, + "learning_rate": 4.5582127545551986e-05, + "loss": 0.2178, + "step": 2643 + }, + { + "epoch": 0.71, + "learning_rate": 4.558045284030011e-05, + "loss": 0.2573, + "step": 2644 + }, + { + "epoch": 0.71, + "learning_rate": 4.5578778135048234e-05, + "loss": 0.2286, + "step": 2645 + }, + { + "epoch": 0.71, + "learning_rate": 4.557710342979636e-05, + "loss": 0.2525, + "step": 2646 + }, + { + "epoch": 0.71, + "learning_rate": 4.557542872454448e-05, + "loss": 0.2331, + "step": 2647 + }, + { + "epoch": 0.71, + "learning_rate": 4.557375401929261e-05, + "loss": 0.2064, + "step": 2648 + }, + { + "epoch": 0.71, + "learning_rate": 4.557207931404073e-05, + "loss": 0.2407, + "step": 2649 + }, + { + "epoch": 0.71, + "learning_rate": 4.5570404608788855e-05, + "loss": 0.2917, + "step": 2650 + }, + { + "epoch": 0.71, + "learning_rate": 4.556872990353698e-05, + "loss": 0.1812, + "step": 2651 + }, + { + "epoch": 0.71, + "learning_rate": 4.5567055198285103e-05, + "loss": 0.2474, + "step": 2652 + }, + { + "epoch": 0.71, + "learning_rate": 4.556538049303323e-05, + "loss": 1.0264, + "step": 2653 + }, + { + "epoch": 0.71, + "learning_rate": 4.556370578778135e-05, + "loss": 0.3701, + "step": 2654 + }, + { + "epoch": 0.71, + "learning_rate": 4.5562031082529476e-05, + "loss": 0.3297, + "step": 2655 + }, + { + "epoch": 0.71, + "learning_rate": 4.55603563772776e-05, + "loss": 0.2558, + "step": 2656 + }, + { + "epoch": 0.71, + "learning_rate": 4.5558681672025725e-05, + "loss": 0.4033, + "step": 2657 + }, + { + "epoch": 0.71, + "learning_rate": 4.555700696677385e-05, + "loss": 0.1757, + "step": 2658 + }, + { + "epoch": 0.71, + "learning_rate": 4.555533226152198e-05, + "loss": 0.4422, + "step": 2659 + }, + { + "epoch": 0.71, + "learning_rate": 4.55536575562701e-05, + "loss": 0.2101, + "step": 2660 + }, + { + "epoch": 0.71, + "learning_rate": 4.555198285101822e-05, + "loss": 0.2412, + "step": 2661 + }, + { + "epoch": 0.71, + "learning_rate": 4.5550308145766346e-05, + "loss": 0.4162, + "step": 2662 + }, + { + "epoch": 0.71, + "learning_rate": 4.554863344051447e-05, + "loss": 0.2576, + "step": 2663 + }, + { + "epoch": 0.71, + "learning_rate": 4.5546958735262594e-05, + "loss": 0.2365, + "step": 2664 + }, + { + "epoch": 0.71, + "learning_rate": 4.554528403001072e-05, + "loss": 0.2357, + "step": 2665 + }, + { + "epoch": 0.71, + "learning_rate": 4.554360932475884e-05, + "loss": 0.2915, + "step": 2666 + }, + { + "epoch": 0.71, + "learning_rate": 4.5541934619506973e-05, + "loss": 0.3027, + "step": 2667 + }, + { + "epoch": 0.71, + "learning_rate": 4.55402599142551e-05, + "loss": 0.4776, + "step": 2668 + }, + { + "epoch": 0.72, + "learning_rate": 4.5538585209003215e-05, + "loss": 0.5312, + "step": 2669 + }, + { + "epoch": 0.72, + "learning_rate": 4.553691050375134e-05, + "loss": 0.2486, + "step": 2670 + }, + { + "epoch": 0.72, + "learning_rate": 4.5535235798499463e-05, + "loss": 0.4861, + "step": 2671 + }, + { + "epoch": 0.72, + "learning_rate": 4.553356109324759e-05, + "loss": 0.5447, + "step": 2672 + }, + { + "epoch": 0.72, + "learning_rate": 4.553188638799571e-05, + "loss": 0.2045, + "step": 2673 + }, + { + "epoch": 0.72, + "learning_rate": 4.553021168274384e-05, + "loss": 0.1865, + "step": 2674 + }, + { + "epoch": 0.72, + "learning_rate": 4.552853697749197e-05, + "loss": 0.8902, + "step": 2675 + }, + { + "epoch": 0.72, + "learning_rate": 4.552686227224009e-05, + "loss": 0.2496, + "step": 2676 + }, + { + "epoch": 0.72, + "learning_rate": 4.5525187566988215e-05, + "loss": 0.2092, + "step": 2677 + }, + { + "epoch": 0.72, + "learning_rate": 4.552351286173634e-05, + "loss": 0.3084, + "step": 2678 + }, + { + "epoch": 0.72, + "learning_rate": 4.552183815648446e-05, + "loss": 0.3458, + "step": 2679 + }, + { + "epoch": 0.72, + "learning_rate": 4.552016345123258e-05, + "loss": 0.2398, + "step": 2680 + }, + { + "epoch": 0.72, + "learning_rate": 4.5518488745980706e-05, + "loss": 0.2212, + "step": 2681 + }, + { + "epoch": 0.72, + "learning_rate": 4.5516814040728837e-05, + "loss": 0.4599, + "step": 2682 + }, + { + "epoch": 0.72, + "learning_rate": 4.551513933547696e-05, + "loss": 0.2578, + "step": 2683 + }, + { + "epoch": 0.72, + "learning_rate": 4.5513464630225085e-05, + "loss": 0.2859, + "step": 2684 + }, + { + "epoch": 0.72, + "learning_rate": 4.551178992497321e-05, + "loss": 0.3884, + "step": 2685 + }, + { + "epoch": 0.72, + "learning_rate": 4.551011521972133e-05, + "loss": 0.2284, + "step": 2686 + }, + { + "epoch": 0.72, + "learning_rate": 4.550844051446946e-05, + "loss": 0.3912, + "step": 2687 + }, + { + "epoch": 0.72, + "learning_rate": 4.5506765809217575e-05, + "loss": 0.2328, + "step": 2688 + }, + { + "epoch": 0.72, + "learning_rate": 4.55050911039657e-05, + "loss": 0.3451, + "step": 2689 + }, + { + "epoch": 0.72, + "learning_rate": 4.550341639871383e-05, + "loss": 0.4523, + "step": 2690 + }, + { + "epoch": 0.72, + "learning_rate": 4.5501741693461954e-05, + "loss": 0.5179, + "step": 2691 + }, + { + "epoch": 0.72, + "learning_rate": 4.550006698821008e-05, + "loss": 0.3832, + "step": 2692 + }, + { + "epoch": 0.72, + "learning_rate": 4.54983922829582e-05, + "loss": 0.3179, + "step": 2693 + }, + { + "epoch": 0.72, + "learning_rate": 4.549671757770633e-05, + "loss": 0.537, + "step": 2694 + }, + { + "epoch": 0.72, + "learning_rate": 4.549504287245445e-05, + "loss": 0.1742, + "step": 2695 + }, + { + "epoch": 0.72, + "learning_rate": 4.5493368167202575e-05, + "loss": 0.3299, + "step": 2696 + }, + { + "epoch": 0.72, + "learning_rate": 4.549169346195069e-05, + "loss": 0.4282, + "step": 2697 + }, + { + "epoch": 0.72, + "learning_rate": 4.5490018756698824e-05, + "loss": 0.3817, + "step": 2698 + }, + { + "epoch": 0.72, + "learning_rate": 4.548834405144695e-05, + "loss": 0.2147, + "step": 2699 + }, + { + "epoch": 0.72, + "learning_rate": 4.548666934619507e-05, + "loss": 0.2713, + "step": 2700 + }, + { + "epoch": 0.72, + "learning_rate": 4.5484994640943197e-05, + "loss": 0.2778, + "step": 2701 + }, + { + "epoch": 0.72, + "learning_rate": 4.548331993569132e-05, + "loss": 0.2533, + "step": 2702 + }, + { + "epoch": 0.72, + "learning_rate": 4.5481645230439445e-05, + "loss": 0.2447, + "step": 2703 + }, + { + "epoch": 0.72, + "learning_rate": 4.547997052518757e-05, + "loss": 0.1881, + "step": 2704 + }, + { + "epoch": 0.72, + "learning_rate": 4.547829581993569e-05, + "loss": 0.3142, + "step": 2705 + }, + { + "epoch": 0.73, + "learning_rate": 4.547662111468382e-05, + "loss": 0.3086, + "step": 2706 + }, + { + "epoch": 0.73, + "learning_rate": 4.547494640943194e-05, + "loss": 0.2133, + "step": 2707 + }, + { + "epoch": 0.73, + "learning_rate": 4.5473271704180066e-05, + "loss": 0.2983, + "step": 2708 + }, + { + "epoch": 0.73, + "learning_rate": 4.547159699892819e-05, + "loss": 0.2253, + "step": 2709 + }, + { + "epoch": 0.73, + "learning_rate": 4.5469922293676314e-05, + "loss": 0.2105, + "step": 2710 + }, + { + "epoch": 0.73, + "learning_rate": 4.546824758842444e-05, + "loss": 0.2672, + "step": 2711 + }, + { + "epoch": 0.73, + "learning_rate": 4.546657288317256e-05, + "loss": 0.192, + "step": 2712 + }, + { + "epoch": 0.73, + "learning_rate": 4.546489817792069e-05, + "loss": 0.1888, + "step": 2713 + }, + { + "epoch": 0.73, + "learning_rate": 4.546322347266881e-05, + "loss": 0.2814, + "step": 2714 + }, + { + "epoch": 0.73, + "learning_rate": 4.546154876741694e-05, + "loss": 0.3082, + "step": 2715 + }, + { + "epoch": 0.73, + "learning_rate": 4.545987406216506e-05, + "loss": 0.1805, + "step": 2716 + }, + { + "epoch": 0.73, + "learning_rate": 4.5458199356913184e-05, + "loss": 0.3248, + "step": 2717 + }, + { + "epoch": 0.73, + "learning_rate": 4.545652465166131e-05, + "loss": 0.5379, + "step": 2718 + }, + { + "epoch": 0.73, + "learning_rate": 4.545484994640943e-05, + "loss": 0.4037, + "step": 2719 + }, + { + "epoch": 0.73, + "learning_rate": 4.5453175241157557e-05, + "loss": 0.2267, + "step": 2720 + }, + { + "epoch": 0.73, + "learning_rate": 4.545150053590568e-05, + "loss": 0.3789, + "step": 2721 + }, + { + "epoch": 0.73, + "learning_rate": 4.5449825830653805e-05, + "loss": 0.2121, + "step": 2722 + }, + { + "epoch": 0.73, + "learning_rate": 4.5448151125401936e-05, + "loss": 0.2781, + "step": 2723 + }, + { + "epoch": 0.73, + "learning_rate": 4.544647642015006e-05, + "loss": 0.256, + "step": 2724 + }, + { + "epoch": 0.73, + "learning_rate": 4.544480171489818e-05, + "loss": 0.22, + "step": 2725 + }, + { + "epoch": 0.73, + "learning_rate": 4.54431270096463e-05, + "loss": 0.4766, + "step": 2726 + }, + { + "epoch": 0.73, + "learning_rate": 4.5441452304394426e-05, + "loss": 0.2717, + "step": 2727 + }, + { + "epoch": 0.73, + "learning_rate": 4.543977759914255e-05, + "loss": 0.4086, + "step": 2728 + }, + { + "epoch": 0.73, + "learning_rate": 4.5438102893890674e-05, + "loss": 0.3438, + "step": 2729 + }, + { + "epoch": 0.73, + "learning_rate": 4.5436428188638805e-05, + "loss": 0.581, + "step": 2730 + }, + { + "epoch": 0.73, + "learning_rate": 4.543475348338693e-05, + "loss": 0.4403, + "step": 2731 + }, + { + "epoch": 0.73, + "learning_rate": 4.5433078778135054e-05, + "loss": 0.3496, + "step": 2732 + }, + { + "epoch": 0.73, + "learning_rate": 4.543140407288318e-05, + "loss": 0.2125, + "step": 2733 + }, + { + "epoch": 0.73, + "learning_rate": 4.54297293676313e-05, + "loss": 0.2075, + "step": 2734 + }, + { + "epoch": 0.73, + "learning_rate": 4.542805466237942e-05, + "loss": 0.3981, + "step": 2735 + }, + { + "epoch": 0.73, + "learning_rate": 4.5426379957127544e-05, + "loss": 0.316, + "step": 2736 + }, + { + "epoch": 0.73, + "learning_rate": 4.542470525187567e-05, + "loss": 0.3492, + "step": 2737 + }, + { + "epoch": 0.73, + "learning_rate": 4.54230305466238e-05, + "loss": 0.2227, + "step": 2738 + }, + { + "epoch": 0.73, + "learning_rate": 4.542135584137192e-05, + "loss": 0.3347, + "step": 2739 + }, + { + "epoch": 0.73, + "learning_rate": 4.541968113612005e-05, + "loss": 0.2277, + "step": 2740 + }, + { + "epoch": 0.73, + "learning_rate": 4.541800643086817e-05, + "loss": 0.5124, + "step": 2741 + }, + { + "epoch": 0.73, + "learning_rate": 4.5416331725616296e-05, + "loss": 0.2368, + "step": 2742 + }, + { + "epoch": 0.73, + "learning_rate": 4.541465702036442e-05, + "loss": 0.2664, + "step": 2743 + }, + { + "epoch": 0.74, + "learning_rate": 4.541298231511254e-05, + "loss": 0.2903, + "step": 2744 + }, + { + "epoch": 0.74, + "learning_rate": 4.541130760986066e-05, + "loss": 0.3942, + "step": 2745 + }, + { + "epoch": 0.74, + "learning_rate": 4.540963290460879e-05, + "loss": 0.2847, + "step": 2746 + }, + { + "epoch": 0.74, + "learning_rate": 4.540795819935692e-05, + "loss": 0.1903, + "step": 2747 + }, + { + "epoch": 0.74, + "learning_rate": 4.540628349410504e-05, + "loss": 0.3399, + "step": 2748 + }, + { + "epoch": 0.74, + "learning_rate": 4.5404608788853165e-05, + "loss": 0.1888, + "step": 2749 + }, + { + "epoch": 0.74, + "learning_rate": 4.540293408360129e-05, + "loss": 0.2187, + "step": 2750 + }, + { + "epoch": 0.74, + "learning_rate": 4.5401259378349414e-05, + "loss": 0.2575, + "step": 2751 + }, + { + "epoch": 0.74, + "learning_rate": 4.539958467309754e-05, + "loss": 0.2736, + "step": 2752 + }, + { + "epoch": 0.74, + "learning_rate": 4.5397909967845655e-05, + "loss": 0.3258, + "step": 2753 + }, + { + "epoch": 0.74, + "learning_rate": 4.5396235262593786e-05, + "loss": 0.2471, + "step": 2754 + }, + { + "epoch": 0.74, + "learning_rate": 4.539456055734191e-05, + "loss": 0.3702, + "step": 2755 + }, + { + "epoch": 0.74, + "learning_rate": 4.5392885852090035e-05, + "loss": 0.4768, + "step": 2756 + }, + { + "epoch": 0.74, + "learning_rate": 4.539121114683816e-05, + "loss": 0.3285, + "step": 2757 + }, + { + "epoch": 0.74, + "learning_rate": 4.538953644158628e-05, + "loss": 0.7377, + "step": 2758 + }, + { + "epoch": 0.74, + "learning_rate": 4.538786173633441e-05, + "loss": 0.2316, + "step": 2759 + }, + { + "epoch": 0.74, + "learning_rate": 4.538618703108253e-05, + "loss": 0.3014, + "step": 2760 + }, + { + "epoch": 0.74, + "learning_rate": 4.5384512325830656e-05, + "loss": 0.4618, + "step": 2761 + }, + { + "epoch": 0.74, + "learning_rate": 4.538283762057878e-05, + "loss": 0.3494, + "step": 2762 + }, + { + "epoch": 0.74, + "learning_rate": 4.5381162915326904e-05, + "loss": 0.2462, + "step": 2763 + }, + { + "epoch": 0.74, + "learning_rate": 4.537948821007503e-05, + "loss": 0.3064, + "step": 2764 + }, + { + "epoch": 0.74, + "learning_rate": 4.537781350482315e-05, + "loss": 0.3785, + "step": 2765 + }, + { + "epoch": 0.74, + "learning_rate": 4.537613879957128e-05, + "loss": 0.3732, + "step": 2766 + }, + { + "epoch": 0.74, + "learning_rate": 4.53744640943194e-05, + "loss": 0.2125, + "step": 2767 + }, + { + "epoch": 0.74, + "learning_rate": 4.5372789389067525e-05, + "loss": 0.2953, + "step": 2768 + }, + { + "epoch": 0.74, + "learning_rate": 4.537111468381565e-05, + "loss": 0.3309, + "step": 2769 + }, + { + "epoch": 0.74, + "learning_rate": 4.5369439978563774e-05, + "loss": 0.2, + "step": 2770 + }, + { + "epoch": 0.74, + "learning_rate": 4.5367765273311905e-05, + "loss": 0.3309, + "step": 2771 + }, + { + "epoch": 0.74, + "learning_rate": 4.536609056806002e-05, + "loss": 0.3578, + "step": 2772 + }, + { + "epoch": 0.74, + "learning_rate": 4.5364415862808146e-05, + "loss": 0.3596, + "step": 2773 + }, + { + "epoch": 0.74, + "learning_rate": 4.536274115755627e-05, + "loss": 0.4532, + "step": 2774 + }, + { + "epoch": 0.74, + "learning_rate": 4.5361066452304395e-05, + "loss": 0.2025, + "step": 2775 + }, + { + "epoch": 0.74, + "learning_rate": 4.535939174705252e-05, + "loss": 0.3488, + "step": 2776 + }, + { + "epoch": 0.74, + "learning_rate": 4.535771704180064e-05, + "loss": 0.2028, + "step": 2777 + }, + { + "epoch": 0.74, + "learning_rate": 4.535604233654877e-05, + "loss": 0.3769, + "step": 2778 + }, + { + "epoch": 0.74, + "learning_rate": 4.53543676312969e-05, + "loss": 0.2156, + "step": 2779 + }, + { + "epoch": 0.74, + "learning_rate": 4.535269292604502e-05, + "loss": 0.3831, + "step": 2780 + }, + { + "epoch": 0.75, + "learning_rate": 4.535101822079314e-05, + "loss": 0.273, + "step": 2781 + }, + { + "epoch": 0.75, + "learning_rate": 4.5349343515541264e-05, + "loss": 0.3638, + "step": 2782 + }, + { + "epoch": 0.75, + "learning_rate": 4.534766881028939e-05, + "loss": 0.3803, + "step": 2783 + }, + { + "epoch": 0.75, + "learning_rate": 4.534599410503751e-05, + "loss": 0.2566, + "step": 2784 + }, + { + "epoch": 0.75, + "learning_rate": 4.534431939978564e-05, + "loss": 0.6349, + "step": 2785 + }, + { + "epoch": 0.75, + "learning_rate": 4.534264469453377e-05, + "loss": 0.3739, + "step": 2786 + }, + { + "epoch": 0.75, + "learning_rate": 4.534096998928189e-05, + "loss": 0.2281, + "step": 2787 + }, + { + "epoch": 0.75, + "learning_rate": 4.5339295284030016e-05, + "loss": 0.2309, + "step": 2788 + }, + { + "epoch": 0.75, + "learning_rate": 4.533762057877814e-05, + "loss": 0.3251, + "step": 2789 + }, + { + "epoch": 0.75, + "learning_rate": 4.5335945873526265e-05, + "loss": 0.2318, + "step": 2790 + }, + { + "epoch": 0.75, + "learning_rate": 4.533427116827438e-05, + "loss": 0.3435, + "step": 2791 + }, + { + "epoch": 0.75, + "learning_rate": 4.5332596463022506e-05, + "loss": 0.4519, + "step": 2792 + }, + { + "epoch": 0.75, + "learning_rate": 4.533092175777063e-05, + "loss": 0.352, + "step": 2793 + }, + { + "epoch": 0.75, + "learning_rate": 4.532924705251876e-05, + "loss": 0.3487, + "step": 2794 + }, + { + "epoch": 0.75, + "learning_rate": 4.5327572347266886e-05, + "loss": 0.2796, + "step": 2795 + }, + { + "epoch": 0.75, + "learning_rate": 4.532589764201501e-05, + "loss": 0.372, + "step": 2796 + }, + { + "epoch": 0.75, + "learning_rate": 4.5324222936763134e-05, + "loss": 0.1932, + "step": 2797 + }, + { + "epoch": 0.75, + "learning_rate": 4.532254823151126e-05, + "loss": 0.2056, + "step": 2798 + }, + { + "epoch": 0.75, + "learning_rate": 4.532087352625938e-05, + "loss": 0.3334, + "step": 2799 + }, + { + "epoch": 0.75, + "learning_rate": 4.53191988210075e-05, + "loss": 0.3273, + "step": 2800 + }, + { + "epoch": 0.75, + "learning_rate": 4.5317524115755624e-05, + "loss": 0.2533, + "step": 2801 + }, + { + "epoch": 0.75, + "learning_rate": 4.5315849410503755e-05, + "loss": 0.251, + "step": 2802 + }, + { + "epoch": 0.75, + "learning_rate": 4.531417470525188e-05, + "loss": 0.1675, + "step": 2803 + }, + { + "epoch": 0.75, + "learning_rate": 4.5312500000000004e-05, + "loss": 0.422, + "step": 2804 + }, + { + "epoch": 0.75, + "learning_rate": 4.531082529474813e-05, + "loss": 0.348, + "step": 2805 + }, + { + "epoch": 0.75, + "learning_rate": 4.530915058949625e-05, + "loss": 0.2059, + "step": 2806 + }, + { + "epoch": 0.75, + "learning_rate": 4.5307475884244376e-05, + "loss": 0.4723, + "step": 2807 + }, + { + "epoch": 0.75, + "learning_rate": 4.53058011789925e-05, + "loss": 0.2616, + "step": 2808 + }, + { + "epoch": 0.75, + "learning_rate": 4.530412647374062e-05, + "loss": 0.1747, + "step": 2809 + }, + { + "epoch": 0.75, + "learning_rate": 4.530245176848875e-05, + "loss": 0.4894, + "step": 2810 + }, + { + "epoch": 0.75, + "learning_rate": 4.530077706323687e-05, + "loss": 0.5, + "step": 2811 + }, + { + "epoch": 0.75, + "learning_rate": 4.5299102357985e-05, + "loss": 0.2431, + "step": 2812 + }, + { + "epoch": 0.75, + "learning_rate": 4.529742765273312e-05, + "loss": 0.5412, + "step": 2813 + }, + { + "epoch": 0.75, + "learning_rate": 4.5295752947481246e-05, + "loss": 0.2203, + "step": 2814 + }, + { + "epoch": 0.75, + "learning_rate": 4.529407824222937e-05, + "loss": 0.2885, + "step": 2815 + }, + { + "epoch": 0.75, + "learning_rate": 4.5292403536977494e-05, + "loss": 0.3759, + "step": 2816 + }, + { + "epoch": 0.75, + "learning_rate": 4.529072883172562e-05, + "loss": 0.3501, + "step": 2817 + }, + { + "epoch": 0.76, + "learning_rate": 4.528905412647374e-05, + "loss": 0.4588, + "step": 2818 + }, + { + "epoch": 0.76, + "learning_rate": 4.528737942122187e-05, + "loss": 0.2657, + "step": 2819 + }, + { + "epoch": 0.76, + "learning_rate": 4.528570471596999e-05, + "loss": 0.3151, + "step": 2820 + }, + { + "epoch": 0.76, + "learning_rate": 4.5284030010718115e-05, + "loss": 0.3059, + "step": 2821 + }, + { + "epoch": 0.76, + "learning_rate": 4.528235530546624e-05, + "loss": 0.2191, + "step": 2822 + }, + { + "epoch": 0.76, + "learning_rate": 4.5280680600214364e-05, + "loss": 0.2259, + "step": 2823 + }, + { + "epoch": 0.76, + "learning_rate": 4.527900589496249e-05, + "loss": 0.43, + "step": 2824 + }, + { + "epoch": 0.76, + "learning_rate": 4.527733118971061e-05, + "loss": 0.5397, + "step": 2825 + }, + { + "epoch": 0.76, + "learning_rate": 4.5275656484458736e-05, + "loss": 0.1951, + "step": 2826 + }, + { + "epoch": 0.76, + "learning_rate": 4.527398177920687e-05, + "loss": 0.2714, + "step": 2827 + }, + { + "epoch": 0.76, + "learning_rate": 4.5272307073954985e-05, + "loss": 0.2449, + "step": 2828 + }, + { + "epoch": 0.76, + "learning_rate": 4.527063236870311e-05, + "loss": 0.4352, + "step": 2829 + }, + { + "epoch": 0.76, + "learning_rate": 4.526895766345123e-05, + "loss": 0.6191, + "step": 2830 + }, + { + "epoch": 0.76, + "learning_rate": 4.526728295819936e-05, + "loss": 0.3937, + "step": 2831 + }, + { + "epoch": 0.76, + "learning_rate": 4.526560825294748e-05, + "loss": 0.216, + "step": 2832 + }, + { + "epoch": 0.76, + "learning_rate": 4.5263933547695606e-05, + "loss": 0.554, + "step": 2833 + }, + { + "epoch": 0.76, + "learning_rate": 4.526225884244373e-05, + "loss": 0.5186, + "step": 2834 + }, + { + "epoch": 0.76, + "learning_rate": 4.526058413719186e-05, + "loss": 0.3785, + "step": 2835 + }, + { + "epoch": 0.76, + "learning_rate": 4.5258909431939985e-05, + "loss": 0.319, + "step": 2836 + }, + { + "epoch": 0.76, + "learning_rate": 4.52572347266881e-05, + "loss": 0.4789, + "step": 2837 + }, + { + "epoch": 0.76, + "learning_rate": 4.525556002143623e-05, + "loss": 0.3706, + "step": 2838 + }, + { + "epoch": 0.76, + "learning_rate": 4.525388531618435e-05, + "loss": 0.463, + "step": 2839 + }, + { + "epoch": 0.76, + "learning_rate": 4.5252210610932475e-05, + "loss": 0.2048, + "step": 2840 + }, + { + "epoch": 0.76, + "learning_rate": 4.52505359056806e-05, + "loss": 0.4279, + "step": 2841 + }, + { + "epoch": 0.76, + "learning_rate": 4.524886120042873e-05, + "loss": 0.4351, + "step": 2842 + }, + { + "epoch": 0.76, + "learning_rate": 4.5247186495176855e-05, + "loss": 0.227, + "step": 2843 + }, + { + "epoch": 0.76, + "learning_rate": 4.524551178992498e-05, + "loss": 0.2057, + "step": 2844 + }, + { + "epoch": 0.76, + "learning_rate": 4.52438370846731e-05, + "loss": 0.48, + "step": 2845 + }, + { + "epoch": 0.76, + "learning_rate": 4.524216237942122e-05, + "loss": 0.2175, + "step": 2846 + }, + { + "epoch": 0.76, + "learning_rate": 4.5240487674169345e-05, + "loss": 0.2314, + "step": 2847 + }, + { + "epoch": 0.76, + "learning_rate": 4.523881296891747e-05, + "loss": 0.4484, + "step": 2848 + }, + { + "epoch": 0.76, + "learning_rate": 4.523713826366559e-05, + "loss": 0.2563, + "step": 2849 + }, + { + "epoch": 0.76, + "learning_rate": 4.5235463558413724e-05, + "loss": 0.2808, + "step": 2850 + }, + { + "epoch": 0.76, + "learning_rate": 4.523378885316185e-05, + "loss": 0.2776, + "step": 2851 + }, + { + "epoch": 0.76, + "learning_rate": 4.523211414790997e-05, + "loss": 0.1834, + "step": 2852 + }, + { + "epoch": 0.76, + "learning_rate": 4.52304394426581e-05, + "loss": 0.401, + "step": 2853 + }, + { + "epoch": 0.76, + "learning_rate": 4.522876473740622e-05, + "loss": 0.4017, + "step": 2854 + }, + { + "epoch": 0.76, + "learning_rate": 4.5227090032154345e-05, + "loss": 0.3413, + "step": 2855 + }, + { + "epoch": 0.77, + "learning_rate": 4.522541532690246e-05, + "loss": 0.317, + "step": 2856 + }, + { + "epoch": 0.77, + "learning_rate": 4.522374062165059e-05, + "loss": 0.4362, + "step": 2857 + }, + { + "epoch": 0.77, + "learning_rate": 4.522206591639872e-05, + "loss": 0.2168, + "step": 2858 + }, + { + "epoch": 0.77, + "learning_rate": 4.522039121114684e-05, + "loss": 0.4666, + "step": 2859 + }, + { + "epoch": 0.77, + "learning_rate": 4.5218716505894966e-05, + "loss": 0.2253, + "step": 2860 + }, + { + "epoch": 0.77, + "learning_rate": 4.521704180064309e-05, + "loss": 0.211, + "step": 2861 + }, + { + "epoch": 0.77, + "learning_rate": 4.5215367095391215e-05, + "loss": 0.1735, + "step": 2862 + }, + { + "epoch": 0.77, + "learning_rate": 4.521369239013934e-05, + "loss": 0.3989, + "step": 2863 + }, + { + "epoch": 0.77, + "learning_rate": 4.521201768488746e-05, + "loss": 0.2011, + "step": 2864 + }, + { + "epoch": 0.77, + "learning_rate": 4.521034297963558e-05, + "loss": 0.354, + "step": 2865 + }, + { + "epoch": 0.77, + "learning_rate": 4.520866827438371e-05, + "loss": 0.2212, + "step": 2866 + }, + { + "epoch": 0.77, + "learning_rate": 4.5206993569131836e-05, + "loss": 0.2124, + "step": 2867 + }, + { + "epoch": 0.77, + "learning_rate": 4.520531886387996e-05, + "loss": 0.4456, + "step": 2868 + }, + { + "epoch": 0.77, + "learning_rate": 4.5203644158628084e-05, + "loss": 0.3057, + "step": 2869 + }, + { + "epoch": 0.77, + "learning_rate": 4.520196945337621e-05, + "loss": 0.3605, + "step": 2870 + }, + { + "epoch": 0.77, + "learning_rate": 4.520029474812433e-05, + "loss": 0.2545, + "step": 2871 + }, + { + "epoch": 0.77, + "learning_rate": 4.519862004287246e-05, + "loss": 0.2269, + "step": 2872 + }, + { + "epoch": 0.77, + "learning_rate": 4.519694533762058e-05, + "loss": 0.395, + "step": 2873 + }, + { + "epoch": 0.77, + "learning_rate": 4.5195270632368705e-05, + "loss": 0.311, + "step": 2874 + }, + { + "epoch": 0.77, + "learning_rate": 4.519359592711683e-05, + "loss": 0.2514, + "step": 2875 + }, + { + "epoch": 0.77, + "learning_rate": 4.5191921221864954e-05, + "loss": 0.2312, + "step": 2876 + }, + { + "epoch": 0.77, + "learning_rate": 4.519024651661308e-05, + "loss": 0.2392, + "step": 2877 + }, + { + "epoch": 0.77, + "learning_rate": 4.51885718113612e-05, + "loss": 0.3176, + "step": 2878 + }, + { + "epoch": 0.77, + "learning_rate": 4.5186897106109326e-05, + "loss": 0.4238, + "step": 2879 + }, + { + "epoch": 0.77, + "learning_rate": 4.518522240085745e-05, + "loss": 0.398, + "step": 2880 + }, + { + "epoch": 0.77, + "learning_rate": 4.5183547695605575e-05, + "loss": 0.1883, + "step": 2881 + }, + { + "epoch": 0.77, + "learning_rate": 4.51818729903537e-05, + "loss": 0.4564, + "step": 2882 + }, + { + "epoch": 0.77, + "learning_rate": 4.518019828510183e-05, + "loss": 0.2224, + "step": 2883 + }, + { + "epoch": 0.77, + "learning_rate": 4.517852357984995e-05, + "loss": 0.1653, + "step": 2884 + }, + { + "epoch": 0.77, + "learning_rate": 4.517684887459807e-05, + "loss": 0.2501, + "step": 2885 + }, + { + "epoch": 0.77, + "learning_rate": 4.5175174169346196e-05, + "loss": 0.2456, + "step": 2886 + }, + { + "epoch": 0.77, + "learning_rate": 4.517349946409432e-05, + "loss": 0.181, + "step": 2887 + }, + { + "epoch": 0.77, + "learning_rate": 4.5171824758842444e-05, + "loss": 0.2248, + "step": 2888 + }, + { + "epoch": 0.77, + "learning_rate": 4.517015005359057e-05, + "loss": 0.7485, + "step": 2889 + }, + { + "epoch": 0.77, + "learning_rate": 4.516847534833869e-05, + "loss": 0.4345, + "step": 2890 + }, + { + "epoch": 0.77, + "learning_rate": 4.5166800643086823e-05, + "loss": 0.2351, + "step": 2891 + }, + { + "epoch": 0.77, + "learning_rate": 4.516512593783495e-05, + "loss": 0.515, + "step": 2892 + }, + { + "epoch": 0.78, + "learning_rate": 4.5163451232583065e-05, + "loss": 0.3067, + "step": 2893 + }, + { + "epoch": 0.78, + "learning_rate": 4.516177652733119e-05, + "loss": 0.2957, + "step": 2894 + }, + { + "epoch": 0.78, + "learning_rate": 4.5160101822079313e-05, + "loss": 0.3518, + "step": 2895 + }, + { + "epoch": 0.78, + "learning_rate": 4.515842711682744e-05, + "loss": 0.33, + "step": 2896 + }, + { + "epoch": 0.78, + "learning_rate": 4.515675241157556e-05, + "loss": 0.5819, + "step": 2897 + }, + { + "epoch": 0.78, + "learning_rate": 4.515507770632369e-05, + "loss": 0.24, + "step": 2898 + }, + { + "epoch": 0.78, + "learning_rate": 4.515340300107182e-05, + "loss": 0.2506, + "step": 2899 + }, + { + "epoch": 0.78, + "learning_rate": 4.515172829581994e-05, + "loss": 0.257, + "step": 2900 + }, + { + "epoch": 0.78, + "learning_rate": 4.5150053590568066e-05, + "loss": 0.2468, + "step": 2901 + }, + { + "epoch": 0.78, + "learning_rate": 4.514837888531618e-05, + "loss": 0.268, + "step": 2902 + }, + { + "epoch": 0.78, + "learning_rate": 4.514670418006431e-05, + "loss": 0.4786, + "step": 2903 + }, + { + "epoch": 0.78, + "learning_rate": 4.514502947481243e-05, + "loss": 0.2108, + "step": 2904 + }, + { + "epoch": 0.78, + "learning_rate": 4.5143354769560556e-05, + "loss": 0.2068, + "step": 2905 + }, + { + "epoch": 0.78, + "learning_rate": 4.5141680064308687e-05, + "loss": 0.3818, + "step": 2906 + }, + { + "epoch": 0.78, + "learning_rate": 4.514000535905681e-05, + "loss": 0.2177, + "step": 2907 + }, + { + "epoch": 0.78, + "learning_rate": 4.5138330653804935e-05, + "loss": 0.2967, + "step": 2908 + }, + { + "epoch": 0.78, + "learning_rate": 4.513665594855306e-05, + "loss": 0.1926, + "step": 2909 + }, + { + "epoch": 0.78, + "learning_rate": 4.5134981243301183e-05, + "loss": 0.3559, + "step": 2910 + }, + { + "epoch": 0.78, + "learning_rate": 4.513330653804931e-05, + "loss": 0.5073, + "step": 2911 + }, + { + "epoch": 0.78, + "learning_rate": 4.5131631832797425e-05, + "loss": 0.3549, + "step": 2912 + }, + { + "epoch": 0.78, + "learning_rate": 4.512995712754555e-05, + "loss": 0.275, + "step": 2913 + }, + { + "epoch": 0.78, + "learning_rate": 4.512828242229368e-05, + "loss": 0.3706, + "step": 2914 + }, + { + "epoch": 0.78, + "learning_rate": 4.5126607717041804e-05, + "loss": 0.3276, + "step": 2915 + }, + { + "epoch": 0.78, + "learning_rate": 4.512493301178993e-05, + "loss": 0.2785, + "step": 2916 + }, + { + "epoch": 0.78, + "learning_rate": 4.512325830653805e-05, + "loss": 0.2168, + "step": 2917 + }, + { + "epoch": 0.78, + "learning_rate": 4.512158360128618e-05, + "loss": 0.2022, + "step": 2918 + }, + { + "epoch": 0.78, + "learning_rate": 4.51199088960343e-05, + "loss": 0.2368, + "step": 2919 + }, + { + "epoch": 0.78, + "learning_rate": 4.5118234190782426e-05, + "loss": 0.6083, + "step": 2920 + }, + { + "epoch": 0.78, + "learning_rate": 4.511655948553054e-05, + "loss": 0.3755, + "step": 2921 + }, + { + "epoch": 0.78, + "learning_rate": 4.5114884780278674e-05, + "loss": 0.2976, + "step": 2922 + }, + { + "epoch": 0.78, + "learning_rate": 4.51132100750268e-05, + "loss": 0.2766, + "step": 2923 + }, + { + "epoch": 0.78, + "learning_rate": 4.511153536977492e-05, + "loss": 0.3824, + "step": 2924 + }, + { + "epoch": 0.78, + "learning_rate": 4.5109860664523047e-05, + "loss": 0.1962, + "step": 2925 + }, + { + "epoch": 0.78, + "learning_rate": 4.510818595927117e-05, + "loss": 0.1962, + "step": 2926 + }, + { + "epoch": 0.78, + "learning_rate": 4.5106511254019295e-05, + "loss": 0.4362, + "step": 2927 + }, + { + "epoch": 0.78, + "learning_rate": 4.510483654876742e-05, + "loss": 0.2745, + "step": 2928 + }, + { + "epoch": 0.78, + "learning_rate": 4.5103161843515543e-05, + "loss": 0.1992, + "step": 2929 + }, + { + "epoch": 0.79, + "learning_rate": 4.510148713826367e-05, + "loss": 0.3077, + "step": 2930 + }, + { + "epoch": 0.79, + "learning_rate": 4.509981243301179e-05, + "loss": 0.2661, + "step": 2931 + }, + { + "epoch": 0.79, + "learning_rate": 4.5098137727759916e-05, + "loss": 0.3283, + "step": 2932 + }, + { + "epoch": 0.79, + "learning_rate": 4.509646302250804e-05, + "loss": 0.2429, + "step": 2933 + }, + { + "epoch": 0.79, + "learning_rate": 4.5094788317256164e-05, + "loss": 0.3967, + "step": 2934 + }, + { + "epoch": 0.79, + "learning_rate": 4.509311361200429e-05, + "loss": 0.3009, + "step": 2935 + }, + { + "epoch": 0.79, + "learning_rate": 4.509143890675241e-05, + "loss": 0.274, + "step": 2936 + }, + { + "epoch": 0.79, + "learning_rate": 4.508976420150054e-05, + "loss": 0.255, + "step": 2937 + }, + { + "epoch": 0.79, + "learning_rate": 4.508808949624866e-05, + "loss": 0.1602, + "step": 2938 + }, + { + "epoch": 0.79, + "learning_rate": 4.508641479099679e-05, + "loss": 0.2784, + "step": 2939 + }, + { + "epoch": 0.79, + "learning_rate": 4.508474008574491e-05, + "loss": 0.2304, + "step": 2940 + }, + { + "epoch": 0.79, + "learning_rate": 4.5083065380493034e-05, + "loss": 0.1752, + "step": 2941 + }, + { + "epoch": 0.79, + "learning_rate": 4.508139067524116e-05, + "loss": 0.4392, + "step": 2942 + }, + { + "epoch": 0.79, + "learning_rate": 4.507971596998928e-05, + "loss": 0.1906, + "step": 2943 + }, + { + "epoch": 0.79, + "learning_rate": 4.5078041264737407e-05, + "loss": 0.2089, + "step": 2944 + }, + { + "epoch": 0.79, + "learning_rate": 4.507636655948553e-05, + "loss": 0.2073, + "step": 2945 + }, + { + "epoch": 0.79, + "learning_rate": 4.5074691854233655e-05, + "loss": 0.2167, + "step": 2946 + }, + { + "epoch": 0.79, + "learning_rate": 4.5073017148981786e-05, + "loss": 0.2115, + "step": 2947 + }, + { + "epoch": 0.79, + "learning_rate": 4.507134244372991e-05, + "loss": 0.2028, + "step": 2948 + }, + { + "epoch": 0.79, + "learning_rate": 4.506966773847803e-05, + "loss": 0.2408, + "step": 2949 + }, + { + "epoch": 0.79, + "learning_rate": 4.506799303322615e-05, + "loss": 0.4431, + "step": 2950 + }, + { + "epoch": 0.79, + "learning_rate": 4.5066318327974276e-05, + "loss": 0.2971, + "step": 2951 + }, + { + "epoch": 0.79, + "learning_rate": 4.50646436227224e-05, + "loss": 0.2096, + "step": 2952 + }, + { + "epoch": 0.79, + "learning_rate": 4.5062968917470524e-05, + "loss": 0.5546, + "step": 2953 + }, + { + "epoch": 0.79, + "learning_rate": 4.5061294212218655e-05, + "loss": 0.2312, + "step": 2954 + }, + { + "epoch": 0.79, + "learning_rate": 4.505961950696678e-05, + "loss": 0.4846, + "step": 2955 + }, + { + "epoch": 0.79, + "learning_rate": 4.5057944801714904e-05, + "loss": 0.1981, + "step": 2956 + }, + { + "epoch": 0.79, + "learning_rate": 4.505627009646303e-05, + "loss": 0.3454, + "step": 2957 + }, + { + "epoch": 0.79, + "learning_rate": 4.5054595391211145e-05, + "loss": 0.1775, + "step": 2958 + }, + { + "epoch": 0.79, + "learning_rate": 4.505292068595927e-05, + "loss": 0.2459, + "step": 2959 + }, + { + "epoch": 0.79, + "learning_rate": 4.5051245980707394e-05, + "loss": 0.1918, + "step": 2960 + }, + { + "epoch": 0.79, + "learning_rate": 4.504957127545552e-05, + "loss": 0.2075, + "step": 2961 + }, + { + "epoch": 0.79, + "learning_rate": 4.504789657020365e-05, + "loss": 0.2962, + "step": 2962 + }, + { + "epoch": 0.79, + "learning_rate": 4.504622186495177e-05, + "loss": 0.2327, + "step": 2963 + }, + { + "epoch": 0.79, + "learning_rate": 4.50445471596999e-05, + "loss": 0.2566, + "step": 2964 + }, + { + "epoch": 0.79, + "learning_rate": 4.504287245444802e-05, + "loss": 0.2107, + "step": 2965 + }, + { + "epoch": 0.79, + "learning_rate": 4.5041197749196146e-05, + "loss": 0.2039, + "step": 2966 + }, + { + "epoch": 0.79, + "learning_rate": 4.503952304394427e-05, + "loss": 0.4404, + "step": 2967 + }, + { + "epoch": 0.8, + "learning_rate": 4.503784833869239e-05, + "loss": 0.5178, + "step": 2968 + }, + { + "epoch": 0.8, + "learning_rate": 4.503617363344051e-05, + "loss": 0.3676, + "step": 2969 + }, + { + "epoch": 0.8, + "learning_rate": 4.503449892818864e-05, + "loss": 0.3589, + "step": 2970 + }, + { + "epoch": 0.8, + "learning_rate": 4.503282422293677e-05, + "loss": 0.4976, + "step": 2971 + }, + { + "epoch": 0.8, + "learning_rate": 4.503114951768489e-05, + "loss": 0.1914, + "step": 2972 + }, + { + "epoch": 0.8, + "learning_rate": 4.5029474812433015e-05, + "loss": 0.5132, + "step": 2973 + }, + { + "epoch": 0.8, + "learning_rate": 4.502780010718114e-05, + "loss": 0.4631, + "step": 2974 + }, + { + "epoch": 0.8, + "learning_rate": 4.5026125401929264e-05, + "loss": 0.264, + "step": 2975 + }, + { + "epoch": 0.8, + "learning_rate": 4.502445069667739e-05, + "loss": 0.5044, + "step": 2976 + }, + { + "epoch": 0.8, + "learning_rate": 4.5022775991425505e-05, + "loss": 0.2044, + "step": 2977 + }, + { + "epoch": 0.8, + "learning_rate": 4.5021101286173636e-05, + "loss": 0.2604, + "step": 2978 + }, + { + "epoch": 0.8, + "learning_rate": 4.501942658092176e-05, + "loss": 0.1882, + "step": 2979 + }, + { + "epoch": 0.8, + "learning_rate": 4.5017751875669885e-05, + "loss": 0.1921, + "step": 2980 + }, + { + "epoch": 0.8, + "learning_rate": 4.501607717041801e-05, + "loss": 0.1799, + "step": 2981 + }, + { + "epoch": 0.8, + "learning_rate": 4.501440246516613e-05, + "loss": 0.5221, + "step": 2982 + }, + { + "epoch": 0.8, + "learning_rate": 4.501272775991426e-05, + "loss": 0.2249, + "step": 2983 + }, + { + "epoch": 0.8, + "learning_rate": 4.501105305466238e-05, + "loss": 0.3397, + "step": 2984 + }, + { + "epoch": 0.8, + "learning_rate": 4.5009378349410506e-05, + "loss": 0.2041, + "step": 2985 + }, + { + "epoch": 0.8, + "learning_rate": 4.500770364415863e-05, + "loss": 0.2587, + "step": 2986 + }, + { + "epoch": 0.8, + "learning_rate": 4.5006028938906754e-05, + "loss": 0.3508, + "step": 2987 + }, + { + "epoch": 0.8, + "learning_rate": 4.500435423365488e-05, + "loss": 0.2086, + "step": 2988 + }, + { + "epoch": 0.8, + "learning_rate": 4.5002679528403e-05, + "loss": 0.4064, + "step": 2989 + }, + { + "epoch": 0.8, + "learning_rate": 4.500100482315113e-05, + "loss": 0.3353, + "step": 2990 + }, + { + "epoch": 0.8, + "learning_rate": 4.499933011789925e-05, + "loss": 0.2242, + "step": 2991 + }, + { + "epoch": 0.8, + "learning_rate": 4.4997655412647375e-05, + "loss": 0.4679, + "step": 2992 + }, + { + "epoch": 0.8, + "learning_rate": 4.49959807073955e-05, + "loss": 0.5856, + "step": 2993 + }, + { + "epoch": 0.8, + "learning_rate": 4.4994306002143624e-05, + "loss": 0.3398, + "step": 2994 + }, + { + "epoch": 0.8, + "learning_rate": 4.4992631296891755e-05, + "loss": 0.4862, + "step": 2995 + }, + { + "epoch": 0.8, + "learning_rate": 4.499095659163987e-05, + "loss": 0.2418, + "step": 2996 + }, + { + "epoch": 0.8, + "learning_rate": 4.4989281886387996e-05, + "loss": 0.4361, + "step": 2997 + }, + { + "epoch": 0.8, + "learning_rate": 4.498760718113612e-05, + "loss": 0.2383, + "step": 2998 + }, + { + "epoch": 0.8, + "learning_rate": 4.4985932475884245e-05, + "loss": 0.5178, + "step": 2999 + }, + { + "epoch": 0.8, + "learning_rate": 4.498425777063237e-05, + "loss": 0.2539, + "step": 3000 + }, + { + "epoch": 0.8, + "learning_rate": 4.498258306538049e-05, + "loss": 0.2694, + "step": 3001 + }, + { + "epoch": 0.8, + "learning_rate": 4.498090836012862e-05, + "loss": 0.5303, + "step": 3002 + }, + { + "epoch": 0.8, + "learning_rate": 4.497923365487675e-05, + "loss": 0.2058, + "step": 3003 + }, + { + "epoch": 0.8, + "learning_rate": 4.497755894962487e-05, + "loss": 0.1879, + "step": 3004 + }, + { + "epoch": 0.81, + "learning_rate": 4.497588424437299e-05, + "loss": 0.3028, + "step": 3005 + }, + { + "epoch": 0.81, + "learning_rate": 4.4974209539121114e-05, + "loss": 0.1686, + "step": 3006 + }, + { + "epoch": 0.81, + "learning_rate": 4.497253483386924e-05, + "loss": 0.1714, + "step": 3007 + }, + { + "epoch": 0.81, + "learning_rate": 4.497086012861736e-05, + "loss": 0.1897, + "step": 3008 + }, + { + "epoch": 0.81, + "learning_rate": 4.496918542336549e-05, + "loss": 0.3722, + "step": 3009 + }, + { + "epoch": 0.81, + "learning_rate": 4.496751071811362e-05, + "loss": 0.3034, + "step": 3010 + }, + { + "epoch": 0.81, + "learning_rate": 4.496583601286174e-05, + "loss": 0.2635, + "step": 3011 + }, + { + "epoch": 0.81, + "learning_rate": 4.4964161307609866e-05, + "loss": 0.3194, + "step": 3012 + }, + { + "epoch": 0.81, + "learning_rate": 4.496248660235799e-05, + "loss": 0.3325, + "step": 3013 + }, + { + "epoch": 0.81, + "learning_rate": 4.496081189710611e-05, + "loss": 0.1858, + "step": 3014 + }, + { + "epoch": 0.81, + "learning_rate": 4.495913719185423e-05, + "loss": 0.3566, + "step": 3015 + }, + { + "epoch": 0.81, + "learning_rate": 4.4957462486602356e-05, + "loss": 0.2268, + "step": 3016 + }, + { + "epoch": 0.81, + "learning_rate": 4.495578778135048e-05, + "loss": 0.5492, + "step": 3017 + }, + { + "epoch": 0.81, + "learning_rate": 4.495411307609861e-05, + "loss": 0.4716, + "step": 3018 + }, + { + "epoch": 0.81, + "learning_rate": 4.4952438370846736e-05, + "loss": 0.2011, + "step": 3019 + }, + { + "epoch": 0.81, + "learning_rate": 4.495076366559486e-05, + "loss": 0.3896, + "step": 3020 + }, + { + "epoch": 0.81, + "learning_rate": 4.4949088960342984e-05, + "loss": 0.2089, + "step": 3021 + }, + { + "epoch": 0.81, + "learning_rate": 4.494741425509111e-05, + "loss": 0.2741, + "step": 3022 + }, + { + "epoch": 0.81, + "learning_rate": 4.494573954983923e-05, + "loss": 0.3769, + "step": 3023 + }, + { + "epoch": 0.81, + "learning_rate": 4.494406484458735e-05, + "loss": 0.2172, + "step": 3024 + }, + { + "epoch": 0.81, + "learning_rate": 4.4942390139335474e-05, + "loss": 0.6737, + "step": 3025 + }, + { + "epoch": 0.81, + "learning_rate": 4.4940715434083605e-05, + "loss": 0.3161, + "step": 3026 + }, + { + "epoch": 0.81, + "learning_rate": 4.493904072883173e-05, + "loss": 0.3838, + "step": 3027 + }, + { + "epoch": 0.81, + "learning_rate": 4.4937366023579854e-05, + "loss": 0.2515, + "step": 3028 + }, + { + "epoch": 0.81, + "learning_rate": 4.493569131832798e-05, + "loss": 0.1958, + "step": 3029 + }, + { + "epoch": 0.81, + "learning_rate": 4.49340166130761e-05, + "loss": 0.2439, + "step": 3030 + }, + { + "epoch": 0.81, + "learning_rate": 4.4932341907824226e-05, + "loss": 0.3755, + "step": 3031 + }, + { + "epoch": 0.81, + "learning_rate": 4.493066720257235e-05, + "loss": 0.2168, + "step": 3032 + }, + { + "epoch": 0.81, + "learning_rate": 4.492899249732047e-05, + "loss": 0.1987, + "step": 3033 + }, + { + "epoch": 0.81, + "learning_rate": 4.49273177920686e-05, + "loss": 0.3732, + "step": 3034 + }, + { + "epoch": 0.81, + "learning_rate": 4.492564308681672e-05, + "loss": 0.2032, + "step": 3035 + }, + { + "epoch": 0.81, + "learning_rate": 4.492396838156485e-05, + "loss": 0.3163, + "step": 3036 + }, + { + "epoch": 0.81, + "learning_rate": 4.492229367631297e-05, + "loss": 0.3688, + "step": 3037 + }, + { + "epoch": 0.81, + "learning_rate": 4.4920618971061096e-05, + "loss": 0.3595, + "step": 3038 + }, + { + "epoch": 0.81, + "learning_rate": 4.491894426580922e-05, + "loss": 0.2775, + "step": 3039 + }, + { + "epoch": 0.81, + "learning_rate": 4.4917269560557344e-05, + "loss": 0.2136, + "step": 3040 + }, + { + "epoch": 0.81, + "learning_rate": 4.491559485530547e-05, + "loss": 0.2692, + "step": 3041 + }, + { + "epoch": 0.82, + "learning_rate": 4.491392015005359e-05, + "loss": 0.3856, + "step": 3042 + }, + { + "epoch": 0.82, + "learning_rate": 4.491224544480172e-05, + "loss": 0.3669, + "step": 3043 + }, + { + "epoch": 0.82, + "learning_rate": 4.491057073954984e-05, + "loss": 0.3721, + "step": 3044 + }, + { + "epoch": 0.82, + "learning_rate": 4.4908896034297965e-05, + "loss": 0.3273, + "step": 3045 + }, + { + "epoch": 0.82, + "learning_rate": 4.490722132904609e-05, + "loss": 0.2787, + "step": 3046 + }, + { + "epoch": 0.82, + "learning_rate": 4.4905546623794214e-05, + "loss": 0.4989, + "step": 3047 + }, + { + "epoch": 0.82, + "learning_rate": 4.490387191854234e-05, + "loss": 0.3066, + "step": 3048 + }, + { + "epoch": 0.82, + "learning_rate": 4.490219721329046e-05, + "loss": 0.2194, + "step": 3049 + }, + { + "epoch": 0.82, + "learning_rate": 4.4900522508038586e-05, + "loss": 0.1908, + "step": 3050 + }, + { + "epoch": 0.82, + "learning_rate": 4.489884780278672e-05, + "loss": 0.369, + "step": 3051 + }, + { + "epoch": 0.82, + "learning_rate": 4.4897173097534835e-05, + "loss": 0.2126, + "step": 3052 + }, + { + "epoch": 0.82, + "learning_rate": 4.489549839228296e-05, + "loss": 0.2829, + "step": 3053 + }, + { + "epoch": 0.82, + "learning_rate": 4.489382368703108e-05, + "loss": 0.2402, + "step": 3054 + }, + { + "epoch": 0.82, + "learning_rate": 4.489214898177921e-05, + "loss": 0.2763, + "step": 3055 + }, + { + "epoch": 0.82, + "learning_rate": 4.489047427652733e-05, + "loss": 0.4453, + "step": 3056 + }, + { + "epoch": 0.82, + "learning_rate": 4.4888799571275456e-05, + "loss": 0.2986, + "step": 3057 + }, + { + "epoch": 0.82, + "learning_rate": 4.488712486602358e-05, + "loss": 0.3269, + "step": 3058 + }, + { + "epoch": 0.82, + "learning_rate": 4.488545016077171e-05, + "loss": 0.4679, + "step": 3059 + }, + { + "epoch": 0.82, + "learning_rate": 4.4883775455519835e-05, + "loss": 0.5858, + "step": 3060 + }, + { + "epoch": 0.82, + "learning_rate": 4.488210075026795e-05, + "loss": 0.2036, + "step": 3061 + }, + { + "epoch": 0.82, + "learning_rate": 4.488042604501608e-05, + "loss": 0.2198, + "step": 3062 + }, + { + "epoch": 0.82, + "learning_rate": 4.48787513397642e-05, + "loss": 0.1968, + "step": 3063 + }, + { + "epoch": 0.82, + "learning_rate": 4.4877076634512325e-05, + "loss": 0.2136, + "step": 3064 + }, + { + "epoch": 0.82, + "learning_rate": 4.487540192926045e-05, + "loss": 0.3869, + "step": 3065 + }, + { + "epoch": 0.82, + "learning_rate": 4.487372722400858e-05, + "loss": 0.405, + "step": 3066 + }, + { + "epoch": 0.82, + "learning_rate": 4.4872052518756705e-05, + "loss": 0.1878, + "step": 3067 + }, + { + "epoch": 0.82, + "learning_rate": 4.487037781350483e-05, + "loss": 0.2031, + "step": 3068 + }, + { + "epoch": 0.82, + "learning_rate": 4.486870310825295e-05, + "loss": 0.3807, + "step": 3069 + }, + { + "epoch": 0.82, + "learning_rate": 4.486702840300107e-05, + "loss": 0.3811, + "step": 3070 + }, + { + "epoch": 0.82, + "learning_rate": 4.4865353697749195e-05, + "loss": 0.2478, + "step": 3071 + }, + { + "epoch": 0.82, + "learning_rate": 4.486367899249732e-05, + "loss": 0.4546, + "step": 3072 + }, + { + "epoch": 0.82, + "learning_rate": 4.486200428724544e-05, + "loss": 0.2913, + "step": 3073 + }, + { + "epoch": 0.82, + "learning_rate": 4.4860329581993574e-05, + "loss": 0.3353, + "step": 3074 + }, + { + "epoch": 0.82, + "learning_rate": 4.48586548767417e-05, + "loss": 0.2643, + "step": 3075 + }, + { + "epoch": 0.82, + "learning_rate": 4.485698017148982e-05, + "loss": 0.1739, + "step": 3076 + }, + { + "epoch": 0.82, + "learning_rate": 4.485530546623795e-05, + "loss": 0.2057, + "step": 3077 + }, + { + "epoch": 0.82, + "learning_rate": 4.485363076098607e-05, + "loss": 0.3927, + "step": 3078 + }, + { + "epoch": 0.82, + "learning_rate": 4.4851956055734195e-05, + "loss": 0.3273, + "step": 3079 + }, + { + "epoch": 0.83, + "learning_rate": 4.485028135048231e-05, + "loss": 0.1979, + "step": 3080 + }, + { + "epoch": 0.83, + "learning_rate": 4.484860664523044e-05, + "loss": 0.2423, + "step": 3081 + }, + { + "epoch": 0.83, + "learning_rate": 4.484693193997857e-05, + "loss": 0.3209, + "step": 3082 + }, + { + "epoch": 0.83, + "learning_rate": 4.484525723472669e-05, + "loss": 0.2238, + "step": 3083 + }, + { + "epoch": 0.83, + "learning_rate": 4.4843582529474816e-05, + "loss": 0.1665, + "step": 3084 + }, + { + "epoch": 0.83, + "learning_rate": 4.484190782422294e-05, + "loss": 0.4061, + "step": 3085 + }, + { + "epoch": 0.83, + "learning_rate": 4.4840233118971065e-05, + "loss": 0.4442, + "step": 3086 + }, + { + "epoch": 0.83, + "learning_rate": 4.483855841371919e-05, + "loss": 0.3116, + "step": 3087 + }, + { + "epoch": 0.83, + "learning_rate": 4.483688370846731e-05, + "loss": 0.1816, + "step": 3088 + }, + { + "epoch": 0.83, + "learning_rate": 4.483520900321543e-05, + "loss": 0.2311, + "step": 3089 + }, + { + "epoch": 0.83, + "learning_rate": 4.483353429796356e-05, + "loss": 0.263, + "step": 3090 + }, + { + "epoch": 0.83, + "learning_rate": 4.4831859592711686e-05, + "loss": 0.1635, + "step": 3091 + }, + { + "epoch": 0.83, + "learning_rate": 4.483018488745981e-05, + "loss": 0.2123, + "step": 3092 + }, + { + "epoch": 0.83, + "learning_rate": 4.4828510182207934e-05, + "loss": 0.2981, + "step": 3093 + }, + { + "epoch": 0.83, + "learning_rate": 4.482683547695606e-05, + "loss": 0.4861, + "step": 3094 + }, + { + "epoch": 0.83, + "learning_rate": 4.482516077170418e-05, + "loss": 0.6064, + "step": 3095 + }, + { + "epoch": 0.83, + "learning_rate": 4.482348606645231e-05, + "loss": 0.2262, + "step": 3096 + }, + { + "epoch": 0.83, + "learning_rate": 4.482181136120043e-05, + "loss": 0.5192, + "step": 3097 + }, + { + "epoch": 0.83, + "learning_rate": 4.4820136655948555e-05, + "loss": 0.4048, + "step": 3098 + }, + { + "epoch": 0.83, + "learning_rate": 4.481846195069668e-05, + "loss": 0.2061, + "step": 3099 + }, + { + "epoch": 0.83, + "learning_rate": 4.4816787245444804e-05, + "loss": 0.2544, + "step": 3100 + }, + { + "epoch": 0.83, + "learning_rate": 4.481511254019293e-05, + "loss": 0.2247, + "step": 3101 + }, + { + "epoch": 0.83, + "learning_rate": 4.481343783494105e-05, + "loss": 0.1765, + "step": 3102 + }, + { + "epoch": 0.83, + "learning_rate": 4.4811763129689176e-05, + "loss": 0.2351, + "step": 3103 + }, + { + "epoch": 0.83, + "learning_rate": 4.48100884244373e-05, + "loss": 0.202, + "step": 3104 + }, + { + "epoch": 0.83, + "learning_rate": 4.4808413719185425e-05, + "loss": 0.2185, + "step": 3105 + }, + { + "epoch": 0.83, + "learning_rate": 4.480673901393355e-05, + "loss": 0.2212, + "step": 3106 + }, + { + "epoch": 0.83, + "learning_rate": 4.480506430868168e-05, + "loss": 0.281, + "step": 3107 + }, + { + "epoch": 0.83, + "learning_rate": 4.48033896034298e-05, + "loss": 0.399, + "step": 3108 + }, + { + "epoch": 0.83, + "learning_rate": 4.480171489817792e-05, + "loss": 0.3129, + "step": 3109 + }, + { + "epoch": 0.83, + "learning_rate": 4.4800040192926046e-05, + "loss": 0.2122, + "step": 3110 + }, + { + "epoch": 0.83, + "learning_rate": 4.479836548767417e-05, + "loss": 0.1904, + "step": 3111 + }, + { + "epoch": 0.83, + "learning_rate": 4.4796690782422294e-05, + "loss": 0.4355, + "step": 3112 + }, + { + "epoch": 0.83, + "learning_rate": 4.479501607717042e-05, + "loss": 0.5433, + "step": 3113 + }, + { + "epoch": 0.83, + "learning_rate": 4.479334137191854e-05, + "loss": 0.4102, + "step": 3114 + }, + { + "epoch": 0.83, + "learning_rate": 4.4791666666666673e-05, + "loss": 0.2788, + "step": 3115 + }, + { + "epoch": 0.83, + "learning_rate": 4.47899919614148e-05, + "loss": 0.4622, + "step": 3116 + }, + { + "epoch": 0.84, + "learning_rate": 4.4788317256162915e-05, + "loss": 0.1903, + "step": 3117 + }, + { + "epoch": 0.84, + "learning_rate": 4.478664255091104e-05, + "loss": 0.2686, + "step": 3118 + }, + { + "epoch": 0.84, + "learning_rate": 4.4784967845659164e-05, + "loss": 0.2129, + "step": 3119 + }, + { + "epoch": 0.84, + "learning_rate": 4.478329314040729e-05, + "loss": 0.1991, + "step": 3120 + }, + { + "epoch": 0.84, + "learning_rate": 4.478161843515541e-05, + "loss": 0.2485, + "step": 3121 + }, + { + "epoch": 0.84, + "learning_rate": 4.477994372990354e-05, + "loss": 0.324, + "step": 3122 + }, + { + "epoch": 0.84, + "learning_rate": 4.477826902465167e-05, + "loss": 0.1971, + "step": 3123 + }, + { + "epoch": 0.84, + "learning_rate": 4.477659431939979e-05, + "loss": 0.2005, + "step": 3124 + }, + { + "epoch": 0.84, + "learning_rate": 4.4774919614147916e-05, + "loss": 0.3447, + "step": 3125 + }, + { + "epoch": 0.84, + "learning_rate": 4.477324490889603e-05, + "loss": 0.2627, + "step": 3126 + }, + { + "epoch": 0.84, + "learning_rate": 4.477157020364416e-05, + "loss": 0.1904, + "step": 3127 + }, + { + "epoch": 0.84, + "learning_rate": 4.476989549839228e-05, + "loss": 0.2186, + "step": 3128 + }, + { + "epoch": 0.84, + "learning_rate": 4.4768220793140406e-05, + "loss": 0.2085, + "step": 3129 + }, + { + "epoch": 0.84, + "learning_rate": 4.476654608788854e-05, + "loss": 0.207, + "step": 3130 + }, + { + "epoch": 0.84, + "learning_rate": 4.476487138263666e-05, + "loss": 0.2089, + "step": 3131 + }, + { + "epoch": 0.84, + "learning_rate": 4.4763196677384785e-05, + "loss": 0.6099, + "step": 3132 + }, + { + "epoch": 0.84, + "learning_rate": 4.476152197213291e-05, + "loss": 0.3666, + "step": 3133 + }, + { + "epoch": 0.84, + "learning_rate": 4.4759847266881033e-05, + "loss": 0.2548, + "step": 3134 + }, + { + "epoch": 0.84, + "learning_rate": 4.475817256162916e-05, + "loss": 0.3856, + "step": 3135 + }, + { + "epoch": 0.84, + "learning_rate": 4.4756497856377275e-05, + "loss": 0.1867, + "step": 3136 + }, + { + "epoch": 0.84, + "learning_rate": 4.47548231511254e-05, + "loss": 0.2083, + "step": 3137 + }, + { + "epoch": 0.84, + "learning_rate": 4.475314844587353e-05, + "loss": 0.5298, + "step": 3138 + }, + { + "epoch": 0.84, + "learning_rate": 4.4751473740621655e-05, + "loss": 0.1538, + "step": 3139 + }, + { + "epoch": 0.84, + "learning_rate": 4.474979903536978e-05, + "loss": 0.2119, + "step": 3140 + }, + { + "epoch": 0.84, + "learning_rate": 4.47481243301179e-05, + "loss": 0.2638, + "step": 3141 + }, + { + "epoch": 0.84, + "learning_rate": 4.474644962486603e-05, + "loss": 0.2226, + "step": 3142 + }, + { + "epoch": 0.84, + "learning_rate": 4.474477491961415e-05, + "loss": 0.2149, + "step": 3143 + }, + { + "epoch": 0.84, + "learning_rate": 4.4743100214362276e-05, + "loss": 0.5018, + "step": 3144 + }, + { + "epoch": 0.84, + "learning_rate": 4.474142550911039e-05, + "loss": 0.3599, + "step": 3145 + }, + { + "epoch": 0.84, + "learning_rate": 4.4739750803858524e-05, + "loss": 0.2131, + "step": 3146 + }, + { + "epoch": 0.84, + "learning_rate": 4.473807609860665e-05, + "loss": 0.2212, + "step": 3147 + }, + { + "epoch": 0.84, + "learning_rate": 4.473640139335477e-05, + "loss": 0.5956, + "step": 3148 + }, + { + "epoch": 0.84, + "learning_rate": 4.4734726688102897e-05, + "loss": 0.2901, + "step": 3149 + }, + { + "epoch": 0.84, + "learning_rate": 4.473305198285102e-05, + "loss": 0.6532, + "step": 3150 + }, + { + "epoch": 0.84, + "learning_rate": 4.4731377277599145e-05, + "loss": 0.2178, + "step": 3151 + }, + { + "epoch": 0.84, + "learning_rate": 4.472970257234727e-05, + "loss": 0.4409, + "step": 3152 + }, + { + "epoch": 0.84, + "learning_rate": 4.4728027867095393e-05, + "loss": 0.2086, + "step": 3153 + }, + { + "epoch": 0.85, + "learning_rate": 4.472635316184352e-05, + "loss": 0.2224, + "step": 3154 + }, + { + "epoch": 0.85, + "learning_rate": 4.472467845659164e-05, + "loss": 0.3492, + "step": 3155 + }, + { + "epoch": 0.85, + "learning_rate": 4.4723003751339766e-05, + "loss": 0.2631, + "step": 3156 + }, + { + "epoch": 0.85, + "learning_rate": 4.472132904608789e-05, + "loss": 0.1951, + "step": 3157 + }, + { + "epoch": 0.85, + "learning_rate": 4.4719654340836014e-05, + "loss": 0.1939, + "step": 3158 + }, + { + "epoch": 0.85, + "learning_rate": 4.471797963558414e-05, + "loss": 0.2022, + "step": 3159 + }, + { + "epoch": 0.85, + "learning_rate": 4.471630493033226e-05, + "loss": 0.1981, + "step": 3160 + }, + { + "epoch": 0.85, + "learning_rate": 4.471463022508039e-05, + "loss": 0.5093, + "step": 3161 + }, + { + "epoch": 0.85, + "learning_rate": 4.471295551982851e-05, + "loss": 0.3071, + "step": 3162 + }, + { + "epoch": 0.85, + "learning_rate": 4.471128081457664e-05, + "loss": 0.3951, + "step": 3163 + }, + { + "epoch": 0.85, + "learning_rate": 4.470960610932476e-05, + "loss": 0.2139, + "step": 3164 + }, + { + "epoch": 0.85, + "learning_rate": 4.4707931404072884e-05, + "loss": 0.3714, + "step": 3165 + }, + { + "epoch": 0.85, + "learning_rate": 4.470625669882101e-05, + "loss": 0.5449, + "step": 3166 + }, + { + "epoch": 0.85, + "learning_rate": 4.470458199356913e-05, + "loss": 0.3058, + "step": 3167 + }, + { + "epoch": 0.85, + "learning_rate": 4.4702907288317257e-05, + "loss": 0.2433, + "step": 3168 + }, + { + "epoch": 0.85, + "learning_rate": 4.470123258306538e-05, + "loss": 0.4647, + "step": 3169 + }, + { + "epoch": 0.85, + "learning_rate": 4.4699557877813505e-05, + "loss": 0.205, + "step": 3170 + }, + { + "epoch": 0.85, + "learning_rate": 4.4697883172561636e-05, + "loss": 0.2317, + "step": 3171 + }, + { + "epoch": 0.85, + "learning_rate": 4.469620846730976e-05, + "loss": 0.4873, + "step": 3172 + }, + { + "epoch": 0.85, + "learning_rate": 4.469453376205788e-05, + "loss": 0.2433, + "step": 3173 + }, + { + "epoch": 0.85, + "learning_rate": 4.4692859056806e-05, + "loss": 0.471, + "step": 3174 + }, + { + "epoch": 0.85, + "learning_rate": 4.4691184351554126e-05, + "loss": 0.2293, + "step": 3175 + }, + { + "epoch": 0.85, + "learning_rate": 4.468950964630225e-05, + "loss": 0.1976, + "step": 3176 + }, + { + "epoch": 0.85, + "learning_rate": 4.4687834941050374e-05, + "loss": 0.2025, + "step": 3177 + }, + { + "epoch": 0.85, + "learning_rate": 4.4686160235798505e-05, + "loss": 0.1742, + "step": 3178 + }, + { + "epoch": 0.85, + "learning_rate": 4.468448553054663e-05, + "loss": 0.2526, + "step": 3179 + }, + { + "epoch": 0.85, + "learning_rate": 4.4682810825294754e-05, + "loss": 0.3224, + "step": 3180 + }, + { + "epoch": 0.85, + "learning_rate": 4.468113612004288e-05, + "loss": 0.2183, + "step": 3181 + }, + { + "epoch": 0.85, + "learning_rate": 4.4679461414790996e-05, + "loss": 0.2859, + "step": 3182 + }, + { + "epoch": 0.85, + "learning_rate": 4.467778670953912e-05, + "loss": 0.4133, + "step": 3183 + }, + { + "epoch": 0.85, + "learning_rate": 4.4676112004287244e-05, + "loss": 0.2062, + "step": 3184 + }, + { + "epoch": 0.85, + "learning_rate": 4.467443729903537e-05, + "loss": 0.2705, + "step": 3185 + }, + { + "epoch": 0.85, + "learning_rate": 4.46727625937835e-05, + "loss": 0.2153, + "step": 3186 + }, + { + "epoch": 0.85, + "learning_rate": 4.467108788853162e-05, + "loss": 0.1948, + "step": 3187 + }, + { + "epoch": 0.85, + "learning_rate": 4.466941318327975e-05, + "loss": 0.4143, + "step": 3188 + }, + { + "epoch": 0.85, + "learning_rate": 4.466773847802787e-05, + "loss": 0.2364, + "step": 3189 + }, + { + "epoch": 0.85, + "learning_rate": 4.4666063772775996e-05, + "loss": 0.4074, + "step": 3190 + }, + { + "epoch": 0.86, + "learning_rate": 4.466438906752412e-05, + "loss": 0.3477, + "step": 3191 + }, + { + "epoch": 0.86, + "learning_rate": 4.466271436227224e-05, + "loss": 0.2054, + "step": 3192 + }, + { + "epoch": 0.86, + "learning_rate": 4.466103965702036e-05, + "loss": 0.2204, + "step": 3193 + }, + { + "epoch": 0.86, + "learning_rate": 4.465936495176849e-05, + "loss": 0.2299, + "step": 3194 + }, + { + "epoch": 0.86, + "learning_rate": 4.465769024651662e-05, + "loss": 0.1969, + "step": 3195 + }, + { + "epoch": 0.86, + "learning_rate": 4.465601554126474e-05, + "loss": 0.1822, + "step": 3196 + }, + { + "epoch": 0.86, + "learning_rate": 4.4654340836012865e-05, + "loss": 0.2089, + "step": 3197 + }, + { + "epoch": 0.86, + "learning_rate": 4.465266613076099e-05, + "loss": 0.1964, + "step": 3198 + }, + { + "epoch": 0.86, + "learning_rate": 4.4650991425509114e-05, + "loss": 0.2793, + "step": 3199 + }, + { + "epoch": 0.86, + "learning_rate": 4.464931672025724e-05, + "loss": 0.3369, + "step": 3200 + }, + { + "epoch": 0.86, + "learning_rate": 4.4647642015005355e-05, + "loss": 0.2352, + "step": 3201 + }, + { + "epoch": 0.86, + "learning_rate": 4.4645967309753486e-05, + "loss": 0.3013, + "step": 3202 + }, + { + "epoch": 0.86, + "learning_rate": 4.464429260450161e-05, + "loss": 0.3047, + "step": 3203 + }, + { + "epoch": 0.86, + "learning_rate": 4.4642617899249735e-05, + "loss": 0.2156, + "step": 3204 + }, + { + "epoch": 0.86, + "learning_rate": 4.464094319399786e-05, + "loss": 0.1674, + "step": 3205 + }, + { + "epoch": 0.86, + "learning_rate": 4.463926848874598e-05, + "loss": 0.2144, + "step": 3206 + }, + { + "epoch": 0.86, + "learning_rate": 4.463759378349411e-05, + "loss": 0.2825, + "step": 3207 + }, + { + "epoch": 0.86, + "learning_rate": 4.463591907824223e-05, + "loss": 0.1756, + "step": 3208 + }, + { + "epoch": 0.86, + "learning_rate": 4.4634244372990356e-05, + "loss": 0.2162, + "step": 3209 + }, + { + "epoch": 0.86, + "learning_rate": 4.463256966773848e-05, + "loss": 0.2908, + "step": 3210 + }, + { + "epoch": 0.86, + "learning_rate": 4.4630894962486604e-05, + "loss": 0.2964, + "step": 3211 + }, + { + "epoch": 0.86, + "learning_rate": 4.462922025723473e-05, + "loss": 0.3594, + "step": 3212 + }, + { + "epoch": 0.86, + "learning_rate": 4.462754555198285e-05, + "loss": 0.4231, + "step": 3213 + }, + { + "epoch": 0.86, + "learning_rate": 4.462587084673098e-05, + "loss": 0.207, + "step": 3214 + }, + { + "epoch": 0.86, + "learning_rate": 4.46241961414791e-05, + "loss": 0.276, + "step": 3215 + }, + { + "epoch": 0.86, + "learning_rate": 4.4622521436227225e-05, + "loss": 0.4278, + "step": 3216 + }, + { + "epoch": 0.86, + "learning_rate": 4.462084673097535e-05, + "loss": 0.2, + "step": 3217 + }, + { + "epoch": 0.86, + "learning_rate": 4.4619172025723474e-05, + "loss": 0.351, + "step": 3218 + }, + { + "epoch": 0.86, + "learning_rate": 4.4617497320471605e-05, + "loss": 0.1832, + "step": 3219 + }, + { + "epoch": 0.86, + "learning_rate": 4.461582261521972e-05, + "loss": 0.3079, + "step": 3220 + }, + { + "epoch": 0.86, + "learning_rate": 4.4614147909967846e-05, + "loss": 0.2325, + "step": 3221 + }, + { + "epoch": 0.86, + "learning_rate": 4.461247320471597e-05, + "loss": 0.6758, + "step": 3222 + }, + { + "epoch": 0.86, + "learning_rate": 4.4610798499464095e-05, + "loss": 0.413, + "step": 3223 + }, + { + "epoch": 0.86, + "learning_rate": 4.460912379421222e-05, + "loss": 0.3432, + "step": 3224 + }, + { + "epoch": 0.86, + "learning_rate": 4.460744908896034e-05, + "loss": 0.199, + "step": 3225 + }, + { + "epoch": 0.86, + "learning_rate": 4.460577438370847e-05, + "loss": 0.1627, + "step": 3226 + }, + { + "epoch": 0.86, + "learning_rate": 4.46040996784566e-05, + "loss": 0.2029, + "step": 3227 + }, + { + "epoch": 0.86, + "learning_rate": 4.460242497320472e-05, + "loss": 0.4918, + "step": 3228 + }, + { + "epoch": 0.87, + "learning_rate": 4.460075026795284e-05, + "loss": 0.5043, + "step": 3229 + }, + { + "epoch": 0.87, + "learning_rate": 4.4599075562700964e-05, + "loss": 0.4246, + "step": 3230 + }, + { + "epoch": 0.87, + "learning_rate": 4.459740085744909e-05, + "loss": 0.5447, + "step": 3231 + }, + { + "epoch": 0.87, + "learning_rate": 4.459572615219721e-05, + "loss": 0.2233, + "step": 3232 + }, + { + "epoch": 0.87, + "learning_rate": 4.459405144694534e-05, + "loss": 0.3408, + "step": 3233 + }, + { + "epoch": 0.87, + "learning_rate": 4.459237674169347e-05, + "loss": 0.388, + "step": 3234 + }, + { + "epoch": 0.87, + "learning_rate": 4.459070203644159e-05, + "loss": 0.3182, + "step": 3235 + }, + { + "epoch": 0.87, + "learning_rate": 4.4589027331189716e-05, + "loss": 0.3039, + "step": 3236 + }, + { + "epoch": 0.87, + "learning_rate": 4.458735262593784e-05, + "loss": 0.2883, + "step": 3237 + }, + { + "epoch": 0.87, + "learning_rate": 4.458567792068596e-05, + "loss": 0.21, + "step": 3238 + }, + { + "epoch": 0.87, + "learning_rate": 4.458400321543408e-05, + "loss": 0.2994, + "step": 3239 + }, + { + "epoch": 0.87, + "learning_rate": 4.4582328510182206e-05, + "loss": 0.5966, + "step": 3240 + }, + { + "epoch": 0.87, + "learning_rate": 4.458065380493033e-05, + "loss": 0.1895, + "step": 3241 + }, + { + "epoch": 0.87, + "learning_rate": 4.457897909967846e-05, + "loss": 0.2234, + "step": 3242 + }, + { + "epoch": 0.87, + "learning_rate": 4.4577304394426586e-05, + "loss": 0.2202, + "step": 3243 + }, + { + "epoch": 0.87, + "learning_rate": 4.457562968917471e-05, + "loss": 0.3551, + "step": 3244 + }, + { + "epoch": 0.87, + "learning_rate": 4.4573954983922834e-05, + "loss": 0.4975, + "step": 3245 + }, + { + "epoch": 0.87, + "learning_rate": 4.457228027867096e-05, + "loss": 0.2435, + "step": 3246 + }, + { + "epoch": 0.87, + "learning_rate": 4.457060557341908e-05, + "loss": 0.2307, + "step": 3247 + }, + { + "epoch": 0.87, + "learning_rate": 4.45689308681672e-05, + "loss": 0.1986, + "step": 3248 + }, + { + "epoch": 0.87, + "learning_rate": 4.4567256162915324e-05, + "loss": 0.3375, + "step": 3249 + }, + { + "epoch": 0.87, + "learning_rate": 4.4565581457663455e-05, + "loss": 0.2927, + "step": 3250 + }, + { + "epoch": 0.87, + "learning_rate": 4.456390675241158e-05, + "loss": 0.26, + "step": 3251 + }, + { + "epoch": 0.87, + "learning_rate": 4.4562232047159704e-05, + "loss": 0.4806, + "step": 3252 + }, + { + "epoch": 0.87, + "learning_rate": 4.456055734190783e-05, + "loss": 0.4696, + "step": 3253 + }, + { + "epoch": 0.87, + "learning_rate": 4.455888263665595e-05, + "loss": 0.3567, + "step": 3254 + }, + { + "epoch": 0.87, + "learning_rate": 4.4557207931404076e-05, + "loss": 0.3242, + "step": 3255 + }, + { + "epoch": 0.87, + "learning_rate": 4.45555332261522e-05, + "loss": 0.2275, + "step": 3256 + }, + { + "epoch": 0.87, + "learning_rate": 4.455385852090032e-05, + "loss": 0.4334, + "step": 3257 + }, + { + "epoch": 0.87, + "learning_rate": 4.455218381564845e-05, + "loss": 0.2231, + "step": 3258 + }, + { + "epoch": 0.87, + "learning_rate": 4.455050911039657e-05, + "loss": 0.2321, + "step": 3259 + }, + { + "epoch": 0.87, + "learning_rate": 4.45488344051447e-05, + "loss": 0.5486, + "step": 3260 + }, + { + "epoch": 0.87, + "learning_rate": 4.454715969989282e-05, + "loss": 0.4888, + "step": 3261 + }, + { + "epoch": 0.87, + "learning_rate": 4.4545484994640946e-05, + "loss": 0.5374, + "step": 3262 + }, + { + "epoch": 0.87, + "learning_rate": 4.454381028938907e-05, + "loss": 0.1883, + "step": 3263 + }, + { + "epoch": 0.87, + "learning_rate": 4.4542135584137194e-05, + "loss": 0.3203, + "step": 3264 + }, + { + "epoch": 0.87, + "learning_rate": 4.454046087888532e-05, + "loss": 0.212, + "step": 3265 + }, + { + "epoch": 0.88, + "learning_rate": 4.453878617363344e-05, + "loss": 0.2681, + "step": 3266 + }, + { + "epoch": 0.88, + "learning_rate": 4.453711146838157e-05, + "loss": 0.3704, + "step": 3267 + }, + { + "epoch": 0.88, + "learning_rate": 4.453543676312969e-05, + "loss": 0.2561, + "step": 3268 + }, + { + "epoch": 0.88, + "learning_rate": 4.4533762057877815e-05, + "loss": 0.2694, + "step": 3269 + }, + { + "epoch": 0.88, + "learning_rate": 4.453208735262594e-05, + "loss": 0.4975, + "step": 3270 + }, + { + "epoch": 0.88, + "learning_rate": 4.4530412647374064e-05, + "loss": 0.2344, + "step": 3271 + }, + { + "epoch": 0.88, + "learning_rate": 4.452873794212219e-05, + "loss": 0.2041, + "step": 3272 + }, + { + "epoch": 0.88, + "learning_rate": 4.452706323687031e-05, + "loss": 0.2075, + "step": 3273 + }, + { + "epoch": 0.88, + "learning_rate": 4.4525388531618436e-05, + "loss": 0.3601, + "step": 3274 + }, + { + "epoch": 0.88, + "learning_rate": 4.452371382636657e-05, + "loss": 0.2831, + "step": 3275 + }, + { + "epoch": 0.88, + "learning_rate": 4.4522039121114685e-05, + "loss": 0.4264, + "step": 3276 + }, + { + "epoch": 0.88, + "learning_rate": 4.452036441586281e-05, + "loss": 0.1892, + "step": 3277 + }, + { + "epoch": 0.88, + "learning_rate": 4.451868971061093e-05, + "loss": 0.4859, + "step": 3278 + }, + { + "epoch": 0.88, + "learning_rate": 4.451701500535906e-05, + "loss": 0.4484, + "step": 3279 + }, + { + "epoch": 0.88, + "learning_rate": 4.451534030010718e-05, + "loss": 0.6579, + "step": 3280 + }, + { + "epoch": 0.88, + "learning_rate": 4.4513665594855306e-05, + "loss": 0.1956, + "step": 3281 + }, + { + "epoch": 0.88, + "learning_rate": 4.451199088960343e-05, + "loss": 0.2126, + "step": 3282 + }, + { + "epoch": 0.88, + "learning_rate": 4.451031618435156e-05, + "loss": 0.2533, + "step": 3283 + }, + { + "epoch": 0.88, + "learning_rate": 4.4508641479099685e-05, + "loss": 0.3714, + "step": 3284 + }, + { + "epoch": 0.88, + "learning_rate": 4.45069667738478e-05, + "loss": 0.3843, + "step": 3285 + }, + { + "epoch": 0.88, + "learning_rate": 4.450529206859593e-05, + "loss": 0.3005, + "step": 3286 + }, + { + "epoch": 0.88, + "learning_rate": 4.450361736334405e-05, + "loss": 0.2115, + "step": 3287 + }, + { + "epoch": 0.88, + "learning_rate": 4.4501942658092175e-05, + "loss": 0.2376, + "step": 3288 + }, + { + "epoch": 0.88, + "learning_rate": 4.45002679528403e-05, + "loss": 0.1912, + "step": 3289 + }, + { + "epoch": 0.88, + "learning_rate": 4.4498593247588424e-05, + "loss": 0.2613, + "step": 3290 + }, + { + "epoch": 0.88, + "learning_rate": 4.4496918542336555e-05, + "loss": 0.2349, + "step": 3291 + }, + { + "epoch": 0.88, + "learning_rate": 4.449524383708468e-05, + "loss": 0.4765, + "step": 3292 + }, + { + "epoch": 0.88, + "learning_rate": 4.44935691318328e-05, + "loss": 0.3101, + "step": 3293 + }, + { + "epoch": 0.88, + "learning_rate": 4.449189442658092e-05, + "loss": 0.4895, + "step": 3294 + }, + { + "epoch": 0.88, + "learning_rate": 4.4490219721329045e-05, + "loss": 0.1896, + "step": 3295 + }, + { + "epoch": 0.88, + "learning_rate": 4.448854501607717e-05, + "loss": 0.4104, + "step": 3296 + }, + { + "epoch": 0.88, + "learning_rate": 4.448687031082529e-05, + "loss": 0.26, + "step": 3297 + }, + { + "epoch": 0.88, + "learning_rate": 4.4485195605573424e-05, + "loss": 0.3087, + "step": 3298 + }, + { + "epoch": 0.88, + "learning_rate": 4.448352090032155e-05, + "loss": 0.5275, + "step": 3299 + }, + { + "epoch": 0.88, + "learning_rate": 4.448184619506967e-05, + "loss": 0.1945, + "step": 3300 + }, + { + "epoch": 0.88, + "learning_rate": 4.44801714898178e-05, + "loss": 0.3435, + "step": 3301 + }, + { + "epoch": 0.88, + "learning_rate": 4.447849678456592e-05, + "loss": 0.2153, + "step": 3302 + }, + { + "epoch": 0.89, + "learning_rate": 4.4476822079314045e-05, + "loss": 0.2054, + "step": 3303 + }, + { + "epoch": 0.89, + "learning_rate": 4.447514737406216e-05, + "loss": 0.2705, + "step": 3304 + }, + { + "epoch": 0.89, + "learning_rate": 4.447347266881029e-05, + "loss": 0.4118, + "step": 3305 + }, + { + "epoch": 0.89, + "learning_rate": 4.447179796355842e-05, + "loss": 0.2382, + "step": 3306 + }, + { + "epoch": 0.89, + "learning_rate": 4.447012325830654e-05, + "loss": 0.4921, + "step": 3307 + }, + { + "epoch": 0.89, + "learning_rate": 4.4468448553054666e-05, + "loss": 0.2245, + "step": 3308 + }, + { + "epoch": 0.89, + "learning_rate": 4.446677384780279e-05, + "loss": 0.1764, + "step": 3309 + }, + { + "epoch": 0.89, + "learning_rate": 4.4465099142550915e-05, + "loss": 0.2152, + "step": 3310 + }, + { + "epoch": 0.89, + "learning_rate": 4.446342443729904e-05, + "loss": 0.5589, + "step": 3311 + }, + { + "epoch": 0.89, + "learning_rate": 4.446174973204716e-05, + "loss": 0.2166, + "step": 3312 + }, + { + "epoch": 0.89, + "learning_rate": 4.446007502679528e-05, + "loss": 0.2288, + "step": 3313 + }, + { + "epoch": 0.89, + "learning_rate": 4.445840032154341e-05, + "loss": 0.6101, + "step": 3314 + }, + { + "epoch": 0.89, + "learning_rate": 4.4456725616291536e-05, + "loss": 0.2525, + "step": 3315 + }, + { + "epoch": 0.89, + "learning_rate": 4.445505091103966e-05, + "loss": 0.2099, + "step": 3316 + }, + { + "epoch": 0.89, + "learning_rate": 4.4453376205787784e-05, + "loss": 0.2298, + "step": 3317 + }, + { + "epoch": 0.89, + "learning_rate": 4.445170150053591e-05, + "loss": 0.4951, + "step": 3318 + }, + { + "epoch": 0.89, + "learning_rate": 4.445002679528403e-05, + "loss": 0.4625, + "step": 3319 + }, + { + "epoch": 0.89, + "learning_rate": 4.444835209003216e-05, + "loss": 0.3253, + "step": 3320 + }, + { + "epoch": 0.89, + "learning_rate": 4.444667738478028e-05, + "loss": 0.2126, + "step": 3321 + }, + { + "epoch": 0.89, + "learning_rate": 4.4445002679528405e-05, + "loss": 0.2996, + "step": 3322 + }, + { + "epoch": 0.89, + "learning_rate": 4.444332797427653e-05, + "loss": 0.2181, + "step": 3323 + }, + { + "epoch": 0.89, + "learning_rate": 4.4441653269024654e-05, + "loss": 0.3207, + "step": 3324 + }, + { + "epoch": 0.89, + "learning_rate": 4.443997856377278e-05, + "loss": 0.1965, + "step": 3325 + }, + { + "epoch": 0.89, + "learning_rate": 4.44383038585209e-05, + "loss": 0.2773, + "step": 3326 + }, + { + "epoch": 0.89, + "learning_rate": 4.4436629153269026e-05, + "loss": 0.2707, + "step": 3327 + }, + { + "epoch": 0.89, + "learning_rate": 4.443495444801715e-05, + "loss": 0.3268, + "step": 3328 + }, + { + "epoch": 0.89, + "learning_rate": 4.4433279742765275e-05, + "loss": 0.2058, + "step": 3329 + }, + { + "epoch": 0.89, + "learning_rate": 4.44316050375134e-05, + "loss": 0.2139, + "step": 3330 + }, + { + "epoch": 0.89, + "learning_rate": 4.442993033226153e-05, + "loss": 0.3337, + "step": 3331 + }, + { + "epoch": 0.89, + "learning_rate": 4.442825562700965e-05, + "loss": 0.182, + "step": 3332 + }, + { + "epoch": 0.89, + "learning_rate": 4.442658092175777e-05, + "loss": 0.2715, + "step": 3333 + }, + { + "epoch": 0.89, + "learning_rate": 4.4424906216505896e-05, + "loss": 0.3539, + "step": 3334 + }, + { + "epoch": 0.89, + "learning_rate": 4.442323151125402e-05, + "loss": 0.2156, + "step": 3335 + }, + { + "epoch": 0.89, + "learning_rate": 4.4421556806002144e-05, + "loss": 0.1589, + "step": 3336 + }, + { + "epoch": 0.89, + "learning_rate": 4.441988210075027e-05, + "loss": 0.2664, + "step": 3337 + }, + { + "epoch": 0.89, + "learning_rate": 4.441820739549839e-05, + "loss": 0.2078, + "step": 3338 + }, + { + "epoch": 0.89, + "learning_rate": 4.4416532690246524e-05, + "loss": 0.7456, + "step": 3339 + }, + { + "epoch": 0.89, + "learning_rate": 4.441485798499465e-05, + "loss": 0.2112, + "step": 3340 + }, + { + "epoch": 0.9, + "learning_rate": 4.4413183279742765e-05, + "loss": 0.3957, + "step": 3341 + }, + { + "epoch": 0.9, + "learning_rate": 4.441150857449089e-05, + "loss": 0.2845, + "step": 3342 + }, + { + "epoch": 0.9, + "learning_rate": 4.4409833869239014e-05, + "loss": 0.3013, + "step": 3343 + }, + { + "epoch": 0.9, + "learning_rate": 4.440815916398714e-05, + "loss": 0.3106, + "step": 3344 + }, + { + "epoch": 0.9, + "learning_rate": 4.440648445873526e-05, + "loss": 0.1759, + "step": 3345 + }, + { + "epoch": 0.9, + "learning_rate": 4.4404809753483386e-05, + "loss": 0.2573, + "step": 3346 + }, + { + "epoch": 0.9, + "learning_rate": 4.440313504823152e-05, + "loss": 0.3241, + "step": 3347 + }, + { + "epoch": 0.9, + "learning_rate": 4.440146034297964e-05, + "loss": 0.3476, + "step": 3348 + }, + { + "epoch": 0.9, + "learning_rate": 4.4399785637727766e-05, + "loss": 0.2491, + "step": 3349 + }, + { + "epoch": 0.9, + "learning_rate": 4.439811093247588e-05, + "loss": 0.2575, + "step": 3350 + }, + { + "epoch": 0.9, + "learning_rate": 4.439643622722401e-05, + "loss": 0.1987, + "step": 3351 + }, + { + "epoch": 0.9, + "learning_rate": 4.439476152197213e-05, + "loss": 0.1949, + "step": 3352 + }, + { + "epoch": 0.9, + "learning_rate": 4.4393086816720256e-05, + "loss": 0.2016, + "step": 3353 + }, + { + "epoch": 0.9, + "learning_rate": 4.439141211146839e-05, + "loss": 0.3511, + "step": 3354 + }, + { + "epoch": 0.9, + "learning_rate": 4.438973740621651e-05, + "loss": 0.2073, + "step": 3355 + }, + { + "epoch": 0.9, + "learning_rate": 4.4388062700964635e-05, + "loss": 0.2375, + "step": 3356 + }, + { + "epoch": 0.9, + "learning_rate": 4.438638799571276e-05, + "loss": 0.2, + "step": 3357 + }, + { + "epoch": 0.9, + "learning_rate": 4.4384713290460883e-05, + "loss": 0.6308, + "step": 3358 + }, + { + "epoch": 0.9, + "learning_rate": 4.438303858520901e-05, + "loss": 0.4255, + "step": 3359 + }, + { + "epoch": 0.9, + "learning_rate": 4.4381363879957125e-05, + "loss": 0.1984, + "step": 3360 + }, + { + "epoch": 0.9, + "learning_rate": 4.437968917470525e-05, + "loss": 0.3722, + "step": 3361 + }, + { + "epoch": 0.9, + "learning_rate": 4.437801446945338e-05, + "loss": 0.2001, + "step": 3362 + }, + { + "epoch": 0.9, + "learning_rate": 4.4376339764201505e-05, + "loss": 0.226, + "step": 3363 + }, + { + "epoch": 0.9, + "learning_rate": 4.437466505894963e-05, + "loss": 0.2414, + "step": 3364 + }, + { + "epoch": 0.9, + "learning_rate": 4.437299035369775e-05, + "loss": 0.1728, + "step": 3365 + }, + { + "epoch": 0.9, + "learning_rate": 4.437131564844588e-05, + "loss": 0.1709, + "step": 3366 + }, + { + "epoch": 0.9, + "learning_rate": 4.4369640943194e-05, + "loss": 0.1828, + "step": 3367 + }, + { + "epoch": 0.9, + "learning_rate": 4.4367966237942126e-05, + "loss": 0.3087, + "step": 3368 + }, + { + "epoch": 0.9, + "learning_rate": 4.436629153269024e-05, + "loss": 0.3125, + "step": 3369 + }, + { + "epoch": 0.9, + "learning_rate": 4.4364616827438374e-05, + "loss": 0.1868, + "step": 3370 + }, + { + "epoch": 0.9, + "learning_rate": 4.43629421221865e-05, + "loss": 0.351, + "step": 3371 + }, + { + "epoch": 0.9, + "learning_rate": 4.436126741693462e-05, + "loss": 0.2584, + "step": 3372 + }, + { + "epoch": 0.9, + "learning_rate": 4.435959271168275e-05, + "loss": 0.2491, + "step": 3373 + }, + { + "epoch": 0.9, + "learning_rate": 4.435791800643087e-05, + "loss": 0.202, + "step": 3374 + }, + { + "epoch": 0.9, + "learning_rate": 4.4356243301178995e-05, + "loss": 0.4742, + "step": 3375 + }, + { + "epoch": 0.9, + "learning_rate": 4.435456859592712e-05, + "loss": 0.1993, + "step": 3376 + }, + { + "epoch": 0.9, + "learning_rate": 4.4352893890675243e-05, + "loss": 0.3099, + "step": 3377 + }, + { + "epoch": 0.91, + "learning_rate": 4.435121918542337e-05, + "loss": 0.332, + "step": 3378 + }, + { + "epoch": 0.91, + "learning_rate": 4.434954448017149e-05, + "loss": 0.5775, + "step": 3379 + }, + { + "epoch": 0.91, + "learning_rate": 4.4347869774919616e-05, + "loss": 0.2512, + "step": 3380 + }, + { + "epoch": 0.91, + "learning_rate": 4.434619506966774e-05, + "loss": 0.4756, + "step": 3381 + }, + { + "epoch": 0.91, + "learning_rate": 4.4344520364415865e-05, + "loss": 0.208, + "step": 3382 + }, + { + "epoch": 0.91, + "learning_rate": 4.434284565916399e-05, + "loss": 0.2586, + "step": 3383 + }, + { + "epoch": 0.91, + "learning_rate": 4.434117095391211e-05, + "loss": 0.3799, + "step": 3384 + }, + { + "epoch": 0.91, + "learning_rate": 4.433949624866024e-05, + "loss": 0.4064, + "step": 3385 + }, + { + "epoch": 0.91, + "learning_rate": 4.433782154340836e-05, + "loss": 0.387, + "step": 3386 + }, + { + "epoch": 0.91, + "learning_rate": 4.433614683815649e-05, + "loss": 0.3046, + "step": 3387 + }, + { + "epoch": 0.91, + "learning_rate": 4.433447213290461e-05, + "loss": 0.4295, + "step": 3388 + }, + { + "epoch": 0.91, + "learning_rate": 4.4332797427652734e-05, + "loss": 0.314, + "step": 3389 + }, + { + "epoch": 0.91, + "learning_rate": 4.433112272240086e-05, + "loss": 0.4928, + "step": 3390 + }, + { + "epoch": 0.91, + "learning_rate": 4.432944801714898e-05, + "loss": 0.193, + "step": 3391 + }, + { + "epoch": 0.91, + "learning_rate": 4.432777331189711e-05, + "loss": 0.1852, + "step": 3392 + }, + { + "epoch": 0.91, + "learning_rate": 4.432609860664523e-05, + "loss": 0.1892, + "step": 3393 + }, + { + "epoch": 0.91, + "learning_rate": 4.4324423901393355e-05, + "loss": 0.2316, + "step": 3394 + }, + { + "epoch": 0.91, + "learning_rate": 4.4322749196141486e-05, + "loss": 0.2296, + "step": 3395 + }, + { + "epoch": 0.91, + "learning_rate": 4.432107449088961e-05, + "loss": 0.1739, + "step": 3396 + }, + { + "epoch": 0.91, + "learning_rate": 4.431939978563773e-05, + "loss": 0.1563, + "step": 3397 + }, + { + "epoch": 0.91, + "learning_rate": 4.431772508038585e-05, + "loss": 0.1987, + "step": 3398 + }, + { + "epoch": 0.91, + "learning_rate": 4.4316050375133976e-05, + "loss": 0.1519, + "step": 3399 + }, + { + "epoch": 0.91, + "learning_rate": 4.43143756698821e-05, + "loss": 0.2204, + "step": 3400 + }, + { + "epoch": 0.91, + "learning_rate": 4.4312700964630224e-05, + "loss": 0.2167, + "step": 3401 + }, + { + "epoch": 0.91, + "learning_rate": 4.431102625937835e-05, + "loss": 0.3662, + "step": 3402 + }, + { + "epoch": 0.91, + "learning_rate": 4.430935155412648e-05, + "loss": 0.2693, + "step": 3403 + }, + { + "epoch": 0.91, + "learning_rate": 4.4307676848874604e-05, + "loss": 0.1672, + "step": 3404 + }, + { + "epoch": 0.91, + "learning_rate": 4.430600214362273e-05, + "loss": 0.1988, + "step": 3405 + }, + { + "epoch": 0.91, + "learning_rate": 4.4304327438370846e-05, + "loss": 0.2486, + "step": 3406 + }, + { + "epoch": 0.91, + "learning_rate": 4.430265273311897e-05, + "loss": 0.8469, + "step": 3407 + }, + { + "epoch": 0.91, + "learning_rate": 4.4300978027867094e-05, + "loss": 0.3878, + "step": 3408 + }, + { + "epoch": 0.91, + "learning_rate": 4.429930332261522e-05, + "loss": 0.2134, + "step": 3409 + }, + { + "epoch": 0.91, + "learning_rate": 4.429762861736335e-05, + "loss": 0.3836, + "step": 3410 + }, + { + "epoch": 0.91, + "learning_rate": 4.429595391211147e-05, + "loss": 0.2052, + "step": 3411 + }, + { + "epoch": 0.91, + "learning_rate": 4.42942792068596e-05, + "loss": 0.3077, + "step": 3412 + }, + { + "epoch": 0.91, + "learning_rate": 4.429260450160772e-05, + "loss": 0.2176, + "step": 3413 + }, + { + "epoch": 0.91, + "learning_rate": 4.4290929796355846e-05, + "loss": 0.4693, + "step": 3414 + }, + { + "epoch": 0.92, + "learning_rate": 4.428925509110397e-05, + "loss": 0.2186, + "step": 3415 + }, + { + "epoch": 0.92, + "learning_rate": 4.428758038585209e-05, + "loss": 0.3163, + "step": 3416 + }, + { + "epoch": 0.92, + "learning_rate": 4.428590568060021e-05, + "loss": 0.3413, + "step": 3417 + }, + { + "epoch": 0.92, + "learning_rate": 4.428423097534834e-05, + "loss": 0.2988, + "step": 3418 + }, + { + "epoch": 0.92, + "learning_rate": 4.428255627009647e-05, + "loss": 0.2183, + "step": 3419 + }, + { + "epoch": 0.92, + "learning_rate": 4.428088156484459e-05, + "loss": 0.2098, + "step": 3420 + }, + { + "epoch": 0.92, + "learning_rate": 4.4279206859592715e-05, + "loss": 0.4248, + "step": 3421 + }, + { + "epoch": 0.92, + "learning_rate": 4.427753215434084e-05, + "loss": 0.186, + "step": 3422 + }, + { + "epoch": 0.92, + "learning_rate": 4.4275857449088964e-05, + "loss": 0.4997, + "step": 3423 + }, + { + "epoch": 0.92, + "learning_rate": 4.427418274383709e-05, + "loss": 0.2271, + "step": 3424 + }, + { + "epoch": 0.92, + "learning_rate": 4.4272508038585206e-05, + "loss": 0.5037, + "step": 3425 + }, + { + "epoch": 0.92, + "learning_rate": 4.4270833333333337e-05, + "loss": 0.5798, + "step": 3426 + }, + { + "epoch": 0.92, + "learning_rate": 4.426915862808146e-05, + "loss": 0.3663, + "step": 3427 + }, + { + "epoch": 0.92, + "learning_rate": 4.4267483922829585e-05, + "loss": 0.3175, + "step": 3428 + }, + { + "epoch": 0.92, + "learning_rate": 4.426580921757771e-05, + "loss": 0.3554, + "step": 3429 + }, + { + "epoch": 0.92, + "learning_rate": 4.426413451232583e-05, + "loss": 0.1954, + "step": 3430 + }, + { + "epoch": 0.92, + "learning_rate": 4.426245980707396e-05, + "loss": 0.1844, + "step": 3431 + }, + { + "epoch": 0.92, + "learning_rate": 4.426078510182208e-05, + "loss": 0.18, + "step": 3432 + }, + { + "epoch": 0.92, + "learning_rate": 4.4259110396570206e-05, + "loss": 0.1829, + "step": 3433 + }, + { + "epoch": 0.92, + "learning_rate": 4.425743569131833e-05, + "loss": 0.1676, + "step": 3434 + }, + { + "epoch": 0.92, + "learning_rate": 4.4255760986066454e-05, + "loss": 0.2137, + "step": 3435 + }, + { + "epoch": 0.92, + "learning_rate": 4.425408628081458e-05, + "loss": 0.2394, + "step": 3436 + }, + { + "epoch": 0.92, + "learning_rate": 4.42524115755627e-05, + "loss": 0.3199, + "step": 3437 + }, + { + "epoch": 0.92, + "learning_rate": 4.425073687031083e-05, + "loss": 0.2821, + "step": 3438 + }, + { + "epoch": 0.92, + "learning_rate": 4.424906216505895e-05, + "loss": 0.2947, + "step": 3439 + }, + { + "epoch": 0.92, + "learning_rate": 4.4247387459807075e-05, + "loss": 0.2154, + "step": 3440 + }, + { + "epoch": 0.92, + "learning_rate": 4.42457127545552e-05, + "loss": 0.3539, + "step": 3441 + }, + { + "epoch": 0.92, + "learning_rate": 4.4244038049303324e-05, + "loss": 0.8738, + "step": 3442 + }, + { + "epoch": 0.92, + "learning_rate": 4.4242363344051455e-05, + "loss": 0.2027, + "step": 3443 + }, + { + "epoch": 0.92, + "learning_rate": 4.424068863879957e-05, + "loss": 0.4357, + "step": 3444 + }, + { + "epoch": 0.92, + "learning_rate": 4.4239013933547696e-05, + "loss": 0.3942, + "step": 3445 + }, + { + "epoch": 0.92, + "learning_rate": 4.423733922829582e-05, + "loss": 0.4387, + "step": 3446 + }, + { + "epoch": 0.92, + "learning_rate": 4.4235664523043945e-05, + "loss": 0.1959, + "step": 3447 + }, + { + "epoch": 0.92, + "learning_rate": 4.423398981779207e-05, + "loss": 0.458, + "step": 3448 + }, + { + "epoch": 0.92, + "learning_rate": 4.423231511254019e-05, + "loss": 0.3197, + "step": 3449 + }, + { + "epoch": 0.92, + "learning_rate": 4.423064040728832e-05, + "loss": 0.3899, + "step": 3450 + }, + { + "epoch": 0.92, + "learning_rate": 4.422896570203645e-05, + "loss": 0.2049, + "step": 3451 + }, + { + "epoch": 0.92, + "learning_rate": 4.422729099678457e-05, + "loss": 0.2512, + "step": 3452 + }, + { + "epoch": 0.93, + "learning_rate": 4.422561629153269e-05, + "loss": 0.1753, + "step": 3453 + }, + { + "epoch": 0.93, + "learning_rate": 4.4223941586280814e-05, + "loss": 0.1829, + "step": 3454 + }, + { + "epoch": 0.93, + "learning_rate": 4.422226688102894e-05, + "loss": 0.1926, + "step": 3455 + }, + { + "epoch": 0.93, + "learning_rate": 4.422059217577706e-05, + "loss": 0.2099, + "step": 3456 + }, + { + "epoch": 0.93, + "learning_rate": 4.421891747052519e-05, + "loss": 0.3012, + "step": 3457 + }, + { + "epoch": 0.93, + "learning_rate": 4.421724276527331e-05, + "loss": 0.3078, + "step": 3458 + }, + { + "epoch": 0.93, + "learning_rate": 4.421556806002144e-05, + "loss": 0.3672, + "step": 3459 + }, + { + "epoch": 0.93, + "learning_rate": 4.4213893354769566e-05, + "loss": 0.3943, + "step": 3460 + }, + { + "epoch": 0.93, + "learning_rate": 4.421221864951769e-05, + "loss": 0.1896, + "step": 3461 + }, + { + "epoch": 0.93, + "learning_rate": 4.421054394426581e-05, + "loss": 0.2558, + "step": 3462 + }, + { + "epoch": 0.93, + "learning_rate": 4.420886923901393e-05, + "loss": 0.533, + "step": 3463 + }, + { + "epoch": 0.93, + "learning_rate": 4.4207194533762056e-05, + "loss": 0.6064, + "step": 3464 + }, + { + "epoch": 0.93, + "learning_rate": 4.420551982851018e-05, + "loss": 0.1802, + "step": 3465 + }, + { + "epoch": 0.93, + "learning_rate": 4.420384512325831e-05, + "loss": 0.263, + "step": 3466 + }, + { + "epoch": 0.93, + "learning_rate": 4.4202170418006436e-05, + "loss": 0.2225, + "step": 3467 + }, + { + "epoch": 0.93, + "learning_rate": 4.420049571275456e-05, + "loss": 0.19, + "step": 3468 + }, + { + "epoch": 0.93, + "learning_rate": 4.4198821007502684e-05, + "loss": 0.1949, + "step": 3469 + }, + { + "epoch": 0.93, + "learning_rate": 4.419714630225081e-05, + "loss": 0.2388, + "step": 3470 + }, + { + "epoch": 0.93, + "learning_rate": 4.419547159699893e-05, + "loss": 0.3493, + "step": 3471 + }, + { + "epoch": 0.93, + "learning_rate": 4.419379689174705e-05, + "loss": 0.4527, + "step": 3472 + }, + { + "epoch": 0.93, + "learning_rate": 4.4192122186495174e-05, + "loss": 0.1877, + "step": 3473 + }, + { + "epoch": 0.93, + "learning_rate": 4.4190447481243305e-05, + "loss": 0.158, + "step": 3474 + }, + { + "epoch": 0.93, + "learning_rate": 4.418877277599143e-05, + "loss": 0.2104, + "step": 3475 + }, + { + "epoch": 0.93, + "learning_rate": 4.4187098070739554e-05, + "loss": 0.4953, + "step": 3476 + }, + { + "epoch": 0.93, + "learning_rate": 4.418542336548768e-05, + "loss": 0.2128, + "step": 3477 + }, + { + "epoch": 0.93, + "learning_rate": 4.41837486602358e-05, + "loss": 0.4868, + "step": 3478 + }, + { + "epoch": 0.93, + "learning_rate": 4.4182073954983926e-05, + "loss": 0.474, + "step": 3479 + }, + { + "epoch": 0.93, + "learning_rate": 4.418039924973205e-05, + "loss": 0.1973, + "step": 3480 + }, + { + "epoch": 0.93, + "learning_rate": 4.417872454448017e-05, + "loss": 0.322, + "step": 3481 + }, + { + "epoch": 0.93, + "learning_rate": 4.41770498392283e-05, + "loss": 0.1872, + "step": 3482 + }, + { + "epoch": 0.93, + "learning_rate": 4.417537513397642e-05, + "loss": 0.3332, + "step": 3483 + }, + { + "epoch": 0.93, + "learning_rate": 4.417370042872455e-05, + "loss": 0.5582, + "step": 3484 + }, + { + "epoch": 0.93, + "learning_rate": 4.417202572347267e-05, + "loss": 0.1704, + "step": 3485 + }, + { + "epoch": 0.93, + "learning_rate": 4.4170351018220796e-05, + "loss": 0.4128, + "step": 3486 + }, + { + "epoch": 0.93, + "learning_rate": 4.416867631296892e-05, + "loss": 0.1838, + "step": 3487 + }, + { + "epoch": 0.93, + "learning_rate": 4.4167001607717044e-05, + "loss": 0.311, + "step": 3488 + }, + { + "epoch": 0.93, + "learning_rate": 4.416532690246517e-05, + "loss": 0.1922, + "step": 3489 + }, + { + "epoch": 0.94, + "learning_rate": 4.416365219721329e-05, + "loss": 0.2403, + "step": 3490 + }, + { + "epoch": 0.94, + "learning_rate": 4.416197749196142e-05, + "loss": 0.1814, + "step": 3491 + }, + { + "epoch": 0.94, + "learning_rate": 4.416030278670954e-05, + "loss": 0.4101, + "step": 3492 + }, + { + "epoch": 0.94, + "learning_rate": 4.4158628081457665e-05, + "loss": 0.2092, + "step": 3493 + }, + { + "epoch": 0.94, + "learning_rate": 4.415695337620579e-05, + "loss": 0.1781, + "step": 3494 + }, + { + "epoch": 0.94, + "learning_rate": 4.4155278670953914e-05, + "loss": 0.5018, + "step": 3495 + }, + { + "epoch": 0.94, + "learning_rate": 4.415360396570204e-05, + "loss": 0.3931, + "step": 3496 + }, + { + "epoch": 0.94, + "learning_rate": 4.415192926045016e-05, + "loss": 0.2352, + "step": 3497 + }, + { + "epoch": 0.94, + "learning_rate": 4.4150254555198286e-05, + "loss": 0.1791, + "step": 3498 + }, + { + "epoch": 0.94, + "learning_rate": 4.414857984994642e-05, + "loss": 0.2765, + "step": 3499 + }, + { + "epoch": 0.94, + "learning_rate": 4.4146905144694535e-05, + "loss": 0.1902, + "step": 3500 + }, + { + "epoch": 0.94, + "learning_rate": 4.414523043944266e-05, + "loss": 0.1739, + "step": 3501 + }, + { + "epoch": 0.94, + "learning_rate": 4.414355573419078e-05, + "loss": 0.3142, + "step": 3502 + }, + { + "epoch": 0.94, + "learning_rate": 4.414188102893891e-05, + "loss": 0.3673, + "step": 3503 + }, + { + "epoch": 0.94, + "learning_rate": 4.414020632368703e-05, + "loss": 0.2441, + "step": 3504 + }, + { + "epoch": 0.94, + "learning_rate": 4.4138531618435156e-05, + "loss": 0.203, + "step": 3505 + }, + { + "epoch": 0.94, + "learning_rate": 4.413685691318328e-05, + "loss": 0.2233, + "step": 3506 + }, + { + "epoch": 0.94, + "learning_rate": 4.413518220793141e-05, + "loss": 0.1962, + "step": 3507 + }, + { + "epoch": 0.94, + "learning_rate": 4.4133507502679535e-05, + "loss": 0.258, + "step": 3508 + }, + { + "epoch": 0.94, + "learning_rate": 4.413183279742765e-05, + "loss": 0.1855, + "step": 3509 + }, + { + "epoch": 0.94, + "learning_rate": 4.413015809217578e-05, + "loss": 0.3826, + "step": 3510 + }, + { + "epoch": 0.94, + "learning_rate": 4.41284833869239e-05, + "loss": 0.482, + "step": 3511 + }, + { + "epoch": 0.94, + "learning_rate": 4.4126808681672025e-05, + "loss": 0.1945, + "step": 3512 + }, + { + "epoch": 0.94, + "learning_rate": 4.412513397642015e-05, + "loss": 0.5103, + "step": 3513 + }, + { + "epoch": 0.94, + "learning_rate": 4.4123459271168274e-05, + "loss": 0.1867, + "step": 3514 + }, + { + "epoch": 0.94, + "learning_rate": 4.4121784565916405e-05, + "loss": 0.4449, + "step": 3515 + }, + { + "epoch": 0.94, + "learning_rate": 4.412010986066453e-05, + "loss": 0.1874, + "step": 3516 + }, + { + "epoch": 0.94, + "learning_rate": 4.411843515541265e-05, + "loss": 0.3076, + "step": 3517 + }, + { + "epoch": 0.94, + "learning_rate": 4.411676045016077e-05, + "loss": 0.1927, + "step": 3518 + }, + { + "epoch": 0.94, + "learning_rate": 4.4115085744908895e-05, + "loss": 0.1557, + "step": 3519 + }, + { + "epoch": 0.94, + "learning_rate": 4.411341103965702e-05, + "loss": 0.2419, + "step": 3520 + }, + { + "epoch": 0.94, + "learning_rate": 4.411173633440514e-05, + "loss": 0.5023, + "step": 3521 + }, + { + "epoch": 0.94, + "learning_rate": 4.4110061629153274e-05, + "loss": 0.2879, + "step": 3522 + }, + { + "epoch": 0.94, + "learning_rate": 4.41083869239014e-05, + "loss": 0.2474, + "step": 3523 + }, + { + "epoch": 0.94, + "learning_rate": 4.410671221864952e-05, + "loss": 0.1759, + "step": 3524 + }, + { + "epoch": 0.94, + "learning_rate": 4.410503751339765e-05, + "loss": 0.2562, + "step": 3525 + }, + { + "epoch": 0.94, + "learning_rate": 4.410336280814577e-05, + "loss": 0.2407, + "step": 3526 + }, + { + "epoch": 0.95, + "learning_rate": 4.4101688102893895e-05, + "loss": 0.3504, + "step": 3527 + }, + { + "epoch": 0.95, + "learning_rate": 4.410001339764201e-05, + "loss": 0.3308, + "step": 3528 + }, + { + "epoch": 0.95, + "learning_rate": 4.409833869239014e-05, + "loss": 0.1854, + "step": 3529 + }, + { + "epoch": 0.95, + "learning_rate": 4.409666398713827e-05, + "loss": 0.3274, + "step": 3530 + }, + { + "epoch": 0.95, + "learning_rate": 4.409498928188639e-05, + "loss": 0.6351, + "step": 3531 + }, + { + "epoch": 0.95, + "learning_rate": 4.4093314576634516e-05, + "loss": 0.2035, + "step": 3532 + }, + { + "epoch": 0.95, + "learning_rate": 4.409163987138264e-05, + "loss": 0.271, + "step": 3533 + }, + { + "epoch": 0.95, + "learning_rate": 4.4089965166130765e-05, + "loss": 0.3127, + "step": 3534 + }, + { + "epoch": 0.95, + "learning_rate": 4.408829046087889e-05, + "loss": 0.29, + "step": 3535 + }, + { + "epoch": 0.95, + "learning_rate": 4.408661575562701e-05, + "loss": 0.387, + "step": 3536 + }, + { + "epoch": 0.95, + "learning_rate": 4.408494105037513e-05, + "loss": 0.3177, + "step": 3537 + }, + { + "epoch": 0.95, + "learning_rate": 4.408326634512326e-05, + "loss": 0.3396, + "step": 3538 + }, + { + "epoch": 0.95, + "learning_rate": 4.4081591639871386e-05, + "loss": 0.2007, + "step": 3539 + }, + { + "epoch": 0.95, + "learning_rate": 4.407991693461951e-05, + "loss": 0.2075, + "step": 3540 + }, + { + "epoch": 0.95, + "learning_rate": 4.4078242229367634e-05, + "loss": 0.1822, + "step": 3541 + }, + { + "epoch": 0.95, + "learning_rate": 4.407656752411576e-05, + "loss": 0.2601, + "step": 3542 + }, + { + "epoch": 0.95, + "learning_rate": 4.407489281886388e-05, + "loss": 0.2924, + "step": 3543 + }, + { + "epoch": 0.95, + "learning_rate": 4.407321811361201e-05, + "loss": 0.3028, + "step": 3544 + }, + { + "epoch": 0.95, + "learning_rate": 4.407154340836013e-05, + "loss": 0.2079, + "step": 3545 + }, + { + "epoch": 0.95, + "learning_rate": 4.4069868703108255e-05, + "loss": 0.2155, + "step": 3546 + }, + { + "epoch": 0.95, + "learning_rate": 4.406819399785638e-05, + "loss": 0.171, + "step": 3547 + }, + { + "epoch": 0.95, + "learning_rate": 4.4066519292604504e-05, + "loss": 0.3186, + "step": 3548 + }, + { + "epoch": 0.95, + "learning_rate": 4.406484458735263e-05, + "loss": 0.6068, + "step": 3549 + }, + { + "epoch": 0.95, + "learning_rate": 4.406316988210075e-05, + "loss": 0.5442, + "step": 3550 + }, + { + "epoch": 0.95, + "learning_rate": 4.4061495176848876e-05, + "loss": 0.3732, + "step": 3551 + }, + { + "epoch": 0.95, + "learning_rate": 4.4059820471597e-05, + "loss": 0.1514, + "step": 3552 + }, + { + "epoch": 0.95, + "learning_rate": 4.4058145766345125e-05, + "loss": 0.3074, + "step": 3553 + }, + { + "epoch": 0.95, + "learning_rate": 4.405647106109325e-05, + "loss": 0.2016, + "step": 3554 + }, + { + "epoch": 0.95, + "learning_rate": 4.405479635584138e-05, + "loss": 0.1725, + "step": 3555 + }, + { + "epoch": 0.95, + "learning_rate": 4.40531216505895e-05, + "loss": 0.1862, + "step": 3556 + }, + { + "epoch": 0.95, + "learning_rate": 4.405144694533762e-05, + "loss": 0.3552, + "step": 3557 + }, + { + "epoch": 0.95, + "learning_rate": 4.4049772240085746e-05, + "loss": 0.4123, + "step": 3558 + }, + { + "epoch": 0.95, + "learning_rate": 4.404809753483387e-05, + "loss": 0.1644, + "step": 3559 + }, + { + "epoch": 0.95, + "learning_rate": 4.4046422829581994e-05, + "loss": 0.255, + "step": 3560 + }, + { + "epoch": 0.95, + "learning_rate": 4.404474812433012e-05, + "loss": 0.4368, + "step": 3561 + }, + { + "epoch": 0.95, + "learning_rate": 4.404307341907824e-05, + "loss": 0.5965, + "step": 3562 + }, + { + "epoch": 0.95, + "learning_rate": 4.4041398713826374e-05, + "loss": 0.202, + "step": 3563 + }, + { + "epoch": 0.95, + "learning_rate": 4.40397240085745e-05, + "loss": 0.1778, + "step": 3564 + }, + { + "epoch": 0.96, + "learning_rate": 4.4038049303322615e-05, + "loss": 0.2043, + "step": 3565 + }, + { + "epoch": 0.96, + "learning_rate": 4.403637459807074e-05, + "loss": 0.1794, + "step": 3566 + }, + { + "epoch": 0.96, + "learning_rate": 4.4034699892818864e-05, + "loss": 0.2174, + "step": 3567 + }, + { + "epoch": 0.96, + "learning_rate": 4.403302518756699e-05, + "loss": 0.3772, + "step": 3568 + }, + { + "epoch": 0.96, + "learning_rate": 4.403135048231511e-05, + "loss": 0.1723, + "step": 3569 + }, + { + "epoch": 0.96, + "learning_rate": 4.4029675777063236e-05, + "loss": 0.3311, + "step": 3570 + }, + { + "epoch": 0.96, + "learning_rate": 4.402800107181137e-05, + "loss": 0.2971, + "step": 3571 + }, + { + "epoch": 0.96, + "learning_rate": 4.402632636655949e-05, + "loss": 0.4016, + "step": 3572 + }, + { + "epoch": 0.96, + "learning_rate": 4.4024651661307616e-05, + "loss": 0.2025, + "step": 3573 + }, + { + "epoch": 0.96, + "learning_rate": 4.402297695605573e-05, + "loss": 0.4953, + "step": 3574 + }, + { + "epoch": 0.96, + "learning_rate": 4.402130225080386e-05, + "loss": 0.2839, + "step": 3575 + }, + { + "epoch": 0.96, + "learning_rate": 4.401962754555198e-05, + "loss": 0.2883, + "step": 3576 + }, + { + "epoch": 0.96, + "learning_rate": 4.4017952840300106e-05, + "loss": 0.6377, + "step": 3577 + }, + { + "epoch": 0.96, + "learning_rate": 4.401627813504824e-05, + "loss": 0.1966, + "step": 3578 + }, + { + "epoch": 0.96, + "learning_rate": 4.401460342979636e-05, + "loss": 0.202, + "step": 3579 + }, + { + "epoch": 0.96, + "learning_rate": 4.4012928724544485e-05, + "loss": 0.2881, + "step": 3580 + }, + { + "epoch": 0.96, + "learning_rate": 4.401125401929261e-05, + "loss": 0.2015, + "step": 3581 + }, + { + "epoch": 0.96, + "learning_rate": 4.4009579314040734e-05, + "loss": 0.3059, + "step": 3582 + }, + { + "epoch": 0.96, + "learning_rate": 4.400790460878886e-05, + "loss": 0.4938, + "step": 3583 + }, + { + "epoch": 0.96, + "learning_rate": 4.4006229903536975e-05, + "loss": 0.513, + "step": 3584 + }, + { + "epoch": 0.96, + "learning_rate": 4.40045551982851e-05, + "loss": 0.3782, + "step": 3585 + }, + { + "epoch": 0.96, + "learning_rate": 4.400288049303323e-05, + "loss": 0.2384, + "step": 3586 + }, + { + "epoch": 0.96, + "learning_rate": 4.4001205787781355e-05, + "loss": 0.1583, + "step": 3587 + }, + { + "epoch": 0.96, + "learning_rate": 4.399953108252948e-05, + "loss": 0.2144, + "step": 3588 + }, + { + "epoch": 0.96, + "learning_rate": 4.39978563772776e-05, + "loss": 0.1944, + "step": 3589 + }, + { + "epoch": 0.96, + "learning_rate": 4.399618167202573e-05, + "loss": 0.1751, + "step": 3590 + }, + { + "epoch": 0.96, + "learning_rate": 4.399450696677385e-05, + "loss": 0.4219, + "step": 3591 + }, + { + "epoch": 0.96, + "learning_rate": 4.3992832261521976e-05, + "loss": 0.1697, + "step": 3592 + }, + { + "epoch": 0.96, + "learning_rate": 4.399115755627009e-05, + "loss": 0.5271, + "step": 3593 + }, + { + "epoch": 0.96, + "learning_rate": 4.3989482851018224e-05, + "loss": 0.228, + "step": 3594 + }, + { + "epoch": 0.96, + "learning_rate": 4.398780814576635e-05, + "loss": 0.3078, + "step": 3595 + }, + { + "epoch": 0.96, + "learning_rate": 4.398613344051447e-05, + "loss": 0.2433, + "step": 3596 + }, + { + "epoch": 0.96, + "learning_rate": 4.39844587352626e-05, + "loss": 0.2123, + "step": 3597 + }, + { + "epoch": 0.96, + "learning_rate": 4.398278403001072e-05, + "loss": 0.1892, + "step": 3598 + }, + { + "epoch": 0.96, + "learning_rate": 4.3981109324758845e-05, + "loss": 0.1917, + "step": 3599 + }, + { + "epoch": 0.96, + "learning_rate": 4.397943461950697e-05, + "loss": 0.1878, + "step": 3600 + }, + { + "epoch": 0.96, + "learning_rate": 4.3977759914255094e-05, + "loss": 0.3768, + "step": 3601 + }, + { + "epoch": 0.97, + "learning_rate": 4.397608520900322e-05, + "loss": 0.1691, + "step": 3602 + }, + { + "epoch": 0.97, + "learning_rate": 4.397441050375134e-05, + "loss": 0.1923, + "step": 3603 + }, + { + "epoch": 0.97, + "learning_rate": 4.3972735798499466e-05, + "loss": 0.1857, + "step": 3604 + }, + { + "epoch": 0.97, + "learning_rate": 4.397106109324759e-05, + "loss": 0.1864, + "step": 3605 + }, + { + "epoch": 0.97, + "learning_rate": 4.3969386387995715e-05, + "loss": 0.1923, + "step": 3606 + }, + { + "epoch": 0.97, + "learning_rate": 4.396771168274384e-05, + "loss": 0.2231, + "step": 3607 + }, + { + "epoch": 0.97, + "learning_rate": 4.396603697749196e-05, + "loss": 0.2572, + "step": 3608 + }, + { + "epoch": 0.97, + "learning_rate": 4.396436227224009e-05, + "loss": 0.2189, + "step": 3609 + }, + { + "epoch": 0.97, + "learning_rate": 4.396268756698821e-05, + "loss": 0.2021, + "step": 3610 + }, + { + "epoch": 0.97, + "learning_rate": 4.396101286173634e-05, + "loss": 0.3435, + "step": 3611 + }, + { + "epoch": 0.97, + "learning_rate": 4.395933815648446e-05, + "loss": 0.1722, + "step": 3612 + }, + { + "epoch": 0.97, + "learning_rate": 4.3957663451232584e-05, + "loss": 0.21, + "step": 3613 + }, + { + "epoch": 0.97, + "learning_rate": 4.395598874598071e-05, + "loss": 0.2994, + "step": 3614 + }, + { + "epoch": 0.97, + "learning_rate": 4.395431404072883e-05, + "loss": 0.1876, + "step": 3615 + }, + { + "epoch": 0.97, + "learning_rate": 4.395263933547696e-05, + "loss": 0.2547, + "step": 3616 + }, + { + "epoch": 0.97, + "learning_rate": 4.395096463022508e-05, + "loss": 0.4869, + "step": 3617 + }, + { + "epoch": 0.97, + "learning_rate": 4.3949289924973205e-05, + "loss": 0.1717, + "step": 3618 + }, + { + "epoch": 0.97, + "learning_rate": 4.3947615219721336e-05, + "loss": 0.2377, + "step": 3619 + }, + { + "epoch": 0.97, + "learning_rate": 4.394594051446946e-05, + "loss": 0.2484, + "step": 3620 + }, + { + "epoch": 0.97, + "learning_rate": 4.394426580921758e-05, + "loss": 0.1742, + "step": 3621 + }, + { + "epoch": 0.97, + "learning_rate": 4.39425911039657e-05, + "loss": 0.2328, + "step": 3622 + }, + { + "epoch": 0.97, + "learning_rate": 4.3940916398713826e-05, + "loss": 0.2598, + "step": 3623 + }, + { + "epoch": 0.97, + "learning_rate": 4.393924169346195e-05, + "loss": 0.1837, + "step": 3624 + }, + { + "epoch": 0.97, + "learning_rate": 4.3937566988210075e-05, + "loss": 0.1975, + "step": 3625 + }, + { + "epoch": 0.97, + "learning_rate": 4.39358922829582e-05, + "loss": 0.2604, + "step": 3626 + }, + { + "epoch": 0.97, + "learning_rate": 4.393421757770633e-05, + "loss": 0.3418, + "step": 3627 + }, + { + "epoch": 0.97, + "learning_rate": 4.3932542872454454e-05, + "loss": 0.1887, + "step": 3628 + }, + { + "epoch": 0.97, + "learning_rate": 4.393086816720258e-05, + "loss": 0.4108, + "step": 3629 + }, + { + "epoch": 0.97, + "learning_rate": 4.3929193461950696e-05, + "loss": 0.2723, + "step": 3630 + }, + { + "epoch": 0.97, + "learning_rate": 4.392751875669882e-05, + "loss": 0.1742, + "step": 3631 + }, + { + "epoch": 0.97, + "learning_rate": 4.3925844051446944e-05, + "loss": 0.2055, + "step": 3632 + }, + { + "epoch": 0.97, + "learning_rate": 4.392416934619507e-05, + "loss": 0.2021, + "step": 3633 + }, + { + "epoch": 0.97, + "learning_rate": 4.39224946409432e-05, + "loss": 0.402, + "step": 3634 + }, + { + "epoch": 0.97, + "learning_rate": 4.3920819935691323e-05, + "loss": 0.574, + "step": 3635 + }, + { + "epoch": 0.97, + "learning_rate": 4.391914523043945e-05, + "loss": 0.2512, + "step": 3636 + }, + { + "epoch": 0.97, + "learning_rate": 4.391747052518757e-05, + "loss": 0.4231, + "step": 3637 + }, + { + "epoch": 0.97, + "learning_rate": 4.3915795819935696e-05, + "loss": 0.2995, + "step": 3638 + }, + { + "epoch": 0.98, + "learning_rate": 4.391412111468382e-05, + "loss": 0.1746, + "step": 3639 + }, + { + "epoch": 0.98, + "learning_rate": 4.391244640943194e-05, + "loss": 0.2207, + "step": 3640 + }, + { + "epoch": 0.98, + "learning_rate": 4.391077170418006e-05, + "loss": 0.2398, + "step": 3641 + }, + { + "epoch": 0.98, + "learning_rate": 4.390909699892819e-05, + "loss": 0.6748, + "step": 3642 + }, + { + "epoch": 0.98, + "learning_rate": 4.390742229367632e-05, + "loss": 0.3763, + "step": 3643 + }, + { + "epoch": 0.98, + "learning_rate": 4.390574758842444e-05, + "loss": 0.4109, + "step": 3644 + }, + { + "epoch": 0.98, + "learning_rate": 4.3904072883172566e-05, + "loss": 0.4387, + "step": 3645 + }, + { + "epoch": 0.98, + "learning_rate": 4.390239817792069e-05, + "loss": 0.7566, + "step": 3646 + }, + { + "epoch": 0.98, + "learning_rate": 4.3900723472668814e-05, + "loss": 0.3526, + "step": 3647 + }, + { + "epoch": 0.98, + "learning_rate": 4.389904876741694e-05, + "loss": 0.3508, + "step": 3648 + }, + { + "epoch": 0.98, + "learning_rate": 4.3897374062165056e-05, + "loss": 0.3146, + "step": 3649 + }, + { + "epoch": 0.98, + "learning_rate": 4.3895699356913187e-05, + "loss": 0.304, + "step": 3650 + }, + { + "epoch": 0.98, + "learning_rate": 4.389402465166131e-05, + "loss": 0.3686, + "step": 3651 + }, + { + "epoch": 0.98, + "learning_rate": 4.3892349946409435e-05, + "loss": 0.4539, + "step": 3652 + }, + { + "epoch": 0.98, + "learning_rate": 4.389067524115756e-05, + "loss": 0.2, + "step": 3653 + }, + { + "epoch": 0.98, + "learning_rate": 4.3889000535905683e-05, + "loss": 0.4137, + "step": 3654 + }, + { + "epoch": 0.98, + "learning_rate": 4.388732583065381e-05, + "loss": 0.3818, + "step": 3655 + }, + { + "epoch": 0.98, + "learning_rate": 4.388565112540193e-05, + "loss": 0.3819, + "step": 3656 + }, + { + "epoch": 0.98, + "learning_rate": 4.3883976420150056e-05, + "loss": 0.5001, + "step": 3657 + }, + { + "epoch": 0.98, + "learning_rate": 4.388230171489818e-05, + "loss": 0.3562, + "step": 3658 + }, + { + "epoch": 0.98, + "learning_rate": 4.3880627009646304e-05, + "loss": 0.403, + "step": 3659 + }, + { + "epoch": 0.98, + "learning_rate": 4.387895230439443e-05, + "loss": 0.2738, + "step": 3660 + }, + { + "epoch": 0.98, + "learning_rate": 4.387727759914255e-05, + "loss": 0.3801, + "step": 3661 + }, + { + "epoch": 0.98, + "learning_rate": 4.387560289389068e-05, + "loss": 0.1686, + "step": 3662 + }, + { + "epoch": 0.98, + "learning_rate": 4.38739281886388e-05, + "loss": 0.2161, + "step": 3663 + }, + { + "epoch": 0.98, + "learning_rate": 4.3872253483386925e-05, + "loss": 0.2866, + "step": 3664 + }, + { + "epoch": 0.98, + "learning_rate": 4.387057877813505e-05, + "loss": 0.2663, + "step": 3665 + }, + { + "epoch": 0.98, + "learning_rate": 4.3868904072883174e-05, + "loss": 0.2048, + "step": 3666 + }, + { + "epoch": 0.98, + "learning_rate": 4.3867229367631305e-05, + "loss": 0.2845, + "step": 3667 + }, + { + "epoch": 0.98, + "learning_rate": 4.386555466237942e-05, + "loss": 0.1907, + "step": 3668 + }, + { + "epoch": 0.98, + "learning_rate": 4.3863879957127547e-05, + "loss": 0.1829, + "step": 3669 + }, + { + "epoch": 0.98, + "learning_rate": 4.386220525187567e-05, + "loss": 0.2459, + "step": 3670 + }, + { + "epoch": 0.98, + "learning_rate": 4.3860530546623795e-05, + "loss": 0.1813, + "step": 3671 + }, + { + "epoch": 0.98, + "learning_rate": 4.385885584137192e-05, + "loss": 0.2203, + "step": 3672 + }, + { + "epoch": 0.98, + "learning_rate": 4.385718113612004e-05, + "loss": 0.1785, + "step": 3673 + }, + { + "epoch": 0.98, + "learning_rate": 4.385550643086817e-05, + "loss": 0.2082, + "step": 3674 + }, + { + "epoch": 0.98, + "learning_rate": 4.38538317256163e-05, + "loss": 0.2485, + "step": 3675 + }, + { + "epoch": 0.98, + "learning_rate": 4.385215702036442e-05, + "loss": 0.3459, + "step": 3676 + }, + { + "epoch": 0.99, + "learning_rate": 4.385048231511254e-05, + "loss": 0.1862, + "step": 3677 + }, + { + "epoch": 0.99, + "learning_rate": 4.3848807609860664e-05, + "loss": 0.2553, + "step": 3678 + }, + { + "epoch": 0.99, + "learning_rate": 4.384713290460879e-05, + "loss": 0.3205, + "step": 3679 + }, + { + "epoch": 0.99, + "learning_rate": 4.384545819935691e-05, + "loss": 0.2777, + "step": 3680 + }, + { + "epoch": 0.99, + "learning_rate": 4.384378349410504e-05, + "loss": 0.2054, + "step": 3681 + }, + { + "epoch": 0.99, + "learning_rate": 4.384210878885316e-05, + "loss": 0.2138, + "step": 3682 + }, + { + "epoch": 0.99, + "learning_rate": 4.384043408360129e-05, + "loss": 0.3084, + "step": 3683 + }, + { + "epoch": 0.99, + "learning_rate": 4.3838759378349416e-05, + "loss": 0.176, + "step": 3684 + }, + { + "epoch": 0.99, + "learning_rate": 4.383708467309754e-05, + "loss": 0.2647, + "step": 3685 + }, + { + "epoch": 0.99, + "learning_rate": 4.383540996784566e-05, + "loss": 0.2819, + "step": 3686 + }, + { + "epoch": 0.99, + "learning_rate": 4.383373526259378e-05, + "loss": 0.3779, + "step": 3687 + }, + { + "epoch": 0.99, + "learning_rate": 4.3832060557341907e-05, + "loss": 0.3125, + "step": 3688 + }, + { + "epoch": 0.99, + "learning_rate": 4.383038585209003e-05, + "loss": 0.1763, + "step": 3689 + }, + { + "epoch": 0.99, + "learning_rate": 4.382871114683816e-05, + "loss": 0.2433, + "step": 3690 + }, + { + "epoch": 0.99, + "learning_rate": 4.3827036441586286e-05, + "loss": 0.1754, + "step": 3691 + }, + { + "epoch": 0.99, + "learning_rate": 4.382536173633441e-05, + "loss": 0.2001, + "step": 3692 + }, + { + "epoch": 0.99, + "learning_rate": 4.3823687031082534e-05, + "loss": 0.1735, + "step": 3693 + }, + { + "epoch": 0.99, + "learning_rate": 4.382201232583066e-05, + "loss": 0.3825, + "step": 3694 + }, + { + "epoch": 0.99, + "learning_rate": 4.382033762057878e-05, + "loss": 0.1915, + "step": 3695 + }, + { + "epoch": 0.99, + "learning_rate": 4.38186629153269e-05, + "loss": 0.1575, + "step": 3696 + }, + { + "epoch": 0.99, + "learning_rate": 4.3816988210075024e-05, + "loss": 0.2146, + "step": 3697 + }, + { + "epoch": 0.99, + "learning_rate": 4.3815313504823155e-05, + "loss": 0.2925, + "step": 3698 + }, + { + "epoch": 0.99, + "learning_rate": 4.381363879957128e-05, + "loss": 0.4377, + "step": 3699 + }, + { + "epoch": 0.99, + "learning_rate": 4.3811964094319404e-05, + "loss": 0.1905, + "step": 3700 + }, + { + "epoch": 0.99, + "learning_rate": 4.381028938906753e-05, + "loss": 0.1985, + "step": 3701 + }, + { + "epoch": 0.99, + "learning_rate": 4.380861468381565e-05, + "loss": 0.2554, + "step": 3702 + }, + { + "epoch": 0.99, + "learning_rate": 4.3806939978563776e-05, + "loss": 0.1843, + "step": 3703 + }, + { + "epoch": 0.99, + "learning_rate": 4.38052652733119e-05, + "loss": 0.2203, + "step": 3704 + }, + { + "epoch": 0.99, + "learning_rate": 4.380359056806002e-05, + "loss": 0.2568, + "step": 3705 + }, + { + "epoch": 0.99, + "learning_rate": 4.380191586280815e-05, + "loss": 0.4575, + "step": 3706 + }, + { + "epoch": 0.99, + "learning_rate": 4.380024115755627e-05, + "loss": 0.3615, + "step": 3707 + }, + { + "epoch": 0.99, + "learning_rate": 4.37985664523044e-05, + "loss": 0.3001, + "step": 3708 + }, + { + "epoch": 0.99, + "learning_rate": 4.379689174705252e-05, + "loss": 0.39, + "step": 3709 + }, + { + "epoch": 0.99, + "learning_rate": 4.3795217041800646e-05, + "loss": 0.2746, + "step": 3710 + }, + { + "epoch": 0.99, + "learning_rate": 4.379354233654877e-05, + "loss": 0.237, + "step": 3711 + }, + { + "epoch": 0.99, + "learning_rate": 4.3791867631296894e-05, + "loss": 0.536, + "step": 3712 + }, + { + "epoch": 0.99, + "learning_rate": 4.379019292604502e-05, + "loss": 0.212, + "step": 3713 + }, + { + "epoch": 1.0, + "learning_rate": 4.378851822079314e-05, + "loss": 0.203, + "step": 3714 + }, + { + "epoch": 1.0, + "learning_rate": 4.378684351554127e-05, + "loss": 0.1559, + "step": 3715 + }, + { + "epoch": 1.0, + "learning_rate": 4.378516881028939e-05, + "loss": 0.2983, + "step": 3716 + }, + { + "epoch": 1.0, + "learning_rate": 4.3783494105037515e-05, + "loss": 0.2432, + "step": 3717 + }, + { + "epoch": 1.0, + "learning_rate": 4.378181939978564e-05, + "loss": 0.1957, + "step": 3718 + }, + { + "epoch": 1.0, + "learning_rate": 4.3780144694533764e-05, + "loss": 0.1816, + "step": 3719 + }, + { + "epoch": 1.0, + "learning_rate": 4.377846998928189e-05, + "loss": 0.1859, + "step": 3720 + }, + { + "epoch": 1.0, + "learning_rate": 4.377679528403001e-05, + "loss": 0.4293, + "step": 3721 + }, + { + "epoch": 1.0, + "learning_rate": 4.3775120578778136e-05, + "loss": 0.341, + "step": 3722 + }, + { + "epoch": 1.0, + "learning_rate": 4.377344587352627e-05, + "loss": 0.2387, + "step": 3723 + }, + { + "epoch": 1.0, + "learning_rate": 4.3771771168274385e-05, + "loss": 0.1821, + "step": 3724 + }, + { + "epoch": 1.0, + "learning_rate": 4.377009646302251e-05, + "loss": 0.2461, + "step": 3725 + }, + { + "epoch": 1.0, + "learning_rate": 4.376842175777063e-05, + "loss": 0.1844, + "step": 3726 + }, + { + "epoch": 1.0, + "learning_rate": 4.376674705251876e-05, + "loss": 0.4207, + "step": 3727 + }, + { + "epoch": 1.0, + "learning_rate": 4.376507234726688e-05, + "loss": 0.4242, + "step": 3728 + }, + { + "epoch": 1.0, + "learning_rate": 4.3763397642015006e-05, + "loss": 0.2721, + "step": 3729 + }, + { + "epoch": 1.0, + "learning_rate": 4.376172293676313e-05, + "loss": 0.1956, + "step": 3730 + }, + { + "epoch": 1.0, + "learning_rate": 4.376004823151126e-05, + "loss": 0.2461, + "step": 3731 + }, + { + "epoch": 1.0, + "learning_rate": 4.3758373526259385e-05, + "loss": 0.264, + "step": 3732 + }, + { + "epoch": 1.0, + "learning_rate": 4.37566988210075e-05, + "loss": 0.1237, + "step": 3733 + }, + { + "epoch": 1.0, + "learning_rate": 4.375502411575563e-05, + "loss": 0.1997, + "step": 3734 + }, + { + "epoch": 1.0, + "learning_rate": 4.375334941050375e-05, + "loss": 0.1363, + "step": 3735 + }, + { + "epoch": 1.0, + "learning_rate": 4.3751674705251875e-05, + "loss": 0.2505, + "step": 3736 + }, + { + "epoch": 1.0, + "learning_rate": 4.375e-05, + "loss": 0.2298, + "step": 3737 + }, + { + "epoch": 1.0, + "learning_rate": 4.3748325294748124e-05, + "loss": 0.1511, + "step": 3738 + }, + { + "epoch": 1.0, + "learning_rate": 4.3746650589496255e-05, + "loss": 0.2483, + "step": 3739 + }, + { + "epoch": 1.0, + "learning_rate": 4.374497588424438e-05, + "loss": 0.2133, + "step": 3740 + }, + { + "epoch": 1.0, + "learning_rate": 4.37433011789925e-05, + "loss": 0.1533, + "step": 3741 + }, + { + "epoch": 1.0, + "learning_rate": 4.374162647374062e-05, + "loss": 0.1617, + "step": 3742 + }, + { + "epoch": 1.0, + "learning_rate": 4.3739951768488745e-05, + "loss": 0.2338, + "step": 3743 + }, + { + "epoch": 1.0, + "learning_rate": 4.373827706323687e-05, + "loss": 0.1761, + "step": 3744 + }, + { + "epoch": 1.0, + "learning_rate": 4.373660235798499e-05, + "loss": 0.1504, + "step": 3745 + }, + { + "epoch": 1.0, + "learning_rate": 4.3734927652733124e-05, + "loss": 0.1053, + "step": 3746 + }, + { + "epoch": 1.0, + "learning_rate": 4.373325294748125e-05, + "loss": 0.1861, + "step": 3747 + }, + { + "epoch": 1.0, + "learning_rate": 4.373157824222937e-05, + "loss": 0.1613, + "step": 3748 + }, + { + "epoch": 1.0, + "learning_rate": 4.37299035369775e-05, + "loss": 0.131, + "step": 3749 + }, + { + "epoch": 1.0, + "learning_rate": 4.372822883172562e-05, + "loss": 0.2697, + "step": 3750 + }, + { + "epoch": 1.01, + "learning_rate": 4.372655412647374e-05, + "loss": 0.2935, + "step": 3751 + }, + { + "epoch": 1.01, + "learning_rate": 4.372487942122186e-05, + "loss": 0.1504, + "step": 3752 + }, + { + "epoch": 1.01, + "learning_rate": 4.372320471596999e-05, + "loss": 0.1529, + "step": 3753 + }, + { + "epoch": 1.01, + "learning_rate": 4.372153001071812e-05, + "loss": 0.1264, + "step": 3754 + }, + { + "epoch": 1.01, + "learning_rate": 4.371985530546624e-05, + "loss": 0.1749, + "step": 3755 + }, + { + "epoch": 1.01, + "learning_rate": 4.3718180600214366e-05, + "loss": 0.1333, + "step": 3756 + }, + { + "epoch": 1.01, + "learning_rate": 4.371650589496249e-05, + "loss": 0.1911, + "step": 3757 + }, + { + "epoch": 1.01, + "learning_rate": 4.3714831189710615e-05, + "loss": 0.1237, + "step": 3758 + }, + { + "epoch": 1.01, + "learning_rate": 4.371315648445874e-05, + "loss": 0.1334, + "step": 3759 + }, + { + "epoch": 1.01, + "learning_rate": 4.371148177920686e-05, + "loss": 0.1191, + "step": 3760 + }, + { + "epoch": 1.01, + "learning_rate": 4.370980707395498e-05, + "loss": 0.1706, + "step": 3761 + }, + { + "epoch": 1.01, + "learning_rate": 4.370813236870311e-05, + "loss": 0.1822, + "step": 3762 + }, + { + "epoch": 1.01, + "learning_rate": 4.3706457663451236e-05, + "loss": 0.148, + "step": 3763 + }, + { + "epoch": 1.01, + "learning_rate": 4.370478295819936e-05, + "loss": 0.1269, + "step": 3764 + }, + { + "epoch": 1.01, + "learning_rate": 4.3703108252947484e-05, + "loss": 0.2583, + "step": 3765 + }, + { + "epoch": 1.01, + "learning_rate": 4.370143354769561e-05, + "loss": 0.1495, + "step": 3766 + }, + { + "epoch": 1.01, + "learning_rate": 4.369975884244373e-05, + "loss": 0.13, + "step": 3767 + }, + { + "epoch": 1.01, + "learning_rate": 4.369808413719186e-05, + "loss": 0.1412, + "step": 3768 + }, + { + "epoch": 1.01, + "learning_rate": 4.369640943193998e-05, + "loss": 0.198, + "step": 3769 + }, + { + "epoch": 1.01, + "learning_rate": 4.3694734726688105e-05, + "loss": 0.2469, + "step": 3770 + }, + { + "epoch": 1.01, + "learning_rate": 4.369306002143623e-05, + "loss": 0.1487, + "step": 3771 + }, + { + "epoch": 1.01, + "learning_rate": 4.3691385316184354e-05, + "loss": 0.1998, + "step": 3772 + }, + { + "epoch": 1.01, + "learning_rate": 4.368971061093248e-05, + "loss": 0.1354, + "step": 3773 + }, + { + "epoch": 1.01, + "learning_rate": 4.36880359056806e-05, + "loss": 0.1281, + "step": 3774 + }, + { + "epoch": 1.01, + "learning_rate": 4.3686361200428726e-05, + "loss": 0.212, + "step": 3775 + }, + { + "epoch": 1.01, + "learning_rate": 4.368468649517685e-05, + "loss": 0.2096, + "step": 3776 + }, + { + "epoch": 1.01, + "learning_rate": 4.3683011789924975e-05, + "loss": 0.195, + "step": 3777 + }, + { + "epoch": 1.01, + "learning_rate": 4.36813370846731e-05, + "loss": 0.2352, + "step": 3778 + }, + { + "epoch": 1.01, + "learning_rate": 4.367966237942122e-05, + "loss": 0.2611, + "step": 3779 + }, + { + "epoch": 1.01, + "learning_rate": 4.367798767416935e-05, + "loss": 0.1779, + "step": 3780 + }, + { + "epoch": 1.01, + "learning_rate": 4.367631296891747e-05, + "loss": 0.2565, + "step": 3781 + }, + { + "epoch": 1.01, + "learning_rate": 4.3674638263665596e-05, + "loss": 0.1269, + "step": 3782 + }, + { + "epoch": 1.01, + "learning_rate": 4.367296355841372e-05, + "loss": 0.1357, + "step": 3783 + }, + { + "epoch": 1.01, + "learning_rate": 4.3671288853161844e-05, + "loss": 0.3219, + "step": 3784 + }, + { + "epoch": 1.01, + "learning_rate": 4.366961414790997e-05, + "loss": 0.2363, + "step": 3785 + }, + { + "epoch": 1.01, + "learning_rate": 4.366793944265809e-05, + "loss": 0.1479, + "step": 3786 + }, + { + "epoch": 1.01, + "learning_rate": 4.3666264737406224e-05, + "loss": 0.1748, + "step": 3787 + }, + { + "epoch": 1.02, + "learning_rate": 4.366459003215435e-05, + "loss": 0.1458, + "step": 3788 + }, + { + "epoch": 1.02, + "learning_rate": 4.3662915326902465e-05, + "loss": 0.2739, + "step": 3789 + }, + { + "epoch": 1.02, + "learning_rate": 4.366124062165059e-05, + "loss": 0.1832, + "step": 3790 + }, + { + "epoch": 1.02, + "learning_rate": 4.3659565916398714e-05, + "loss": 0.1178, + "step": 3791 + }, + { + "epoch": 1.02, + "learning_rate": 4.365789121114684e-05, + "loss": 0.1403, + "step": 3792 + }, + { + "epoch": 1.02, + "learning_rate": 4.365621650589496e-05, + "loss": 0.1397, + "step": 3793 + }, + { + "epoch": 1.02, + "learning_rate": 4.3654541800643086e-05, + "loss": 0.1521, + "step": 3794 + }, + { + "epoch": 1.02, + "learning_rate": 4.365286709539122e-05, + "loss": 0.204, + "step": 3795 + }, + { + "epoch": 1.02, + "learning_rate": 4.365119239013934e-05, + "loss": 0.1722, + "step": 3796 + }, + { + "epoch": 1.02, + "learning_rate": 4.3649517684887466e-05, + "loss": 0.1535, + "step": 3797 + }, + { + "epoch": 1.02, + "learning_rate": 4.364784297963558e-05, + "loss": 0.1317, + "step": 3798 + }, + { + "epoch": 1.02, + "learning_rate": 4.364616827438371e-05, + "loss": 0.1671, + "step": 3799 + }, + { + "epoch": 1.02, + "learning_rate": 4.364449356913183e-05, + "loss": 0.1334, + "step": 3800 + }, + { + "epoch": 1.02, + "learning_rate": 4.3642818863879956e-05, + "loss": 0.2588, + "step": 3801 + }, + { + "epoch": 1.02, + "learning_rate": 4.364114415862809e-05, + "loss": 0.1449, + "step": 3802 + }, + { + "epoch": 1.02, + "learning_rate": 4.363946945337621e-05, + "loss": 0.2613, + "step": 3803 + }, + { + "epoch": 1.02, + "learning_rate": 4.3637794748124335e-05, + "loss": 0.1399, + "step": 3804 + }, + { + "epoch": 1.02, + "learning_rate": 4.363612004287246e-05, + "loss": 0.1451, + "step": 3805 + }, + { + "epoch": 1.02, + "learning_rate": 4.3634445337620584e-05, + "loss": 0.1706, + "step": 3806 + }, + { + "epoch": 1.02, + "learning_rate": 4.36327706323687e-05, + "loss": 0.1501, + "step": 3807 + }, + { + "epoch": 1.02, + "learning_rate": 4.3631095927116825e-05, + "loss": 0.1242, + "step": 3808 + }, + { + "epoch": 1.02, + "learning_rate": 4.362942122186495e-05, + "loss": 0.1145, + "step": 3809 + }, + { + "epoch": 1.02, + "learning_rate": 4.362774651661308e-05, + "loss": 0.1539, + "step": 3810 + }, + { + "epoch": 1.02, + "learning_rate": 4.3626071811361205e-05, + "loss": 0.161, + "step": 3811 + }, + { + "epoch": 1.02, + "learning_rate": 4.362439710610933e-05, + "loss": 0.1285, + "step": 3812 + }, + { + "epoch": 1.02, + "learning_rate": 4.362272240085745e-05, + "loss": 0.1457, + "step": 3813 + }, + { + "epoch": 1.02, + "learning_rate": 4.362104769560558e-05, + "loss": 0.1934, + "step": 3814 + }, + { + "epoch": 1.02, + "learning_rate": 4.36193729903537e-05, + "loss": 0.1538, + "step": 3815 + }, + { + "epoch": 1.02, + "learning_rate": 4.3617698285101826e-05, + "loss": 0.2103, + "step": 3816 + }, + { + "epoch": 1.02, + "learning_rate": 4.361602357984994e-05, + "loss": 0.2269, + "step": 3817 + }, + { + "epoch": 1.02, + "learning_rate": 4.3614348874598074e-05, + "loss": 0.2692, + "step": 3818 + }, + { + "epoch": 1.02, + "learning_rate": 4.36126741693462e-05, + "loss": 0.1904, + "step": 3819 + }, + { + "epoch": 1.02, + "learning_rate": 4.361099946409432e-05, + "loss": 0.1842, + "step": 3820 + }, + { + "epoch": 1.02, + "learning_rate": 4.360932475884245e-05, + "loss": 0.1728, + "step": 3821 + }, + { + "epoch": 1.02, + "learning_rate": 4.360765005359057e-05, + "loss": 0.1672, + "step": 3822 + }, + { + "epoch": 1.02, + "learning_rate": 4.3605975348338695e-05, + "loss": 0.123, + "step": 3823 + }, + { + "epoch": 1.02, + "learning_rate": 4.360430064308682e-05, + "loss": 0.2175, + "step": 3824 + }, + { + "epoch": 1.02, + "learning_rate": 4.3602625937834944e-05, + "loss": 0.2272, + "step": 3825 + }, + { + "epoch": 1.03, + "learning_rate": 4.360095123258307e-05, + "loss": 0.3173, + "step": 3826 + }, + { + "epoch": 1.03, + "learning_rate": 4.359927652733119e-05, + "loss": 0.156, + "step": 3827 + }, + { + "epoch": 1.03, + "learning_rate": 4.3597601822079316e-05, + "loss": 0.1399, + "step": 3828 + }, + { + "epoch": 1.03, + "learning_rate": 4.359592711682744e-05, + "loss": 0.1665, + "step": 3829 + }, + { + "epoch": 1.03, + "learning_rate": 4.3594252411575565e-05, + "loss": 0.1747, + "step": 3830 + }, + { + "epoch": 1.03, + "learning_rate": 4.359257770632369e-05, + "loss": 0.1871, + "step": 3831 + }, + { + "epoch": 1.03, + "learning_rate": 4.359090300107181e-05, + "loss": 0.1388, + "step": 3832 + }, + { + "epoch": 1.03, + "learning_rate": 4.358922829581994e-05, + "loss": 0.1358, + "step": 3833 + }, + { + "epoch": 1.03, + "learning_rate": 4.358755359056806e-05, + "loss": 0.2112, + "step": 3834 + }, + { + "epoch": 1.03, + "learning_rate": 4.3585878885316186e-05, + "loss": 0.1475, + "step": 3835 + }, + { + "epoch": 1.03, + "learning_rate": 4.358420418006431e-05, + "loss": 0.1464, + "step": 3836 + }, + { + "epoch": 1.03, + "learning_rate": 4.3582529474812434e-05, + "loss": 0.1523, + "step": 3837 + }, + { + "epoch": 1.03, + "learning_rate": 4.358085476956056e-05, + "loss": 0.1596, + "step": 3838 + }, + { + "epoch": 1.03, + "learning_rate": 4.357918006430868e-05, + "loss": 0.1084, + "step": 3839 + }, + { + "epoch": 1.03, + "learning_rate": 4.357750535905681e-05, + "loss": 0.1467, + "step": 3840 + }, + { + "epoch": 1.03, + "learning_rate": 4.357583065380493e-05, + "loss": 0.2693, + "step": 3841 + }, + { + "epoch": 1.03, + "learning_rate": 4.3574155948553055e-05, + "loss": 0.1234, + "step": 3842 + }, + { + "epoch": 1.03, + "learning_rate": 4.3572481243301186e-05, + "loss": 0.2693, + "step": 3843 + }, + { + "epoch": 1.03, + "learning_rate": 4.357080653804931e-05, + "loss": 0.1598, + "step": 3844 + }, + { + "epoch": 1.03, + "learning_rate": 4.356913183279743e-05, + "loss": 0.1728, + "step": 3845 + }, + { + "epoch": 1.03, + "learning_rate": 4.356745712754555e-05, + "loss": 0.2016, + "step": 3846 + }, + { + "epoch": 1.03, + "learning_rate": 4.3565782422293676e-05, + "loss": 0.1545, + "step": 3847 + }, + { + "epoch": 1.03, + "learning_rate": 4.35641077170418e-05, + "loss": 0.2804, + "step": 3848 + }, + { + "epoch": 1.03, + "learning_rate": 4.3562433011789925e-05, + "loss": 0.2984, + "step": 3849 + }, + { + "epoch": 1.03, + "learning_rate": 4.356075830653805e-05, + "loss": 0.1217, + "step": 3850 + }, + { + "epoch": 1.03, + "learning_rate": 4.355908360128618e-05, + "loss": 0.3145, + "step": 3851 + }, + { + "epoch": 1.03, + "learning_rate": 4.3557408896034304e-05, + "loss": 0.2223, + "step": 3852 + }, + { + "epoch": 1.03, + "learning_rate": 4.355573419078243e-05, + "loss": 0.2187, + "step": 3853 + }, + { + "epoch": 1.03, + "learning_rate": 4.3554059485530546e-05, + "loss": 0.1402, + "step": 3854 + }, + { + "epoch": 1.03, + "learning_rate": 4.355238478027867e-05, + "loss": 0.2369, + "step": 3855 + }, + { + "epoch": 1.03, + "learning_rate": 4.3550710075026794e-05, + "loss": 0.2853, + "step": 3856 + }, + { + "epoch": 1.03, + "learning_rate": 4.354903536977492e-05, + "loss": 0.1321, + "step": 3857 + }, + { + "epoch": 1.03, + "learning_rate": 4.354736066452305e-05, + "loss": 0.1601, + "step": 3858 + }, + { + "epoch": 1.03, + "learning_rate": 4.3545685959271173e-05, + "loss": 0.2973, + "step": 3859 + }, + { + "epoch": 1.03, + "learning_rate": 4.35440112540193e-05, + "loss": 0.1327, + "step": 3860 + }, + { + "epoch": 1.03, + "learning_rate": 4.354233654876742e-05, + "loss": 0.2847, + "step": 3861 + }, + { + "epoch": 1.03, + "learning_rate": 4.3540661843515546e-05, + "loss": 0.1476, + "step": 3862 + }, + { + "epoch": 1.04, + "learning_rate": 4.3538987138263664e-05, + "loss": 0.1306, + "step": 3863 + }, + { + "epoch": 1.04, + "learning_rate": 4.353731243301179e-05, + "loss": 0.2016, + "step": 3864 + }, + { + "epoch": 1.04, + "learning_rate": 4.353563772775991e-05, + "loss": 0.2685, + "step": 3865 + }, + { + "epoch": 1.04, + "learning_rate": 4.353396302250804e-05, + "loss": 0.139, + "step": 3866 + }, + { + "epoch": 1.04, + "learning_rate": 4.353228831725617e-05, + "loss": 0.1615, + "step": 3867 + }, + { + "epoch": 1.04, + "learning_rate": 4.353061361200429e-05, + "loss": 0.1588, + "step": 3868 + }, + { + "epoch": 1.04, + "learning_rate": 4.3528938906752416e-05, + "loss": 0.1697, + "step": 3869 + }, + { + "epoch": 1.04, + "learning_rate": 4.352726420150054e-05, + "loss": 0.1695, + "step": 3870 + }, + { + "epoch": 1.04, + "learning_rate": 4.3525589496248664e-05, + "loss": 0.1617, + "step": 3871 + }, + { + "epoch": 1.04, + "learning_rate": 4.352391479099679e-05, + "loss": 0.1912, + "step": 3872 + }, + { + "epoch": 1.04, + "learning_rate": 4.3522240085744906e-05, + "loss": 0.1707, + "step": 3873 + }, + { + "epoch": 1.04, + "learning_rate": 4.3520565380493037e-05, + "loss": 0.2377, + "step": 3874 + }, + { + "epoch": 1.04, + "learning_rate": 4.351889067524116e-05, + "loss": 0.1801, + "step": 3875 + }, + { + "epoch": 1.04, + "learning_rate": 4.3517215969989285e-05, + "loss": 0.2345, + "step": 3876 + }, + { + "epoch": 1.04, + "learning_rate": 4.351554126473741e-05, + "loss": 0.1341, + "step": 3877 + }, + { + "epoch": 1.04, + "learning_rate": 4.3513866559485533e-05, + "loss": 0.2203, + "step": 3878 + }, + { + "epoch": 1.04, + "learning_rate": 4.351219185423366e-05, + "loss": 0.1383, + "step": 3879 + }, + { + "epoch": 1.04, + "learning_rate": 4.351051714898178e-05, + "loss": 0.15, + "step": 3880 + }, + { + "epoch": 1.04, + "learning_rate": 4.3508842443729906e-05, + "loss": 0.155, + "step": 3881 + }, + { + "epoch": 1.04, + "learning_rate": 4.350716773847803e-05, + "loss": 0.2877, + "step": 3882 + }, + { + "epoch": 1.04, + "learning_rate": 4.3505493033226154e-05, + "loss": 0.1498, + "step": 3883 + }, + { + "epoch": 1.04, + "learning_rate": 4.350381832797428e-05, + "loss": 0.1744, + "step": 3884 + }, + { + "epoch": 1.04, + "learning_rate": 4.35021436227224e-05, + "loss": 0.1637, + "step": 3885 + }, + { + "epoch": 1.04, + "learning_rate": 4.350046891747053e-05, + "loss": 0.1283, + "step": 3886 + }, + { + "epoch": 1.04, + "learning_rate": 4.349879421221865e-05, + "loss": 0.1404, + "step": 3887 + }, + { + "epoch": 1.04, + "learning_rate": 4.3497119506966776e-05, + "loss": 0.1667, + "step": 3888 + }, + { + "epoch": 1.04, + "learning_rate": 4.34954448017149e-05, + "loss": 0.1639, + "step": 3889 + }, + { + "epoch": 1.04, + "learning_rate": 4.3493770096463024e-05, + "loss": 0.2428, + "step": 3890 + }, + { + "epoch": 1.04, + "learning_rate": 4.349209539121115e-05, + "loss": 0.1481, + "step": 3891 + }, + { + "epoch": 1.04, + "learning_rate": 4.349042068595927e-05, + "loss": 0.2346, + "step": 3892 + }, + { + "epoch": 1.04, + "learning_rate": 4.3488745980707397e-05, + "loss": 0.1434, + "step": 3893 + }, + { + "epoch": 1.04, + "learning_rate": 4.348707127545552e-05, + "loss": 0.1312, + "step": 3894 + }, + { + "epoch": 1.04, + "learning_rate": 4.3485396570203645e-05, + "loss": 0.1264, + "step": 3895 + }, + { + "epoch": 1.04, + "learning_rate": 4.348372186495177e-05, + "loss": 0.159, + "step": 3896 + }, + { + "epoch": 1.04, + "learning_rate": 4.3482047159699893e-05, + "loss": 0.1319, + "step": 3897 + }, + { + "epoch": 1.04, + "learning_rate": 4.348037245444802e-05, + "loss": 0.1411, + "step": 3898 + }, + { + "epoch": 1.04, + "learning_rate": 4.347869774919615e-05, + "loss": 0.2113, + "step": 3899 + }, + { + "epoch": 1.05, + "learning_rate": 4.347702304394427e-05, + "loss": 0.1784, + "step": 3900 + }, + { + "epoch": 1.05, + "learning_rate": 4.347534833869239e-05, + "loss": 0.1939, + "step": 3901 + }, + { + "epoch": 1.05, + "learning_rate": 4.3473673633440514e-05, + "loss": 0.162, + "step": 3902 + }, + { + "epoch": 1.05, + "learning_rate": 4.347199892818864e-05, + "loss": 0.1648, + "step": 3903 + }, + { + "epoch": 1.05, + "learning_rate": 4.347032422293676e-05, + "loss": 0.1572, + "step": 3904 + }, + { + "epoch": 1.05, + "learning_rate": 4.346864951768489e-05, + "loss": 0.1313, + "step": 3905 + }, + { + "epoch": 1.05, + "learning_rate": 4.346697481243301e-05, + "loss": 0.1356, + "step": 3906 + }, + { + "epoch": 1.05, + "learning_rate": 4.346530010718114e-05, + "loss": 0.2686, + "step": 3907 + }, + { + "epoch": 1.05, + "learning_rate": 4.3463625401929266e-05, + "loss": 0.1618, + "step": 3908 + }, + { + "epoch": 1.05, + "learning_rate": 4.346195069667739e-05, + "loss": 0.1423, + "step": 3909 + }, + { + "epoch": 1.05, + "learning_rate": 4.346027599142551e-05, + "loss": 0.2333, + "step": 3910 + }, + { + "epoch": 1.05, + "learning_rate": 4.345860128617363e-05, + "loss": 0.1649, + "step": 3911 + }, + { + "epoch": 1.05, + "learning_rate": 4.3456926580921757e-05, + "loss": 0.1253, + "step": 3912 + }, + { + "epoch": 1.05, + "learning_rate": 4.345525187566988e-05, + "loss": 0.1352, + "step": 3913 + }, + { + "epoch": 1.05, + "learning_rate": 4.3453577170418005e-05, + "loss": 0.1423, + "step": 3914 + }, + { + "epoch": 1.05, + "learning_rate": 4.3451902465166136e-05, + "loss": 0.2164, + "step": 3915 + }, + { + "epoch": 1.05, + "learning_rate": 4.345022775991426e-05, + "loss": 0.3535, + "step": 3916 + }, + { + "epoch": 1.05, + "learning_rate": 4.3448553054662384e-05, + "loss": 0.1573, + "step": 3917 + }, + { + "epoch": 1.05, + "learning_rate": 4.344687834941051e-05, + "loss": 0.1663, + "step": 3918 + }, + { + "epoch": 1.05, + "learning_rate": 4.3445203644158626e-05, + "loss": 0.1483, + "step": 3919 + }, + { + "epoch": 1.05, + "learning_rate": 4.344352893890675e-05, + "loss": 0.1818, + "step": 3920 + }, + { + "epoch": 1.05, + "learning_rate": 4.3441854233654874e-05, + "loss": 0.1577, + "step": 3921 + }, + { + "epoch": 1.05, + "learning_rate": 4.3440179528403005e-05, + "loss": 0.1713, + "step": 3922 + }, + { + "epoch": 1.05, + "learning_rate": 4.343850482315113e-05, + "loss": 0.1421, + "step": 3923 + }, + { + "epoch": 1.05, + "learning_rate": 4.3436830117899254e-05, + "loss": 0.1729, + "step": 3924 + }, + { + "epoch": 1.05, + "learning_rate": 4.343515541264738e-05, + "loss": 0.1611, + "step": 3925 + }, + { + "epoch": 1.05, + "learning_rate": 4.34334807073955e-05, + "loss": 0.1391, + "step": 3926 + }, + { + "epoch": 1.05, + "learning_rate": 4.3431806002143626e-05, + "loss": 0.2761, + "step": 3927 + }, + { + "epoch": 1.05, + "learning_rate": 4.343013129689175e-05, + "loss": 0.2124, + "step": 3928 + }, + { + "epoch": 1.05, + "learning_rate": 4.342845659163987e-05, + "loss": 0.1604, + "step": 3929 + }, + { + "epoch": 1.05, + "learning_rate": 4.3426781886388e-05, + "loss": 0.1844, + "step": 3930 + }, + { + "epoch": 1.05, + "learning_rate": 4.342510718113612e-05, + "loss": 0.1593, + "step": 3931 + }, + { + "epoch": 1.05, + "learning_rate": 4.342343247588425e-05, + "loss": 0.1303, + "step": 3932 + }, + { + "epoch": 1.05, + "learning_rate": 4.342175777063237e-05, + "loss": 0.1659, + "step": 3933 + }, + { + "epoch": 1.05, + "learning_rate": 4.3420083065380496e-05, + "loss": 0.1706, + "step": 3934 + }, + { + "epoch": 1.05, + "learning_rate": 4.341840836012862e-05, + "loss": 0.167, + "step": 3935 + }, + { + "epoch": 1.05, + "learning_rate": 4.3416733654876744e-05, + "loss": 0.1842, + "step": 3936 + }, + { + "epoch": 1.05, + "learning_rate": 4.341505894962487e-05, + "loss": 0.1706, + "step": 3937 + }, + { + "epoch": 1.06, + "learning_rate": 4.341338424437299e-05, + "loss": 0.1253, + "step": 3938 + }, + { + "epoch": 1.06, + "learning_rate": 4.341170953912112e-05, + "loss": 0.1786, + "step": 3939 + }, + { + "epoch": 1.06, + "learning_rate": 4.341003483386924e-05, + "loss": 0.2799, + "step": 3940 + }, + { + "epoch": 1.06, + "learning_rate": 4.3408360128617365e-05, + "loss": 0.2683, + "step": 3941 + }, + { + "epoch": 1.06, + "learning_rate": 4.340668542336549e-05, + "loss": 0.1717, + "step": 3942 + }, + { + "epoch": 1.06, + "learning_rate": 4.3405010718113614e-05, + "loss": 0.1515, + "step": 3943 + }, + { + "epoch": 1.06, + "learning_rate": 4.340333601286174e-05, + "loss": 0.1767, + "step": 3944 + }, + { + "epoch": 1.06, + "learning_rate": 4.340166130760986e-05, + "loss": 0.1334, + "step": 3945 + }, + { + "epoch": 1.06, + "learning_rate": 4.3399986602357986e-05, + "loss": 0.365, + "step": 3946 + }, + { + "epoch": 1.06, + "learning_rate": 4.339831189710611e-05, + "loss": 0.2152, + "step": 3947 + }, + { + "epoch": 1.06, + "learning_rate": 4.3396637191854235e-05, + "loss": 0.1496, + "step": 3948 + }, + { + "epoch": 1.06, + "learning_rate": 4.339496248660236e-05, + "loss": 0.1922, + "step": 3949 + }, + { + "epoch": 1.06, + "learning_rate": 4.339328778135048e-05, + "loss": 0.1602, + "step": 3950 + }, + { + "epoch": 1.06, + "learning_rate": 4.339161307609861e-05, + "loss": 0.1678, + "step": 3951 + }, + { + "epoch": 1.06, + "learning_rate": 4.338993837084673e-05, + "loss": 0.1683, + "step": 3952 + }, + { + "epoch": 1.06, + "learning_rate": 4.3388263665594856e-05, + "loss": 0.149, + "step": 3953 + }, + { + "epoch": 1.06, + "learning_rate": 4.338658896034298e-05, + "loss": 0.1526, + "step": 3954 + }, + { + "epoch": 1.06, + "learning_rate": 4.338491425509111e-05, + "loss": 0.168, + "step": 3955 + }, + { + "epoch": 1.06, + "learning_rate": 4.3383239549839235e-05, + "loss": 0.2494, + "step": 3956 + }, + { + "epoch": 1.06, + "learning_rate": 4.338156484458735e-05, + "loss": 0.1968, + "step": 3957 + }, + { + "epoch": 1.06, + "learning_rate": 4.337989013933548e-05, + "loss": 0.1748, + "step": 3958 + }, + { + "epoch": 1.06, + "learning_rate": 4.33782154340836e-05, + "loss": 0.1681, + "step": 3959 + }, + { + "epoch": 1.06, + "learning_rate": 4.3376540728831725e-05, + "loss": 0.15, + "step": 3960 + }, + { + "epoch": 1.06, + "learning_rate": 4.337486602357985e-05, + "loss": 0.2298, + "step": 3961 + }, + { + "epoch": 1.06, + "learning_rate": 4.3373191318327974e-05, + "loss": 0.3487, + "step": 3962 + }, + { + "epoch": 1.06, + "learning_rate": 4.3371516613076105e-05, + "loss": 0.2023, + "step": 3963 + }, + { + "epoch": 1.06, + "learning_rate": 4.336984190782423e-05, + "loss": 0.1644, + "step": 3964 + }, + { + "epoch": 1.06, + "learning_rate": 4.336816720257235e-05, + "loss": 0.3022, + "step": 3965 + }, + { + "epoch": 1.06, + "learning_rate": 4.336649249732047e-05, + "loss": 0.1384, + "step": 3966 + }, + { + "epoch": 1.06, + "learning_rate": 4.3364817792068595e-05, + "loss": 0.1663, + "step": 3967 + }, + { + "epoch": 1.06, + "learning_rate": 4.336314308681672e-05, + "loss": 0.3297, + "step": 3968 + }, + { + "epoch": 1.06, + "learning_rate": 4.336146838156484e-05, + "loss": 0.1492, + "step": 3969 + }, + { + "epoch": 1.06, + "learning_rate": 4.335979367631297e-05, + "loss": 0.1458, + "step": 3970 + }, + { + "epoch": 1.06, + "learning_rate": 4.33581189710611e-05, + "loss": 0.1524, + "step": 3971 + }, + { + "epoch": 1.06, + "learning_rate": 4.335644426580922e-05, + "loss": 0.1382, + "step": 3972 + }, + { + "epoch": 1.06, + "learning_rate": 4.335476956055735e-05, + "loss": 0.1415, + "step": 3973 + }, + { + "epoch": 1.06, + "learning_rate": 4.335309485530547e-05, + "loss": 0.1366, + "step": 3974 + }, + { + "epoch": 1.07, + "learning_rate": 4.335142015005359e-05, + "loss": 0.2417, + "step": 3975 + }, + { + "epoch": 1.07, + "learning_rate": 4.334974544480171e-05, + "loss": 0.172, + "step": 3976 + }, + { + "epoch": 1.07, + "learning_rate": 4.334807073954984e-05, + "loss": 0.1366, + "step": 3977 + }, + { + "epoch": 1.07, + "learning_rate": 4.334639603429797e-05, + "loss": 0.1446, + "step": 3978 + }, + { + "epoch": 1.07, + "learning_rate": 4.334472132904609e-05, + "loss": 0.1183, + "step": 3979 + }, + { + "epoch": 1.07, + "learning_rate": 4.3343046623794216e-05, + "loss": 0.2428, + "step": 3980 + }, + { + "epoch": 1.07, + "learning_rate": 4.334137191854234e-05, + "loss": 0.1549, + "step": 3981 + }, + { + "epoch": 1.07, + "learning_rate": 4.3339697213290465e-05, + "loss": 0.1856, + "step": 3982 + }, + { + "epoch": 1.07, + "learning_rate": 4.333802250803859e-05, + "loss": 0.1501, + "step": 3983 + }, + { + "epoch": 1.07, + "learning_rate": 4.333634780278671e-05, + "loss": 0.1511, + "step": 3984 + }, + { + "epoch": 1.07, + "learning_rate": 4.333467309753483e-05, + "loss": 0.1805, + "step": 3985 + }, + { + "epoch": 1.07, + "learning_rate": 4.333299839228296e-05, + "loss": 0.14, + "step": 3986 + }, + { + "epoch": 1.07, + "learning_rate": 4.3331323687031086e-05, + "loss": 0.1778, + "step": 3987 + }, + { + "epoch": 1.07, + "learning_rate": 4.332964898177921e-05, + "loss": 0.2066, + "step": 3988 + }, + { + "epoch": 1.07, + "learning_rate": 4.3327974276527334e-05, + "loss": 0.152, + "step": 3989 + }, + { + "epoch": 1.07, + "learning_rate": 4.332629957127546e-05, + "loss": 0.1799, + "step": 3990 + }, + { + "epoch": 1.07, + "learning_rate": 4.332462486602358e-05, + "loss": 0.315, + "step": 3991 + }, + { + "epoch": 1.07, + "learning_rate": 4.332295016077171e-05, + "loss": 0.1398, + "step": 3992 + }, + { + "epoch": 1.07, + "learning_rate": 4.332127545551983e-05, + "loss": 0.1185, + "step": 3993 + }, + { + "epoch": 1.07, + "learning_rate": 4.3319600750267955e-05, + "loss": 0.1465, + "step": 3994 + }, + { + "epoch": 1.07, + "learning_rate": 4.331792604501608e-05, + "loss": 0.144, + "step": 3995 + }, + { + "epoch": 1.07, + "learning_rate": 4.3316251339764204e-05, + "loss": 0.1772, + "step": 3996 + }, + { + "epoch": 1.07, + "learning_rate": 4.331457663451233e-05, + "loss": 0.1766, + "step": 3997 + }, + { + "epoch": 1.07, + "learning_rate": 4.331290192926045e-05, + "loss": 0.1989, + "step": 3998 + }, + { + "epoch": 1.07, + "learning_rate": 4.3311227224008576e-05, + "loss": 0.3366, + "step": 3999 + }, + { + "epoch": 1.07, + "learning_rate": 4.33095525187567e-05, + "loss": 0.2514, + "step": 4000 + }, + { + "epoch": 1.07, + "learning_rate": 4.3307877813504825e-05, + "loss": 0.1426, + "step": 4001 + }, + { + "epoch": 1.07, + "learning_rate": 4.330620310825295e-05, + "loss": 0.1615, + "step": 4002 + }, + { + "epoch": 1.07, + "learning_rate": 4.330452840300107e-05, + "loss": 0.1188, + "step": 4003 + }, + { + "epoch": 1.07, + "learning_rate": 4.33028536977492e-05, + "loss": 0.1452, + "step": 4004 + }, + { + "epoch": 1.07, + "learning_rate": 4.330117899249732e-05, + "loss": 0.1648, + "step": 4005 + }, + { + "epoch": 1.07, + "learning_rate": 4.3299504287245446e-05, + "loss": 0.1593, + "step": 4006 + }, + { + "epoch": 1.07, + "learning_rate": 4.329782958199357e-05, + "loss": 0.1367, + "step": 4007 + }, + { + "epoch": 1.07, + "learning_rate": 4.3296154876741694e-05, + "loss": 0.1467, + "step": 4008 + }, + { + "epoch": 1.07, + "learning_rate": 4.329448017148982e-05, + "loss": 0.193, + "step": 4009 + }, + { + "epoch": 1.07, + "learning_rate": 4.329280546623794e-05, + "loss": 0.2081, + "step": 4010 + }, + { + "epoch": 1.07, + "learning_rate": 4.3291130760986074e-05, + "loss": 0.2019, + "step": 4011 + }, + { + "epoch": 1.08, + "learning_rate": 4.32894560557342e-05, + "loss": 0.1511, + "step": 4012 + }, + { + "epoch": 1.08, + "learning_rate": 4.3287781350482315e-05, + "loss": 0.2838, + "step": 4013 + }, + { + "epoch": 1.08, + "learning_rate": 4.328610664523044e-05, + "loss": 0.1598, + "step": 4014 + }, + { + "epoch": 1.08, + "learning_rate": 4.3284431939978564e-05, + "loss": 0.1413, + "step": 4015 + }, + { + "epoch": 1.08, + "learning_rate": 4.328275723472669e-05, + "loss": 0.312, + "step": 4016 + }, + { + "epoch": 1.08, + "learning_rate": 4.328108252947481e-05, + "loss": 0.1426, + "step": 4017 + }, + { + "epoch": 1.08, + "learning_rate": 4.3279407824222936e-05, + "loss": 0.1526, + "step": 4018 + }, + { + "epoch": 1.08, + "learning_rate": 4.327773311897107e-05, + "loss": 0.1563, + "step": 4019 + }, + { + "epoch": 1.08, + "learning_rate": 4.327605841371919e-05, + "loss": 0.1587, + "step": 4020 + }, + { + "epoch": 1.08, + "learning_rate": 4.3274383708467316e-05, + "loss": 0.1373, + "step": 4021 + }, + { + "epoch": 1.08, + "learning_rate": 4.327270900321543e-05, + "loss": 0.1902, + "step": 4022 + }, + { + "epoch": 1.08, + "learning_rate": 4.327103429796356e-05, + "loss": 0.1612, + "step": 4023 + }, + { + "epoch": 1.08, + "learning_rate": 4.326935959271168e-05, + "loss": 0.1359, + "step": 4024 + }, + { + "epoch": 1.08, + "learning_rate": 4.3267684887459806e-05, + "loss": 0.153, + "step": 4025 + }, + { + "epoch": 1.08, + "learning_rate": 4.326601018220793e-05, + "loss": 0.1286, + "step": 4026 + }, + { + "epoch": 1.08, + "learning_rate": 4.326433547695606e-05, + "loss": 0.2029, + "step": 4027 + }, + { + "epoch": 1.08, + "learning_rate": 4.3262660771704185e-05, + "loss": 0.1595, + "step": 4028 + }, + { + "epoch": 1.08, + "learning_rate": 4.326098606645231e-05, + "loss": 0.1556, + "step": 4029 + }, + { + "epoch": 1.08, + "learning_rate": 4.3259311361200434e-05, + "loss": 0.1287, + "step": 4030 + }, + { + "epoch": 1.08, + "learning_rate": 4.325763665594855e-05, + "loss": 0.1311, + "step": 4031 + }, + { + "epoch": 1.08, + "learning_rate": 4.3255961950696675e-05, + "loss": 0.133, + "step": 4032 + }, + { + "epoch": 1.08, + "learning_rate": 4.32542872454448e-05, + "loss": 0.2028, + "step": 4033 + }, + { + "epoch": 1.08, + "learning_rate": 4.325261254019293e-05, + "loss": 0.1286, + "step": 4034 + }, + { + "epoch": 1.08, + "learning_rate": 4.3250937834941055e-05, + "loss": 0.1588, + "step": 4035 + }, + { + "epoch": 1.08, + "learning_rate": 4.324926312968918e-05, + "loss": 0.1339, + "step": 4036 + }, + { + "epoch": 1.08, + "learning_rate": 4.32475884244373e-05, + "loss": 0.1581, + "step": 4037 + }, + { + "epoch": 1.08, + "learning_rate": 4.324591371918543e-05, + "loss": 0.178, + "step": 4038 + }, + { + "epoch": 1.08, + "learning_rate": 4.324423901393355e-05, + "loss": 0.1515, + "step": 4039 + }, + { + "epoch": 1.08, + "learning_rate": 4.3242564308681676e-05, + "loss": 0.1488, + "step": 4040 + }, + { + "epoch": 1.08, + "learning_rate": 4.324088960342979e-05, + "loss": 0.2161, + "step": 4041 + }, + { + "epoch": 1.08, + "learning_rate": 4.3239214898177924e-05, + "loss": 0.1379, + "step": 4042 + }, + { + "epoch": 1.08, + "learning_rate": 4.323754019292605e-05, + "loss": 0.136, + "step": 4043 + }, + { + "epoch": 1.08, + "learning_rate": 4.323586548767417e-05, + "loss": 0.1797, + "step": 4044 + }, + { + "epoch": 1.08, + "learning_rate": 4.32341907824223e-05, + "loss": 0.2418, + "step": 4045 + }, + { + "epoch": 1.08, + "learning_rate": 4.323251607717042e-05, + "loss": 0.1453, + "step": 4046 + }, + { + "epoch": 1.08, + "learning_rate": 4.3230841371918545e-05, + "loss": 0.2704, + "step": 4047 + }, + { + "epoch": 1.08, + "learning_rate": 4.322916666666667e-05, + "loss": 0.3082, + "step": 4048 + }, + { + "epoch": 1.08, + "learning_rate": 4.3227491961414794e-05, + "loss": 0.1321, + "step": 4049 + }, + { + "epoch": 1.09, + "learning_rate": 4.322581725616292e-05, + "loss": 0.1395, + "step": 4050 + }, + { + "epoch": 1.09, + "learning_rate": 4.322414255091104e-05, + "loss": 0.1419, + "step": 4051 + }, + { + "epoch": 1.09, + "learning_rate": 4.3222467845659166e-05, + "loss": 0.1467, + "step": 4052 + }, + { + "epoch": 1.09, + "learning_rate": 4.322079314040729e-05, + "loss": 0.1854, + "step": 4053 + }, + { + "epoch": 1.09, + "learning_rate": 4.3219118435155415e-05, + "loss": 0.1581, + "step": 4054 + }, + { + "epoch": 1.09, + "learning_rate": 4.321744372990354e-05, + "loss": 0.1867, + "step": 4055 + }, + { + "epoch": 1.09, + "learning_rate": 4.321576902465166e-05, + "loss": 0.1969, + "step": 4056 + }, + { + "epoch": 1.09, + "learning_rate": 4.321409431939979e-05, + "loss": 0.2628, + "step": 4057 + }, + { + "epoch": 1.09, + "learning_rate": 4.321241961414791e-05, + "loss": 0.1458, + "step": 4058 + }, + { + "epoch": 1.09, + "learning_rate": 4.3210744908896036e-05, + "loss": 0.2605, + "step": 4059 + }, + { + "epoch": 1.09, + "learning_rate": 4.320907020364416e-05, + "loss": 0.1731, + "step": 4060 + }, + { + "epoch": 1.09, + "learning_rate": 4.3207395498392284e-05, + "loss": 0.1625, + "step": 4061 + }, + { + "epoch": 1.09, + "learning_rate": 4.320572079314041e-05, + "loss": 0.2225, + "step": 4062 + }, + { + "epoch": 1.09, + "learning_rate": 4.320404608788853e-05, + "loss": 0.1333, + "step": 4063 + }, + { + "epoch": 1.09, + "learning_rate": 4.320237138263666e-05, + "loss": 0.1995, + "step": 4064 + }, + { + "epoch": 1.09, + "learning_rate": 4.320069667738478e-05, + "loss": 0.2038, + "step": 4065 + }, + { + "epoch": 1.09, + "learning_rate": 4.3199021972132905e-05, + "loss": 0.2215, + "step": 4066 + }, + { + "epoch": 1.09, + "learning_rate": 4.3197347266881036e-05, + "loss": 0.1243, + "step": 4067 + }, + { + "epoch": 1.09, + "learning_rate": 4.319567256162916e-05, + "loss": 0.2631, + "step": 4068 + }, + { + "epoch": 1.09, + "learning_rate": 4.319399785637728e-05, + "loss": 0.2534, + "step": 4069 + }, + { + "epoch": 1.09, + "learning_rate": 4.31923231511254e-05, + "loss": 0.1484, + "step": 4070 + }, + { + "epoch": 1.09, + "learning_rate": 4.3190648445873526e-05, + "loss": 0.1868, + "step": 4071 + }, + { + "epoch": 1.09, + "learning_rate": 4.318897374062165e-05, + "loss": 0.2774, + "step": 4072 + }, + { + "epoch": 1.09, + "learning_rate": 4.3187299035369775e-05, + "loss": 0.1389, + "step": 4073 + }, + { + "epoch": 1.09, + "learning_rate": 4.31856243301179e-05, + "loss": 0.1931, + "step": 4074 + }, + { + "epoch": 1.09, + "learning_rate": 4.318394962486603e-05, + "loss": 0.123, + "step": 4075 + }, + { + "epoch": 1.09, + "learning_rate": 4.3182274919614154e-05, + "loss": 0.1543, + "step": 4076 + }, + { + "epoch": 1.09, + "learning_rate": 4.318060021436228e-05, + "loss": 0.1812, + "step": 4077 + }, + { + "epoch": 1.09, + "learning_rate": 4.3178925509110396e-05, + "loss": 0.2933, + "step": 4078 + }, + { + "epoch": 1.09, + "learning_rate": 4.317725080385852e-05, + "loss": 0.1905, + "step": 4079 + }, + { + "epoch": 1.09, + "learning_rate": 4.3175576098606644e-05, + "loss": 0.215, + "step": 4080 + }, + { + "epoch": 1.09, + "learning_rate": 4.317390139335477e-05, + "loss": 0.1712, + "step": 4081 + }, + { + "epoch": 1.09, + "learning_rate": 4.317222668810289e-05, + "loss": 0.3621, + "step": 4082 + }, + { + "epoch": 1.09, + "learning_rate": 4.3170551982851023e-05, + "loss": 0.1538, + "step": 4083 + }, + { + "epoch": 1.09, + "learning_rate": 4.316887727759915e-05, + "loss": 0.1721, + "step": 4084 + }, + { + "epoch": 1.09, + "learning_rate": 4.316720257234727e-05, + "loss": 0.1337, + "step": 4085 + }, + { + "epoch": 1.09, + "learning_rate": 4.3165527867095396e-05, + "loss": 0.2463, + "step": 4086 + }, + { + "epoch": 1.1, + "learning_rate": 4.3163853161843514e-05, + "loss": 0.1309, + "step": 4087 + }, + { + "epoch": 1.1, + "learning_rate": 4.316217845659164e-05, + "loss": 0.282, + "step": 4088 + }, + { + "epoch": 1.1, + "learning_rate": 4.316050375133976e-05, + "loss": 0.2987, + "step": 4089 + }, + { + "epoch": 1.1, + "learning_rate": 4.315882904608789e-05, + "loss": 0.132, + "step": 4090 + }, + { + "epoch": 1.1, + "learning_rate": 4.315715434083602e-05, + "loss": 0.1378, + "step": 4091 + }, + { + "epoch": 1.1, + "learning_rate": 4.315547963558414e-05, + "loss": 0.3652, + "step": 4092 + }, + { + "epoch": 1.1, + "learning_rate": 4.3153804930332266e-05, + "loss": 0.1639, + "step": 4093 + }, + { + "epoch": 1.1, + "learning_rate": 4.315213022508039e-05, + "loss": 0.1739, + "step": 4094 + }, + { + "epoch": 1.1, + "learning_rate": 4.3150455519828514e-05, + "loss": 0.3252, + "step": 4095 + }, + { + "epoch": 1.1, + "learning_rate": 4.314878081457664e-05, + "loss": 0.1431, + "step": 4096 + }, + { + "epoch": 1.1, + "learning_rate": 4.3147106109324756e-05, + "loss": 0.1452, + "step": 4097 + }, + { + "epoch": 1.1, + "learning_rate": 4.314543140407289e-05, + "loss": 0.1379, + "step": 4098 + }, + { + "epoch": 1.1, + "learning_rate": 4.314375669882101e-05, + "loss": 0.2371, + "step": 4099 + }, + { + "epoch": 1.1, + "learning_rate": 4.3142081993569135e-05, + "loss": 0.1294, + "step": 4100 + }, + { + "epoch": 1.1, + "learning_rate": 4.314040728831726e-05, + "loss": 0.1432, + "step": 4101 + }, + { + "epoch": 1.1, + "learning_rate": 4.3138732583065383e-05, + "loss": 0.1327, + "step": 4102 + }, + { + "epoch": 1.1, + "learning_rate": 4.313705787781351e-05, + "loss": 0.2232, + "step": 4103 + }, + { + "epoch": 1.1, + "learning_rate": 4.313538317256163e-05, + "loss": 0.1763, + "step": 4104 + }, + { + "epoch": 1.1, + "learning_rate": 4.3133708467309756e-05, + "loss": 0.1499, + "step": 4105 + }, + { + "epoch": 1.1, + "learning_rate": 4.313203376205788e-05, + "loss": 0.1735, + "step": 4106 + }, + { + "epoch": 1.1, + "learning_rate": 4.3130359056806005e-05, + "loss": 0.1913, + "step": 4107 + }, + { + "epoch": 1.1, + "learning_rate": 4.312868435155413e-05, + "loss": 0.1574, + "step": 4108 + }, + { + "epoch": 1.1, + "learning_rate": 4.312700964630225e-05, + "loss": 0.1453, + "step": 4109 + }, + { + "epoch": 1.1, + "learning_rate": 4.312533494105038e-05, + "loss": 0.2646, + "step": 4110 + }, + { + "epoch": 1.1, + "learning_rate": 4.31236602357985e-05, + "loss": 0.2753, + "step": 4111 + }, + { + "epoch": 1.1, + "learning_rate": 4.3121985530546626e-05, + "loss": 0.198, + "step": 4112 + }, + { + "epoch": 1.1, + "learning_rate": 4.312031082529475e-05, + "loss": 0.1591, + "step": 4113 + }, + { + "epoch": 1.1, + "learning_rate": 4.3118636120042874e-05, + "loss": 0.1384, + "step": 4114 + }, + { + "epoch": 1.1, + "learning_rate": 4.3116961414791e-05, + "loss": 0.1681, + "step": 4115 + }, + { + "epoch": 1.1, + "learning_rate": 4.311528670953912e-05, + "loss": 0.158, + "step": 4116 + }, + { + "epoch": 1.1, + "learning_rate": 4.3113612004287247e-05, + "loss": 0.1482, + "step": 4117 + }, + { + "epoch": 1.1, + "learning_rate": 4.311193729903537e-05, + "loss": 0.1961, + "step": 4118 + }, + { + "epoch": 1.1, + "learning_rate": 4.3110262593783495e-05, + "loss": 0.1316, + "step": 4119 + }, + { + "epoch": 1.1, + "learning_rate": 4.310858788853162e-05, + "loss": 0.2057, + "step": 4120 + }, + { + "epoch": 1.1, + "learning_rate": 4.3106913183279743e-05, + "loss": 0.1771, + "step": 4121 + }, + { + "epoch": 1.1, + "learning_rate": 4.310523847802787e-05, + "loss": 0.1738, + "step": 4122 + }, + { + "epoch": 1.1, + "learning_rate": 4.3103563772776e-05, + "loss": 0.1637, + "step": 4123 + }, + { + "epoch": 1.11, + "learning_rate": 4.310188906752412e-05, + "loss": 0.1445, + "step": 4124 + }, + { + "epoch": 1.11, + "learning_rate": 4.310021436227224e-05, + "loss": 0.1963, + "step": 4125 + }, + { + "epoch": 1.11, + "learning_rate": 4.3098539657020364e-05, + "loss": 0.1546, + "step": 4126 + }, + { + "epoch": 1.11, + "learning_rate": 4.309686495176849e-05, + "loss": 0.151, + "step": 4127 + }, + { + "epoch": 1.11, + "learning_rate": 4.309519024651661e-05, + "loss": 0.2314, + "step": 4128 + }, + { + "epoch": 1.11, + "learning_rate": 4.309351554126474e-05, + "loss": 0.2849, + "step": 4129 + }, + { + "epoch": 1.11, + "learning_rate": 4.309184083601286e-05, + "loss": 0.1491, + "step": 4130 + }, + { + "epoch": 1.11, + "learning_rate": 4.309016613076099e-05, + "loss": 0.1744, + "step": 4131 + }, + { + "epoch": 1.11, + "learning_rate": 4.3088491425509117e-05, + "loss": 0.1366, + "step": 4132 + }, + { + "epoch": 1.11, + "learning_rate": 4.308681672025724e-05, + "loss": 0.255, + "step": 4133 + }, + { + "epoch": 1.11, + "learning_rate": 4.308514201500536e-05, + "loss": 0.1414, + "step": 4134 + }, + { + "epoch": 1.11, + "learning_rate": 4.308346730975348e-05, + "loss": 0.1744, + "step": 4135 + }, + { + "epoch": 1.11, + "learning_rate": 4.3081792604501607e-05, + "loss": 0.1528, + "step": 4136 + }, + { + "epoch": 1.11, + "learning_rate": 4.308011789924973e-05, + "loss": 0.1534, + "step": 4137 + }, + { + "epoch": 1.11, + "learning_rate": 4.3078443193997855e-05, + "loss": 0.1794, + "step": 4138 + }, + { + "epoch": 1.11, + "learning_rate": 4.3076768488745986e-05, + "loss": 0.2136, + "step": 4139 + }, + { + "epoch": 1.11, + "learning_rate": 4.307509378349411e-05, + "loss": 0.2916, + "step": 4140 + }, + { + "epoch": 1.11, + "learning_rate": 4.3073419078242234e-05, + "loss": 0.2348, + "step": 4141 + }, + { + "epoch": 1.11, + "learning_rate": 4.307174437299036e-05, + "loss": 0.1341, + "step": 4142 + }, + { + "epoch": 1.11, + "learning_rate": 4.3070069667738476e-05, + "loss": 0.156, + "step": 4143 + }, + { + "epoch": 1.11, + "learning_rate": 4.30683949624866e-05, + "loss": 0.2897, + "step": 4144 + }, + { + "epoch": 1.11, + "learning_rate": 4.3066720257234724e-05, + "loss": 0.1295, + "step": 4145 + }, + { + "epoch": 1.11, + "learning_rate": 4.3065045551982855e-05, + "loss": 0.1294, + "step": 4146 + }, + { + "epoch": 1.11, + "learning_rate": 4.306337084673098e-05, + "loss": 0.1257, + "step": 4147 + }, + { + "epoch": 1.11, + "learning_rate": 4.3061696141479104e-05, + "loss": 0.1764, + "step": 4148 + }, + { + "epoch": 1.11, + "learning_rate": 4.306002143622723e-05, + "loss": 0.1548, + "step": 4149 + }, + { + "epoch": 1.11, + "learning_rate": 4.305834673097535e-05, + "loss": 0.2069, + "step": 4150 + }, + { + "epoch": 1.11, + "learning_rate": 4.3056672025723477e-05, + "loss": 0.171, + "step": 4151 + }, + { + "epoch": 1.11, + "learning_rate": 4.30549973204716e-05, + "loss": 0.1417, + "step": 4152 + }, + { + "epoch": 1.11, + "learning_rate": 4.305332261521972e-05, + "loss": 0.2167, + "step": 4153 + }, + { + "epoch": 1.11, + "learning_rate": 4.305164790996785e-05, + "loss": 0.276, + "step": 4154 + }, + { + "epoch": 1.11, + "learning_rate": 4.304997320471597e-05, + "loss": 0.1383, + "step": 4155 + }, + { + "epoch": 1.11, + "learning_rate": 4.30482984994641e-05, + "loss": 0.1671, + "step": 4156 + }, + { + "epoch": 1.11, + "learning_rate": 4.304662379421222e-05, + "loss": 0.1943, + "step": 4157 + }, + { + "epoch": 1.11, + "learning_rate": 4.3044949088960346e-05, + "loss": 0.134, + "step": 4158 + }, + { + "epoch": 1.11, + "learning_rate": 4.304327438370847e-05, + "loss": 0.1942, + "step": 4159 + }, + { + "epoch": 1.11, + "learning_rate": 4.3041599678456594e-05, + "loss": 0.2838, + "step": 4160 + }, + { + "epoch": 1.11, + "learning_rate": 4.303992497320472e-05, + "loss": 0.1458, + "step": 4161 + }, + { + "epoch": 1.12, + "learning_rate": 4.303825026795284e-05, + "loss": 0.1968, + "step": 4162 + }, + { + "epoch": 1.12, + "learning_rate": 4.303657556270097e-05, + "loss": 0.1573, + "step": 4163 + }, + { + "epoch": 1.12, + "learning_rate": 4.303490085744909e-05, + "loss": 0.1569, + "step": 4164 + }, + { + "epoch": 1.12, + "learning_rate": 4.3033226152197215e-05, + "loss": 0.1309, + "step": 4165 + }, + { + "epoch": 1.12, + "learning_rate": 4.303155144694534e-05, + "loss": 0.1272, + "step": 4166 + }, + { + "epoch": 1.12, + "learning_rate": 4.3029876741693464e-05, + "loss": 0.1422, + "step": 4167 + }, + { + "epoch": 1.12, + "learning_rate": 4.302820203644159e-05, + "loss": 0.1421, + "step": 4168 + }, + { + "epoch": 1.12, + "learning_rate": 4.302652733118971e-05, + "loss": 0.1845, + "step": 4169 + }, + { + "epoch": 1.12, + "learning_rate": 4.3024852625937836e-05, + "loss": 0.1566, + "step": 4170 + }, + { + "epoch": 1.12, + "learning_rate": 4.302317792068596e-05, + "loss": 0.1949, + "step": 4171 + }, + { + "epoch": 1.12, + "learning_rate": 4.3021503215434085e-05, + "loss": 0.2122, + "step": 4172 + }, + { + "epoch": 1.12, + "learning_rate": 4.301982851018221e-05, + "loss": 0.165, + "step": 4173 + }, + { + "epoch": 1.12, + "learning_rate": 4.301815380493033e-05, + "loss": 0.2438, + "step": 4174 + }, + { + "epoch": 1.12, + "learning_rate": 4.301647909967846e-05, + "loss": 0.1593, + "step": 4175 + }, + { + "epoch": 1.12, + "learning_rate": 4.301480439442658e-05, + "loss": 0.1516, + "step": 4176 + }, + { + "epoch": 1.12, + "learning_rate": 4.3013129689174706e-05, + "loss": 0.2367, + "step": 4177 + }, + { + "epoch": 1.12, + "learning_rate": 4.301145498392283e-05, + "loss": 0.1607, + "step": 4178 + }, + { + "epoch": 1.12, + "learning_rate": 4.300978027867096e-05, + "loss": 0.2265, + "step": 4179 + }, + { + "epoch": 1.12, + "learning_rate": 4.3008105573419085e-05, + "loss": 0.1482, + "step": 4180 + }, + { + "epoch": 1.12, + "learning_rate": 4.30064308681672e-05, + "loss": 0.1621, + "step": 4181 + }, + { + "epoch": 1.12, + "learning_rate": 4.300475616291533e-05, + "loss": 0.2343, + "step": 4182 + }, + { + "epoch": 1.12, + "learning_rate": 4.300308145766345e-05, + "loss": 0.1476, + "step": 4183 + }, + { + "epoch": 1.12, + "learning_rate": 4.3001406752411575e-05, + "loss": 0.1429, + "step": 4184 + }, + { + "epoch": 1.12, + "learning_rate": 4.29997320471597e-05, + "loss": 0.2714, + "step": 4185 + }, + { + "epoch": 1.12, + "learning_rate": 4.2998057341907824e-05, + "loss": 0.2248, + "step": 4186 + }, + { + "epoch": 1.12, + "learning_rate": 4.2996382636655955e-05, + "loss": 0.1646, + "step": 4187 + }, + { + "epoch": 1.12, + "learning_rate": 4.299470793140408e-05, + "loss": 0.1519, + "step": 4188 + }, + { + "epoch": 1.12, + "learning_rate": 4.29930332261522e-05, + "loss": 0.1453, + "step": 4189 + }, + { + "epoch": 1.12, + "learning_rate": 4.299135852090032e-05, + "loss": 0.2521, + "step": 4190 + }, + { + "epoch": 1.12, + "learning_rate": 4.2989683815648445e-05, + "loss": 0.1495, + "step": 4191 + }, + { + "epoch": 1.12, + "learning_rate": 4.298800911039657e-05, + "loss": 0.1504, + "step": 4192 + }, + { + "epoch": 1.12, + "learning_rate": 4.298633440514469e-05, + "loss": 0.2816, + "step": 4193 + }, + { + "epoch": 1.12, + "learning_rate": 4.298465969989282e-05, + "loss": 0.1877, + "step": 4194 + }, + { + "epoch": 1.12, + "learning_rate": 4.298298499464095e-05, + "loss": 0.2684, + "step": 4195 + }, + { + "epoch": 1.12, + "learning_rate": 4.298131028938907e-05, + "loss": 0.1259, + "step": 4196 + }, + { + "epoch": 1.12, + "learning_rate": 4.29796355841372e-05, + "loss": 0.208, + "step": 4197 + }, + { + "epoch": 1.12, + "learning_rate": 4.297796087888532e-05, + "loss": 0.1958, + "step": 4198 + }, + { + "epoch": 1.13, + "learning_rate": 4.297628617363344e-05, + "loss": 0.3603, + "step": 4199 + }, + { + "epoch": 1.13, + "learning_rate": 4.297461146838156e-05, + "loss": 0.1475, + "step": 4200 + }, + { + "epoch": 1.13, + "learning_rate": 4.297293676312969e-05, + "loss": 0.1662, + "step": 4201 + }, + { + "epoch": 1.13, + "learning_rate": 4.297126205787782e-05, + "loss": 0.2456, + "step": 4202 + }, + { + "epoch": 1.13, + "learning_rate": 4.296958735262594e-05, + "loss": 0.1654, + "step": 4203 + }, + { + "epoch": 1.13, + "learning_rate": 4.2967912647374066e-05, + "loss": 0.1363, + "step": 4204 + }, + { + "epoch": 1.13, + "learning_rate": 4.296623794212219e-05, + "loss": 0.1714, + "step": 4205 + }, + { + "epoch": 1.13, + "learning_rate": 4.2964563236870315e-05, + "loss": 0.1372, + "step": 4206 + }, + { + "epoch": 1.13, + "learning_rate": 4.296288853161844e-05, + "loss": 0.1311, + "step": 4207 + }, + { + "epoch": 1.13, + "learning_rate": 4.296121382636656e-05, + "loss": 0.2238, + "step": 4208 + }, + { + "epoch": 1.13, + "learning_rate": 4.295953912111468e-05, + "loss": 0.1447, + "step": 4209 + }, + { + "epoch": 1.13, + "learning_rate": 4.295786441586281e-05, + "loss": 0.1612, + "step": 4210 + }, + { + "epoch": 1.13, + "learning_rate": 4.2956189710610936e-05, + "loss": 0.1572, + "step": 4211 + }, + { + "epoch": 1.13, + "learning_rate": 4.295451500535906e-05, + "loss": 0.1718, + "step": 4212 + }, + { + "epoch": 1.13, + "learning_rate": 4.2952840300107184e-05, + "loss": 0.1716, + "step": 4213 + }, + { + "epoch": 1.13, + "learning_rate": 4.295116559485531e-05, + "loss": 0.1248, + "step": 4214 + }, + { + "epoch": 1.13, + "learning_rate": 4.294949088960343e-05, + "loss": 0.1657, + "step": 4215 + }, + { + "epoch": 1.13, + "learning_rate": 4.294781618435156e-05, + "loss": 0.1936, + "step": 4216 + }, + { + "epoch": 1.13, + "learning_rate": 4.294614147909968e-05, + "loss": 0.1914, + "step": 4217 + }, + { + "epoch": 1.13, + "learning_rate": 4.2944466773847805e-05, + "loss": 0.1541, + "step": 4218 + }, + { + "epoch": 1.13, + "learning_rate": 4.294279206859593e-05, + "loss": 0.1439, + "step": 4219 + }, + { + "epoch": 1.13, + "learning_rate": 4.2941117363344054e-05, + "loss": 0.2005, + "step": 4220 + }, + { + "epoch": 1.13, + "learning_rate": 4.293944265809218e-05, + "loss": 0.2133, + "step": 4221 + }, + { + "epoch": 1.13, + "learning_rate": 4.29377679528403e-05, + "loss": 0.4761, + "step": 4222 + }, + { + "epoch": 1.13, + "learning_rate": 4.2936093247588426e-05, + "loss": 0.2082, + "step": 4223 + }, + { + "epoch": 1.13, + "learning_rate": 4.293441854233655e-05, + "loss": 0.197, + "step": 4224 + }, + { + "epoch": 1.13, + "learning_rate": 4.2932743837084675e-05, + "loss": 0.1654, + "step": 4225 + }, + { + "epoch": 1.13, + "learning_rate": 4.29310691318328e-05, + "loss": 0.1562, + "step": 4226 + }, + { + "epoch": 1.13, + "learning_rate": 4.292939442658092e-05, + "loss": 0.3227, + "step": 4227 + }, + { + "epoch": 1.13, + "learning_rate": 4.292771972132905e-05, + "loss": 0.1527, + "step": 4228 + }, + { + "epoch": 1.13, + "learning_rate": 4.292604501607717e-05, + "loss": 0.1674, + "step": 4229 + }, + { + "epoch": 1.13, + "learning_rate": 4.2924370310825296e-05, + "loss": 0.1123, + "step": 4230 + }, + { + "epoch": 1.13, + "learning_rate": 4.292269560557342e-05, + "loss": 0.1378, + "step": 4231 + }, + { + "epoch": 1.13, + "learning_rate": 4.2921020900321544e-05, + "loss": 0.1812, + "step": 4232 + }, + { + "epoch": 1.13, + "learning_rate": 4.291934619506967e-05, + "loss": 0.1464, + "step": 4233 + }, + { + "epoch": 1.13, + "learning_rate": 4.291767148981779e-05, + "loss": 0.1556, + "step": 4234 + }, + { + "epoch": 1.13, + "learning_rate": 4.2915996784565924e-05, + "loss": 0.1148, + "step": 4235 + }, + { + "epoch": 1.14, + "learning_rate": 4.291432207931405e-05, + "loss": 0.2162, + "step": 4236 + }, + { + "epoch": 1.14, + "learning_rate": 4.2912647374062165e-05, + "loss": 0.19, + "step": 4237 + }, + { + "epoch": 1.14, + "learning_rate": 4.291097266881029e-05, + "loss": 0.2808, + "step": 4238 + }, + { + "epoch": 1.14, + "learning_rate": 4.2909297963558414e-05, + "loss": 0.1543, + "step": 4239 + }, + { + "epoch": 1.14, + "learning_rate": 4.290762325830654e-05, + "loss": 0.1334, + "step": 4240 + }, + { + "epoch": 1.14, + "learning_rate": 4.290594855305466e-05, + "loss": 0.143, + "step": 4241 + }, + { + "epoch": 1.14, + "learning_rate": 4.2904273847802786e-05, + "loss": 0.1942, + "step": 4242 + }, + { + "epoch": 1.14, + "learning_rate": 4.290259914255092e-05, + "loss": 0.153, + "step": 4243 + }, + { + "epoch": 1.14, + "learning_rate": 4.290092443729904e-05, + "loss": 0.2284, + "step": 4244 + }, + { + "epoch": 1.14, + "learning_rate": 4.2899249732047166e-05, + "loss": 0.1511, + "step": 4245 + }, + { + "epoch": 1.14, + "learning_rate": 4.289757502679528e-05, + "loss": 0.1569, + "step": 4246 + }, + { + "epoch": 1.14, + "learning_rate": 4.289590032154341e-05, + "loss": 0.1325, + "step": 4247 + }, + { + "epoch": 1.14, + "learning_rate": 4.289422561629153e-05, + "loss": 0.2298, + "step": 4248 + }, + { + "epoch": 1.14, + "learning_rate": 4.2892550911039656e-05, + "loss": 0.2315, + "step": 4249 + }, + { + "epoch": 1.14, + "learning_rate": 4.289087620578778e-05, + "loss": 0.1398, + "step": 4250 + }, + { + "epoch": 1.14, + "learning_rate": 4.288920150053591e-05, + "loss": 0.219, + "step": 4251 + }, + { + "epoch": 1.14, + "learning_rate": 4.2887526795284035e-05, + "loss": 0.1576, + "step": 4252 + }, + { + "epoch": 1.14, + "learning_rate": 4.288585209003216e-05, + "loss": 0.1735, + "step": 4253 + }, + { + "epoch": 1.14, + "learning_rate": 4.2884177384780284e-05, + "loss": 0.1948, + "step": 4254 + }, + { + "epoch": 1.14, + "learning_rate": 4.28825026795284e-05, + "loss": 0.3555, + "step": 4255 + }, + { + "epoch": 1.14, + "learning_rate": 4.2880827974276525e-05, + "loss": 0.2049, + "step": 4256 + }, + { + "epoch": 1.14, + "learning_rate": 4.287915326902465e-05, + "loss": 0.3127, + "step": 4257 + }, + { + "epoch": 1.14, + "learning_rate": 4.287747856377278e-05, + "loss": 0.2666, + "step": 4258 + }, + { + "epoch": 1.14, + "learning_rate": 4.2875803858520905e-05, + "loss": 0.1482, + "step": 4259 + }, + { + "epoch": 1.14, + "learning_rate": 4.287412915326903e-05, + "loss": 0.1846, + "step": 4260 + }, + { + "epoch": 1.14, + "learning_rate": 4.287245444801715e-05, + "loss": 0.2433, + "step": 4261 + }, + { + "epoch": 1.14, + "learning_rate": 4.287077974276528e-05, + "loss": 0.118, + "step": 4262 + }, + { + "epoch": 1.14, + "learning_rate": 4.28691050375134e-05, + "loss": 0.1546, + "step": 4263 + }, + { + "epoch": 1.14, + "learning_rate": 4.2867430332261526e-05, + "loss": 0.1775, + "step": 4264 + }, + { + "epoch": 1.14, + "learning_rate": 4.286575562700964e-05, + "loss": 0.1634, + "step": 4265 + }, + { + "epoch": 1.14, + "learning_rate": 4.2864080921757774e-05, + "loss": 0.16, + "step": 4266 + }, + { + "epoch": 1.14, + "learning_rate": 4.28624062165059e-05, + "loss": 0.1526, + "step": 4267 + }, + { + "epoch": 1.14, + "learning_rate": 4.286073151125402e-05, + "loss": 0.1641, + "step": 4268 + }, + { + "epoch": 1.14, + "learning_rate": 4.285905680600215e-05, + "loss": 0.278, + "step": 4269 + }, + { + "epoch": 1.14, + "learning_rate": 4.285738210075027e-05, + "loss": 0.1281, + "step": 4270 + }, + { + "epoch": 1.14, + "learning_rate": 4.2855707395498395e-05, + "loss": 0.1511, + "step": 4271 + }, + { + "epoch": 1.14, + "learning_rate": 4.285403269024652e-05, + "loss": 0.1239, + "step": 4272 + }, + { + "epoch": 1.14, + "learning_rate": 4.2852357984994644e-05, + "loss": 0.1444, + "step": 4273 + }, + { + "epoch": 1.15, + "learning_rate": 4.285068327974277e-05, + "loss": 0.1507, + "step": 4274 + }, + { + "epoch": 1.15, + "learning_rate": 4.284900857449089e-05, + "loss": 0.1392, + "step": 4275 + }, + { + "epoch": 1.15, + "learning_rate": 4.2847333869239016e-05, + "loss": 0.1894, + "step": 4276 + }, + { + "epoch": 1.15, + "learning_rate": 4.284565916398714e-05, + "loss": 0.2005, + "step": 4277 + }, + { + "epoch": 1.15, + "learning_rate": 4.2843984458735265e-05, + "loss": 0.1473, + "step": 4278 + }, + { + "epoch": 1.15, + "learning_rate": 4.284230975348339e-05, + "loss": 0.1497, + "step": 4279 + }, + { + "epoch": 1.15, + "learning_rate": 4.284063504823151e-05, + "loss": 0.1269, + "step": 4280 + }, + { + "epoch": 1.15, + "learning_rate": 4.283896034297964e-05, + "loss": 0.267, + "step": 4281 + }, + { + "epoch": 1.15, + "learning_rate": 4.283728563772776e-05, + "loss": 0.1537, + "step": 4282 + }, + { + "epoch": 1.15, + "learning_rate": 4.2835610932475886e-05, + "loss": 0.3072, + "step": 4283 + }, + { + "epoch": 1.15, + "learning_rate": 4.283393622722401e-05, + "loss": 0.1788, + "step": 4284 + }, + { + "epoch": 1.15, + "learning_rate": 4.2832261521972134e-05, + "loss": 0.1849, + "step": 4285 + }, + { + "epoch": 1.15, + "learning_rate": 4.283058681672026e-05, + "loss": 0.1569, + "step": 4286 + }, + { + "epoch": 1.15, + "learning_rate": 4.282891211146838e-05, + "loss": 0.1428, + "step": 4287 + }, + { + "epoch": 1.15, + "learning_rate": 4.282723740621651e-05, + "loss": 0.1693, + "step": 4288 + }, + { + "epoch": 1.15, + "learning_rate": 4.282556270096463e-05, + "loss": 0.2742, + "step": 4289 + }, + { + "epoch": 1.15, + "learning_rate": 4.2823887995712755e-05, + "loss": 0.1373, + "step": 4290 + }, + { + "epoch": 1.15, + "learning_rate": 4.2822213290460886e-05, + "loss": 0.3561, + "step": 4291 + }, + { + "epoch": 1.15, + "learning_rate": 4.282053858520901e-05, + "loss": 0.1718, + "step": 4292 + }, + { + "epoch": 1.15, + "learning_rate": 4.281886387995713e-05, + "loss": 0.1531, + "step": 4293 + }, + { + "epoch": 1.15, + "learning_rate": 4.281718917470525e-05, + "loss": 0.1354, + "step": 4294 + }, + { + "epoch": 1.15, + "learning_rate": 4.2815514469453376e-05, + "loss": 0.1424, + "step": 4295 + }, + { + "epoch": 1.15, + "learning_rate": 4.28138397642015e-05, + "loss": 0.1493, + "step": 4296 + }, + { + "epoch": 1.15, + "learning_rate": 4.2812165058949625e-05, + "loss": 0.216, + "step": 4297 + }, + { + "epoch": 1.15, + "learning_rate": 4.281049035369775e-05, + "loss": 0.2788, + "step": 4298 + }, + { + "epoch": 1.15, + "learning_rate": 4.280881564844588e-05, + "loss": 0.1685, + "step": 4299 + }, + { + "epoch": 1.15, + "learning_rate": 4.2807140943194004e-05, + "loss": 0.2216, + "step": 4300 + }, + { + "epoch": 1.15, + "learning_rate": 4.280546623794213e-05, + "loss": 0.1974, + "step": 4301 + }, + { + "epoch": 1.15, + "learning_rate": 4.2803791532690246e-05, + "loss": 0.2468, + "step": 4302 + }, + { + "epoch": 1.15, + "learning_rate": 4.280211682743837e-05, + "loss": 0.1335, + "step": 4303 + }, + { + "epoch": 1.15, + "learning_rate": 4.2800442122186494e-05, + "loss": 0.1668, + "step": 4304 + }, + { + "epoch": 1.15, + "learning_rate": 4.279876741693462e-05, + "loss": 0.1659, + "step": 4305 + }, + { + "epoch": 1.15, + "learning_rate": 4.279709271168274e-05, + "loss": 0.1291, + "step": 4306 + }, + { + "epoch": 1.15, + "learning_rate": 4.2795418006430874e-05, + "loss": 0.1543, + "step": 4307 + }, + { + "epoch": 1.15, + "learning_rate": 4.2793743301179e-05, + "loss": 0.1622, + "step": 4308 + }, + { + "epoch": 1.15, + "learning_rate": 4.279206859592712e-05, + "loss": 0.1966, + "step": 4309 + }, + { + "epoch": 1.15, + "learning_rate": 4.2790393890675246e-05, + "loss": 0.1488, + "step": 4310 + }, + { + "epoch": 1.16, + "learning_rate": 4.2788719185423364e-05, + "loss": 0.1455, + "step": 4311 + }, + { + "epoch": 1.16, + "learning_rate": 4.278704448017149e-05, + "loss": 0.3879, + "step": 4312 + }, + { + "epoch": 1.16, + "learning_rate": 4.278536977491961e-05, + "loss": 0.2017, + "step": 4313 + }, + { + "epoch": 1.16, + "learning_rate": 4.278369506966774e-05, + "loss": 0.1521, + "step": 4314 + }, + { + "epoch": 1.16, + "learning_rate": 4.278202036441587e-05, + "loss": 0.3188, + "step": 4315 + }, + { + "epoch": 1.16, + "learning_rate": 4.278034565916399e-05, + "loss": 0.1522, + "step": 4316 + }, + { + "epoch": 1.16, + "learning_rate": 4.2778670953912116e-05, + "loss": 0.1708, + "step": 4317 + }, + { + "epoch": 1.16, + "learning_rate": 4.277699624866024e-05, + "loss": 0.1331, + "step": 4318 + }, + { + "epoch": 1.16, + "learning_rate": 4.2775321543408364e-05, + "loss": 0.2797, + "step": 4319 + }, + { + "epoch": 1.16, + "learning_rate": 4.277364683815649e-05, + "loss": 0.2429, + "step": 4320 + }, + { + "epoch": 1.16, + "learning_rate": 4.2771972132904606e-05, + "loss": 0.1535, + "step": 4321 + }, + { + "epoch": 1.16, + "learning_rate": 4.277029742765274e-05, + "loss": 0.1378, + "step": 4322 + }, + { + "epoch": 1.16, + "learning_rate": 4.276862272240086e-05, + "loss": 0.1522, + "step": 4323 + }, + { + "epoch": 1.16, + "learning_rate": 4.2766948017148985e-05, + "loss": 0.156, + "step": 4324 + }, + { + "epoch": 1.16, + "learning_rate": 4.276527331189711e-05, + "loss": 0.15, + "step": 4325 + }, + { + "epoch": 1.16, + "learning_rate": 4.2763598606645234e-05, + "loss": 0.1391, + "step": 4326 + }, + { + "epoch": 1.16, + "learning_rate": 4.276192390139336e-05, + "loss": 0.1843, + "step": 4327 + }, + { + "epoch": 1.16, + "learning_rate": 4.276024919614148e-05, + "loss": 0.1638, + "step": 4328 + }, + { + "epoch": 1.16, + "learning_rate": 4.2758574490889606e-05, + "loss": 0.2117, + "step": 4329 + }, + { + "epoch": 1.16, + "learning_rate": 4.275689978563773e-05, + "loss": 0.1509, + "step": 4330 + }, + { + "epoch": 1.16, + "learning_rate": 4.2755225080385855e-05, + "loss": 0.1566, + "step": 4331 + }, + { + "epoch": 1.16, + "learning_rate": 4.275355037513398e-05, + "loss": 0.1868, + "step": 4332 + }, + { + "epoch": 1.16, + "learning_rate": 4.27518756698821e-05, + "loss": 0.1596, + "step": 4333 + }, + { + "epoch": 1.16, + "learning_rate": 4.275020096463023e-05, + "loss": 0.2951, + "step": 4334 + }, + { + "epoch": 1.16, + "learning_rate": 4.274852625937835e-05, + "loss": 0.1966, + "step": 4335 + }, + { + "epoch": 1.16, + "learning_rate": 4.2746851554126476e-05, + "loss": 0.301, + "step": 4336 + }, + { + "epoch": 1.16, + "learning_rate": 4.27451768488746e-05, + "loss": 0.2283, + "step": 4337 + }, + { + "epoch": 1.16, + "learning_rate": 4.2743502143622724e-05, + "loss": 0.1169, + "step": 4338 + }, + { + "epoch": 1.16, + "learning_rate": 4.274182743837085e-05, + "loss": 0.4018, + "step": 4339 + }, + { + "epoch": 1.16, + "learning_rate": 4.274015273311897e-05, + "loss": 0.1406, + "step": 4340 + }, + { + "epoch": 1.16, + "learning_rate": 4.27384780278671e-05, + "loss": 0.3044, + "step": 4341 + }, + { + "epoch": 1.16, + "learning_rate": 4.273680332261522e-05, + "loss": 0.1503, + "step": 4342 + }, + { + "epoch": 1.16, + "learning_rate": 4.2735128617363345e-05, + "loss": 0.1253, + "step": 4343 + }, + { + "epoch": 1.16, + "learning_rate": 4.273345391211147e-05, + "loss": 0.1403, + "step": 4344 + }, + { + "epoch": 1.16, + "learning_rate": 4.2731779206859593e-05, + "loss": 0.1519, + "step": 4345 + }, + { + "epoch": 1.16, + "learning_rate": 4.273010450160772e-05, + "loss": 0.1336, + "step": 4346 + }, + { + "epoch": 1.16, + "learning_rate": 4.272842979635585e-05, + "loss": 0.2295, + "step": 4347 + }, + { + "epoch": 1.17, + "learning_rate": 4.272675509110397e-05, + "loss": 0.1546, + "step": 4348 + }, + { + "epoch": 1.17, + "learning_rate": 4.272508038585209e-05, + "loss": 0.1804, + "step": 4349 + }, + { + "epoch": 1.17, + "learning_rate": 4.2723405680600215e-05, + "loss": 0.165, + "step": 4350 + }, + { + "epoch": 1.17, + "learning_rate": 4.272173097534834e-05, + "loss": 0.1385, + "step": 4351 + }, + { + "epoch": 1.17, + "learning_rate": 4.272005627009646e-05, + "loss": 0.1428, + "step": 4352 + }, + { + "epoch": 1.17, + "learning_rate": 4.271838156484459e-05, + "loss": 0.3574, + "step": 4353 + }, + { + "epoch": 1.17, + "learning_rate": 4.271670685959271e-05, + "loss": 0.1532, + "step": 4354 + }, + { + "epoch": 1.17, + "learning_rate": 4.271503215434084e-05, + "loss": 0.2883, + "step": 4355 + }, + { + "epoch": 1.17, + "learning_rate": 4.2713357449088967e-05, + "loss": 0.1574, + "step": 4356 + }, + { + "epoch": 1.17, + "learning_rate": 4.271168274383709e-05, + "loss": 0.2856, + "step": 4357 + }, + { + "epoch": 1.17, + "learning_rate": 4.271000803858521e-05, + "loss": 0.2514, + "step": 4358 + }, + { + "epoch": 1.17, + "learning_rate": 4.270833333333333e-05, + "loss": 0.3821, + "step": 4359 + }, + { + "epoch": 1.17, + "learning_rate": 4.270665862808146e-05, + "loss": 0.1314, + "step": 4360 + }, + { + "epoch": 1.17, + "learning_rate": 4.270498392282958e-05, + "loss": 0.2899, + "step": 4361 + }, + { + "epoch": 1.17, + "learning_rate": 4.2703309217577705e-05, + "loss": 0.1703, + "step": 4362 + }, + { + "epoch": 1.17, + "learning_rate": 4.2701634512325836e-05, + "loss": 0.1386, + "step": 4363 + }, + { + "epoch": 1.17, + "learning_rate": 4.269995980707396e-05, + "loss": 0.1523, + "step": 4364 + }, + { + "epoch": 1.17, + "learning_rate": 4.2698285101822084e-05, + "loss": 0.1506, + "step": 4365 + }, + { + "epoch": 1.17, + "learning_rate": 4.269661039657021e-05, + "loss": 0.1453, + "step": 4366 + }, + { + "epoch": 1.17, + "learning_rate": 4.2694935691318326e-05, + "loss": 0.1745, + "step": 4367 + }, + { + "epoch": 1.17, + "learning_rate": 4.269326098606645e-05, + "loss": 0.1338, + "step": 4368 + }, + { + "epoch": 1.17, + "learning_rate": 4.2691586280814575e-05, + "loss": 0.237, + "step": 4369 + }, + { + "epoch": 1.17, + "learning_rate": 4.2689911575562705e-05, + "loss": 0.166, + "step": 4370 + }, + { + "epoch": 1.17, + "learning_rate": 4.268823687031083e-05, + "loss": 0.2249, + "step": 4371 + }, + { + "epoch": 1.17, + "learning_rate": 4.2686562165058954e-05, + "loss": 0.1768, + "step": 4372 + }, + { + "epoch": 1.17, + "learning_rate": 4.268488745980708e-05, + "loss": 0.3959, + "step": 4373 + }, + { + "epoch": 1.17, + "learning_rate": 4.26832127545552e-05, + "loss": 0.3334, + "step": 4374 + }, + { + "epoch": 1.17, + "learning_rate": 4.2681538049303327e-05, + "loss": 0.3103, + "step": 4375 + }, + { + "epoch": 1.17, + "learning_rate": 4.267986334405145e-05, + "loss": 0.1569, + "step": 4376 + }, + { + "epoch": 1.17, + "learning_rate": 4.267818863879957e-05, + "loss": 0.1783, + "step": 4377 + }, + { + "epoch": 1.17, + "learning_rate": 4.26765139335477e-05, + "loss": 0.1557, + "step": 4378 + }, + { + "epoch": 1.17, + "learning_rate": 4.267483922829582e-05, + "loss": 0.1586, + "step": 4379 + }, + { + "epoch": 1.17, + "learning_rate": 4.267316452304395e-05, + "loss": 0.1768, + "step": 4380 + }, + { + "epoch": 1.17, + "learning_rate": 4.267148981779207e-05, + "loss": 0.1202, + "step": 4381 + }, + { + "epoch": 1.17, + "learning_rate": 4.2669815112540196e-05, + "loss": 0.2035, + "step": 4382 + }, + { + "epoch": 1.17, + "learning_rate": 4.266814040728832e-05, + "loss": 0.1299, + "step": 4383 + }, + { + "epoch": 1.17, + "learning_rate": 4.2666465702036444e-05, + "loss": 0.1384, + "step": 4384 + }, + { + "epoch": 1.17, + "learning_rate": 4.266479099678457e-05, + "loss": 0.1697, + "step": 4385 + }, + { + "epoch": 1.18, + "learning_rate": 4.266311629153269e-05, + "loss": 0.1493, + "step": 4386 + }, + { + "epoch": 1.18, + "learning_rate": 4.266144158628082e-05, + "loss": 0.191, + "step": 4387 + }, + { + "epoch": 1.18, + "learning_rate": 4.265976688102894e-05, + "loss": 0.1378, + "step": 4388 + }, + { + "epoch": 1.18, + "learning_rate": 4.2658092175777065e-05, + "loss": 0.166, + "step": 4389 + }, + { + "epoch": 1.18, + "learning_rate": 4.265641747052519e-05, + "loss": 0.1759, + "step": 4390 + }, + { + "epoch": 1.18, + "learning_rate": 4.2654742765273314e-05, + "loss": 0.1539, + "step": 4391 + }, + { + "epoch": 1.18, + "learning_rate": 4.265306806002144e-05, + "loss": 0.1301, + "step": 4392 + }, + { + "epoch": 1.18, + "learning_rate": 4.265139335476956e-05, + "loss": 0.1552, + "step": 4393 + }, + { + "epoch": 1.18, + "learning_rate": 4.2649718649517687e-05, + "loss": 0.3148, + "step": 4394 + }, + { + "epoch": 1.18, + "learning_rate": 4.264804394426581e-05, + "loss": 0.1522, + "step": 4395 + }, + { + "epoch": 1.18, + "learning_rate": 4.2646369239013935e-05, + "loss": 0.1249, + "step": 4396 + }, + { + "epoch": 1.18, + "learning_rate": 4.264469453376206e-05, + "loss": 0.2056, + "step": 4397 + }, + { + "epoch": 1.18, + "learning_rate": 4.264301982851018e-05, + "loss": 0.3603, + "step": 4398 + }, + { + "epoch": 1.18, + "learning_rate": 4.264134512325831e-05, + "loss": 0.4086, + "step": 4399 + }, + { + "epoch": 1.18, + "learning_rate": 4.263967041800643e-05, + "loss": 0.1486, + "step": 4400 + }, + { + "epoch": 1.18, + "learning_rate": 4.2637995712754556e-05, + "loss": 0.2904, + "step": 4401 + }, + { + "epoch": 1.18, + "learning_rate": 4.263632100750268e-05, + "loss": 0.1772, + "step": 4402 + }, + { + "epoch": 1.18, + "learning_rate": 4.263464630225081e-05, + "loss": 0.2355, + "step": 4403 + }, + { + "epoch": 1.18, + "learning_rate": 4.2632971596998935e-05, + "loss": 0.1518, + "step": 4404 + }, + { + "epoch": 1.18, + "learning_rate": 4.263129689174705e-05, + "loss": 0.1374, + "step": 4405 + }, + { + "epoch": 1.18, + "learning_rate": 4.262962218649518e-05, + "loss": 0.2451, + "step": 4406 + }, + { + "epoch": 1.18, + "learning_rate": 4.26279474812433e-05, + "loss": 0.168, + "step": 4407 + }, + { + "epoch": 1.18, + "learning_rate": 4.2626272775991425e-05, + "loss": 0.1516, + "step": 4408 + }, + { + "epoch": 1.18, + "learning_rate": 4.262459807073955e-05, + "loss": 0.3217, + "step": 4409 + }, + { + "epoch": 1.18, + "learning_rate": 4.2622923365487674e-05, + "loss": 0.1557, + "step": 4410 + }, + { + "epoch": 1.18, + "learning_rate": 4.2621248660235805e-05, + "loss": 0.2444, + "step": 4411 + }, + { + "epoch": 1.18, + "learning_rate": 4.261957395498393e-05, + "loss": 0.1809, + "step": 4412 + }, + { + "epoch": 1.18, + "learning_rate": 4.261789924973205e-05, + "loss": 0.151, + "step": 4413 + }, + { + "epoch": 1.18, + "learning_rate": 4.261622454448017e-05, + "loss": 0.1622, + "step": 4414 + }, + { + "epoch": 1.18, + "learning_rate": 4.2614549839228295e-05, + "loss": 0.1278, + "step": 4415 + }, + { + "epoch": 1.18, + "learning_rate": 4.261287513397642e-05, + "loss": 0.1803, + "step": 4416 + }, + { + "epoch": 1.18, + "learning_rate": 4.261120042872454e-05, + "loss": 0.2628, + "step": 4417 + }, + { + "epoch": 1.18, + "learning_rate": 4.260952572347267e-05, + "loss": 0.1211, + "step": 4418 + }, + { + "epoch": 1.18, + "learning_rate": 4.26078510182208e-05, + "loss": 0.1545, + "step": 4419 + }, + { + "epoch": 1.18, + "learning_rate": 4.260617631296892e-05, + "loss": 0.1658, + "step": 4420 + }, + { + "epoch": 1.18, + "learning_rate": 4.260450160771705e-05, + "loss": 0.1578, + "step": 4421 + }, + { + "epoch": 1.18, + "learning_rate": 4.260282690246517e-05, + "loss": 0.4048, + "step": 4422 + }, + { + "epoch": 1.19, + "learning_rate": 4.260115219721329e-05, + "loss": 0.1697, + "step": 4423 + }, + { + "epoch": 1.19, + "learning_rate": 4.259947749196141e-05, + "loss": 0.1287, + "step": 4424 + }, + { + "epoch": 1.19, + "learning_rate": 4.259780278670954e-05, + "loss": 0.1451, + "step": 4425 + }, + { + "epoch": 1.19, + "learning_rate": 4.259612808145767e-05, + "loss": 0.273, + "step": 4426 + }, + { + "epoch": 1.19, + "learning_rate": 4.259445337620579e-05, + "loss": 0.1573, + "step": 4427 + }, + { + "epoch": 1.19, + "learning_rate": 4.2592778670953916e-05, + "loss": 0.1551, + "step": 4428 + }, + { + "epoch": 1.19, + "learning_rate": 4.259110396570204e-05, + "loss": 0.2507, + "step": 4429 + }, + { + "epoch": 1.19, + "learning_rate": 4.2589429260450165e-05, + "loss": 0.1404, + "step": 4430 + }, + { + "epoch": 1.19, + "learning_rate": 4.258775455519829e-05, + "loss": 0.1679, + "step": 4431 + }, + { + "epoch": 1.19, + "learning_rate": 4.258607984994641e-05, + "loss": 0.1704, + "step": 4432 + }, + { + "epoch": 1.19, + "learning_rate": 4.258440514469453e-05, + "loss": 0.1581, + "step": 4433 + }, + { + "epoch": 1.19, + "learning_rate": 4.258273043944266e-05, + "loss": 0.1852, + "step": 4434 + }, + { + "epoch": 1.19, + "learning_rate": 4.2581055734190786e-05, + "loss": 0.1684, + "step": 4435 + }, + { + "epoch": 1.19, + "learning_rate": 4.257938102893891e-05, + "loss": 0.2206, + "step": 4436 + }, + { + "epoch": 1.19, + "learning_rate": 4.2577706323687034e-05, + "loss": 0.1715, + "step": 4437 + }, + { + "epoch": 1.19, + "learning_rate": 4.257603161843516e-05, + "loss": 0.1499, + "step": 4438 + }, + { + "epoch": 1.19, + "learning_rate": 4.257435691318328e-05, + "loss": 0.1929, + "step": 4439 + }, + { + "epoch": 1.19, + "learning_rate": 4.257268220793141e-05, + "loss": 0.1611, + "step": 4440 + }, + { + "epoch": 1.19, + "learning_rate": 4.257100750267953e-05, + "loss": 0.1383, + "step": 4441 + }, + { + "epoch": 1.19, + "learning_rate": 4.2569332797427655e-05, + "loss": 0.1429, + "step": 4442 + }, + { + "epoch": 1.19, + "learning_rate": 4.256765809217578e-05, + "loss": 0.1522, + "step": 4443 + }, + { + "epoch": 1.19, + "learning_rate": 4.2565983386923904e-05, + "loss": 0.1454, + "step": 4444 + }, + { + "epoch": 1.19, + "learning_rate": 4.256430868167203e-05, + "loss": 0.138, + "step": 4445 + }, + { + "epoch": 1.19, + "learning_rate": 4.256263397642015e-05, + "loss": 0.1841, + "step": 4446 + }, + { + "epoch": 1.19, + "learning_rate": 4.2560959271168276e-05, + "loss": 0.1427, + "step": 4447 + }, + { + "epoch": 1.19, + "learning_rate": 4.25592845659164e-05, + "loss": 0.3108, + "step": 4448 + }, + { + "epoch": 1.19, + "learning_rate": 4.2557609860664525e-05, + "loss": 0.1226, + "step": 4449 + }, + { + "epoch": 1.19, + "learning_rate": 4.255593515541265e-05, + "loss": 0.2285, + "step": 4450 + }, + { + "epoch": 1.19, + "learning_rate": 4.255426045016077e-05, + "loss": 0.1356, + "step": 4451 + }, + { + "epoch": 1.19, + "learning_rate": 4.25525857449089e-05, + "loss": 0.2282, + "step": 4452 + }, + { + "epoch": 1.19, + "learning_rate": 4.255091103965702e-05, + "loss": 0.1559, + "step": 4453 + }, + { + "epoch": 1.19, + "learning_rate": 4.2549236334405146e-05, + "loss": 0.1594, + "step": 4454 + }, + { + "epoch": 1.19, + "learning_rate": 4.254756162915327e-05, + "loss": 0.215, + "step": 4455 + }, + { + "epoch": 1.19, + "learning_rate": 4.2545886923901394e-05, + "loss": 0.1574, + "step": 4456 + }, + { + "epoch": 1.19, + "learning_rate": 4.254421221864952e-05, + "loss": 0.1766, + "step": 4457 + }, + { + "epoch": 1.19, + "learning_rate": 4.254253751339764e-05, + "loss": 0.2274, + "step": 4458 + }, + { + "epoch": 1.19, + "learning_rate": 4.2540862808145774e-05, + "loss": 0.2687, + "step": 4459 + }, + { + "epoch": 1.2, + "learning_rate": 4.25391881028939e-05, + "loss": 0.1925, + "step": 4460 + }, + { + "epoch": 1.2, + "learning_rate": 4.2537513397642015e-05, + "loss": 0.1865, + "step": 4461 + }, + { + "epoch": 1.2, + "learning_rate": 4.253583869239014e-05, + "loss": 0.1617, + "step": 4462 + }, + { + "epoch": 1.2, + "learning_rate": 4.2534163987138264e-05, + "loss": 0.2766, + "step": 4463 + }, + { + "epoch": 1.2, + "learning_rate": 4.253248928188639e-05, + "loss": 0.2117, + "step": 4464 + }, + { + "epoch": 1.2, + "learning_rate": 4.253081457663451e-05, + "loss": 0.1442, + "step": 4465 + }, + { + "epoch": 1.2, + "learning_rate": 4.2529139871382636e-05, + "loss": 0.2176, + "step": 4466 + }, + { + "epoch": 1.2, + "learning_rate": 4.252746516613077e-05, + "loss": 0.3444, + "step": 4467 + }, + { + "epoch": 1.2, + "learning_rate": 4.252579046087889e-05, + "loss": 0.1392, + "step": 4468 + }, + { + "epoch": 1.2, + "learning_rate": 4.2524115755627016e-05, + "loss": 0.1617, + "step": 4469 + }, + { + "epoch": 1.2, + "learning_rate": 4.252244105037513e-05, + "loss": 0.1399, + "step": 4470 + }, + { + "epoch": 1.2, + "learning_rate": 4.252076634512326e-05, + "loss": 0.4114, + "step": 4471 + }, + { + "epoch": 1.2, + "learning_rate": 4.251909163987138e-05, + "loss": 0.129, + "step": 4472 + }, + { + "epoch": 1.2, + "learning_rate": 4.2517416934619506e-05, + "loss": 0.2207, + "step": 4473 + }, + { + "epoch": 1.2, + "learning_rate": 4.251574222936763e-05, + "loss": 0.3033, + "step": 4474 + }, + { + "epoch": 1.2, + "learning_rate": 4.251406752411576e-05, + "loss": 0.1467, + "step": 4475 + }, + { + "epoch": 1.2, + "learning_rate": 4.2512392818863885e-05, + "loss": 0.1323, + "step": 4476 + }, + { + "epoch": 1.2, + "learning_rate": 4.251071811361201e-05, + "loss": 0.3389, + "step": 4477 + }, + { + "epoch": 1.2, + "learning_rate": 4.2509043408360134e-05, + "loss": 0.189, + "step": 4478 + }, + { + "epoch": 1.2, + "learning_rate": 4.250736870310825e-05, + "loss": 0.3421, + "step": 4479 + }, + { + "epoch": 1.2, + "learning_rate": 4.2505693997856375e-05, + "loss": 0.2765, + "step": 4480 + }, + { + "epoch": 1.2, + "learning_rate": 4.25040192926045e-05, + "loss": 0.1452, + "step": 4481 + }, + { + "epoch": 1.2, + "learning_rate": 4.250234458735263e-05, + "loss": 0.1569, + "step": 4482 + }, + { + "epoch": 1.2, + "learning_rate": 4.2500669882100755e-05, + "loss": 0.345, + "step": 4483 + }, + { + "epoch": 1.2, + "learning_rate": 4.249899517684888e-05, + "loss": 0.1327, + "step": 4484 + }, + { + "epoch": 1.2, + "learning_rate": 4.2497320471597e-05, + "loss": 0.3351, + "step": 4485 + }, + { + "epoch": 1.2, + "learning_rate": 4.249564576634513e-05, + "loss": 0.1271, + "step": 4486 + }, + { + "epoch": 1.2, + "learning_rate": 4.249397106109325e-05, + "loss": 0.175, + "step": 4487 + }, + { + "epoch": 1.2, + "learning_rate": 4.2492296355841376e-05, + "loss": 0.2159, + "step": 4488 + }, + { + "epoch": 1.2, + "learning_rate": 4.249062165058949e-05, + "loss": 0.1903, + "step": 4489 + }, + { + "epoch": 1.2, + "learning_rate": 4.2488946945337624e-05, + "loss": 0.14, + "step": 4490 + }, + { + "epoch": 1.2, + "learning_rate": 4.248727224008575e-05, + "loss": 0.4749, + "step": 4491 + }, + { + "epoch": 1.2, + "learning_rate": 4.248559753483387e-05, + "loss": 0.322, + "step": 4492 + }, + { + "epoch": 1.2, + "learning_rate": 4.2483922829582e-05, + "loss": 0.1399, + "step": 4493 + }, + { + "epoch": 1.2, + "learning_rate": 4.248224812433012e-05, + "loss": 0.1354, + "step": 4494 + }, + { + "epoch": 1.2, + "learning_rate": 4.2480573419078245e-05, + "loss": 0.1545, + "step": 4495 + }, + { + "epoch": 1.2, + "learning_rate": 4.247889871382637e-05, + "loss": 0.2767, + "step": 4496 + }, + { + "epoch": 1.2, + "learning_rate": 4.2477224008574494e-05, + "loss": 0.2235, + "step": 4497 + }, + { + "epoch": 1.21, + "learning_rate": 4.247554930332262e-05, + "loss": 0.1916, + "step": 4498 + }, + { + "epoch": 1.21, + "learning_rate": 4.247387459807074e-05, + "loss": 0.1576, + "step": 4499 + }, + { + "epoch": 1.21, + "learning_rate": 4.2472199892818866e-05, + "loss": 0.156, + "step": 4500 + }, + { + "epoch": 1.21, + "learning_rate": 4.247052518756699e-05, + "loss": 0.1391, + "step": 4501 + }, + { + "epoch": 1.21, + "learning_rate": 4.2468850482315115e-05, + "loss": 0.1582, + "step": 4502 + }, + { + "epoch": 1.21, + "learning_rate": 4.246717577706324e-05, + "loss": 0.3118, + "step": 4503 + }, + { + "epoch": 1.21, + "learning_rate": 4.246550107181136e-05, + "loss": 0.287, + "step": 4504 + }, + { + "epoch": 1.21, + "learning_rate": 4.246382636655949e-05, + "loss": 0.1426, + "step": 4505 + }, + { + "epoch": 1.21, + "learning_rate": 4.246215166130761e-05, + "loss": 0.244, + "step": 4506 + }, + { + "epoch": 1.21, + "learning_rate": 4.2460476956055736e-05, + "loss": 0.1508, + "step": 4507 + }, + { + "epoch": 1.21, + "learning_rate": 4.245880225080386e-05, + "loss": 0.1514, + "step": 4508 + }, + { + "epoch": 1.21, + "learning_rate": 4.2457127545551984e-05, + "loss": 0.1411, + "step": 4509 + }, + { + "epoch": 1.21, + "learning_rate": 4.245545284030011e-05, + "loss": 0.1216, + "step": 4510 + }, + { + "epoch": 1.21, + "learning_rate": 4.245377813504823e-05, + "loss": 0.1742, + "step": 4511 + }, + { + "epoch": 1.21, + "learning_rate": 4.245210342979636e-05, + "loss": 0.1271, + "step": 4512 + }, + { + "epoch": 1.21, + "learning_rate": 4.245042872454448e-05, + "loss": 0.1743, + "step": 4513 + }, + { + "epoch": 1.21, + "learning_rate": 4.2448754019292605e-05, + "loss": 0.4435, + "step": 4514 + }, + { + "epoch": 1.21, + "learning_rate": 4.2447079314040736e-05, + "loss": 0.1416, + "step": 4515 + }, + { + "epoch": 1.21, + "learning_rate": 4.244540460878886e-05, + "loss": 0.1602, + "step": 4516 + }, + { + "epoch": 1.21, + "learning_rate": 4.244372990353698e-05, + "loss": 0.1345, + "step": 4517 + }, + { + "epoch": 1.21, + "learning_rate": 4.24420551982851e-05, + "loss": 0.3332, + "step": 4518 + }, + { + "epoch": 1.21, + "learning_rate": 4.2440380493033226e-05, + "loss": 0.1593, + "step": 4519 + }, + { + "epoch": 1.21, + "learning_rate": 4.243870578778135e-05, + "loss": 0.1723, + "step": 4520 + }, + { + "epoch": 1.21, + "learning_rate": 4.2437031082529475e-05, + "loss": 0.2546, + "step": 4521 + }, + { + "epoch": 1.21, + "learning_rate": 4.24353563772776e-05, + "loss": 0.1628, + "step": 4522 + }, + { + "epoch": 1.21, + "learning_rate": 4.243368167202573e-05, + "loss": 0.2667, + "step": 4523 + }, + { + "epoch": 1.21, + "learning_rate": 4.2432006966773854e-05, + "loss": 0.1385, + "step": 4524 + }, + { + "epoch": 1.21, + "learning_rate": 4.243033226152198e-05, + "loss": 0.18, + "step": 4525 + }, + { + "epoch": 1.21, + "learning_rate": 4.2428657556270096e-05, + "loss": 0.2841, + "step": 4526 + }, + { + "epoch": 1.21, + "learning_rate": 4.242698285101822e-05, + "loss": 0.2795, + "step": 4527 + }, + { + "epoch": 1.21, + "learning_rate": 4.2425308145766344e-05, + "loss": 0.1901, + "step": 4528 + }, + { + "epoch": 1.21, + "learning_rate": 4.242363344051447e-05, + "loss": 0.1366, + "step": 4529 + }, + { + "epoch": 1.21, + "learning_rate": 4.242195873526259e-05, + "loss": 0.144, + "step": 4530 + }, + { + "epoch": 1.21, + "learning_rate": 4.2420284030010724e-05, + "loss": 0.1429, + "step": 4531 + }, + { + "epoch": 1.21, + "learning_rate": 4.241860932475885e-05, + "loss": 0.1541, + "step": 4532 + }, + { + "epoch": 1.21, + "learning_rate": 4.241693461950697e-05, + "loss": 0.1349, + "step": 4533 + }, + { + "epoch": 1.21, + "learning_rate": 4.2415259914255096e-05, + "loss": 0.3112, + "step": 4534 + }, + { + "epoch": 1.22, + "learning_rate": 4.2413585209003214e-05, + "loss": 0.2086, + "step": 4535 + }, + { + "epoch": 1.22, + "learning_rate": 4.241191050375134e-05, + "loss": 0.2508, + "step": 4536 + }, + { + "epoch": 1.22, + "learning_rate": 4.241023579849946e-05, + "loss": 0.1458, + "step": 4537 + }, + { + "epoch": 1.22, + "learning_rate": 4.2408561093247586e-05, + "loss": 0.1495, + "step": 4538 + }, + { + "epoch": 1.22, + "learning_rate": 4.240688638799572e-05, + "loss": 0.1733, + "step": 4539 + }, + { + "epoch": 1.22, + "learning_rate": 4.240521168274384e-05, + "loss": 0.1279, + "step": 4540 + }, + { + "epoch": 1.22, + "learning_rate": 4.2403536977491966e-05, + "loss": 0.1358, + "step": 4541 + }, + { + "epoch": 1.22, + "learning_rate": 4.240186227224009e-05, + "loss": 0.1362, + "step": 4542 + }, + { + "epoch": 1.22, + "learning_rate": 4.2400187566988214e-05, + "loss": 0.1578, + "step": 4543 + }, + { + "epoch": 1.22, + "learning_rate": 4.239851286173634e-05, + "loss": 0.1951, + "step": 4544 + }, + { + "epoch": 1.22, + "learning_rate": 4.2396838156484456e-05, + "loss": 0.2355, + "step": 4545 + }, + { + "epoch": 1.22, + "learning_rate": 4.239516345123259e-05, + "loss": 0.1681, + "step": 4546 + }, + { + "epoch": 1.22, + "learning_rate": 4.239348874598071e-05, + "loss": 0.1355, + "step": 4547 + }, + { + "epoch": 1.22, + "learning_rate": 4.2391814040728835e-05, + "loss": 0.1818, + "step": 4548 + }, + { + "epoch": 1.22, + "learning_rate": 4.239013933547696e-05, + "loss": 0.2481, + "step": 4549 + }, + { + "epoch": 1.22, + "learning_rate": 4.2388464630225084e-05, + "loss": 0.2268, + "step": 4550 + }, + { + "epoch": 1.22, + "learning_rate": 4.238678992497321e-05, + "loss": 0.1624, + "step": 4551 + }, + { + "epoch": 1.22, + "learning_rate": 4.238511521972133e-05, + "loss": 0.1688, + "step": 4552 + }, + { + "epoch": 1.22, + "learning_rate": 4.2383440514469456e-05, + "loss": 0.2422, + "step": 4553 + }, + { + "epoch": 1.22, + "learning_rate": 4.238176580921758e-05, + "loss": 0.1298, + "step": 4554 + }, + { + "epoch": 1.22, + "learning_rate": 4.2380091103965705e-05, + "loss": 0.1372, + "step": 4555 + }, + { + "epoch": 1.22, + "learning_rate": 4.237841639871383e-05, + "loss": 0.164, + "step": 4556 + }, + { + "epoch": 1.22, + "learning_rate": 4.237674169346195e-05, + "loss": 0.1841, + "step": 4557 + }, + { + "epoch": 1.22, + "learning_rate": 4.237506698821008e-05, + "loss": 0.2003, + "step": 4558 + }, + { + "epoch": 1.22, + "learning_rate": 4.23733922829582e-05, + "loss": 0.2138, + "step": 4559 + }, + { + "epoch": 1.22, + "learning_rate": 4.2371717577706326e-05, + "loss": 0.1989, + "step": 4560 + }, + { + "epoch": 1.22, + "learning_rate": 4.237004287245445e-05, + "loss": 0.1878, + "step": 4561 + }, + { + "epoch": 1.22, + "learning_rate": 4.2368368167202574e-05, + "loss": 0.1388, + "step": 4562 + }, + { + "epoch": 1.22, + "learning_rate": 4.23666934619507e-05, + "loss": 0.1383, + "step": 4563 + }, + { + "epoch": 1.22, + "learning_rate": 4.236501875669882e-05, + "loss": 0.1454, + "step": 4564 + }, + { + "epoch": 1.22, + "learning_rate": 4.236334405144695e-05, + "loss": 0.1423, + "step": 4565 + }, + { + "epoch": 1.22, + "learning_rate": 4.236166934619507e-05, + "loss": 0.2756, + "step": 4566 + }, + { + "epoch": 1.22, + "learning_rate": 4.2359994640943195e-05, + "loss": 0.2528, + "step": 4567 + }, + { + "epoch": 1.22, + "learning_rate": 4.235831993569132e-05, + "loss": 0.1502, + "step": 4568 + }, + { + "epoch": 1.22, + "learning_rate": 4.2356645230439444e-05, + "loss": 0.2663, + "step": 4569 + }, + { + "epoch": 1.22, + "learning_rate": 4.235497052518757e-05, + "loss": 0.1898, + "step": 4570 + }, + { + "epoch": 1.22, + "learning_rate": 4.23532958199357e-05, + "loss": 0.1781, + "step": 4571 + }, + { + "epoch": 1.23, + "learning_rate": 4.235162111468382e-05, + "loss": 0.2027, + "step": 4572 + }, + { + "epoch": 1.23, + "learning_rate": 4.234994640943194e-05, + "loss": 0.1241, + "step": 4573 + }, + { + "epoch": 1.23, + "learning_rate": 4.2348271704180065e-05, + "loss": 0.1725, + "step": 4574 + }, + { + "epoch": 1.23, + "learning_rate": 4.234659699892819e-05, + "loss": 0.1515, + "step": 4575 + }, + { + "epoch": 1.23, + "learning_rate": 4.234492229367631e-05, + "loss": 0.1731, + "step": 4576 + }, + { + "epoch": 1.23, + "learning_rate": 4.234324758842444e-05, + "loss": 0.1494, + "step": 4577 + }, + { + "epoch": 1.23, + "learning_rate": 4.234157288317256e-05, + "loss": 0.3091, + "step": 4578 + }, + { + "epoch": 1.23, + "learning_rate": 4.233989817792069e-05, + "loss": 0.132, + "step": 4579 + }, + { + "epoch": 1.23, + "learning_rate": 4.2338223472668817e-05, + "loss": 0.2773, + "step": 4580 + }, + { + "epoch": 1.23, + "learning_rate": 4.233654876741694e-05, + "loss": 0.1971, + "step": 4581 + }, + { + "epoch": 1.23, + "learning_rate": 4.233487406216506e-05, + "loss": 0.2301, + "step": 4582 + }, + { + "epoch": 1.23, + "learning_rate": 4.233319935691318e-05, + "loss": 0.2297, + "step": 4583 + }, + { + "epoch": 1.23, + "learning_rate": 4.233152465166131e-05, + "loss": 0.18, + "step": 4584 + }, + { + "epoch": 1.23, + "learning_rate": 4.232984994640943e-05, + "loss": 0.1453, + "step": 4585 + }, + { + "epoch": 1.23, + "learning_rate": 4.2328175241157555e-05, + "loss": 0.1369, + "step": 4586 + }, + { + "epoch": 1.23, + "learning_rate": 4.2326500535905686e-05, + "loss": 0.1677, + "step": 4587 + }, + { + "epoch": 1.23, + "learning_rate": 4.232482583065381e-05, + "loss": 0.2807, + "step": 4588 + }, + { + "epoch": 1.23, + "learning_rate": 4.2323151125401934e-05, + "loss": 0.1854, + "step": 4589 + }, + { + "epoch": 1.23, + "learning_rate": 4.232147642015006e-05, + "loss": 0.176, + "step": 4590 + }, + { + "epoch": 1.23, + "learning_rate": 4.2319801714898176e-05, + "loss": 0.1883, + "step": 4591 + }, + { + "epoch": 1.23, + "learning_rate": 4.23181270096463e-05, + "loss": 0.2285, + "step": 4592 + }, + { + "epoch": 1.23, + "learning_rate": 4.2316452304394425e-05, + "loss": 0.1742, + "step": 4593 + }, + { + "epoch": 1.23, + "learning_rate": 4.231477759914255e-05, + "loss": 0.1535, + "step": 4594 + }, + { + "epoch": 1.23, + "learning_rate": 4.231310289389068e-05, + "loss": 0.1406, + "step": 4595 + }, + { + "epoch": 1.23, + "learning_rate": 4.2311428188638804e-05, + "loss": 0.2611, + "step": 4596 + }, + { + "epoch": 1.23, + "learning_rate": 4.230975348338693e-05, + "loss": 0.1549, + "step": 4597 + }, + { + "epoch": 1.23, + "learning_rate": 4.230807877813505e-05, + "loss": 0.1459, + "step": 4598 + }, + { + "epoch": 1.23, + "learning_rate": 4.2306404072883177e-05, + "loss": 0.211, + "step": 4599 + }, + { + "epoch": 1.23, + "learning_rate": 4.23047293676313e-05, + "loss": 0.1944, + "step": 4600 + }, + { + "epoch": 1.23, + "learning_rate": 4.230305466237942e-05, + "loss": 0.276, + "step": 4601 + }, + { + "epoch": 1.23, + "learning_rate": 4.230137995712755e-05, + "loss": 0.1574, + "step": 4602 + }, + { + "epoch": 1.23, + "learning_rate": 4.2299705251875673e-05, + "loss": 0.1891, + "step": 4603 + }, + { + "epoch": 1.23, + "learning_rate": 4.22980305466238e-05, + "loss": 0.1737, + "step": 4604 + }, + { + "epoch": 1.23, + "learning_rate": 4.229635584137192e-05, + "loss": 0.2193, + "step": 4605 + }, + { + "epoch": 1.23, + "learning_rate": 4.2294681136120046e-05, + "loss": 0.1613, + "step": 4606 + }, + { + "epoch": 1.23, + "learning_rate": 4.229300643086817e-05, + "loss": 0.1514, + "step": 4607 + }, + { + "epoch": 1.23, + "learning_rate": 4.2291331725616294e-05, + "loss": 0.234, + "step": 4608 + }, + { + "epoch": 1.23, + "learning_rate": 4.228965702036442e-05, + "loss": 0.1546, + "step": 4609 + }, + { + "epoch": 1.24, + "learning_rate": 4.228798231511254e-05, + "loss": 0.1394, + "step": 4610 + }, + { + "epoch": 1.24, + "learning_rate": 4.228630760986067e-05, + "loss": 0.1326, + "step": 4611 + }, + { + "epoch": 1.24, + "learning_rate": 4.228463290460879e-05, + "loss": 0.3416, + "step": 4612 + }, + { + "epoch": 1.24, + "learning_rate": 4.2282958199356916e-05, + "loss": 0.1424, + "step": 4613 + }, + { + "epoch": 1.24, + "learning_rate": 4.228128349410504e-05, + "loss": 0.1602, + "step": 4614 + }, + { + "epoch": 1.24, + "learning_rate": 4.2279608788853164e-05, + "loss": 0.1784, + "step": 4615 + }, + { + "epoch": 1.24, + "learning_rate": 4.227793408360129e-05, + "loss": 0.102, + "step": 4616 + }, + { + "epoch": 1.24, + "learning_rate": 4.227625937834941e-05, + "loss": 0.119, + "step": 4617 + }, + { + "epoch": 1.24, + "learning_rate": 4.2274584673097537e-05, + "loss": 0.1937, + "step": 4618 + }, + { + "epoch": 1.24, + "learning_rate": 4.227290996784566e-05, + "loss": 0.2668, + "step": 4619 + }, + { + "epoch": 1.24, + "learning_rate": 4.2271235262593785e-05, + "loss": 0.1789, + "step": 4620 + }, + { + "epoch": 1.24, + "learning_rate": 4.226956055734191e-05, + "loss": 0.187, + "step": 4621 + }, + { + "epoch": 1.24, + "learning_rate": 4.2267885852090033e-05, + "loss": 0.1267, + "step": 4622 + }, + { + "epoch": 1.24, + "learning_rate": 4.226621114683816e-05, + "loss": 0.1677, + "step": 4623 + }, + { + "epoch": 1.24, + "learning_rate": 4.226453644158628e-05, + "loss": 0.1332, + "step": 4624 + }, + { + "epoch": 1.24, + "learning_rate": 4.2262861736334406e-05, + "loss": 0.253, + "step": 4625 + }, + { + "epoch": 1.24, + "learning_rate": 4.226118703108253e-05, + "loss": 0.1664, + "step": 4626 + }, + { + "epoch": 1.24, + "learning_rate": 4.225951232583066e-05, + "loss": 0.1524, + "step": 4627 + }, + { + "epoch": 1.24, + "learning_rate": 4.2257837620578785e-05, + "loss": 0.1975, + "step": 4628 + }, + { + "epoch": 1.24, + "learning_rate": 4.22561629153269e-05, + "loss": 0.3138, + "step": 4629 + }, + { + "epoch": 1.24, + "learning_rate": 4.225448821007503e-05, + "loss": 0.1687, + "step": 4630 + }, + { + "epoch": 1.24, + "learning_rate": 4.225281350482315e-05, + "loss": 0.1473, + "step": 4631 + }, + { + "epoch": 1.24, + "learning_rate": 4.2251138799571275e-05, + "loss": 0.1457, + "step": 4632 + }, + { + "epoch": 1.24, + "learning_rate": 4.22494640943194e-05, + "loss": 0.1354, + "step": 4633 + }, + { + "epoch": 1.24, + "learning_rate": 4.2247789389067524e-05, + "loss": 0.1282, + "step": 4634 + }, + { + "epoch": 1.24, + "learning_rate": 4.2246114683815655e-05, + "loss": 0.1644, + "step": 4635 + }, + { + "epoch": 1.24, + "learning_rate": 4.224443997856378e-05, + "loss": 0.1779, + "step": 4636 + }, + { + "epoch": 1.24, + "learning_rate": 4.22427652733119e-05, + "loss": 0.291, + "step": 4637 + }, + { + "epoch": 1.24, + "learning_rate": 4.224109056806002e-05, + "loss": 0.1775, + "step": 4638 + }, + { + "epoch": 1.24, + "learning_rate": 4.2239415862808145e-05, + "loss": 0.2215, + "step": 4639 + }, + { + "epoch": 1.24, + "learning_rate": 4.223774115755627e-05, + "loss": 0.3331, + "step": 4640 + }, + { + "epoch": 1.24, + "learning_rate": 4.223606645230439e-05, + "loss": 0.1674, + "step": 4641 + }, + { + "epoch": 1.24, + "learning_rate": 4.223439174705252e-05, + "loss": 0.1543, + "step": 4642 + }, + { + "epoch": 1.24, + "learning_rate": 4.223271704180065e-05, + "loss": 0.1703, + "step": 4643 + }, + { + "epoch": 1.24, + "learning_rate": 4.223104233654877e-05, + "loss": 0.2361, + "step": 4644 + }, + { + "epoch": 1.24, + "learning_rate": 4.22293676312969e-05, + "loss": 0.2, + "step": 4645 + }, + { + "epoch": 1.24, + "learning_rate": 4.222769292604502e-05, + "loss": 0.1661, + "step": 4646 + }, + { + "epoch": 1.25, + "learning_rate": 4.222601822079314e-05, + "loss": 0.2338, + "step": 4647 + }, + { + "epoch": 1.25, + "learning_rate": 4.222434351554126e-05, + "loss": 0.165, + "step": 4648 + }, + { + "epoch": 1.25, + "learning_rate": 4.222266881028939e-05, + "loss": 0.1836, + "step": 4649 + }, + { + "epoch": 1.25, + "learning_rate": 4.222099410503751e-05, + "loss": 0.1792, + "step": 4650 + }, + { + "epoch": 1.25, + "learning_rate": 4.221931939978564e-05, + "loss": 0.1988, + "step": 4651 + }, + { + "epoch": 1.25, + "learning_rate": 4.2217644694533766e-05, + "loss": 0.2266, + "step": 4652 + }, + { + "epoch": 1.25, + "learning_rate": 4.221596998928189e-05, + "loss": 0.2495, + "step": 4653 + }, + { + "epoch": 1.25, + "learning_rate": 4.2214295284030015e-05, + "loss": 0.1984, + "step": 4654 + }, + { + "epoch": 1.25, + "learning_rate": 4.221262057877814e-05, + "loss": 0.2207, + "step": 4655 + }, + { + "epoch": 1.25, + "learning_rate": 4.221094587352626e-05, + "loss": 0.1758, + "step": 4656 + }, + { + "epoch": 1.25, + "learning_rate": 4.220927116827438e-05, + "loss": 0.1479, + "step": 4657 + }, + { + "epoch": 1.25, + "learning_rate": 4.220759646302251e-05, + "loss": 0.1468, + "step": 4658 + }, + { + "epoch": 1.25, + "learning_rate": 4.2205921757770636e-05, + "loss": 0.1447, + "step": 4659 + }, + { + "epoch": 1.25, + "learning_rate": 4.220424705251876e-05, + "loss": 0.1239, + "step": 4660 + }, + { + "epoch": 1.25, + "learning_rate": 4.2202572347266884e-05, + "loss": 0.1176, + "step": 4661 + }, + { + "epoch": 1.25, + "learning_rate": 4.220089764201501e-05, + "loss": 0.1475, + "step": 4662 + }, + { + "epoch": 1.25, + "learning_rate": 4.219922293676313e-05, + "loss": 0.1844, + "step": 4663 + }, + { + "epoch": 1.25, + "learning_rate": 4.219754823151126e-05, + "loss": 0.1449, + "step": 4664 + }, + { + "epoch": 1.25, + "learning_rate": 4.219587352625938e-05, + "loss": 0.1433, + "step": 4665 + }, + { + "epoch": 1.25, + "learning_rate": 4.2194198821007505e-05, + "loss": 0.2683, + "step": 4666 + }, + { + "epoch": 1.25, + "learning_rate": 4.219252411575563e-05, + "loss": 0.1437, + "step": 4667 + }, + { + "epoch": 1.25, + "learning_rate": 4.2190849410503754e-05, + "loss": 0.1432, + "step": 4668 + }, + { + "epoch": 1.25, + "learning_rate": 4.218917470525188e-05, + "loss": 0.1508, + "step": 4669 + }, + { + "epoch": 1.25, + "learning_rate": 4.21875e-05, + "loss": 0.1485, + "step": 4670 + }, + { + "epoch": 1.25, + "learning_rate": 4.2185825294748126e-05, + "loss": 0.1843, + "step": 4671 + }, + { + "epoch": 1.25, + "learning_rate": 4.218415058949625e-05, + "loss": 0.3305, + "step": 4672 + }, + { + "epoch": 1.25, + "learning_rate": 4.2182475884244375e-05, + "loss": 0.1499, + "step": 4673 + }, + { + "epoch": 1.25, + "learning_rate": 4.21808011789925e-05, + "loss": 0.1444, + "step": 4674 + }, + { + "epoch": 1.25, + "learning_rate": 4.217912647374062e-05, + "loss": 0.1513, + "step": 4675 + }, + { + "epoch": 1.25, + "learning_rate": 4.217745176848875e-05, + "loss": 0.1365, + "step": 4676 + }, + { + "epoch": 1.25, + "learning_rate": 4.217577706323687e-05, + "loss": 0.2152, + "step": 4677 + }, + { + "epoch": 1.25, + "learning_rate": 4.2174102357984996e-05, + "loss": 0.3276, + "step": 4678 + }, + { + "epoch": 1.25, + "learning_rate": 4.217242765273312e-05, + "loss": 0.1428, + "step": 4679 + }, + { + "epoch": 1.25, + "learning_rate": 4.2170752947481244e-05, + "loss": 0.1655, + "step": 4680 + }, + { + "epoch": 1.25, + "learning_rate": 4.216907824222937e-05, + "loss": 0.2221, + "step": 4681 + }, + { + "epoch": 1.25, + "learning_rate": 4.216740353697749e-05, + "loss": 0.3186, + "step": 4682 + }, + { + "epoch": 1.25, + "learning_rate": 4.2165728831725624e-05, + "loss": 0.2286, + "step": 4683 + }, + { + "epoch": 1.26, + "learning_rate": 4.216405412647374e-05, + "loss": 0.1283, + "step": 4684 + }, + { + "epoch": 1.26, + "learning_rate": 4.2162379421221865e-05, + "loss": 0.1433, + "step": 4685 + }, + { + "epoch": 1.26, + "learning_rate": 4.216070471596999e-05, + "loss": 0.1297, + "step": 4686 + }, + { + "epoch": 1.26, + "learning_rate": 4.2159030010718114e-05, + "loss": 0.1171, + "step": 4687 + }, + { + "epoch": 1.26, + "learning_rate": 4.215735530546624e-05, + "loss": 0.2714, + "step": 4688 + }, + { + "epoch": 1.26, + "learning_rate": 4.215568060021436e-05, + "loss": 0.1392, + "step": 4689 + }, + { + "epoch": 1.26, + "learning_rate": 4.2154005894962486e-05, + "loss": 0.1704, + "step": 4690 + }, + { + "epoch": 1.26, + "learning_rate": 4.215233118971062e-05, + "loss": 0.1774, + "step": 4691 + }, + { + "epoch": 1.26, + "learning_rate": 4.215065648445874e-05, + "loss": 0.1944, + "step": 4692 + }, + { + "epoch": 1.26, + "learning_rate": 4.2148981779206866e-05, + "loss": 0.1663, + "step": 4693 + }, + { + "epoch": 1.26, + "learning_rate": 4.214730707395498e-05, + "loss": 0.3177, + "step": 4694 + }, + { + "epoch": 1.26, + "learning_rate": 4.214563236870311e-05, + "loss": 0.12, + "step": 4695 + }, + { + "epoch": 1.26, + "learning_rate": 4.214395766345123e-05, + "loss": 0.1465, + "step": 4696 + }, + { + "epoch": 1.26, + "learning_rate": 4.2142282958199356e-05, + "loss": 0.2161, + "step": 4697 + }, + { + "epoch": 1.26, + "learning_rate": 4.214060825294748e-05, + "loss": 0.1417, + "step": 4698 + }, + { + "epoch": 1.26, + "learning_rate": 4.213893354769561e-05, + "loss": 0.2628, + "step": 4699 + }, + { + "epoch": 1.26, + "learning_rate": 4.2137258842443735e-05, + "loss": 0.1871, + "step": 4700 + }, + { + "epoch": 1.26, + "learning_rate": 4.213558413719186e-05, + "loss": 0.1895, + "step": 4701 + }, + { + "epoch": 1.26, + "learning_rate": 4.2133909431939984e-05, + "loss": 0.1944, + "step": 4702 + }, + { + "epoch": 1.26, + "learning_rate": 4.21322347266881e-05, + "loss": 0.2092, + "step": 4703 + }, + { + "epoch": 1.26, + "learning_rate": 4.2130560021436225e-05, + "loss": 0.1338, + "step": 4704 + }, + { + "epoch": 1.26, + "learning_rate": 4.212888531618435e-05, + "loss": 0.1388, + "step": 4705 + }, + { + "epoch": 1.26, + "learning_rate": 4.2127210610932474e-05, + "loss": 0.2153, + "step": 4706 + }, + { + "epoch": 1.26, + "learning_rate": 4.2125535905680605e-05, + "loss": 0.1392, + "step": 4707 + }, + { + "epoch": 1.26, + "learning_rate": 4.212386120042873e-05, + "loss": 0.1485, + "step": 4708 + }, + { + "epoch": 1.26, + "learning_rate": 4.212218649517685e-05, + "loss": 0.1594, + "step": 4709 + }, + { + "epoch": 1.26, + "learning_rate": 4.212051178992498e-05, + "loss": 0.1393, + "step": 4710 + }, + { + "epoch": 1.26, + "learning_rate": 4.21188370846731e-05, + "loss": 0.1533, + "step": 4711 + }, + { + "epoch": 1.26, + "learning_rate": 4.211716237942122e-05, + "loss": 0.1623, + "step": 4712 + }, + { + "epoch": 1.26, + "learning_rate": 4.211548767416934e-05, + "loss": 0.1308, + "step": 4713 + }, + { + "epoch": 1.26, + "learning_rate": 4.2113812968917474e-05, + "loss": 0.1654, + "step": 4714 + }, + { + "epoch": 1.26, + "learning_rate": 4.21121382636656e-05, + "loss": 0.1299, + "step": 4715 + }, + { + "epoch": 1.26, + "learning_rate": 4.211046355841372e-05, + "loss": 0.2918, + "step": 4716 + }, + { + "epoch": 1.26, + "learning_rate": 4.210878885316185e-05, + "loss": 0.1423, + "step": 4717 + }, + { + "epoch": 1.26, + "learning_rate": 4.210711414790997e-05, + "loss": 0.4577, + "step": 4718 + }, + { + "epoch": 1.26, + "learning_rate": 4.2105439442658095e-05, + "loss": 0.2824, + "step": 4719 + }, + { + "epoch": 1.26, + "learning_rate": 4.210376473740622e-05, + "loss": 0.1524, + "step": 4720 + }, + { + "epoch": 1.26, + "learning_rate": 4.2102090032154344e-05, + "loss": 0.1667, + "step": 4721 + }, + { + "epoch": 1.27, + "learning_rate": 4.210041532690247e-05, + "loss": 0.3358, + "step": 4722 + }, + { + "epoch": 1.27, + "learning_rate": 4.209874062165059e-05, + "loss": 0.1574, + "step": 4723 + }, + { + "epoch": 1.27, + "learning_rate": 4.2097065916398716e-05, + "loss": 0.2325, + "step": 4724 + }, + { + "epoch": 1.27, + "learning_rate": 4.209539121114684e-05, + "loss": 0.1498, + "step": 4725 + }, + { + "epoch": 1.27, + "learning_rate": 4.2093716505894965e-05, + "loss": 0.1693, + "step": 4726 + }, + { + "epoch": 1.27, + "learning_rate": 4.209204180064309e-05, + "loss": 0.1477, + "step": 4727 + }, + { + "epoch": 1.27, + "learning_rate": 4.209036709539121e-05, + "loss": 0.1541, + "step": 4728 + }, + { + "epoch": 1.27, + "learning_rate": 4.208869239013934e-05, + "loss": 0.1743, + "step": 4729 + }, + { + "epoch": 1.27, + "learning_rate": 4.208701768488746e-05, + "loss": 0.1404, + "step": 4730 + }, + { + "epoch": 1.27, + "learning_rate": 4.2085342979635586e-05, + "loss": 0.3045, + "step": 4731 + }, + { + "epoch": 1.27, + "learning_rate": 4.208366827438371e-05, + "loss": 0.2397, + "step": 4732 + }, + { + "epoch": 1.27, + "learning_rate": 4.2081993569131834e-05, + "loss": 0.1436, + "step": 4733 + }, + { + "epoch": 1.27, + "learning_rate": 4.208031886387996e-05, + "loss": 0.1865, + "step": 4734 + }, + { + "epoch": 1.27, + "learning_rate": 4.207864415862808e-05, + "loss": 0.2535, + "step": 4735 + }, + { + "epoch": 1.27, + "learning_rate": 4.207696945337621e-05, + "loss": 0.2486, + "step": 4736 + }, + { + "epoch": 1.27, + "learning_rate": 4.207529474812433e-05, + "loss": 0.1436, + "step": 4737 + }, + { + "epoch": 1.27, + "learning_rate": 4.2073620042872455e-05, + "loss": 0.167, + "step": 4738 + }, + { + "epoch": 1.27, + "learning_rate": 4.2071945337620586e-05, + "loss": 0.3135, + "step": 4739 + }, + { + "epoch": 1.27, + "learning_rate": 4.2070270632368704e-05, + "loss": 0.1368, + "step": 4740 + }, + { + "epoch": 1.27, + "learning_rate": 4.206859592711683e-05, + "loss": 0.2329, + "step": 4741 + }, + { + "epoch": 1.27, + "learning_rate": 4.206692122186495e-05, + "loss": 0.1406, + "step": 4742 + }, + { + "epoch": 1.27, + "learning_rate": 4.2065246516613076e-05, + "loss": 0.134, + "step": 4743 + }, + { + "epoch": 1.27, + "learning_rate": 4.20635718113612e-05, + "loss": 0.1184, + "step": 4744 + }, + { + "epoch": 1.27, + "learning_rate": 4.2061897106109325e-05, + "loss": 0.2131, + "step": 4745 + }, + { + "epoch": 1.27, + "learning_rate": 4.206022240085745e-05, + "loss": 0.1262, + "step": 4746 + }, + { + "epoch": 1.27, + "learning_rate": 4.205854769560558e-05, + "loss": 0.1466, + "step": 4747 + }, + { + "epoch": 1.27, + "learning_rate": 4.2056872990353704e-05, + "loss": 0.1535, + "step": 4748 + }, + { + "epoch": 1.27, + "learning_rate": 4.205519828510183e-05, + "loss": 0.1491, + "step": 4749 + }, + { + "epoch": 1.27, + "learning_rate": 4.2053523579849946e-05, + "loss": 0.2118, + "step": 4750 + }, + { + "epoch": 1.27, + "learning_rate": 4.205184887459807e-05, + "loss": 0.2018, + "step": 4751 + }, + { + "epoch": 1.27, + "learning_rate": 4.2050174169346194e-05, + "loss": 0.2032, + "step": 4752 + }, + { + "epoch": 1.27, + "learning_rate": 4.204849946409432e-05, + "loss": 0.1251, + "step": 4753 + }, + { + "epoch": 1.27, + "learning_rate": 4.204682475884244e-05, + "loss": 0.3008, + "step": 4754 + }, + { + "epoch": 1.27, + "learning_rate": 4.2045150053590574e-05, + "loss": 0.215, + "step": 4755 + }, + { + "epoch": 1.27, + "learning_rate": 4.20434753483387e-05, + "loss": 0.1471, + "step": 4756 + }, + { + "epoch": 1.27, + "learning_rate": 4.204180064308682e-05, + "loss": 0.17, + "step": 4757 + }, + { + "epoch": 1.27, + "learning_rate": 4.2040125937834946e-05, + "loss": 0.1434, + "step": 4758 + }, + { + "epoch": 1.28, + "learning_rate": 4.2038451232583064e-05, + "loss": 0.1458, + "step": 4759 + }, + { + "epoch": 1.28, + "learning_rate": 4.203677652733119e-05, + "loss": 0.133, + "step": 4760 + }, + { + "epoch": 1.28, + "learning_rate": 4.203510182207931e-05, + "loss": 0.1653, + "step": 4761 + }, + { + "epoch": 1.28, + "learning_rate": 4.2033427116827436e-05, + "loss": 0.2401, + "step": 4762 + }, + { + "epoch": 1.28, + "learning_rate": 4.203175241157557e-05, + "loss": 0.1805, + "step": 4763 + }, + { + "epoch": 1.28, + "learning_rate": 4.203007770632369e-05, + "loss": 0.1542, + "step": 4764 + }, + { + "epoch": 1.28, + "learning_rate": 4.2028403001071816e-05, + "loss": 0.1098, + "step": 4765 + }, + { + "epoch": 1.28, + "learning_rate": 4.202672829581994e-05, + "loss": 0.1367, + "step": 4766 + }, + { + "epoch": 1.28, + "learning_rate": 4.2025053590568064e-05, + "loss": 0.3521, + "step": 4767 + }, + { + "epoch": 1.28, + "learning_rate": 4.202337888531618e-05, + "loss": 0.2146, + "step": 4768 + }, + { + "epoch": 1.28, + "learning_rate": 4.2021704180064306e-05, + "loss": 0.1578, + "step": 4769 + }, + { + "epoch": 1.28, + "learning_rate": 4.202002947481244e-05, + "loss": 0.1465, + "step": 4770 + }, + { + "epoch": 1.28, + "learning_rate": 4.201835476956056e-05, + "loss": 0.1908, + "step": 4771 + }, + { + "epoch": 1.28, + "learning_rate": 4.2016680064308685e-05, + "loss": 0.1287, + "step": 4772 + }, + { + "epoch": 1.28, + "learning_rate": 4.201500535905681e-05, + "loss": 0.1482, + "step": 4773 + }, + { + "epoch": 1.28, + "learning_rate": 4.2013330653804934e-05, + "loss": 0.1311, + "step": 4774 + }, + { + "epoch": 1.28, + "learning_rate": 4.201165594855306e-05, + "loss": 0.1431, + "step": 4775 + }, + { + "epoch": 1.28, + "learning_rate": 4.200998124330118e-05, + "loss": 0.2027, + "step": 4776 + }, + { + "epoch": 1.28, + "learning_rate": 4.2008306538049306e-05, + "loss": 0.1482, + "step": 4777 + }, + { + "epoch": 1.28, + "learning_rate": 4.200663183279743e-05, + "loss": 0.1455, + "step": 4778 + }, + { + "epoch": 1.28, + "learning_rate": 4.2004957127545555e-05, + "loss": 0.1479, + "step": 4779 + }, + { + "epoch": 1.28, + "learning_rate": 4.200328242229368e-05, + "loss": 0.1282, + "step": 4780 + }, + { + "epoch": 1.28, + "learning_rate": 4.20016077170418e-05, + "loss": 0.1596, + "step": 4781 + }, + { + "epoch": 1.28, + "learning_rate": 4.199993301178993e-05, + "loss": 0.1411, + "step": 4782 + }, + { + "epoch": 1.28, + "learning_rate": 4.199825830653805e-05, + "loss": 0.1257, + "step": 4783 + }, + { + "epoch": 1.28, + "learning_rate": 4.1996583601286176e-05, + "loss": 0.1693, + "step": 4784 + }, + { + "epoch": 1.28, + "learning_rate": 4.19949088960343e-05, + "loss": 0.2667, + "step": 4785 + }, + { + "epoch": 1.28, + "learning_rate": 4.1993234190782424e-05, + "loss": 0.1459, + "step": 4786 + }, + { + "epoch": 1.28, + "learning_rate": 4.199155948553055e-05, + "loss": 0.1223, + "step": 4787 + }, + { + "epoch": 1.28, + "learning_rate": 4.198988478027867e-05, + "loss": 0.1329, + "step": 4788 + }, + { + "epoch": 1.28, + "learning_rate": 4.19882100750268e-05, + "loss": 0.1617, + "step": 4789 + }, + { + "epoch": 1.28, + "learning_rate": 4.198653536977492e-05, + "loss": 0.26, + "step": 4790 + }, + { + "epoch": 1.28, + "learning_rate": 4.1984860664523045e-05, + "loss": 0.2086, + "step": 4791 + }, + { + "epoch": 1.28, + "learning_rate": 4.198318595927117e-05, + "loss": 0.1923, + "step": 4792 + }, + { + "epoch": 1.28, + "learning_rate": 4.1981511254019294e-05, + "loss": 0.1733, + "step": 4793 + }, + { + "epoch": 1.28, + "learning_rate": 4.197983654876742e-05, + "loss": 0.1973, + "step": 4794 + }, + { + "epoch": 1.28, + "learning_rate": 4.197816184351555e-05, + "loss": 0.3509, + "step": 4795 + }, + { + "epoch": 1.29, + "learning_rate": 4.1976487138263666e-05, + "loss": 0.131, + "step": 4796 + }, + { + "epoch": 1.29, + "learning_rate": 4.197481243301179e-05, + "loss": 0.147, + "step": 4797 + }, + { + "epoch": 1.29, + "learning_rate": 4.1973137727759915e-05, + "loss": 0.2268, + "step": 4798 + }, + { + "epoch": 1.29, + "learning_rate": 4.197146302250804e-05, + "loss": 0.1263, + "step": 4799 + }, + { + "epoch": 1.29, + "learning_rate": 4.196978831725616e-05, + "loss": 0.2936, + "step": 4800 + }, + { + "epoch": 1.29, + "learning_rate": 4.196811361200429e-05, + "loss": 0.1642, + "step": 4801 + }, + { + "epoch": 1.29, + "learning_rate": 4.196643890675241e-05, + "loss": 0.1549, + "step": 4802 + }, + { + "epoch": 1.29, + "learning_rate": 4.196476420150054e-05, + "loss": 0.1681, + "step": 4803 + }, + { + "epoch": 1.29, + "learning_rate": 4.196308949624867e-05, + "loss": 0.1511, + "step": 4804 + }, + { + "epoch": 1.29, + "learning_rate": 4.196141479099679e-05, + "loss": 0.1775, + "step": 4805 + }, + { + "epoch": 1.29, + "learning_rate": 4.195974008574491e-05, + "loss": 0.2356, + "step": 4806 + }, + { + "epoch": 1.29, + "learning_rate": 4.195806538049303e-05, + "loss": 0.1349, + "step": 4807 + }, + { + "epoch": 1.29, + "learning_rate": 4.195639067524116e-05, + "loss": 0.1428, + "step": 4808 + }, + { + "epoch": 1.29, + "learning_rate": 4.195471596998928e-05, + "loss": 0.2285, + "step": 4809 + }, + { + "epoch": 1.29, + "learning_rate": 4.1953041264737405e-05, + "loss": 0.187, + "step": 4810 + }, + { + "epoch": 1.29, + "learning_rate": 4.1951366559485536e-05, + "loss": 0.2653, + "step": 4811 + }, + { + "epoch": 1.29, + "learning_rate": 4.194969185423366e-05, + "loss": 0.1354, + "step": 4812 + }, + { + "epoch": 1.29, + "learning_rate": 4.1948017148981785e-05, + "loss": 0.2162, + "step": 4813 + }, + { + "epoch": 1.29, + "learning_rate": 4.194634244372991e-05, + "loss": 0.1176, + "step": 4814 + }, + { + "epoch": 1.29, + "learning_rate": 4.1944667738478026e-05, + "loss": 0.1903, + "step": 4815 + }, + { + "epoch": 1.29, + "learning_rate": 4.194299303322615e-05, + "loss": 0.1527, + "step": 4816 + }, + { + "epoch": 1.29, + "learning_rate": 4.1941318327974275e-05, + "loss": 0.2477, + "step": 4817 + }, + { + "epoch": 1.29, + "learning_rate": 4.19396436227224e-05, + "loss": 0.1468, + "step": 4818 + }, + { + "epoch": 1.29, + "learning_rate": 4.193796891747053e-05, + "loss": 0.1426, + "step": 4819 + }, + { + "epoch": 1.29, + "learning_rate": 4.1936294212218654e-05, + "loss": 0.1773, + "step": 4820 + }, + { + "epoch": 1.29, + "learning_rate": 4.193461950696678e-05, + "loss": 0.156, + "step": 4821 + }, + { + "epoch": 1.29, + "learning_rate": 4.19329448017149e-05, + "loss": 0.1546, + "step": 4822 + }, + { + "epoch": 1.29, + "learning_rate": 4.193127009646303e-05, + "loss": 0.1482, + "step": 4823 + }, + { + "epoch": 1.29, + "learning_rate": 4.1929595391211144e-05, + "loss": 0.25, + "step": 4824 + }, + { + "epoch": 1.29, + "learning_rate": 4.192792068595927e-05, + "loss": 0.1649, + "step": 4825 + }, + { + "epoch": 1.29, + "learning_rate": 4.19262459807074e-05, + "loss": 0.2129, + "step": 4826 + }, + { + "epoch": 1.29, + "learning_rate": 4.1924571275455523e-05, + "loss": 0.1343, + "step": 4827 + }, + { + "epoch": 1.29, + "learning_rate": 4.192289657020365e-05, + "loss": 0.1388, + "step": 4828 + }, + { + "epoch": 1.29, + "learning_rate": 4.192122186495177e-05, + "loss": 0.2874, + "step": 4829 + }, + { + "epoch": 1.29, + "learning_rate": 4.1919547159699896e-05, + "loss": 0.1756, + "step": 4830 + }, + { + "epoch": 1.29, + "learning_rate": 4.191787245444802e-05, + "loss": 0.1282, + "step": 4831 + }, + { + "epoch": 1.29, + "learning_rate": 4.1916197749196145e-05, + "loss": 0.1649, + "step": 4832 + }, + { + "epoch": 1.3, + "learning_rate": 4.191452304394427e-05, + "loss": 0.1477, + "step": 4833 + }, + { + "epoch": 1.3, + "learning_rate": 4.191284833869239e-05, + "loss": 0.1364, + "step": 4834 + }, + { + "epoch": 1.3, + "learning_rate": 4.191117363344052e-05, + "loss": 0.1725, + "step": 4835 + }, + { + "epoch": 1.3, + "learning_rate": 4.190949892818864e-05, + "loss": 0.1288, + "step": 4836 + }, + { + "epoch": 1.3, + "learning_rate": 4.1907824222936766e-05, + "loss": 0.1411, + "step": 4837 + }, + { + "epoch": 1.3, + "learning_rate": 4.190614951768489e-05, + "loss": 0.1602, + "step": 4838 + }, + { + "epoch": 1.3, + "learning_rate": 4.1904474812433014e-05, + "loss": 0.1213, + "step": 4839 + }, + { + "epoch": 1.3, + "learning_rate": 4.190280010718114e-05, + "loss": 0.1437, + "step": 4840 + }, + { + "epoch": 1.3, + "learning_rate": 4.190112540192926e-05, + "loss": 0.1388, + "step": 4841 + }, + { + "epoch": 1.3, + "learning_rate": 4.1899450696677387e-05, + "loss": 0.2637, + "step": 4842 + }, + { + "epoch": 1.3, + "learning_rate": 4.189777599142551e-05, + "loss": 0.1526, + "step": 4843 + }, + { + "epoch": 1.3, + "learning_rate": 4.1896101286173635e-05, + "loss": 0.1338, + "step": 4844 + }, + { + "epoch": 1.3, + "learning_rate": 4.189442658092176e-05, + "loss": 0.1179, + "step": 4845 + }, + { + "epoch": 1.3, + "learning_rate": 4.1892751875669883e-05, + "loss": 0.3674, + "step": 4846 + }, + { + "epoch": 1.3, + "learning_rate": 4.189107717041801e-05, + "loss": 0.1819, + "step": 4847 + }, + { + "epoch": 1.3, + "learning_rate": 4.188940246516613e-05, + "loss": 0.2859, + "step": 4848 + }, + { + "epoch": 1.3, + "learning_rate": 4.1887727759914256e-05, + "loss": 0.2224, + "step": 4849 + }, + { + "epoch": 1.3, + "learning_rate": 4.188605305466238e-05, + "loss": 0.2137, + "step": 4850 + }, + { + "epoch": 1.3, + "learning_rate": 4.188437834941051e-05, + "loss": 0.1297, + "step": 4851 + }, + { + "epoch": 1.3, + "learning_rate": 4.188270364415863e-05, + "loss": 0.1812, + "step": 4852 + }, + { + "epoch": 1.3, + "learning_rate": 4.188102893890675e-05, + "loss": 0.1752, + "step": 4853 + }, + { + "epoch": 1.3, + "learning_rate": 4.187935423365488e-05, + "loss": 0.1838, + "step": 4854 + }, + { + "epoch": 1.3, + "learning_rate": 4.1877679528403e-05, + "loss": 0.2236, + "step": 4855 + }, + { + "epoch": 1.3, + "learning_rate": 4.1876004823151126e-05, + "loss": 0.2261, + "step": 4856 + }, + { + "epoch": 1.3, + "learning_rate": 4.187433011789925e-05, + "loss": 0.1628, + "step": 4857 + }, + { + "epoch": 1.3, + "learning_rate": 4.1872655412647374e-05, + "loss": 0.2784, + "step": 4858 + }, + { + "epoch": 1.3, + "learning_rate": 4.1870980707395505e-05, + "loss": 0.2279, + "step": 4859 + }, + { + "epoch": 1.3, + "learning_rate": 4.186930600214363e-05, + "loss": 0.1424, + "step": 4860 + }, + { + "epoch": 1.3, + "learning_rate": 4.186763129689175e-05, + "loss": 0.1454, + "step": 4861 + }, + { + "epoch": 1.3, + "learning_rate": 4.186595659163987e-05, + "loss": 0.2865, + "step": 4862 + }, + { + "epoch": 1.3, + "learning_rate": 4.1864281886387995e-05, + "loss": 0.1379, + "step": 4863 + }, + { + "epoch": 1.3, + "learning_rate": 4.186260718113612e-05, + "loss": 0.1383, + "step": 4864 + }, + { + "epoch": 1.3, + "learning_rate": 4.1860932475884243e-05, + "loss": 0.1165, + "step": 4865 + }, + { + "epoch": 1.3, + "learning_rate": 4.185925777063237e-05, + "loss": 0.1582, + "step": 4866 + }, + { + "epoch": 1.3, + "learning_rate": 4.18575830653805e-05, + "loss": 0.2529, + "step": 4867 + }, + { + "epoch": 1.3, + "learning_rate": 4.185590836012862e-05, + "loss": 0.2149, + "step": 4868 + }, + { + "epoch": 1.3, + "learning_rate": 4.185423365487675e-05, + "loss": 0.3418, + "step": 4869 + }, + { + "epoch": 1.3, + "learning_rate": 4.185255894962487e-05, + "loss": 0.1528, + "step": 4870 + }, + { + "epoch": 1.31, + "learning_rate": 4.185088424437299e-05, + "loss": 0.1421, + "step": 4871 + }, + { + "epoch": 1.31, + "learning_rate": 4.184920953912111e-05, + "loss": 0.1334, + "step": 4872 + }, + { + "epoch": 1.31, + "learning_rate": 4.184753483386924e-05, + "loss": 0.1534, + "step": 4873 + }, + { + "epoch": 1.31, + "learning_rate": 4.184586012861736e-05, + "loss": 0.2273, + "step": 4874 + }, + { + "epoch": 1.31, + "learning_rate": 4.184418542336549e-05, + "loss": 0.1404, + "step": 4875 + }, + { + "epoch": 1.31, + "learning_rate": 4.1842510718113616e-05, + "loss": 0.134, + "step": 4876 + }, + { + "epoch": 1.31, + "learning_rate": 4.184083601286174e-05, + "loss": 0.129, + "step": 4877 + }, + { + "epoch": 1.31, + "learning_rate": 4.1839161307609865e-05, + "loss": 0.1488, + "step": 4878 + }, + { + "epoch": 1.31, + "learning_rate": 4.183748660235799e-05, + "loss": 0.1372, + "step": 4879 + }, + { + "epoch": 1.31, + "learning_rate": 4.1835811897106107e-05, + "loss": 0.1591, + "step": 4880 + }, + { + "epoch": 1.31, + "learning_rate": 4.183413719185423e-05, + "loss": 0.1472, + "step": 4881 + }, + { + "epoch": 1.31, + "learning_rate": 4.183246248660236e-05, + "loss": 0.1245, + "step": 4882 + }, + { + "epoch": 1.31, + "learning_rate": 4.1830787781350486e-05, + "loss": 0.1339, + "step": 4883 + }, + { + "epoch": 1.31, + "learning_rate": 4.182911307609861e-05, + "loss": 0.2065, + "step": 4884 + }, + { + "epoch": 1.31, + "learning_rate": 4.1827438370846734e-05, + "loss": 0.209, + "step": 4885 + }, + { + "epoch": 1.31, + "learning_rate": 4.182576366559486e-05, + "loss": 0.2304, + "step": 4886 + }, + { + "epoch": 1.31, + "learning_rate": 4.182408896034298e-05, + "loss": 0.1409, + "step": 4887 + }, + { + "epoch": 1.31, + "learning_rate": 4.182241425509111e-05, + "loss": 0.1263, + "step": 4888 + }, + { + "epoch": 1.31, + "learning_rate": 4.182073954983923e-05, + "loss": 0.1357, + "step": 4889 + }, + { + "epoch": 1.31, + "learning_rate": 4.1819064844587355e-05, + "loss": 0.2353, + "step": 4890 + }, + { + "epoch": 1.31, + "learning_rate": 4.181739013933548e-05, + "loss": 0.3793, + "step": 4891 + }, + { + "epoch": 1.31, + "learning_rate": 4.1815715434083604e-05, + "loss": 0.1867, + "step": 4892 + }, + { + "epoch": 1.31, + "learning_rate": 4.181404072883173e-05, + "loss": 0.1592, + "step": 4893 + }, + { + "epoch": 1.31, + "learning_rate": 4.181236602357985e-05, + "loss": 0.151, + "step": 4894 + }, + { + "epoch": 1.31, + "learning_rate": 4.1810691318327976e-05, + "loss": 0.2848, + "step": 4895 + }, + { + "epoch": 1.31, + "learning_rate": 4.18090166130761e-05, + "loss": 0.127, + "step": 4896 + }, + { + "epoch": 1.31, + "learning_rate": 4.1807341907824225e-05, + "loss": 0.3852, + "step": 4897 + }, + { + "epoch": 1.31, + "learning_rate": 4.180566720257235e-05, + "loss": 0.1332, + "step": 4898 + }, + { + "epoch": 1.31, + "learning_rate": 4.180399249732047e-05, + "loss": 0.132, + "step": 4899 + }, + { + "epoch": 1.31, + "learning_rate": 4.18023177920686e-05, + "loss": 0.331, + "step": 4900 + }, + { + "epoch": 1.31, + "learning_rate": 4.180064308681672e-05, + "loss": 0.2745, + "step": 4901 + }, + { + "epoch": 1.31, + "learning_rate": 4.1798968381564846e-05, + "loss": 0.3156, + "step": 4902 + }, + { + "epoch": 1.31, + "learning_rate": 4.179729367631297e-05, + "loss": 0.2656, + "step": 4903 + }, + { + "epoch": 1.31, + "learning_rate": 4.1795618971061094e-05, + "loss": 0.1448, + "step": 4904 + }, + { + "epoch": 1.31, + "learning_rate": 4.179394426580922e-05, + "loss": 0.1781, + "step": 4905 + }, + { + "epoch": 1.31, + "learning_rate": 4.179226956055734e-05, + "loss": 0.1424, + "step": 4906 + }, + { + "epoch": 1.31, + "learning_rate": 4.1790594855305474e-05, + "loss": 0.1846, + "step": 4907 + }, + { + "epoch": 1.32, + "learning_rate": 4.178892015005359e-05, + "loss": 0.159, + "step": 4908 + }, + { + "epoch": 1.32, + "learning_rate": 4.1787245444801715e-05, + "loss": 0.1774, + "step": 4909 + }, + { + "epoch": 1.32, + "learning_rate": 4.178557073954984e-05, + "loss": 0.2345, + "step": 4910 + }, + { + "epoch": 1.32, + "learning_rate": 4.1783896034297964e-05, + "loss": 0.1608, + "step": 4911 + }, + { + "epoch": 1.32, + "learning_rate": 4.178222132904609e-05, + "loss": 0.1421, + "step": 4912 + }, + { + "epoch": 1.32, + "learning_rate": 4.178054662379421e-05, + "loss": 0.1308, + "step": 4913 + }, + { + "epoch": 1.32, + "learning_rate": 4.1778871918542336e-05, + "loss": 0.1312, + "step": 4914 + }, + { + "epoch": 1.32, + "learning_rate": 4.177719721329047e-05, + "loss": 0.1756, + "step": 4915 + }, + { + "epoch": 1.32, + "learning_rate": 4.177552250803859e-05, + "loss": 0.1788, + "step": 4916 + }, + { + "epoch": 1.32, + "learning_rate": 4.1773847802786716e-05, + "loss": 0.1275, + "step": 4917 + }, + { + "epoch": 1.32, + "learning_rate": 4.177217309753483e-05, + "loss": 0.1641, + "step": 4918 + }, + { + "epoch": 1.32, + "learning_rate": 4.177049839228296e-05, + "loss": 0.161, + "step": 4919 + }, + { + "epoch": 1.32, + "learning_rate": 4.176882368703108e-05, + "loss": 0.4139, + "step": 4920 + }, + { + "epoch": 1.32, + "learning_rate": 4.1767148981779206e-05, + "loss": 0.2486, + "step": 4921 + }, + { + "epoch": 1.32, + "learning_rate": 4.176547427652733e-05, + "loss": 0.1455, + "step": 4922 + }, + { + "epoch": 1.32, + "learning_rate": 4.176379957127546e-05, + "loss": 0.1546, + "step": 4923 + }, + { + "epoch": 1.32, + "learning_rate": 4.1762124866023585e-05, + "loss": 0.1408, + "step": 4924 + }, + { + "epoch": 1.32, + "learning_rate": 4.176045016077171e-05, + "loss": 0.1726, + "step": 4925 + }, + { + "epoch": 1.32, + "learning_rate": 4.1758775455519834e-05, + "loss": 0.1475, + "step": 4926 + }, + { + "epoch": 1.32, + "learning_rate": 4.175710075026795e-05, + "loss": 0.2327, + "step": 4927 + }, + { + "epoch": 1.32, + "learning_rate": 4.1755426045016075e-05, + "loss": 0.1543, + "step": 4928 + }, + { + "epoch": 1.32, + "learning_rate": 4.17537513397642e-05, + "loss": 0.1607, + "step": 4929 + }, + { + "epoch": 1.32, + "learning_rate": 4.1752076634512324e-05, + "loss": 0.1786, + "step": 4930 + }, + { + "epoch": 1.32, + "learning_rate": 4.1750401929260455e-05, + "loss": 0.2192, + "step": 4931 + }, + { + "epoch": 1.32, + "learning_rate": 4.174872722400858e-05, + "loss": 0.1299, + "step": 4932 + }, + { + "epoch": 1.32, + "learning_rate": 4.17470525187567e-05, + "loss": 0.108, + "step": 4933 + }, + { + "epoch": 1.32, + "learning_rate": 4.174537781350483e-05, + "loss": 0.2277, + "step": 4934 + }, + { + "epoch": 1.32, + "learning_rate": 4.174370310825295e-05, + "loss": 0.2565, + "step": 4935 + }, + { + "epoch": 1.32, + "learning_rate": 4.174202840300107e-05, + "loss": 0.1255, + "step": 4936 + }, + { + "epoch": 1.32, + "learning_rate": 4.174035369774919e-05, + "loss": 0.1428, + "step": 4937 + }, + { + "epoch": 1.32, + "learning_rate": 4.1738678992497324e-05, + "loss": 0.1596, + "step": 4938 + }, + { + "epoch": 1.32, + "learning_rate": 4.173700428724545e-05, + "loss": 0.134, + "step": 4939 + }, + { + "epoch": 1.32, + "learning_rate": 4.173532958199357e-05, + "loss": 0.1608, + "step": 4940 + }, + { + "epoch": 1.32, + "learning_rate": 4.17336548767417e-05, + "loss": 0.1167, + "step": 4941 + }, + { + "epoch": 1.32, + "learning_rate": 4.173198017148982e-05, + "loss": 0.1368, + "step": 4942 + }, + { + "epoch": 1.32, + "learning_rate": 4.1730305466237945e-05, + "loss": 0.2098, + "step": 4943 + }, + { + "epoch": 1.32, + "learning_rate": 4.172863076098607e-05, + "loss": 0.2009, + "step": 4944 + }, + { + "epoch": 1.33, + "learning_rate": 4.1726956055734194e-05, + "loss": 0.2505, + "step": 4945 + }, + { + "epoch": 1.33, + "learning_rate": 4.172528135048232e-05, + "loss": 0.2078, + "step": 4946 + }, + { + "epoch": 1.33, + "learning_rate": 4.172360664523044e-05, + "loss": 0.1589, + "step": 4947 + }, + { + "epoch": 1.33, + "learning_rate": 4.1721931939978566e-05, + "loss": 0.156, + "step": 4948 + }, + { + "epoch": 1.33, + "learning_rate": 4.172025723472669e-05, + "loss": 0.1502, + "step": 4949 + }, + { + "epoch": 1.33, + "learning_rate": 4.1718582529474815e-05, + "loss": 0.1842, + "step": 4950 + }, + { + "epoch": 1.33, + "learning_rate": 4.171690782422294e-05, + "loss": 0.1254, + "step": 4951 + }, + { + "epoch": 1.33, + "learning_rate": 4.171523311897106e-05, + "loss": 0.1577, + "step": 4952 + }, + { + "epoch": 1.33, + "learning_rate": 4.171355841371919e-05, + "loss": 0.2078, + "step": 4953 + }, + { + "epoch": 1.33, + "learning_rate": 4.171188370846731e-05, + "loss": 0.1339, + "step": 4954 + }, + { + "epoch": 1.33, + "learning_rate": 4.1710209003215436e-05, + "loss": 0.1897, + "step": 4955 + }, + { + "epoch": 1.33, + "learning_rate": 4.170853429796356e-05, + "loss": 0.177, + "step": 4956 + }, + { + "epoch": 1.33, + "learning_rate": 4.1706859592711684e-05, + "loss": 0.289, + "step": 4957 + }, + { + "epoch": 1.33, + "learning_rate": 4.170518488745981e-05, + "loss": 0.128, + "step": 4958 + }, + { + "epoch": 1.33, + "learning_rate": 4.170351018220793e-05, + "loss": 0.1489, + "step": 4959 + }, + { + "epoch": 1.33, + "learning_rate": 4.170183547695606e-05, + "loss": 0.1121, + "step": 4960 + }, + { + "epoch": 1.33, + "learning_rate": 4.170016077170418e-05, + "loss": 0.1242, + "step": 4961 + }, + { + "epoch": 1.33, + "learning_rate": 4.1698486066452305e-05, + "loss": 0.3791, + "step": 4962 + }, + { + "epoch": 1.33, + "learning_rate": 4.1696811361200436e-05, + "loss": 0.2906, + "step": 4963 + }, + { + "epoch": 1.33, + "learning_rate": 4.1695136655948554e-05, + "loss": 0.147, + "step": 4964 + }, + { + "epoch": 1.33, + "learning_rate": 4.169346195069668e-05, + "loss": 0.1438, + "step": 4965 + }, + { + "epoch": 1.33, + "learning_rate": 4.16917872454448e-05, + "loss": 0.1508, + "step": 4966 + }, + { + "epoch": 1.33, + "learning_rate": 4.1690112540192926e-05, + "loss": 0.1953, + "step": 4967 + }, + { + "epoch": 1.33, + "learning_rate": 4.168843783494105e-05, + "loss": 0.134, + "step": 4968 + }, + { + "epoch": 1.33, + "learning_rate": 4.1686763129689175e-05, + "loss": 0.1954, + "step": 4969 + }, + { + "epoch": 1.33, + "learning_rate": 4.16850884244373e-05, + "loss": 0.2228, + "step": 4970 + }, + { + "epoch": 1.33, + "learning_rate": 4.168341371918543e-05, + "loss": 0.205, + "step": 4971 + }, + { + "epoch": 1.33, + "learning_rate": 4.1681739013933554e-05, + "loss": 0.2481, + "step": 4972 + }, + { + "epoch": 1.33, + "learning_rate": 4.168006430868168e-05, + "loss": 0.1502, + "step": 4973 + }, + { + "epoch": 1.33, + "learning_rate": 4.1678389603429796e-05, + "loss": 0.4753, + "step": 4974 + }, + { + "epoch": 1.33, + "learning_rate": 4.167671489817792e-05, + "loss": 0.3114, + "step": 4975 + }, + { + "epoch": 1.33, + "learning_rate": 4.1675040192926044e-05, + "loss": 0.3891, + "step": 4976 + }, + { + "epoch": 1.33, + "learning_rate": 4.167336548767417e-05, + "loss": 0.1446, + "step": 4977 + }, + { + "epoch": 1.33, + "learning_rate": 4.167169078242229e-05, + "loss": 0.1548, + "step": 4978 + }, + { + "epoch": 1.33, + "learning_rate": 4.1670016077170424e-05, + "loss": 0.2204, + "step": 4979 + }, + { + "epoch": 1.33, + "learning_rate": 4.166834137191855e-05, + "loss": 0.1071, + "step": 4980 + }, + { + "epoch": 1.33, + "learning_rate": 4.166666666666667e-05, + "loss": 0.2219, + "step": 4981 + }, + { + "epoch": 1.33, + "learning_rate": 4.1664991961414796e-05, + "loss": 0.1674, + "step": 4982 + }, + { + "epoch": 1.34, + "learning_rate": 4.1663317256162914e-05, + "loss": 0.122, + "step": 4983 + }, + { + "epoch": 1.34, + "learning_rate": 4.166164255091104e-05, + "loss": 0.2481, + "step": 4984 + }, + { + "epoch": 1.34, + "learning_rate": 4.165996784565916e-05, + "loss": 0.2581, + "step": 4985 + }, + { + "epoch": 1.34, + "learning_rate": 4.1658293140407286e-05, + "loss": 0.2317, + "step": 4986 + }, + { + "epoch": 1.34, + "learning_rate": 4.165661843515542e-05, + "loss": 0.1248, + "step": 4987 + }, + { + "epoch": 1.34, + "learning_rate": 4.165494372990354e-05, + "loss": 0.1767, + "step": 4988 + }, + { + "epoch": 1.34, + "learning_rate": 4.1653269024651666e-05, + "loss": 0.1197, + "step": 4989 + }, + { + "epoch": 1.34, + "learning_rate": 4.165159431939979e-05, + "loss": 0.2605, + "step": 4990 + }, + { + "epoch": 1.34, + "learning_rate": 4.1649919614147914e-05, + "loss": 0.2344, + "step": 4991 + }, + { + "epoch": 1.34, + "learning_rate": 4.164824490889603e-05, + "loss": 0.1329, + "step": 4992 + }, + { + "epoch": 1.34, + "learning_rate": 4.1646570203644156e-05, + "loss": 0.1353, + "step": 4993 + }, + { + "epoch": 1.34, + "learning_rate": 4.164489549839229e-05, + "loss": 0.1982, + "step": 4994 + }, + { + "epoch": 1.34, + "learning_rate": 4.164322079314041e-05, + "loss": 0.3507, + "step": 4995 + }, + { + "epoch": 1.34, + "learning_rate": 4.1641546087888535e-05, + "loss": 0.1827, + "step": 4996 + }, + { + "epoch": 1.34, + "learning_rate": 4.163987138263666e-05, + "loss": 0.1228, + "step": 4997 + }, + { + "epoch": 1.34, + "learning_rate": 4.1638196677384784e-05, + "loss": 0.1478, + "step": 4998 + }, + { + "epoch": 1.34, + "learning_rate": 4.163652197213291e-05, + "loss": 0.126, + "step": 4999 + }, + { + "epoch": 1.34, + "learning_rate": 4.163484726688103e-05, + "loss": 0.1388, + "step": 5000 + }, + { + "epoch": 1.34, + "learning_rate": 4.1633172561629156e-05, + "loss": 0.1295, + "step": 5001 + }, + { + "epoch": 1.34, + "learning_rate": 4.163149785637728e-05, + "loss": 0.1333, + "step": 5002 + }, + { + "epoch": 1.34, + "learning_rate": 4.1629823151125405e-05, + "loss": 0.1575, + "step": 5003 + }, + { + "epoch": 1.34, + "learning_rate": 4.162814844587353e-05, + "loss": 0.2624, + "step": 5004 + }, + { + "epoch": 1.34, + "learning_rate": 4.162647374062165e-05, + "loss": 0.1748, + "step": 5005 + }, + { + "epoch": 1.34, + "learning_rate": 4.162479903536978e-05, + "loss": 0.1901, + "step": 5006 + }, + { + "epoch": 1.34, + "learning_rate": 4.16231243301179e-05, + "loss": 0.1337, + "step": 5007 + }, + { + "epoch": 1.34, + "learning_rate": 4.1621449624866026e-05, + "loss": 0.1568, + "step": 5008 + }, + { + "epoch": 1.34, + "learning_rate": 4.161977491961415e-05, + "loss": 0.2309, + "step": 5009 + }, + { + "epoch": 1.34, + "learning_rate": 4.1618100214362274e-05, + "loss": 0.1438, + "step": 5010 + }, + { + "epoch": 1.34, + "learning_rate": 4.16164255091104e-05, + "loss": 0.1804, + "step": 5011 + }, + { + "epoch": 1.34, + "learning_rate": 4.161475080385852e-05, + "loss": 0.1605, + "step": 5012 + }, + { + "epoch": 1.34, + "learning_rate": 4.161307609860665e-05, + "loss": 0.1475, + "step": 5013 + }, + { + "epoch": 1.34, + "learning_rate": 4.161140139335477e-05, + "loss": 0.2875, + "step": 5014 + }, + { + "epoch": 1.34, + "learning_rate": 4.1609726688102895e-05, + "loss": 0.1447, + "step": 5015 + }, + { + "epoch": 1.34, + "learning_rate": 4.160805198285102e-05, + "loss": 0.1229, + "step": 5016 + }, + { + "epoch": 1.34, + "learning_rate": 4.1606377277599144e-05, + "loss": 0.1382, + "step": 5017 + }, + { + "epoch": 1.34, + "learning_rate": 4.160470257234727e-05, + "loss": 0.1402, + "step": 5018 + }, + { + "epoch": 1.34, + "learning_rate": 4.16030278670954e-05, + "loss": 0.1711, + "step": 5019 + }, + { + "epoch": 1.35, + "learning_rate": 4.1601353161843516e-05, + "loss": 0.3279, + "step": 5020 + }, + { + "epoch": 1.35, + "learning_rate": 4.159967845659164e-05, + "loss": 0.1202, + "step": 5021 + }, + { + "epoch": 1.35, + "learning_rate": 4.1598003751339765e-05, + "loss": 0.1506, + "step": 5022 + }, + { + "epoch": 1.35, + "learning_rate": 4.159632904608789e-05, + "loss": 0.1281, + "step": 5023 + }, + { + "epoch": 1.35, + "learning_rate": 4.159465434083601e-05, + "loss": 0.1505, + "step": 5024 + }, + { + "epoch": 1.35, + "learning_rate": 4.159297963558414e-05, + "loss": 0.183, + "step": 5025 + }, + { + "epoch": 1.35, + "learning_rate": 4.159130493033226e-05, + "loss": 0.1669, + "step": 5026 + }, + { + "epoch": 1.35, + "learning_rate": 4.158963022508039e-05, + "loss": 0.1658, + "step": 5027 + }, + { + "epoch": 1.35, + "learning_rate": 4.158795551982852e-05, + "loss": 0.1713, + "step": 5028 + }, + { + "epoch": 1.35, + "learning_rate": 4.158628081457664e-05, + "loss": 0.1257, + "step": 5029 + }, + { + "epoch": 1.35, + "learning_rate": 4.158460610932476e-05, + "loss": 0.1383, + "step": 5030 + }, + { + "epoch": 1.35, + "learning_rate": 4.158293140407288e-05, + "loss": 0.1588, + "step": 5031 + }, + { + "epoch": 1.35, + "learning_rate": 4.158125669882101e-05, + "loss": 0.1503, + "step": 5032 + }, + { + "epoch": 1.35, + "learning_rate": 4.157958199356913e-05, + "loss": 0.1472, + "step": 5033 + }, + { + "epoch": 1.35, + "learning_rate": 4.1577907288317255e-05, + "loss": 0.126, + "step": 5034 + }, + { + "epoch": 1.35, + "learning_rate": 4.1576232583065386e-05, + "loss": 0.1252, + "step": 5035 + }, + { + "epoch": 1.35, + "learning_rate": 4.157455787781351e-05, + "loss": 0.1546, + "step": 5036 + }, + { + "epoch": 1.35, + "learning_rate": 4.1572883172561635e-05, + "loss": 0.2441, + "step": 5037 + }, + { + "epoch": 1.35, + "learning_rate": 4.157120846730976e-05, + "loss": 0.1287, + "step": 5038 + }, + { + "epoch": 1.35, + "learning_rate": 4.1569533762057876e-05, + "loss": 0.1494, + "step": 5039 + }, + { + "epoch": 1.35, + "learning_rate": 4.1567859056806e-05, + "loss": 0.138, + "step": 5040 + }, + { + "epoch": 1.35, + "learning_rate": 4.1566184351554125e-05, + "loss": 0.2816, + "step": 5041 + }, + { + "epoch": 1.35, + "learning_rate": 4.156450964630225e-05, + "loss": 0.278, + "step": 5042 + }, + { + "epoch": 1.35, + "learning_rate": 4.156283494105038e-05, + "loss": 0.3453, + "step": 5043 + }, + { + "epoch": 1.35, + "learning_rate": 4.1561160235798504e-05, + "loss": 0.1573, + "step": 5044 + }, + { + "epoch": 1.35, + "learning_rate": 4.155948553054663e-05, + "loss": 0.2062, + "step": 5045 + }, + { + "epoch": 1.35, + "learning_rate": 4.155781082529475e-05, + "loss": 0.1738, + "step": 5046 + }, + { + "epoch": 1.35, + "learning_rate": 4.155613612004288e-05, + "loss": 0.2054, + "step": 5047 + }, + { + "epoch": 1.35, + "learning_rate": 4.1554461414790994e-05, + "loss": 0.1331, + "step": 5048 + }, + { + "epoch": 1.35, + "learning_rate": 4.155278670953912e-05, + "loss": 0.1836, + "step": 5049 + }, + { + "epoch": 1.35, + "learning_rate": 4.155111200428725e-05, + "loss": 0.1488, + "step": 5050 + }, + { + "epoch": 1.35, + "learning_rate": 4.1549437299035373e-05, + "loss": 0.1644, + "step": 5051 + }, + { + "epoch": 1.35, + "learning_rate": 4.15477625937835e-05, + "loss": 0.3792, + "step": 5052 + }, + { + "epoch": 1.35, + "learning_rate": 4.154608788853162e-05, + "loss": 0.1531, + "step": 5053 + }, + { + "epoch": 1.35, + "learning_rate": 4.1544413183279746e-05, + "loss": 0.2331, + "step": 5054 + }, + { + "epoch": 1.35, + "learning_rate": 4.154273847802787e-05, + "loss": 0.1689, + "step": 5055 + }, + { + "epoch": 1.35, + "learning_rate": 4.1541063772775995e-05, + "loss": 0.1587, + "step": 5056 + }, + { + "epoch": 1.36, + "learning_rate": 4.153938906752412e-05, + "loss": 0.2231, + "step": 5057 + }, + { + "epoch": 1.36, + "learning_rate": 4.153771436227224e-05, + "loss": 0.2674, + "step": 5058 + }, + { + "epoch": 1.36, + "learning_rate": 4.153603965702037e-05, + "loss": 0.1246, + "step": 5059 + }, + { + "epoch": 1.36, + "learning_rate": 4.153436495176849e-05, + "loss": 0.1862, + "step": 5060 + }, + { + "epoch": 1.36, + "learning_rate": 4.1532690246516616e-05, + "loss": 0.178, + "step": 5061 + }, + { + "epoch": 1.36, + "learning_rate": 4.153101554126474e-05, + "loss": 0.1436, + "step": 5062 + }, + { + "epoch": 1.36, + "learning_rate": 4.1529340836012864e-05, + "loss": 0.1829, + "step": 5063 + }, + { + "epoch": 1.36, + "learning_rate": 4.152766613076099e-05, + "loss": 0.2479, + "step": 5064 + }, + { + "epoch": 1.36, + "learning_rate": 4.152599142550911e-05, + "loss": 0.149, + "step": 5065 + }, + { + "epoch": 1.36, + "learning_rate": 4.152431672025724e-05, + "loss": 0.2488, + "step": 5066 + }, + { + "epoch": 1.36, + "learning_rate": 4.152264201500536e-05, + "loss": 0.2186, + "step": 5067 + }, + { + "epoch": 1.36, + "learning_rate": 4.1520967309753485e-05, + "loss": 0.1667, + "step": 5068 + }, + { + "epoch": 1.36, + "learning_rate": 4.151929260450161e-05, + "loss": 0.3209, + "step": 5069 + }, + { + "epoch": 1.36, + "learning_rate": 4.1517617899249733e-05, + "loss": 0.1658, + "step": 5070 + }, + { + "epoch": 1.36, + "learning_rate": 4.151594319399786e-05, + "loss": 0.1717, + "step": 5071 + }, + { + "epoch": 1.36, + "learning_rate": 4.151426848874598e-05, + "loss": 0.1911, + "step": 5072 + }, + { + "epoch": 1.36, + "learning_rate": 4.1512593783494106e-05, + "loss": 0.169, + "step": 5073 + }, + { + "epoch": 1.36, + "learning_rate": 4.151091907824223e-05, + "loss": 0.1401, + "step": 5074 + }, + { + "epoch": 1.36, + "learning_rate": 4.150924437299036e-05, + "loss": 0.2101, + "step": 5075 + }, + { + "epoch": 1.36, + "learning_rate": 4.150756966773848e-05, + "loss": 0.1598, + "step": 5076 + }, + { + "epoch": 1.36, + "learning_rate": 4.15058949624866e-05, + "loss": 0.2354, + "step": 5077 + }, + { + "epoch": 1.36, + "learning_rate": 4.150422025723473e-05, + "loss": 0.3134, + "step": 5078 + }, + { + "epoch": 1.36, + "learning_rate": 4.150254555198285e-05, + "loss": 0.191, + "step": 5079 + }, + { + "epoch": 1.36, + "learning_rate": 4.1500870846730976e-05, + "loss": 0.1568, + "step": 5080 + }, + { + "epoch": 1.36, + "learning_rate": 4.14991961414791e-05, + "loss": 0.1297, + "step": 5081 + }, + { + "epoch": 1.36, + "learning_rate": 4.1497521436227224e-05, + "loss": 0.181, + "step": 5082 + }, + { + "epoch": 1.36, + "learning_rate": 4.1495846730975355e-05, + "loss": 0.1407, + "step": 5083 + }, + { + "epoch": 1.36, + "learning_rate": 4.149417202572348e-05, + "loss": 0.1365, + "step": 5084 + }, + { + "epoch": 1.36, + "learning_rate": 4.1492497320471603e-05, + "loss": 0.1887, + "step": 5085 + }, + { + "epoch": 1.36, + "learning_rate": 4.149082261521972e-05, + "loss": 0.276, + "step": 5086 + }, + { + "epoch": 1.36, + "learning_rate": 4.1489147909967845e-05, + "loss": 0.1364, + "step": 5087 + }, + { + "epoch": 1.36, + "learning_rate": 4.148747320471597e-05, + "loss": 0.2199, + "step": 5088 + }, + { + "epoch": 1.36, + "learning_rate": 4.1485798499464093e-05, + "loss": 0.2372, + "step": 5089 + }, + { + "epoch": 1.36, + "learning_rate": 4.148412379421222e-05, + "loss": 0.1265, + "step": 5090 + }, + { + "epoch": 1.36, + "learning_rate": 4.148244908896035e-05, + "loss": 0.1432, + "step": 5091 + }, + { + "epoch": 1.36, + "learning_rate": 4.148077438370847e-05, + "loss": 0.1465, + "step": 5092 + }, + { + "epoch": 1.36, + "learning_rate": 4.14790996784566e-05, + "loss": 0.1366, + "step": 5093 + }, + { + "epoch": 1.36, + "learning_rate": 4.147742497320472e-05, + "loss": 0.1805, + "step": 5094 + }, + { + "epoch": 1.37, + "learning_rate": 4.147575026795284e-05, + "loss": 0.3198, + "step": 5095 + }, + { + "epoch": 1.37, + "learning_rate": 4.147407556270096e-05, + "loss": 0.1362, + "step": 5096 + }, + { + "epoch": 1.37, + "learning_rate": 4.147240085744909e-05, + "loss": 0.1428, + "step": 5097 + }, + { + "epoch": 1.37, + "learning_rate": 4.147072615219721e-05, + "loss": 0.1527, + "step": 5098 + }, + { + "epoch": 1.37, + "learning_rate": 4.146905144694534e-05, + "loss": 0.1317, + "step": 5099 + }, + { + "epoch": 1.37, + "learning_rate": 4.1467376741693467e-05, + "loss": 0.1547, + "step": 5100 + }, + { + "epoch": 1.37, + "learning_rate": 4.146570203644159e-05, + "loss": 0.2713, + "step": 5101 + }, + { + "epoch": 1.37, + "learning_rate": 4.1464027331189715e-05, + "loss": 0.2166, + "step": 5102 + }, + { + "epoch": 1.37, + "learning_rate": 4.146235262593784e-05, + "loss": 0.1707, + "step": 5103 + }, + { + "epoch": 1.37, + "learning_rate": 4.1460677920685957e-05, + "loss": 0.1719, + "step": 5104 + }, + { + "epoch": 1.37, + "learning_rate": 4.145900321543408e-05, + "loss": 0.1191, + "step": 5105 + }, + { + "epoch": 1.37, + "learning_rate": 4.145732851018221e-05, + "loss": 0.1771, + "step": 5106 + }, + { + "epoch": 1.37, + "learning_rate": 4.1455653804930336e-05, + "loss": 0.1771, + "step": 5107 + }, + { + "epoch": 1.37, + "learning_rate": 4.145397909967846e-05, + "loss": 0.1381, + "step": 5108 + }, + { + "epoch": 1.37, + "learning_rate": 4.1452304394426584e-05, + "loss": 0.1686, + "step": 5109 + }, + { + "epoch": 1.37, + "learning_rate": 4.145062968917471e-05, + "loss": 0.1553, + "step": 5110 + }, + { + "epoch": 1.37, + "learning_rate": 4.144895498392283e-05, + "loss": 0.133, + "step": 5111 + }, + { + "epoch": 1.37, + "learning_rate": 4.144728027867096e-05, + "loss": 0.1732, + "step": 5112 + }, + { + "epoch": 1.37, + "learning_rate": 4.144560557341908e-05, + "loss": 0.1229, + "step": 5113 + }, + { + "epoch": 1.37, + "learning_rate": 4.1443930868167205e-05, + "loss": 0.1353, + "step": 5114 + }, + { + "epoch": 1.37, + "learning_rate": 4.144225616291533e-05, + "loss": 0.1519, + "step": 5115 + }, + { + "epoch": 1.37, + "learning_rate": 4.1440581457663454e-05, + "loss": 0.1377, + "step": 5116 + }, + { + "epoch": 1.37, + "learning_rate": 4.143890675241158e-05, + "loss": 0.1235, + "step": 5117 + }, + { + "epoch": 1.37, + "learning_rate": 4.14372320471597e-05, + "loss": 0.1421, + "step": 5118 + }, + { + "epoch": 1.37, + "learning_rate": 4.1435557341907827e-05, + "loss": 0.1433, + "step": 5119 + }, + { + "epoch": 1.37, + "learning_rate": 4.143388263665595e-05, + "loss": 0.3371, + "step": 5120 + }, + { + "epoch": 1.37, + "learning_rate": 4.1432207931404075e-05, + "loss": 0.1512, + "step": 5121 + }, + { + "epoch": 1.37, + "learning_rate": 4.14305332261522e-05, + "loss": 0.1852, + "step": 5122 + }, + { + "epoch": 1.37, + "learning_rate": 4.142885852090032e-05, + "loss": 0.1474, + "step": 5123 + }, + { + "epoch": 1.37, + "learning_rate": 4.142718381564845e-05, + "loss": 0.1742, + "step": 5124 + }, + { + "epoch": 1.37, + "learning_rate": 4.142550911039657e-05, + "loss": 0.1355, + "step": 5125 + }, + { + "epoch": 1.37, + "learning_rate": 4.1423834405144696e-05, + "loss": 0.1286, + "step": 5126 + }, + { + "epoch": 1.37, + "learning_rate": 4.142215969989282e-05, + "loss": 0.1189, + "step": 5127 + }, + { + "epoch": 1.37, + "learning_rate": 4.1420484994640944e-05, + "loss": 0.154, + "step": 5128 + }, + { + "epoch": 1.37, + "learning_rate": 4.141881028938907e-05, + "loss": 0.1483, + "step": 5129 + }, + { + "epoch": 1.37, + "learning_rate": 4.141713558413719e-05, + "loss": 0.1242, + "step": 5130 + }, + { + "epoch": 1.37, + "learning_rate": 4.1415460878885324e-05, + "loss": 0.2196, + "step": 5131 + }, + { + "epoch": 1.38, + "learning_rate": 4.141378617363344e-05, + "loss": 0.1506, + "step": 5132 + }, + { + "epoch": 1.38, + "learning_rate": 4.1412111468381565e-05, + "loss": 0.1837, + "step": 5133 + }, + { + "epoch": 1.38, + "learning_rate": 4.141043676312969e-05, + "loss": 0.167, + "step": 5134 + }, + { + "epoch": 1.38, + "learning_rate": 4.1408762057877814e-05, + "loss": 0.2086, + "step": 5135 + }, + { + "epoch": 1.38, + "learning_rate": 4.140708735262594e-05, + "loss": 0.1437, + "step": 5136 + }, + { + "epoch": 1.38, + "learning_rate": 4.140541264737406e-05, + "loss": 0.1573, + "step": 5137 + }, + { + "epoch": 1.38, + "learning_rate": 4.1403737942122186e-05, + "loss": 0.2112, + "step": 5138 + }, + { + "epoch": 1.38, + "learning_rate": 4.140206323687032e-05, + "loss": 0.151, + "step": 5139 + }, + { + "epoch": 1.38, + "learning_rate": 4.140038853161844e-05, + "loss": 0.2563, + "step": 5140 + }, + { + "epoch": 1.38, + "learning_rate": 4.1398713826366566e-05, + "loss": 0.1291, + "step": 5141 + }, + { + "epoch": 1.38, + "learning_rate": 4.139703912111468e-05, + "loss": 0.2654, + "step": 5142 + }, + { + "epoch": 1.38, + "learning_rate": 4.139536441586281e-05, + "loss": 0.1205, + "step": 5143 + }, + { + "epoch": 1.38, + "learning_rate": 4.139368971061093e-05, + "loss": 0.2837, + "step": 5144 + }, + { + "epoch": 1.38, + "learning_rate": 4.1392015005359056e-05, + "loss": 0.1444, + "step": 5145 + }, + { + "epoch": 1.38, + "learning_rate": 4.139034030010718e-05, + "loss": 0.1369, + "step": 5146 + }, + { + "epoch": 1.38, + "learning_rate": 4.138866559485531e-05, + "loss": 0.2168, + "step": 5147 + }, + { + "epoch": 1.38, + "learning_rate": 4.1386990889603435e-05, + "loss": 0.1894, + "step": 5148 + }, + { + "epoch": 1.38, + "learning_rate": 4.138531618435156e-05, + "loss": 0.1902, + "step": 5149 + }, + { + "epoch": 1.38, + "learning_rate": 4.1383641479099684e-05, + "loss": 0.1733, + "step": 5150 + }, + { + "epoch": 1.38, + "learning_rate": 4.13819667738478e-05, + "loss": 0.1325, + "step": 5151 + }, + { + "epoch": 1.38, + "learning_rate": 4.1380292068595925e-05, + "loss": 0.1407, + "step": 5152 + }, + { + "epoch": 1.38, + "learning_rate": 4.137861736334405e-05, + "loss": 0.1426, + "step": 5153 + }, + { + "epoch": 1.38, + "learning_rate": 4.1376942658092174e-05, + "loss": 0.131, + "step": 5154 + }, + { + "epoch": 1.38, + "learning_rate": 4.1375267952840305e-05, + "loss": 0.2957, + "step": 5155 + }, + { + "epoch": 1.38, + "learning_rate": 4.137359324758843e-05, + "loss": 0.2256, + "step": 5156 + }, + { + "epoch": 1.38, + "learning_rate": 4.137191854233655e-05, + "loss": 0.1481, + "step": 5157 + }, + { + "epoch": 1.38, + "learning_rate": 4.137024383708468e-05, + "loss": 0.1657, + "step": 5158 + }, + { + "epoch": 1.38, + "learning_rate": 4.13685691318328e-05, + "loss": 0.198, + "step": 5159 + }, + { + "epoch": 1.38, + "learning_rate": 4.136689442658092e-05, + "loss": 0.1367, + "step": 5160 + }, + { + "epoch": 1.38, + "learning_rate": 4.136521972132904e-05, + "loss": 0.1415, + "step": 5161 + }, + { + "epoch": 1.38, + "learning_rate": 4.1363545016077174e-05, + "loss": 0.3731, + "step": 5162 + }, + { + "epoch": 1.38, + "learning_rate": 4.13618703108253e-05, + "loss": 0.1963, + "step": 5163 + }, + { + "epoch": 1.38, + "learning_rate": 4.136019560557342e-05, + "loss": 0.1565, + "step": 5164 + }, + { + "epoch": 1.38, + "learning_rate": 4.135852090032155e-05, + "loss": 0.3358, + "step": 5165 + }, + { + "epoch": 1.38, + "learning_rate": 4.135684619506967e-05, + "loss": 0.1416, + "step": 5166 + }, + { + "epoch": 1.38, + "learning_rate": 4.1355171489817795e-05, + "loss": 0.1519, + "step": 5167 + }, + { + "epoch": 1.38, + "learning_rate": 4.135349678456592e-05, + "loss": 0.1859, + "step": 5168 + }, + { + "epoch": 1.39, + "learning_rate": 4.1351822079314044e-05, + "loss": 0.148, + "step": 5169 + }, + { + "epoch": 1.39, + "learning_rate": 4.135014737406217e-05, + "loss": 0.371, + "step": 5170 + }, + { + "epoch": 1.39, + "learning_rate": 4.134847266881029e-05, + "loss": 0.1614, + "step": 5171 + }, + { + "epoch": 1.39, + "learning_rate": 4.1346797963558416e-05, + "loss": 0.1206, + "step": 5172 + }, + { + "epoch": 1.39, + "learning_rate": 4.134512325830654e-05, + "loss": 0.201, + "step": 5173 + }, + { + "epoch": 1.39, + "learning_rate": 4.1343448553054665e-05, + "loss": 0.1554, + "step": 5174 + }, + { + "epoch": 1.39, + "learning_rate": 4.134177384780279e-05, + "loss": 0.1217, + "step": 5175 + }, + { + "epoch": 1.39, + "learning_rate": 4.134009914255091e-05, + "loss": 0.2305, + "step": 5176 + }, + { + "epoch": 1.39, + "learning_rate": 4.133842443729904e-05, + "loss": 0.2598, + "step": 5177 + }, + { + "epoch": 1.39, + "learning_rate": 4.133674973204716e-05, + "loss": 0.1161, + "step": 5178 + }, + { + "epoch": 1.39, + "learning_rate": 4.1335075026795286e-05, + "loss": 0.1325, + "step": 5179 + }, + { + "epoch": 1.39, + "learning_rate": 4.133340032154341e-05, + "loss": 0.1893, + "step": 5180 + }, + { + "epoch": 1.39, + "learning_rate": 4.1331725616291534e-05, + "loss": 0.142, + "step": 5181 + }, + { + "epoch": 1.39, + "learning_rate": 4.133005091103966e-05, + "loss": 0.1463, + "step": 5182 + }, + { + "epoch": 1.39, + "learning_rate": 4.132837620578778e-05, + "loss": 0.1428, + "step": 5183 + }, + { + "epoch": 1.39, + "learning_rate": 4.132670150053591e-05, + "loss": 0.1511, + "step": 5184 + }, + { + "epoch": 1.39, + "learning_rate": 4.132502679528403e-05, + "loss": 0.1766, + "step": 5185 + }, + { + "epoch": 1.39, + "learning_rate": 4.1323352090032155e-05, + "loss": 0.1189, + "step": 5186 + }, + { + "epoch": 1.39, + "learning_rate": 4.1321677384780286e-05, + "loss": 0.167, + "step": 5187 + }, + { + "epoch": 1.39, + "learning_rate": 4.1320002679528404e-05, + "loss": 0.1462, + "step": 5188 + }, + { + "epoch": 1.39, + "learning_rate": 4.131832797427653e-05, + "loss": 0.1254, + "step": 5189 + }, + { + "epoch": 1.39, + "learning_rate": 4.131665326902465e-05, + "loss": 0.1573, + "step": 5190 + }, + { + "epoch": 1.39, + "learning_rate": 4.1314978563772776e-05, + "loss": 0.2502, + "step": 5191 + }, + { + "epoch": 1.39, + "learning_rate": 4.13133038585209e-05, + "loss": 0.1342, + "step": 5192 + }, + { + "epoch": 1.39, + "learning_rate": 4.1311629153269025e-05, + "loss": 0.1592, + "step": 5193 + }, + { + "epoch": 1.39, + "learning_rate": 4.130995444801715e-05, + "loss": 0.3006, + "step": 5194 + }, + { + "epoch": 1.39, + "learning_rate": 4.130827974276528e-05, + "loss": 0.1516, + "step": 5195 + }, + { + "epoch": 1.39, + "learning_rate": 4.1306605037513404e-05, + "loss": 0.1277, + "step": 5196 + }, + { + "epoch": 1.39, + "learning_rate": 4.130493033226153e-05, + "loss": 0.2899, + "step": 5197 + }, + { + "epoch": 1.39, + "learning_rate": 4.1303255627009646e-05, + "loss": 0.118, + "step": 5198 + }, + { + "epoch": 1.39, + "learning_rate": 4.130158092175777e-05, + "loss": 0.2092, + "step": 5199 + }, + { + "epoch": 1.39, + "learning_rate": 4.1299906216505894e-05, + "loss": 0.1413, + "step": 5200 + }, + { + "epoch": 1.39, + "learning_rate": 4.129823151125402e-05, + "loss": 0.1817, + "step": 5201 + }, + { + "epoch": 1.39, + "learning_rate": 4.129655680600214e-05, + "loss": 0.2742, + "step": 5202 + }, + { + "epoch": 1.39, + "learning_rate": 4.1294882100750274e-05, + "loss": 0.1214, + "step": 5203 + }, + { + "epoch": 1.39, + "learning_rate": 4.12932073954984e-05, + "loss": 0.1284, + "step": 5204 + }, + { + "epoch": 1.39, + "learning_rate": 4.129153269024652e-05, + "loss": 0.1444, + "step": 5205 + }, + { + "epoch": 1.39, + "learning_rate": 4.1289857984994646e-05, + "loss": 0.1498, + "step": 5206 + }, + { + "epoch": 1.4, + "learning_rate": 4.1288183279742764e-05, + "loss": 0.2473, + "step": 5207 + }, + { + "epoch": 1.4, + "learning_rate": 4.128650857449089e-05, + "loss": 0.1917, + "step": 5208 + }, + { + "epoch": 1.4, + "learning_rate": 4.128483386923901e-05, + "loss": 0.1993, + "step": 5209 + }, + { + "epoch": 1.4, + "learning_rate": 4.1283159163987136e-05, + "loss": 0.1735, + "step": 5210 + }, + { + "epoch": 1.4, + "learning_rate": 4.128148445873527e-05, + "loss": 0.1639, + "step": 5211 + }, + { + "epoch": 1.4, + "learning_rate": 4.127980975348339e-05, + "loss": 0.1754, + "step": 5212 + }, + { + "epoch": 1.4, + "learning_rate": 4.1278135048231516e-05, + "loss": 0.1712, + "step": 5213 + }, + { + "epoch": 1.4, + "learning_rate": 4.127646034297964e-05, + "loss": 0.137, + "step": 5214 + }, + { + "epoch": 1.4, + "learning_rate": 4.1274785637727764e-05, + "loss": 0.1466, + "step": 5215 + }, + { + "epoch": 1.4, + "learning_rate": 4.127311093247588e-05, + "loss": 0.2088, + "step": 5216 + }, + { + "epoch": 1.4, + "learning_rate": 4.1271436227224006e-05, + "loss": 0.1511, + "step": 5217 + }, + { + "epoch": 1.4, + "learning_rate": 4.126976152197213e-05, + "loss": 0.2524, + "step": 5218 + }, + { + "epoch": 1.4, + "learning_rate": 4.126808681672026e-05, + "loss": 0.247, + "step": 5219 + }, + { + "epoch": 1.4, + "learning_rate": 4.1266412111468385e-05, + "loss": 0.1403, + "step": 5220 + }, + { + "epoch": 1.4, + "learning_rate": 4.126473740621651e-05, + "loss": 0.2108, + "step": 5221 + }, + { + "epoch": 1.4, + "learning_rate": 4.1263062700964634e-05, + "loss": 0.2088, + "step": 5222 + }, + { + "epoch": 1.4, + "learning_rate": 4.126138799571276e-05, + "loss": 0.149, + "step": 5223 + }, + { + "epoch": 1.4, + "learning_rate": 4.125971329046088e-05, + "loss": 0.1258, + "step": 5224 + }, + { + "epoch": 1.4, + "learning_rate": 4.1258038585209006e-05, + "loss": 0.1749, + "step": 5225 + }, + { + "epoch": 1.4, + "learning_rate": 4.125636387995713e-05, + "loss": 0.1474, + "step": 5226 + }, + { + "epoch": 1.4, + "learning_rate": 4.1254689174705255e-05, + "loss": 0.2448, + "step": 5227 + }, + { + "epoch": 1.4, + "learning_rate": 4.125301446945338e-05, + "loss": 0.2781, + "step": 5228 + }, + { + "epoch": 1.4, + "learning_rate": 4.12513397642015e-05, + "loss": 0.202, + "step": 5229 + }, + { + "epoch": 1.4, + "learning_rate": 4.124966505894963e-05, + "loss": 0.1401, + "step": 5230 + }, + { + "epoch": 1.4, + "learning_rate": 4.124799035369775e-05, + "loss": 0.343, + "step": 5231 + }, + { + "epoch": 1.4, + "learning_rate": 4.1246315648445876e-05, + "loss": 0.1175, + "step": 5232 + }, + { + "epoch": 1.4, + "learning_rate": 4.1244640943194e-05, + "loss": 0.1703, + "step": 5233 + }, + { + "epoch": 1.4, + "learning_rate": 4.1242966237942124e-05, + "loss": 0.1589, + "step": 5234 + }, + { + "epoch": 1.4, + "learning_rate": 4.124129153269025e-05, + "loss": 0.1681, + "step": 5235 + }, + { + "epoch": 1.4, + "learning_rate": 4.123961682743837e-05, + "loss": 0.1522, + "step": 5236 + }, + { + "epoch": 1.4, + "learning_rate": 4.12379421221865e-05, + "loss": 0.1332, + "step": 5237 + }, + { + "epoch": 1.4, + "learning_rate": 4.123626741693462e-05, + "loss": 0.2075, + "step": 5238 + }, + { + "epoch": 1.4, + "learning_rate": 4.1234592711682745e-05, + "loss": 0.147, + "step": 5239 + }, + { + "epoch": 1.4, + "learning_rate": 4.123291800643087e-05, + "loss": 0.1345, + "step": 5240 + }, + { + "epoch": 1.4, + "learning_rate": 4.1231243301178994e-05, + "loss": 0.1568, + "step": 5241 + }, + { + "epoch": 1.4, + "learning_rate": 4.122956859592712e-05, + "loss": 0.154, + "step": 5242 + }, + { + "epoch": 1.4, + "learning_rate": 4.122789389067524e-05, + "loss": 0.137, + "step": 5243 + }, + { + "epoch": 1.41, + "learning_rate": 4.1226219185423366e-05, + "loss": 0.1792, + "step": 5244 + }, + { + "epoch": 1.41, + "learning_rate": 4.122454448017149e-05, + "loss": 0.3374, + "step": 5245 + }, + { + "epoch": 1.41, + "learning_rate": 4.1222869774919615e-05, + "loss": 0.1335, + "step": 5246 + }, + { + "epoch": 1.41, + "learning_rate": 4.122119506966774e-05, + "loss": 0.1466, + "step": 5247 + }, + { + "epoch": 1.41, + "learning_rate": 4.121952036441586e-05, + "loss": 0.3875, + "step": 5248 + }, + { + "epoch": 1.41, + "learning_rate": 4.121784565916399e-05, + "loss": 0.1282, + "step": 5249 + }, + { + "epoch": 1.41, + "learning_rate": 4.121617095391211e-05, + "loss": 0.2546, + "step": 5250 + }, + { + "epoch": 1.41, + "learning_rate": 4.121449624866024e-05, + "loss": 0.1739, + "step": 5251 + }, + { + "epoch": 1.41, + "learning_rate": 4.121282154340837e-05, + "loss": 0.1362, + "step": 5252 + }, + { + "epoch": 1.41, + "learning_rate": 4.121114683815649e-05, + "loss": 0.1458, + "step": 5253 + }, + { + "epoch": 1.41, + "learning_rate": 4.120947213290461e-05, + "loss": 0.1489, + "step": 5254 + }, + { + "epoch": 1.41, + "learning_rate": 4.120779742765273e-05, + "loss": 0.122, + "step": 5255 + }, + { + "epoch": 1.41, + "learning_rate": 4.120612272240086e-05, + "loss": 0.1348, + "step": 5256 + }, + { + "epoch": 1.41, + "learning_rate": 4.120444801714898e-05, + "loss": 0.137, + "step": 5257 + }, + { + "epoch": 1.41, + "learning_rate": 4.1202773311897105e-05, + "loss": 0.1708, + "step": 5258 + }, + { + "epoch": 1.41, + "learning_rate": 4.1201098606645236e-05, + "loss": 0.1336, + "step": 5259 + }, + { + "epoch": 1.41, + "learning_rate": 4.119942390139336e-05, + "loss": 0.1581, + "step": 5260 + }, + { + "epoch": 1.41, + "learning_rate": 4.1197749196141485e-05, + "loss": 0.1425, + "step": 5261 + }, + { + "epoch": 1.41, + "learning_rate": 4.119607449088961e-05, + "loss": 0.196, + "step": 5262 + }, + { + "epoch": 1.41, + "learning_rate": 4.1194399785637726e-05, + "loss": 0.2761, + "step": 5263 + }, + { + "epoch": 1.41, + "learning_rate": 4.119272508038585e-05, + "loss": 0.2126, + "step": 5264 + }, + { + "epoch": 1.41, + "learning_rate": 4.1191050375133975e-05, + "loss": 0.1334, + "step": 5265 + }, + { + "epoch": 1.41, + "learning_rate": 4.11893756698821e-05, + "loss": 0.2514, + "step": 5266 + }, + { + "epoch": 1.41, + "learning_rate": 4.118770096463023e-05, + "loss": 0.1278, + "step": 5267 + }, + { + "epoch": 1.41, + "learning_rate": 4.1186026259378354e-05, + "loss": 0.1598, + "step": 5268 + }, + { + "epoch": 1.41, + "learning_rate": 4.118435155412648e-05, + "loss": 0.1414, + "step": 5269 + }, + { + "epoch": 1.41, + "learning_rate": 4.11826768488746e-05, + "loss": 0.262, + "step": 5270 + }, + { + "epoch": 1.41, + "learning_rate": 4.118100214362273e-05, + "loss": 0.139, + "step": 5271 + }, + { + "epoch": 1.41, + "learning_rate": 4.1179327438370844e-05, + "loss": 0.1162, + "step": 5272 + }, + { + "epoch": 1.41, + "learning_rate": 4.117765273311897e-05, + "loss": 0.1274, + "step": 5273 + }, + { + "epoch": 1.41, + "learning_rate": 4.117597802786709e-05, + "loss": 0.1458, + "step": 5274 + }, + { + "epoch": 1.41, + "learning_rate": 4.1174303322615224e-05, + "loss": 0.2671, + "step": 5275 + }, + { + "epoch": 1.41, + "learning_rate": 4.117262861736335e-05, + "loss": 0.1438, + "step": 5276 + }, + { + "epoch": 1.41, + "learning_rate": 4.117095391211147e-05, + "loss": 0.1469, + "step": 5277 + }, + { + "epoch": 1.41, + "learning_rate": 4.1169279206859596e-05, + "loss": 0.1468, + "step": 5278 + }, + { + "epoch": 1.41, + "learning_rate": 4.116760450160772e-05, + "loss": 0.1355, + "step": 5279 + }, + { + "epoch": 1.41, + "learning_rate": 4.1165929796355845e-05, + "loss": 0.1322, + "step": 5280 + }, + { + "epoch": 1.42, + "learning_rate": 4.116425509110397e-05, + "loss": 0.1451, + "step": 5281 + }, + { + "epoch": 1.42, + "learning_rate": 4.116258038585209e-05, + "loss": 0.2277, + "step": 5282 + }, + { + "epoch": 1.42, + "learning_rate": 4.116090568060022e-05, + "loss": 0.1701, + "step": 5283 + }, + { + "epoch": 1.42, + "learning_rate": 4.115923097534834e-05, + "loss": 0.1079, + "step": 5284 + }, + { + "epoch": 1.42, + "learning_rate": 4.1157556270096466e-05, + "loss": 0.1497, + "step": 5285 + }, + { + "epoch": 1.42, + "learning_rate": 4.115588156484459e-05, + "loss": 0.1409, + "step": 5286 + }, + { + "epoch": 1.42, + "learning_rate": 4.1154206859592714e-05, + "loss": 0.1944, + "step": 5287 + }, + { + "epoch": 1.42, + "learning_rate": 4.115253215434084e-05, + "loss": 0.1296, + "step": 5288 + }, + { + "epoch": 1.42, + "learning_rate": 4.115085744908896e-05, + "loss": 0.1291, + "step": 5289 + }, + { + "epoch": 1.42, + "learning_rate": 4.114918274383709e-05, + "loss": 0.3173, + "step": 5290 + }, + { + "epoch": 1.42, + "learning_rate": 4.114750803858521e-05, + "loss": 0.1539, + "step": 5291 + }, + { + "epoch": 1.42, + "learning_rate": 4.1145833333333335e-05, + "loss": 0.1537, + "step": 5292 + }, + { + "epoch": 1.42, + "learning_rate": 4.114415862808146e-05, + "loss": 0.1296, + "step": 5293 + }, + { + "epoch": 1.42, + "learning_rate": 4.1142483922829584e-05, + "loss": 0.152, + "step": 5294 + }, + { + "epoch": 1.42, + "learning_rate": 4.114080921757771e-05, + "loss": 0.1336, + "step": 5295 + }, + { + "epoch": 1.42, + "learning_rate": 4.113913451232583e-05, + "loss": 0.2073, + "step": 5296 + }, + { + "epoch": 1.42, + "learning_rate": 4.1137459807073956e-05, + "loss": 0.1307, + "step": 5297 + }, + { + "epoch": 1.42, + "learning_rate": 4.113578510182208e-05, + "loss": 0.1546, + "step": 5298 + }, + { + "epoch": 1.42, + "learning_rate": 4.1134110396570205e-05, + "loss": 0.144, + "step": 5299 + }, + { + "epoch": 1.42, + "learning_rate": 4.113243569131833e-05, + "loss": 0.145, + "step": 5300 + }, + { + "epoch": 1.42, + "learning_rate": 4.113076098606645e-05, + "loss": 0.1365, + "step": 5301 + }, + { + "epoch": 1.42, + "learning_rate": 4.112908628081458e-05, + "loss": 0.1322, + "step": 5302 + }, + { + "epoch": 1.42, + "learning_rate": 4.11274115755627e-05, + "loss": 0.2496, + "step": 5303 + }, + { + "epoch": 1.42, + "learning_rate": 4.1125736870310826e-05, + "loss": 0.1429, + "step": 5304 + }, + { + "epoch": 1.42, + "learning_rate": 4.112406216505895e-05, + "loss": 0.1293, + "step": 5305 + }, + { + "epoch": 1.42, + "learning_rate": 4.1122387459807074e-05, + "loss": 0.1933, + "step": 5306 + }, + { + "epoch": 1.42, + "learning_rate": 4.1120712754555205e-05, + "loss": 0.2145, + "step": 5307 + }, + { + "epoch": 1.42, + "learning_rate": 4.111903804930333e-05, + "loss": 0.276, + "step": 5308 + }, + { + "epoch": 1.42, + "learning_rate": 4.1117363344051453e-05, + "loss": 0.19, + "step": 5309 + }, + { + "epoch": 1.42, + "learning_rate": 4.111568863879957e-05, + "loss": 0.1642, + "step": 5310 + }, + { + "epoch": 1.42, + "learning_rate": 4.1114013933547695e-05, + "loss": 0.1499, + "step": 5311 + }, + { + "epoch": 1.42, + "learning_rate": 4.111233922829582e-05, + "loss": 0.1345, + "step": 5312 + }, + { + "epoch": 1.42, + "learning_rate": 4.1110664523043943e-05, + "loss": 0.1658, + "step": 5313 + }, + { + "epoch": 1.42, + "learning_rate": 4.110898981779207e-05, + "loss": 0.1442, + "step": 5314 + }, + { + "epoch": 1.42, + "learning_rate": 4.11073151125402e-05, + "loss": 0.1297, + "step": 5315 + }, + { + "epoch": 1.42, + "learning_rate": 4.110564040728832e-05, + "loss": 0.1244, + "step": 5316 + }, + { + "epoch": 1.42, + "learning_rate": 4.110396570203645e-05, + "loss": 0.1758, + "step": 5317 + }, + { + "epoch": 1.42, + "learning_rate": 4.110229099678457e-05, + "loss": 0.2973, + "step": 5318 + }, + { + "epoch": 1.43, + "learning_rate": 4.110061629153269e-05, + "loss": 0.3292, + "step": 5319 + }, + { + "epoch": 1.43, + "learning_rate": 4.109894158628081e-05, + "loss": 0.1771, + "step": 5320 + }, + { + "epoch": 1.43, + "learning_rate": 4.109726688102894e-05, + "loss": 0.1452, + "step": 5321 + }, + { + "epoch": 1.43, + "learning_rate": 4.109559217577706e-05, + "loss": 0.1471, + "step": 5322 + }, + { + "epoch": 1.43, + "learning_rate": 4.109391747052519e-05, + "loss": 0.1284, + "step": 5323 + }, + { + "epoch": 1.43, + "learning_rate": 4.1092242765273317e-05, + "loss": 0.1267, + "step": 5324 + }, + { + "epoch": 1.43, + "learning_rate": 4.109056806002144e-05, + "loss": 0.1799, + "step": 5325 + }, + { + "epoch": 1.43, + "learning_rate": 4.1088893354769565e-05, + "loss": 0.1143, + "step": 5326 + }, + { + "epoch": 1.43, + "learning_rate": 4.108721864951769e-05, + "loss": 0.1287, + "step": 5327 + }, + { + "epoch": 1.43, + "learning_rate": 4.108554394426581e-05, + "loss": 0.2016, + "step": 5328 + }, + { + "epoch": 1.43, + "learning_rate": 4.108386923901393e-05, + "loss": 0.1327, + "step": 5329 + }, + { + "epoch": 1.43, + "learning_rate": 4.1082194533762055e-05, + "loss": 0.1412, + "step": 5330 + }, + { + "epoch": 1.43, + "learning_rate": 4.1080519828510186e-05, + "loss": 0.1453, + "step": 5331 + }, + { + "epoch": 1.43, + "learning_rate": 4.107884512325831e-05, + "loss": 0.1396, + "step": 5332 + }, + { + "epoch": 1.43, + "learning_rate": 4.1077170418006434e-05, + "loss": 0.1487, + "step": 5333 + }, + { + "epoch": 1.43, + "learning_rate": 4.107549571275456e-05, + "loss": 0.2177, + "step": 5334 + }, + { + "epoch": 1.43, + "learning_rate": 4.107382100750268e-05, + "loss": 0.2576, + "step": 5335 + }, + { + "epoch": 1.43, + "learning_rate": 4.107214630225081e-05, + "loss": 0.1335, + "step": 5336 + }, + { + "epoch": 1.43, + "learning_rate": 4.107047159699893e-05, + "loss": 0.1572, + "step": 5337 + }, + { + "epoch": 1.43, + "learning_rate": 4.1068796891747056e-05, + "loss": 0.1244, + "step": 5338 + }, + { + "epoch": 1.43, + "learning_rate": 4.106712218649518e-05, + "loss": 0.1339, + "step": 5339 + }, + { + "epoch": 1.43, + "learning_rate": 4.1065447481243304e-05, + "loss": 0.3235, + "step": 5340 + }, + { + "epoch": 1.43, + "learning_rate": 4.106377277599143e-05, + "loss": 0.1243, + "step": 5341 + }, + { + "epoch": 1.43, + "learning_rate": 4.106209807073955e-05, + "loss": 0.1387, + "step": 5342 + }, + { + "epoch": 1.43, + "learning_rate": 4.1060423365487677e-05, + "loss": 0.1224, + "step": 5343 + }, + { + "epoch": 1.43, + "learning_rate": 4.10587486602358e-05, + "loss": 0.1513, + "step": 5344 + }, + { + "epoch": 1.43, + "learning_rate": 4.1057073954983925e-05, + "loss": 0.1441, + "step": 5345 + }, + { + "epoch": 1.43, + "learning_rate": 4.105539924973205e-05, + "loss": 0.1307, + "step": 5346 + }, + { + "epoch": 1.43, + "learning_rate": 4.1053724544480173e-05, + "loss": 0.1377, + "step": 5347 + }, + { + "epoch": 1.43, + "learning_rate": 4.10520498392283e-05, + "loss": 0.1495, + "step": 5348 + }, + { + "epoch": 1.43, + "learning_rate": 4.105037513397642e-05, + "loss": 0.1367, + "step": 5349 + }, + { + "epoch": 1.43, + "learning_rate": 4.1048700428724546e-05, + "loss": 0.1423, + "step": 5350 + }, + { + "epoch": 1.43, + "learning_rate": 4.104702572347267e-05, + "loss": 0.2386, + "step": 5351 + }, + { + "epoch": 1.43, + "learning_rate": 4.1045351018220794e-05, + "loss": 0.1412, + "step": 5352 + }, + { + "epoch": 1.43, + "learning_rate": 4.104367631296892e-05, + "loss": 0.2059, + "step": 5353 + }, + { + "epoch": 1.43, + "learning_rate": 4.104200160771704e-05, + "loss": 0.1535, + "step": 5354 + }, + { + "epoch": 1.43, + "learning_rate": 4.104032690246517e-05, + "loss": 0.163, + "step": 5355 + }, + { + "epoch": 1.44, + "learning_rate": 4.103865219721329e-05, + "loss": 0.1179, + "step": 5356 + }, + { + "epoch": 1.44, + "learning_rate": 4.1036977491961415e-05, + "loss": 0.1798, + "step": 5357 + }, + { + "epoch": 1.44, + "learning_rate": 4.103530278670954e-05, + "loss": 0.1768, + "step": 5358 + }, + { + "epoch": 1.44, + "learning_rate": 4.1033628081457664e-05, + "loss": 0.1286, + "step": 5359 + }, + { + "epoch": 1.44, + "learning_rate": 4.103195337620579e-05, + "loss": 0.1352, + "step": 5360 + }, + { + "epoch": 1.44, + "learning_rate": 4.103027867095391e-05, + "loss": 0.2411, + "step": 5361 + }, + { + "epoch": 1.44, + "learning_rate": 4.1028603965702037e-05, + "loss": 0.1736, + "step": 5362 + }, + { + "epoch": 1.44, + "learning_rate": 4.102692926045017e-05, + "loss": 0.1368, + "step": 5363 + }, + { + "epoch": 1.44, + "learning_rate": 4.102525455519829e-05, + "loss": 0.1832, + "step": 5364 + }, + { + "epoch": 1.44, + "learning_rate": 4.1023579849946416e-05, + "loss": 0.2097, + "step": 5365 + }, + { + "epoch": 1.44, + "learning_rate": 4.102190514469453e-05, + "loss": 0.1423, + "step": 5366 + }, + { + "epoch": 1.44, + "learning_rate": 4.102023043944266e-05, + "loss": 0.122, + "step": 5367 + }, + { + "epoch": 1.44, + "learning_rate": 4.101855573419078e-05, + "loss": 0.1559, + "step": 5368 + }, + { + "epoch": 1.44, + "learning_rate": 4.1016881028938906e-05, + "loss": 0.2479, + "step": 5369 + }, + { + "epoch": 1.44, + "learning_rate": 4.101520632368703e-05, + "loss": 0.1735, + "step": 5370 + }, + { + "epoch": 1.44, + "learning_rate": 4.101353161843516e-05, + "loss": 0.1295, + "step": 5371 + }, + { + "epoch": 1.44, + "learning_rate": 4.1011856913183285e-05, + "loss": 0.1424, + "step": 5372 + }, + { + "epoch": 1.44, + "learning_rate": 4.101018220793141e-05, + "loss": 0.1276, + "step": 5373 + }, + { + "epoch": 1.44, + "learning_rate": 4.1008507502679534e-05, + "loss": 0.141, + "step": 5374 + }, + { + "epoch": 1.44, + "learning_rate": 4.100683279742765e-05, + "loss": 0.2841, + "step": 5375 + }, + { + "epoch": 1.44, + "learning_rate": 4.1005158092175775e-05, + "loss": 0.1516, + "step": 5376 + }, + { + "epoch": 1.44, + "learning_rate": 4.10034833869239e-05, + "loss": 0.1944, + "step": 5377 + }, + { + "epoch": 1.44, + "learning_rate": 4.1001808681672024e-05, + "loss": 0.255, + "step": 5378 + }, + { + "epoch": 1.44, + "learning_rate": 4.1000133976420155e-05, + "loss": 0.244, + "step": 5379 + }, + { + "epoch": 1.44, + "learning_rate": 4.099845927116828e-05, + "loss": 0.1372, + "step": 5380 + }, + { + "epoch": 1.44, + "learning_rate": 4.09967845659164e-05, + "loss": 0.1267, + "step": 5381 + }, + { + "epoch": 1.44, + "learning_rate": 4.099510986066453e-05, + "loss": 0.1445, + "step": 5382 + }, + { + "epoch": 1.44, + "learning_rate": 4.099343515541265e-05, + "loss": 0.276, + "step": 5383 + }, + { + "epoch": 1.44, + "learning_rate": 4.099176045016077e-05, + "loss": 0.1263, + "step": 5384 + }, + { + "epoch": 1.44, + "learning_rate": 4.099008574490889e-05, + "loss": 0.1344, + "step": 5385 + }, + { + "epoch": 1.44, + "learning_rate": 4.098841103965702e-05, + "loss": 0.1487, + "step": 5386 + }, + { + "epoch": 1.44, + "learning_rate": 4.098673633440515e-05, + "loss": 0.1214, + "step": 5387 + }, + { + "epoch": 1.44, + "learning_rate": 4.098506162915327e-05, + "loss": 0.1936, + "step": 5388 + }, + { + "epoch": 1.44, + "learning_rate": 4.09833869239014e-05, + "loss": 0.1257, + "step": 5389 + }, + { + "epoch": 1.44, + "learning_rate": 4.098171221864952e-05, + "loss": 0.1495, + "step": 5390 + }, + { + "epoch": 1.44, + "learning_rate": 4.0980037513397645e-05, + "loss": 0.2822, + "step": 5391 + }, + { + "epoch": 1.44, + "learning_rate": 4.097836280814577e-05, + "loss": 0.2205, + "step": 5392 + }, + { + "epoch": 1.45, + "learning_rate": 4.0976688102893894e-05, + "loss": 0.1341, + "step": 5393 + }, + { + "epoch": 1.45, + "learning_rate": 4.097501339764202e-05, + "loss": 0.1711, + "step": 5394 + }, + { + "epoch": 1.45, + "learning_rate": 4.097333869239014e-05, + "loss": 0.1594, + "step": 5395 + }, + { + "epoch": 1.45, + "learning_rate": 4.0971663987138266e-05, + "loss": 0.1365, + "step": 5396 + }, + { + "epoch": 1.45, + "learning_rate": 4.096998928188639e-05, + "loss": 0.2995, + "step": 5397 + }, + { + "epoch": 1.45, + "learning_rate": 4.0968314576634515e-05, + "loss": 0.2228, + "step": 5398 + }, + { + "epoch": 1.45, + "learning_rate": 4.096663987138264e-05, + "loss": 0.2378, + "step": 5399 + }, + { + "epoch": 1.45, + "learning_rate": 4.096496516613076e-05, + "loss": 0.1337, + "step": 5400 + }, + { + "epoch": 1.45, + "learning_rate": 4.096329046087889e-05, + "loss": 0.1442, + "step": 5401 + }, + { + "epoch": 1.45, + "learning_rate": 4.096161575562701e-05, + "loss": 0.3026, + "step": 5402 + }, + { + "epoch": 1.45, + "learning_rate": 4.0959941050375136e-05, + "loss": 0.1489, + "step": 5403 + }, + { + "epoch": 1.45, + "learning_rate": 4.095826634512326e-05, + "loss": 0.1484, + "step": 5404 + }, + { + "epoch": 1.45, + "learning_rate": 4.0956591639871384e-05, + "loss": 0.197, + "step": 5405 + }, + { + "epoch": 1.45, + "learning_rate": 4.095491693461951e-05, + "loss": 0.1487, + "step": 5406 + }, + { + "epoch": 1.45, + "learning_rate": 4.095324222936763e-05, + "loss": 0.139, + "step": 5407 + }, + { + "epoch": 1.45, + "learning_rate": 4.095156752411576e-05, + "loss": 0.2017, + "step": 5408 + }, + { + "epoch": 1.45, + "learning_rate": 4.094989281886388e-05, + "loss": 0.1377, + "step": 5409 + }, + { + "epoch": 1.45, + "learning_rate": 4.0948218113612005e-05, + "loss": 0.1202, + "step": 5410 + }, + { + "epoch": 1.45, + "learning_rate": 4.094654340836013e-05, + "loss": 0.1608, + "step": 5411 + }, + { + "epoch": 1.45, + "learning_rate": 4.0944868703108254e-05, + "loss": 0.2318, + "step": 5412 + }, + { + "epoch": 1.45, + "learning_rate": 4.094319399785638e-05, + "loss": 0.11, + "step": 5413 + }, + { + "epoch": 1.45, + "learning_rate": 4.09415192926045e-05, + "loss": 0.1425, + "step": 5414 + }, + { + "epoch": 1.45, + "learning_rate": 4.0939844587352626e-05, + "loss": 0.2147, + "step": 5415 + }, + { + "epoch": 1.45, + "learning_rate": 4.093816988210075e-05, + "loss": 0.2206, + "step": 5416 + }, + { + "epoch": 1.45, + "learning_rate": 4.0936495176848875e-05, + "loss": 0.1862, + "step": 5417 + }, + { + "epoch": 1.45, + "learning_rate": 4.0934820471597e-05, + "loss": 0.1238, + "step": 5418 + }, + { + "epoch": 1.45, + "learning_rate": 4.093314576634513e-05, + "loss": 0.1674, + "step": 5419 + }, + { + "epoch": 1.45, + "learning_rate": 4.0931471061093254e-05, + "loss": 0.1406, + "step": 5420 + }, + { + "epoch": 1.45, + "learning_rate": 4.092979635584138e-05, + "loss": 0.1792, + "step": 5421 + }, + { + "epoch": 1.45, + "learning_rate": 4.0928121650589496e-05, + "loss": 0.1597, + "step": 5422 + }, + { + "epoch": 1.45, + "learning_rate": 4.092644694533762e-05, + "loss": 0.2436, + "step": 5423 + }, + { + "epoch": 1.45, + "learning_rate": 4.0924772240085744e-05, + "loss": 0.1324, + "step": 5424 + }, + { + "epoch": 1.45, + "learning_rate": 4.092309753483387e-05, + "loss": 0.1777, + "step": 5425 + }, + { + "epoch": 1.45, + "learning_rate": 4.092142282958199e-05, + "loss": 0.1508, + "step": 5426 + }, + { + "epoch": 1.45, + "learning_rate": 4.0919748124330124e-05, + "loss": 0.1261, + "step": 5427 + }, + { + "epoch": 1.45, + "learning_rate": 4.091807341907825e-05, + "loss": 0.1133, + "step": 5428 + }, + { + "epoch": 1.45, + "learning_rate": 4.091639871382637e-05, + "loss": 0.1363, + "step": 5429 + }, + { + "epoch": 1.45, + "learning_rate": 4.0914724008574496e-05, + "loss": 0.1264, + "step": 5430 + }, + { + "epoch": 1.46, + "learning_rate": 4.0913049303322614e-05, + "loss": 0.1472, + "step": 5431 + }, + { + "epoch": 1.46, + "learning_rate": 4.091137459807074e-05, + "loss": 0.1154, + "step": 5432 + }, + { + "epoch": 1.46, + "learning_rate": 4.090969989281886e-05, + "loss": 0.1328, + "step": 5433 + }, + { + "epoch": 1.46, + "learning_rate": 4.0908025187566986e-05, + "loss": 0.1672, + "step": 5434 + }, + { + "epoch": 1.46, + "learning_rate": 4.090635048231512e-05, + "loss": 0.2507, + "step": 5435 + }, + { + "epoch": 1.46, + "learning_rate": 4.090467577706324e-05, + "loss": 0.2881, + "step": 5436 + }, + { + "epoch": 1.46, + "learning_rate": 4.0903001071811366e-05, + "loss": 0.1855, + "step": 5437 + }, + { + "epoch": 1.46, + "learning_rate": 4.090132636655949e-05, + "loss": 0.1648, + "step": 5438 + }, + { + "epoch": 1.46, + "learning_rate": 4.0899651661307614e-05, + "loss": 0.2884, + "step": 5439 + }, + { + "epoch": 1.46, + "learning_rate": 4.089797695605573e-05, + "loss": 0.1306, + "step": 5440 + }, + { + "epoch": 1.46, + "learning_rate": 4.0896302250803856e-05, + "loss": 0.2324, + "step": 5441 + }, + { + "epoch": 1.46, + "learning_rate": 4.089462754555198e-05, + "loss": 0.1664, + "step": 5442 + }, + { + "epoch": 1.46, + "learning_rate": 4.089295284030011e-05, + "loss": 0.2914, + "step": 5443 + }, + { + "epoch": 1.46, + "learning_rate": 4.0891278135048235e-05, + "loss": 0.1445, + "step": 5444 + }, + { + "epoch": 1.46, + "learning_rate": 4.088960342979636e-05, + "loss": 0.1483, + "step": 5445 + }, + { + "epoch": 1.46, + "learning_rate": 4.0887928724544484e-05, + "loss": 0.1422, + "step": 5446 + }, + { + "epoch": 1.46, + "learning_rate": 4.088625401929261e-05, + "loss": 0.1248, + "step": 5447 + }, + { + "epoch": 1.46, + "learning_rate": 4.088457931404073e-05, + "loss": 0.3736, + "step": 5448 + }, + { + "epoch": 1.46, + "learning_rate": 4.0882904608788856e-05, + "loss": 0.2242, + "step": 5449 + }, + { + "epoch": 1.46, + "learning_rate": 4.088122990353698e-05, + "loss": 0.1798, + "step": 5450 + }, + { + "epoch": 1.46, + "learning_rate": 4.0879555198285105e-05, + "loss": 0.281, + "step": 5451 + }, + { + "epoch": 1.46, + "learning_rate": 4.087788049303323e-05, + "loss": 0.1313, + "step": 5452 + }, + { + "epoch": 1.46, + "learning_rate": 4.087620578778135e-05, + "loss": 0.233, + "step": 5453 + }, + { + "epoch": 1.46, + "learning_rate": 4.087453108252948e-05, + "loss": 0.1278, + "step": 5454 + }, + { + "epoch": 1.46, + "learning_rate": 4.08728563772776e-05, + "loss": 0.1997, + "step": 5455 + }, + { + "epoch": 1.46, + "learning_rate": 4.0871181672025726e-05, + "loss": 0.2641, + "step": 5456 + }, + { + "epoch": 1.46, + "learning_rate": 4.086950696677385e-05, + "loss": 0.1233, + "step": 5457 + }, + { + "epoch": 1.46, + "learning_rate": 4.0867832261521974e-05, + "loss": 0.1422, + "step": 5458 + }, + { + "epoch": 1.46, + "learning_rate": 4.08661575562701e-05, + "loss": 0.123, + "step": 5459 + }, + { + "epoch": 1.46, + "learning_rate": 4.086448285101822e-05, + "loss": 0.4441, + "step": 5460 + }, + { + "epoch": 1.46, + "learning_rate": 4.086280814576635e-05, + "loss": 0.1422, + "step": 5461 + }, + { + "epoch": 1.46, + "learning_rate": 4.086113344051447e-05, + "loss": 0.1975, + "step": 5462 + }, + { + "epoch": 1.46, + "learning_rate": 4.0859458735262595e-05, + "loss": 0.123, + "step": 5463 + }, + { + "epoch": 1.46, + "learning_rate": 4.085778403001072e-05, + "loss": 0.1482, + "step": 5464 + }, + { + "epoch": 1.46, + "learning_rate": 4.0856109324758844e-05, + "loss": 0.1507, + "step": 5465 + }, + { + "epoch": 1.46, + "learning_rate": 4.085443461950697e-05, + "loss": 0.1462, + "step": 5466 + }, + { + "epoch": 1.46, + "learning_rate": 4.085275991425509e-05, + "loss": 0.131, + "step": 5467 + }, + { + "epoch": 1.47, + "learning_rate": 4.0851085209003216e-05, + "loss": 0.1445, + "step": 5468 + }, + { + "epoch": 1.47, + "learning_rate": 4.084941050375134e-05, + "loss": 0.1726, + "step": 5469 + }, + { + "epoch": 1.47, + "learning_rate": 4.0847735798499465e-05, + "loss": 0.3591, + "step": 5470 + }, + { + "epoch": 1.47, + "learning_rate": 4.084606109324759e-05, + "loss": 0.2674, + "step": 5471 + }, + { + "epoch": 1.47, + "learning_rate": 4.084438638799571e-05, + "loss": 0.1647, + "step": 5472 + }, + { + "epoch": 1.47, + "learning_rate": 4.084271168274384e-05, + "loss": 0.1554, + "step": 5473 + }, + { + "epoch": 1.47, + "learning_rate": 4.084103697749196e-05, + "loss": 0.1354, + "step": 5474 + }, + { + "epoch": 1.47, + "learning_rate": 4.083936227224009e-05, + "loss": 0.1672, + "step": 5475 + }, + { + "epoch": 1.47, + "learning_rate": 4.083768756698822e-05, + "loss": 0.275, + "step": 5476 + }, + { + "epoch": 1.47, + "learning_rate": 4.083601286173634e-05, + "loss": 0.1875, + "step": 5477 + }, + { + "epoch": 1.47, + "learning_rate": 4.083433815648446e-05, + "loss": 0.2268, + "step": 5478 + }, + { + "epoch": 1.47, + "learning_rate": 4.083266345123258e-05, + "loss": 0.1672, + "step": 5479 + }, + { + "epoch": 1.47, + "learning_rate": 4.083098874598071e-05, + "loss": 0.2359, + "step": 5480 + }, + { + "epoch": 1.47, + "learning_rate": 4.082931404072883e-05, + "loss": 0.14, + "step": 5481 + }, + { + "epoch": 1.47, + "learning_rate": 4.0827639335476955e-05, + "loss": 0.1396, + "step": 5482 + }, + { + "epoch": 1.47, + "learning_rate": 4.0825964630225086e-05, + "loss": 0.4657, + "step": 5483 + }, + { + "epoch": 1.47, + "learning_rate": 4.082428992497321e-05, + "loss": 0.1311, + "step": 5484 + }, + { + "epoch": 1.47, + "learning_rate": 4.0822615219721335e-05, + "loss": 0.1667, + "step": 5485 + }, + { + "epoch": 1.47, + "learning_rate": 4.082094051446946e-05, + "loss": 0.1334, + "step": 5486 + }, + { + "epoch": 1.47, + "learning_rate": 4.0819265809217576e-05, + "loss": 0.1225, + "step": 5487 + }, + { + "epoch": 1.47, + "learning_rate": 4.08175911039657e-05, + "loss": 0.1862, + "step": 5488 + }, + { + "epoch": 1.47, + "learning_rate": 4.0815916398713825e-05, + "loss": 0.1912, + "step": 5489 + }, + { + "epoch": 1.47, + "learning_rate": 4.081424169346195e-05, + "loss": 0.1396, + "step": 5490 + }, + { + "epoch": 1.47, + "learning_rate": 4.081256698821008e-05, + "loss": 0.1189, + "step": 5491 + }, + { + "epoch": 1.47, + "learning_rate": 4.0810892282958204e-05, + "loss": 0.128, + "step": 5492 + }, + { + "epoch": 1.47, + "learning_rate": 4.080921757770633e-05, + "loss": 0.1124, + "step": 5493 + }, + { + "epoch": 1.47, + "learning_rate": 4.080754287245445e-05, + "loss": 0.2623, + "step": 5494 + }, + { + "epoch": 1.47, + "learning_rate": 4.080586816720258e-05, + "loss": 0.2132, + "step": 5495 + }, + { + "epoch": 1.47, + "learning_rate": 4.0804193461950694e-05, + "loss": 0.1389, + "step": 5496 + }, + { + "epoch": 1.47, + "learning_rate": 4.080251875669882e-05, + "loss": 0.2691, + "step": 5497 + }, + { + "epoch": 1.47, + "learning_rate": 4.080084405144694e-05, + "loss": 0.1792, + "step": 5498 + }, + { + "epoch": 1.47, + "learning_rate": 4.0799169346195074e-05, + "loss": 0.1604, + "step": 5499 + }, + { + "epoch": 1.47, + "learning_rate": 4.07974946409432e-05, + "loss": 0.1677, + "step": 5500 + }, + { + "epoch": 1.47, + "learning_rate": 4.079581993569132e-05, + "loss": 0.1225, + "step": 5501 + }, + { + "epoch": 1.47, + "learning_rate": 4.0794145230439446e-05, + "loss": 0.2325, + "step": 5502 + }, + { + "epoch": 1.47, + "learning_rate": 4.079247052518757e-05, + "loss": 0.1235, + "step": 5503 + }, + { + "epoch": 1.47, + "learning_rate": 4.0790795819935695e-05, + "loss": 0.1833, + "step": 5504 + }, + { + "epoch": 1.48, + "learning_rate": 4.078912111468382e-05, + "loss": 0.1344, + "step": 5505 + }, + { + "epoch": 1.48, + "learning_rate": 4.078744640943194e-05, + "loss": 0.3261, + "step": 5506 + }, + { + "epoch": 1.48, + "learning_rate": 4.078577170418007e-05, + "loss": 0.1502, + "step": 5507 + }, + { + "epoch": 1.48, + "learning_rate": 4.078409699892819e-05, + "loss": 0.1926, + "step": 5508 + }, + { + "epoch": 1.48, + "learning_rate": 4.0782422293676316e-05, + "loss": 0.136, + "step": 5509 + }, + { + "epoch": 1.48, + "learning_rate": 4.078074758842444e-05, + "loss": 0.1441, + "step": 5510 + }, + { + "epoch": 1.48, + "learning_rate": 4.0779072883172564e-05, + "loss": 0.1415, + "step": 5511 + }, + { + "epoch": 1.48, + "learning_rate": 4.077739817792069e-05, + "loss": 0.1626, + "step": 5512 + }, + { + "epoch": 1.48, + "learning_rate": 4.077572347266881e-05, + "loss": 0.2225, + "step": 5513 + }, + { + "epoch": 1.48, + "learning_rate": 4.077404876741694e-05, + "loss": 0.1183, + "step": 5514 + }, + { + "epoch": 1.48, + "learning_rate": 4.077237406216506e-05, + "loss": 0.1564, + "step": 5515 + }, + { + "epoch": 1.48, + "learning_rate": 4.0770699356913185e-05, + "loss": 0.1649, + "step": 5516 + }, + { + "epoch": 1.48, + "learning_rate": 4.076902465166131e-05, + "loss": 0.1427, + "step": 5517 + }, + { + "epoch": 1.48, + "learning_rate": 4.0767349946409434e-05, + "loss": 0.1299, + "step": 5518 + }, + { + "epoch": 1.48, + "learning_rate": 4.076567524115756e-05, + "loss": 0.1303, + "step": 5519 + }, + { + "epoch": 1.48, + "learning_rate": 4.076400053590568e-05, + "loss": 0.2068, + "step": 5520 + }, + { + "epoch": 1.48, + "learning_rate": 4.0762325830653806e-05, + "loss": 0.1213, + "step": 5521 + }, + { + "epoch": 1.48, + "learning_rate": 4.076065112540193e-05, + "loss": 0.2188, + "step": 5522 + }, + { + "epoch": 1.48, + "learning_rate": 4.0758976420150055e-05, + "loss": 0.1353, + "step": 5523 + }, + { + "epoch": 1.48, + "learning_rate": 4.075730171489818e-05, + "loss": 0.1453, + "step": 5524 + }, + { + "epoch": 1.48, + "learning_rate": 4.07556270096463e-05, + "loss": 0.1386, + "step": 5525 + }, + { + "epoch": 1.48, + "learning_rate": 4.075395230439443e-05, + "loss": 0.2527, + "step": 5526 + }, + { + "epoch": 1.48, + "learning_rate": 4.075227759914255e-05, + "loss": 0.1519, + "step": 5527 + }, + { + "epoch": 1.48, + "learning_rate": 4.0750602893890676e-05, + "loss": 0.172, + "step": 5528 + }, + { + "epoch": 1.48, + "learning_rate": 4.07489281886388e-05, + "loss": 0.1125, + "step": 5529 + }, + { + "epoch": 1.48, + "learning_rate": 4.0747253483386924e-05, + "loss": 0.1539, + "step": 5530 + }, + { + "epoch": 1.48, + "learning_rate": 4.0745578778135055e-05, + "loss": 0.1213, + "step": 5531 + }, + { + "epoch": 1.48, + "learning_rate": 4.074390407288318e-05, + "loss": 0.1466, + "step": 5532 + }, + { + "epoch": 1.48, + "learning_rate": 4.0742229367631303e-05, + "loss": 0.1305, + "step": 5533 + }, + { + "epoch": 1.48, + "learning_rate": 4.074055466237942e-05, + "loss": 0.1716, + "step": 5534 + }, + { + "epoch": 1.48, + "learning_rate": 4.0738879957127545e-05, + "loss": 0.3316, + "step": 5535 + }, + { + "epoch": 1.48, + "learning_rate": 4.073720525187567e-05, + "loss": 0.1297, + "step": 5536 + }, + { + "epoch": 1.48, + "learning_rate": 4.0735530546623794e-05, + "loss": 0.3067, + "step": 5537 + }, + { + "epoch": 1.48, + "learning_rate": 4.073385584137192e-05, + "loss": 0.1576, + "step": 5538 + }, + { + "epoch": 1.48, + "learning_rate": 4.073218113612005e-05, + "loss": 0.1288, + "step": 5539 + }, + { + "epoch": 1.48, + "learning_rate": 4.073050643086817e-05, + "loss": 0.1951, + "step": 5540 + }, + { + "epoch": 1.48, + "learning_rate": 4.07288317256163e-05, + "loss": 0.171, + "step": 5541 + }, + { + "epoch": 1.48, + "learning_rate": 4.072715702036442e-05, + "loss": 0.1193, + "step": 5542 + }, + { + "epoch": 1.49, + "learning_rate": 4.072548231511254e-05, + "loss": 0.1582, + "step": 5543 + }, + { + "epoch": 1.49, + "learning_rate": 4.072380760986066e-05, + "loss": 0.1259, + "step": 5544 + }, + { + "epoch": 1.49, + "learning_rate": 4.072213290460879e-05, + "loss": 0.1205, + "step": 5545 + }, + { + "epoch": 1.49, + "learning_rate": 4.072045819935691e-05, + "loss": 0.1435, + "step": 5546 + }, + { + "epoch": 1.49, + "learning_rate": 4.071878349410504e-05, + "loss": 0.2331, + "step": 5547 + }, + { + "epoch": 1.49, + "learning_rate": 4.0717108788853167e-05, + "loss": 0.183, + "step": 5548 + }, + { + "epoch": 1.49, + "learning_rate": 4.071543408360129e-05, + "loss": 0.1133, + "step": 5549 + }, + { + "epoch": 1.49, + "learning_rate": 4.0713759378349415e-05, + "loss": 0.1208, + "step": 5550 + }, + { + "epoch": 1.49, + "learning_rate": 4.071208467309754e-05, + "loss": 0.269, + "step": 5551 + }, + { + "epoch": 1.49, + "learning_rate": 4.071040996784566e-05, + "loss": 0.2504, + "step": 5552 + }, + { + "epoch": 1.49, + "learning_rate": 4.070873526259378e-05, + "loss": 0.1326, + "step": 5553 + }, + { + "epoch": 1.49, + "learning_rate": 4.0707060557341905e-05, + "loss": 0.1529, + "step": 5554 + }, + { + "epoch": 1.49, + "learning_rate": 4.0705385852090036e-05, + "loss": 0.1786, + "step": 5555 + }, + { + "epoch": 1.49, + "learning_rate": 4.070371114683816e-05, + "loss": 0.1454, + "step": 5556 + }, + { + "epoch": 1.49, + "learning_rate": 4.0702036441586284e-05, + "loss": 0.1356, + "step": 5557 + }, + { + "epoch": 1.49, + "learning_rate": 4.070036173633441e-05, + "loss": 0.135, + "step": 5558 + }, + { + "epoch": 1.49, + "learning_rate": 4.069868703108253e-05, + "loss": 0.129, + "step": 5559 + }, + { + "epoch": 1.49, + "learning_rate": 4.069701232583066e-05, + "loss": 0.1399, + "step": 5560 + }, + { + "epoch": 1.49, + "learning_rate": 4.069533762057878e-05, + "loss": 0.2585, + "step": 5561 + }, + { + "epoch": 1.49, + "learning_rate": 4.0693662915326906e-05, + "loss": 0.15, + "step": 5562 + }, + { + "epoch": 1.49, + "learning_rate": 4.069198821007503e-05, + "loss": 0.1186, + "step": 5563 + }, + { + "epoch": 1.49, + "learning_rate": 4.0690313504823154e-05, + "loss": 0.1504, + "step": 5564 + }, + { + "epoch": 1.49, + "learning_rate": 4.068863879957128e-05, + "loss": 0.1476, + "step": 5565 + }, + { + "epoch": 1.49, + "learning_rate": 4.06869640943194e-05, + "loss": 0.1377, + "step": 5566 + }, + { + "epoch": 1.49, + "learning_rate": 4.0685289389067527e-05, + "loss": 0.1354, + "step": 5567 + }, + { + "epoch": 1.49, + "learning_rate": 4.068361468381565e-05, + "loss": 0.1581, + "step": 5568 + }, + { + "epoch": 1.49, + "learning_rate": 4.0681939978563775e-05, + "loss": 0.1365, + "step": 5569 + }, + { + "epoch": 1.49, + "learning_rate": 4.06802652733119e-05, + "loss": 0.1328, + "step": 5570 + }, + { + "epoch": 1.49, + "learning_rate": 4.0678590568060023e-05, + "loss": 0.1301, + "step": 5571 + }, + { + "epoch": 1.49, + "learning_rate": 4.067691586280815e-05, + "loss": 0.1227, + "step": 5572 + }, + { + "epoch": 1.49, + "learning_rate": 4.067524115755627e-05, + "loss": 0.3987, + "step": 5573 + }, + { + "epoch": 1.49, + "learning_rate": 4.0673566452304396e-05, + "loss": 0.1388, + "step": 5574 + }, + { + "epoch": 1.49, + "learning_rate": 4.067189174705252e-05, + "loss": 0.1595, + "step": 5575 + }, + { + "epoch": 1.49, + "learning_rate": 4.0670217041800644e-05, + "loss": 0.1309, + "step": 5576 + }, + { + "epoch": 1.49, + "learning_rate": 4.066854233654877e-05, + "loss": 0.1638, + "step": 5577 + }, + { + "epoch": 1.49, + "learning_rate": 4.066686763129689e-05, + "loss": 0.151, + "step": 5578 + }, + { + "epoch": 1.49, + "learning_rate": 4.066519292604502e-05, + "loss": 0.1257, + "step": 5579 + }, + { + "epoch": 1.5, + "learning_rate": 4.066351822079314e-05, + "loss": 0.1433, + "step": 5580 + }, + { + "epoch": 1.5, + "learning_rate": 4.0661843515541266e-05, + "loss": 0.1621, + "step": 5581 + }, + { + "epoch": 1.5, + "learning_rate": 4.066016881028939e-05, + "loss": 0.259, + "step": 5582 + }, + { + "epoch": 1.5, + "learning_rate": 4.0658494105037514e-05, + "loss": 0.2273, + "step": 5583 + }, + { + "epoch": 1.5, + "learning_rate": 4.065681939978564e-05, + "loss": 0.3146, + "step": 5584 + }, + { + "epoch": 1.5, + "learning_rate": 4.065514469453376e-05, + "loss": 0.1437, + "step": 5585 + }, + { + "epoch": 1.5, + "learning_rate": 4.0653469989281887e-05, + "loss": 0.1231, + "step": 5586 + }, + { + "epoch": 1.5, + "learning_rate": 4.065179528403002e-05, + "loss": 0.1442, + "step": 5587 + }, + { + "epoch": 1.5, + "learning_rate": 4.065012057877814e-05, + "loss": 0.267, + "step": 5588 + }, + { + "epoch": 1.5, + "learning_rate": 4.0648445873526266e-05, + "loss": 0.1423, + "step": 5589 + }, + { + "epoch": 1.5, + "learning_rate": 4.0646771168274383e-05, + "loss": 0.1554, + "step": 5590 + }, + { + "epoch": 1.5, + "learning_rate": 4.064509646302251e-05, + "loss": 0.2142, + "step": 5591 + }, + { + "epoch": 1.5, + "learning_rate": 4.064342175777063e-05, + "loss": 0.1231, + "step": 5592 + }, + { + "epoch": 1.5, + "learning_rate": 4.0641747052518756e-05, + "loss": 0.1579, + "step": 5593 + }, + { + "epoch": 1.5, + "learning_rate": 4.064007234726688e-05, + "loss": 0.1407, + "step": 5594 + }, + { + "epoch": 1.5, + "learning_rate": 4.063839764201501e-05, + "loss": 0.1484, + "step": 5595 + }, + { + "epoch": 1.5, + "learning_rate": 4.0636722936763135e-05, + "loss": 0.124, + "step": 5596 + }, + { + "epoch": 1.5, + "learning_rate": 4.063504823151126e-05, + "loss": 0.2069, + "step": 5597 + }, + { + "epoch": 1.5, + "learning_rate": 4.0633373526259384e-05, + "loss": 0.1298, + "step": 5598 + }, + { + "epoch": 1.5, + "learning_rate": 4.06316988210075e-05, + "loss": 0.1585, + "step": 5599 + }, + { + "epoch": 1.5, + "learning_rate": 4.0630024115755626e-05, + "loss": 0.1517, + "step": 5600 + }, + { + "epoch": 1.5, + "learning_rate": 4.062834941050375e-05, + "loss": 0.1461, + "step": 5601 + }, + { + "epoch": 1.5, + "learning_rate": 4.0626674705251874e-05, + "loss": 0.1536, + "step": 5602 + }, + { + "epoch": 1.5, + "learning_rate": 4.0625000000000005e-05, + "loss": 0.1178, + "step": 5603 + }, + { + "epoch": 1.5, + "learning_rate": 4.062332529474813e-05, + "loss": 0.1538, + "step": 5604 + }, + { + "epoch": 1.5, + "learning_rate": 4.062165058949625e-05, + "loss": 0.1749, + "step": 5605 + }, + { + "epoch": 1.5, + "learning_rate": 4.061997588424438e-05, + "loss": 0.1619, + "step": 5606 + }, + { + "epoch": 1.5, + "learning_rate": 4.06183011789925e-05, + "loss": 0.1575, + "step": 5607 + }, + { + "epoch": 1.5, + "learning_rate": 4.061662647374062e-05, + "loss": 0.1304, + "step": 5608 + }, + { + "epoch": 1.5, + "learning_rate": 4.0614951768488743e-05, + "loss": 0.3673, + "step": 5609 + }, + { + "epoch": 1.5, + "learning_rate": 4.061327706323687e-05, + "loss": 0.2827, + "step": 5610 + }, + { + "epoch": 1.5, + "learning_rate": 4.0611602357985e-05, + "loss": 0.1507, + "step": 5611 + }, + { + "epoch": 1.5, + "learning_rate": 4.060992765273312e-05, + "loss": 0.1458, + "step": 5612 + }, + { + "epoch": 1.5, + "learning_rate": 4.060825294748125e-05, + "loss": 0.215, + "step": 5613 + }, + { + "epoch": 1.5, + "learning_rate": 4.060657824222937e-05, + "loss": 0.1368, + "step": 5614 + }, + { + "epoch": 1.5, + "learning_rate": 4.0604903536977495e-05, + "loss": 0.1334, + "step": 5615 + }, + { + "epoch": 1.5, + "learning_rate": 4.060322883172562e-05, + "loss": 0.148, + "step": 5616 + }, + { + "epoch": 1.51, + "learning_rate": 4.060155412647374e-05, + "loss": 0.1365, + "step": 5617 + }, + { + "epoch": 1.51, + "learning_rate": 4.059987942122187e-05, + "loss": 0.2559, + "step": 5618 + }, + { + "epoch": 1.51, + "learning_rate": 4.059820471596999e-05, + "loss": 0.1346, + "step": 5619 + }, + { + "epoch": 1.51, + "learning_rate": 4.0596530010718116e-05, + "loss": 0.237, + "step": 5620 + }, + { + "epoch": 1.51, + "learning_rate": 4.059485530546624e-05, + "loss": 0.1921, + "step": 5621 + }, + { + "epoch": 1.51, + "learning_rate": 4.0593180600214365e-05, + "loss": 0.1217, + "step": 5622 + }, + { + "epoch": 1.51, + "learning_rate": 4.059150589496249e-05, + "loss": 0.1561, + "step": 5623 + }, + { + "epoch": 1.51, + "learning_rate": 4.058983118971061e-05, + "loss": 0.1793, + "step": 5624 + }, + { + "epoch": 1.51, + "learning_rate": 4.058815648445874e-05, + "loss": 0.1865, + "step": 5625 + }, + { + "epoch": 1.51, + "learning_rate": 4.058648177920686e-05, + "loss": 0.1677, + "step": 5626 + }, + { + "epoch": 1.51, + "learning_rate": 4.0584807073954986e-05, + "loss": 0.1241, + "step": 5627 + }, + { + "epoch": 1.51, + "learning_rate": 4.058313236870311e-05, + "loss": 0.1573, + "step": 5628 + }, + { + "epoch": 1.51, + "learning_rate": 4.0581457663451234e-05, + "loss": 0.1285, + "step": 5629 + }, + { + "epoch": 1.51, + "learning_rate": 4.057978295819936e-05, + "loss": 0.1375, + "step": 5630 + }, + { + "epoch": 1.51, + "learning_rate": 4.057810825294748e-05, + "loss": 0.1376, + "step": 5631 + }, + { + "epoch": 1.51, + "learning_rate": 4.057643354769561e-05, + "loss": 0.1177, + "step": 5632 + }, + { + "epoch": 1.51, + "learning_rate": 4.057475884244373e-05, + "loss": 0.1334, + "step": 5633 + }, + { + "epoch": 1.51, + "learning_rate": 4.0573084137191855e-05, + "loss": 0.1197, + "step": 5634 + }, + { + "epoch": 1.51, + "learning_rate": 4.057140943193998e-05, + "loss": 0.1174, + "step": 5635 + }, + { + "epoch": 1.51, + "learning_rate": 4.0569734726688104e-05, + "loss": 0.2664, + "step": 5636 + }, + { + "epoch": 1.51, + "learning_rate": 4.056806002143623e-05, + "loss": 0.3379, + "step": 5637 + }, + { + "epoch": 1.51, + "learning_rate": 4.056638531618435e-05, + "loss": 0.1364, + "step": 5638 + }, + { + "epoch": 1.51, + "learning_rate": 4.0564710610932476e-05, + "loss": 0.2592, + "step": 5639 + }, + { + "epoch": 1.51, + "learning_rate": 4.05630359056806e-05, + "loss": 0.12, + "step": 5640 + }, + { + "epoch": 1.51, + "learning_rate": 4.0561361200428725e-05, + "loss": 0.1747, + "step": 5641 + }, + { + "epoch": 1.51, + "learning_rate": 4.055968649517685e-05, + "loss": 0.1293, + "step": 5642 + }, + { + "epoch": 1.51, + "learning_rate": 4.055801178992498e-05, + "loss": 0.1673, + "step": 5643 + }, + { + "epoch": 1.51, + "learning_rate": 4.0556337084673104e-05, + "loss": 0.1613, + "step": 5644 + }, + { + "epoch": 1.51, + "learning_rate": 4.055466237942122e-05, + "loss": 0.1819, + "step": 5645 + }, + { + "epoch": 1.51, + "learning_rate": 4.0552987674169346e-05, + "loss": 0.1307, + "step": 5646 + }, + { + "epoch": 1.51, + "learning_rate": 4.055131296891747e-05, + "loss": 0.1472, + "step": 5647 + }, + { + "epoch": 1.51, + "learning_rate": 4.0549638263665594e-05, + "loss": 0.2811, + "step": 5648 + }, + { + "epoch": 1.51, + "learning_rate": 4.054796355841372e-05, + "loss": 0.1034, + "step": 5649 + }, + { + "epoch": 1.51, + "learning_rate": 4.054628885316184e-05, + "loss": 0.455, + "step": 5650 + }, + { + "epoch": 1.51, + "learning_rate": 4.0544614147909974e-05, + "loss": 0.16, + "step": 5651 + }, + { + "epoch": 1.51, + "learning_rate": 4.05429394426581e-05, + "loss": 0.2057, + "step": 5652 + }, + { + "epoch": 1.51, + "learning_rate": 4.054126473740622e-05, + "loss": 0.1615, + "step": 5653 + }, + { + "epoch": 1.51, + "learning_rate": 4.0539590032154346e-05, + "loss": 0.1385, + "step": 5654 + }, + { + "epoch": 1.52, + "learning_rate": 4.0537915326902464e-05, + "loss": 0.1524, + "step": 5655 + }, + { + "epoch": 1.52, + "learning_rate": 4.053624062165059e-05, + "loss": 0.1389, + "step": 5656 + }, + { + "epoch": 1.52, + "learning_rate": 4.053456591639871e-05, + "loss": 0.1222, + "step": 5657 + }, + { + "epoch": 1.52, + "learning_rate": 4.0532891211146836e-05, + "loss": 0.1689, + "step": 5658 + }, + { + "epoch": 1.52, + "learning_rate": 4.053121650589497e-05, + "loss": 0.1493, + "step": 5659 + }, + { + "epoch": 1.52, + "learning_rate": 4.052954180064309e-05, + "loss": 0.1223, + "step": 5660 + }, + { + "epoch": 1.52, + "learning_rate": 4.0527867095391216e-05, + "loss": 0.4175, + "step": 5661 + }, + { + "epoch": 1.52, + "learning_rate": 4.052619239013934e-05, + "loss": 0.1568, + "step": 5662 + }, + { + "epoch": 1.52, + "learning_rate": 4.0524517684887464e-05, + "loss": 0.2068, + "step": 5663 + }, + { + "epoch": 1.52, + "learning_rate": 4.052284297963558e-05, + "loss": 0.171, + "step": 5664 + }, + { + "epoch": 1.52, + "learning_rate": 4.0521168274383706e-05, + "loss": 0.1811, + "step": 5665 + }, + { + "epoch": 1.52, + "learning_rate": 4.051949356913183e-05, + "loss": 0.1364, + "step": 5666 + }, + { + "epoch": 1.52, + "learning_rate": 4.051781886387996e-05, + "loss": 0.1272, + "step": 5667 + }, + { + "epoch": 1.52, + "learning_rate": 4.0516144158628085e-05, + "loss": 0.3652, + "step": 5668 + }, + { + "epoch": 1.52, + "learning_rate": 4.051446945337621e-05, + "loss": 0.2025, + "step": 5669 + }, + { + "epoch": 1.52, + "learning_rate": 4.0512794748124334e-05, + "loss": 0.1448, + "step": 5670 + }, + { + "epoch": 1.52, + "learning_rate": 4.051112004287246e-05, + "loss": 0.1173, + "step": 5671 + }, + { + "epoch": 1.52, + "learning_rate": 4.050944533762058e-05, + "loss": 0.1514, + "step": 5672 + }, + { + "epoch": 1.52, + "learning_rate": 4.05077706323687e-05, + "loss": 0.1045, + "step": 5673 + }, + { + "epoch": 1.52, + "learning_rate": 4.050609592711683e-05, + "loss": 0.2695, + "step": 5674 + }, + { + "epoch": 1.52, + "learning_rate": 4.0504421221864955e-05, + "loss": 0.2989, + "step": 5675 + }, + { + "epoch": 1.52, + "learning_rate": 4.050274651661308e-05, + "loss": 0.1479, + "step": 5676 + }, + { + "epoch": 1.52, + "learning_rate": 4.05010718113612e-05, + "loss": 0.174, + "step": 5677 + }, + { + "epoch": 1.52, + "learning_rate": 4.049939710610933e-05, + "loss": 0.1247, + "step": 5678 + }, + { + "epoch": 1.52, + "learning_rate": 4.049772240085745e-05, + "loss": 0.2776, + "step": 5679 + }, + { + "epoch": 1.52, + "learning_rate": 4.0496047695605576e-05, + "loss": 0.1235, + "step": 5680 + }, + { + "epoch": 1.52, + "learning_rate": 4.04943729903537e-05, + "loss": 0.1316, + "step": 5681 + }, + { + "epoch": 1.52, + "learning_rate": 4.0492698285101824e-05, + "loss": 0.1654, + "step": 5682 + }, + { + "epoch": 1.52, + "learning_rate": 4.049102357984995e-05, + "loss": 0.1863, + "step": 5683 + }, + { + "epoch": 1.52, + "learning_rate": 4.048934887459807e-05, + "loss": 0.1549, + "step": 5684 + }, + { + "epoch": 1.52, + "learning_rate": 4.04876741693462e-05, + "loss": 0.1123, + "step": 5685 + }, + { + "epoch": 1.52, + "learning_rate": 4.048599946409432e-05, + "loss": 0.2031, + "step": 5686 + }, + { + "epoch": 1.52, + "learning_rate": 4.0484324758842445e-05, + "loss": 0.1478, + "step": 5687 + }, + { + "epoch": 1.52, + "learning_rate": 4.048265005359057e-05, + "loss": 0.2426, + "step": 5688 + }, + { + "epoch": 1.52, + "learning_rate": 4.0480975348338694e-05, + "loss": 0.1294, + "step": 5689 + }, + { + "epoch": 1.52, + "learning_rate": 4.047930064308682e-05, + "loss": 0.1478, + "step": 5690 + }, + { + "epoch": 1.52, + "learning_rate": 4.047762593783494e-05, + "loss": 0.1498, + "step": 5691 + }, + { + "epoch": 1.53, + "learning_rate": 4.0475951232583066e-05, + "loss": 0.2484, + "step": 5692 + }, + { + "epoch": 1.53, + "learning_rate": 4.047427652733119e-05, + "loss": 0.1097, + "step": 5693 + }, + { + "epoch": 1.53, + "learning_rate": 4.0472601822079315e-05, + "loss": 0.1326, + "step": 5694 + }, + { + "epoch": 1.53, + "learning_rate": 4.047092711682744e-05, + "loss": 0.1787, + "step": 5695 + }, + { + "epoch": 1.53, + "learning_rate": 4.046925241157556e-05, + "loss": 0.1404, + "step": 5696 + }, + { + "epoch": 1.53, + "learning_rate": 4.046757770632369e-05, + "loss": 0.1326, + "step": 5697 + }, + { + "epoch": 1.53, + "learning_rate": 4.046590300107181e-05, + "loss": 0.1835, + "step": 5698 + }, + { + "epoch": 1.53, + "learning_rate": 4.046422829581994e-05, + "loss": 0.2586, + "step": 5699 + }, + { + "epoch": 1.53, + "learning_rate": 4.046255359056807e-05, + "loss": 0.1332, + "step": 5700 + }, + { + "epoch": 1.53, + "learning_rate": 4.0460878885316184e-05, + "loss": 0.4036, + "step": 5701 + }, + { + "epoch": 1.53, + "learning_rate": 4.045920418006431e-05, + "loss": 0.162, + "step": 5702 + }, + { + "epoch": 1.53, + "learning_rate": 4.045752947481243e-05, + "loss": 0.1235, + "step": 5703 + }, + { + "epoch": 1.53, + "learning_rate": 4.045585476956056e-05, + "loss": 0.3277, + "step": 5704 + }, + { + "epoch": 1.53, + "learning_rate": 4.045418006430868e-05, + "loss": 0.1539, + "step": 5705 + }, + { + "epoch": 1.53, + "learning_rate": 4.0452505359056805e-05, + "loss": 0.2264, + "step": 5706 + }, + { + "epoch": 1.53, + "learning_rate": 4.0450830653804936e-05, + "loss": 0.2053, + "step": 5707 + }, + { + "epoch": 1.53, + "learning_rate": 4.044915594855306e-05, + "loss": 0.1645, + "step": 5708 + }, + { + "epoch": 1.53, + "learning_rate": 4.0447481243301185e-05, + "loss": 0.1648, + "step": 5709 + }, + { + "epoch": 1.53, + "learning_rate": 4.044580653804931e-05, + "loss": 0.1317, + "step": 5710 + }, + { + "epoch": 1.53, + "learning_rate": 4.0444131832797426e-05, + "loss": 0.1976, + "step": 5711 + }, + { + "epoch": 1.53, + "learning_rate": 4.044245712754555e-05, + "loss": 0.2389, + "step": 5712 + }, + { + "epoch": 1.53, + "learning_rate": 4.0440782422293675e-05, + "loss": 0.1454, + "step": 5713 + }, + { + "epoch": 1.53, + "learning_rate": 4.04391077170418e-05, + "loss": 0.1376, + "step": 5714 + }, + { + "epoch": 1.53, + "learning_rate": 4.043743301178993e-05, + "loss": 0.1109, + "step": 5715 + }, + { + "epoch": 1.53, + "learning_rate": 4.0435758306538054e-05, + "loss": 0.1552, + "step": 5716 + }, + { + "epoch": 1.53, + "learning_rate": 4.043408360128618e-05, + "loss": 0.1632, + "step": 5717 + }, + { + "epoch": 1.53, + "learning_rate": 4.04324088960343e-05, + "loss": 0.1555, + "step": 5718 + }, + { + "epoch": 1.53, + "learning_rate": 4.043073419078243e-05, + "loss": 0.1118, + "step": 5719 + }, + { + "epoch": 1.53, + "learning_rate": 4.0429059485530544e-05, + "loss": 0.2268, + "step": 5720 + }, + { + "epoch": 1.53, + "learning_rate": 4.042738478027867e-05, + "loss": 0.1411, + "step": 5721 + }, + { + "epoch": 1.53, + "learning_rate": 4.042571007502679e-05, + "loss": 0.1408, + "step": 5722 + }, + { + "epoch": 1.53, + "learning_rate": 4.0424035369774924e-05, + "loss": 0.239, + "step": 5723 + }, + { + "epoch": 1.53, + "learning_rate": 4.042236066452305e-05, + "loss": 0.1673, + "step": 5724 + }, + { + "epoch": 1.53, + "learning_rate": 4.042068595927117e-05, + "loss": 0.1491, + "step": 5725 + }, + { + "epoch": 1.53, + "learning_rate": 4.0419011254019296e-05, + "loss": 0.1469, + "step": 5726 + }, + { + "epoch": 1.53, + "learning_rate": 4.041733654876742e-05, + "loss": 0.1272, + "step": 5727 + }, + { + "epoch": 1.53, + "learning_rate": 4.0415661843515545e-05, + "loss": 0.1338, + "step": 5728 + }, + { + "epoch": 1.54, + "learning_rate": 4.041398713826366e-05, + "loss": 0.1978, + "step": 5729 + }, + { + "epoch": 1.54, + "learning_rate": 4.041231243301179e-05, + "loss": 0.1342, + "step": 5730 + }, + { + "epoch": 1.54, + "learning_rate": 4.041063772775992e-05, + "loss": 0.1623, + "step": 5731 + }, + { + "epoch": 1.54, + "learning_rate": 4.040896302250804e-05, + "loss": 0.1726, + "step": 5732 + }, + { + "epoch": 1.54, + "learning_rate": 4.0407288317256166e-05, + "loss": 0.1305, + "step": 5733 + }, + { + "epoch": 1.54, + "learning_rate": 4.040561361200429e-05, + "loss": 0.2468, + "step": 5734 + }, + { + "epoch": 1.54, + "learning_rate": 4.0403938906752414e-05, + "loss": 0.186, + "step": 5735 + }, + { + "epoch": 1.54, + "learning_rate": 4.040226420150054e-05, + "loss": 0.1347, + "step": 5736 + }, + { + "epoch": 1.54, + "learning_rate": 4.040058949624866e-05, + "loss": 0.3299, + "step": 5737 + }, + { + "epoch": 1.54, + "learning_rate": 4.039891479099679e-05, + "loss": 0.12, + "step": 5738 + }, + { + "epoch": 1.54, + "learning_rate": 4.039724008574491e-05, + "loss": 0.125, + "step": 5739 + }, + { + "epoch": 1.54, + "learning_rate": 4.0395565380493035e-05, + "loss": 0.1372, + "step": 5740 + }, + { + "epoch": 1.54, + "learning_rate": 4.039389067524116e-05, + "loss": 0.1979, + "step": 5741 + }, + { + "epoch": 1.54, + "learning_rate": 4.0392215969989284e-05, + "loss": 0.128, + "step": 5742 + }, + { + "epoch": 1.54, + "learning_rate": 4.039054126473741e-05, + "loss": 0.2174, + "step": 5743 + }, + { + "epoch": 1.54, + "learning_rate": 4.038886655948553e-05, + "loss": 0.1751, + "step": 5744 + }, + { + "epoch": 1.54, + "learning_rate": 4.0387191854233656e-05, + "loss": 0.1379, + "step": 5745 + }, + { + "epoch": 1.54, + "learning_rate": 4.038551714898178e-05, + "loss": 0.1498, + "step": 5746 + }, + { + "epoch": 1.54, + "learning_rate": 4.0383842443729905e-05, + "loss": 0.1459, + "step": 5747 + }, + { + "epoch": 1.54, + "learning_rate": 4.038216773847803e-05, + "loss": 0.1761, + "step": 5748 + }, + { + "epoch": 1.54, + "learning_rate": 4.038049303322615e-05, + "loss": 0.2093, + "step": 5749 + }, + { + "epoch": 1.54, + "learning_rate": 4.037881832797428e-05, + "loss": 0.2137, + "step": 5750 + }, + { + "epoch": 1.54, + "learning_rate": 4.03771436227224e-05, + "loss": 0.144, + "step": 5751 + }, + { + "epoch": 1.54, + "learning_rate": 4.0375468917470526e-05, + "loss": 0.1248, + "step": 5752 + }, + { + "epoch": 1.54, + "learning_rate": 4.037379421221865e-05, + "loss": 0.1298, + "step": 5753 + }, + { + "epoch": 1.54, + "learning_rate": 4.0372119506966774e-05, + "loss": 0.1673, + "step": 5754 + }, + { + "epoch": 1.54, + "learning_rate": 4.0370444801714905e-05, + "loss": 0.151, + "step": 5755 + }, + { + "epoch": 1.54, + "learning_rate": 4.036877009646303e-05, + "loss": 0.1553, + "step": 5756 + }, + { + "epoch": 1.54, + "learning_rate": 4.036709539121115e-05, + "loss": 0.1369, + "step": 5757 + }, + { + "epoch": 1.54, + "learning_rate": 4.036542068595927e-05, + "loss": 0.1691, + "step": 5758 + }, + { + "epoch": 1.54, + "learning_rate": 4.0363745980707395e-05, + "loss": 0.1084, + "step": 5759 + }, + { + "epoch": 1.54, + "learning_rate": 4.036207127545552e-05, + "loss": 0.1595, + "step": 5760 + }, + { + "epoch": 1.54, + "learning_rate": 4.0360396570203644e-05, + "loss": 0.1625, + "step": 5761 + }, + { + "epoch": 1.54, + "learning_rate": 4.035872186495177e-05, + "loss": 0.2535, + "step": 5762 + }, + { + "epoch": 1.54, + "learning_rate": 4.03570471596999e-05, + "loss": 0.146, + "step": 5763 + }, + { + "epoch": 1.54, + "learning_rate": 4.035537245444802e-05, + "loss": 0.1816, + "step": 5764 + }, + { + "epoch": 1.54, + "learning_rate": 4.035369774919615e-05, + "loss": 0.1415, + "step": 5765 + }, + { + "epoch": 1.54, + "learning_rate": 4.035202304394427e-05, + "loss": 0.1549, + "step": 5766 + }, + { + "epoch": 1.55, + "learning_rate": 4.035034833869239e-05, + "loss": 0.2122, + "step": 5767 + }, + { + "epoch": 1.55, + "learning_rate": 4.034867363344051e-05, + "loss": 0.1436, + "step": 5768 + }, + { + "epoch": 1.55, + "learning_rate": 4.034699892818864e-05, + "loss": 0.1385, + "step": 5769 + }, + { + "epoch": 1.55, + "learning_rate": 4.034532422293676e-05, + "loss": 0.1597, + "step": 5770 + }, + { + "epoch": 1.55, + "learning_rate": 4.034364951768489e-05, + "loss": 0.274, + "step": 5771 + }, + { + "epoch": 1.55, + "learning_rate": 4.034197481243302e-05, + "loss": 0.1614, + "step": 5772 + }, + { + "epoch": 1.55, + "learning_rate": 4.034030010718114e-05, + "loss": 0.1169, + "step": 5773 + }, + { + "epoch": 1.55, + "learning_rate": 4.0338625401929265e-05, + "loss": 0.1481, + "step": 5774 + }, + { + "epoch": 1.55, + "learning_rate": 4.033695069667739e-05, + "loss": 0.1647, + "step": 5775 + }, + { + "epoch": 1.55, + "learning_rate": 4.033527599142551e-05, + "loss": 0.2074, + "step": 5776 + }, + { + "epoch": 1.55, + "learning_rate": 4.033360128617363e-05, + "loss": 0.1262, + "step": 5777 + }, + { + "epoch": 1.55, + "learning_rate": 4.0331926580921755e-05, + "loss": 0.1351, + "step": 5778 + }, + { + "epoch": 1.55, + "learning_rate": 4.0330251875669886e-05, + "loss": 0.322, + "step": 5779 + }, + { + "epoch": 1.55, + "learning_rate": 4.032857717041801e-05, + "loss": 0.1765, + "step": 5780 + }, + { + "epoch": 1.55, + "learning_rate": 4.0326902465166135e-05, + "loss": 0.137, + "step": 5781 + }, + { + "epoch": 1.55, + "learning_rate": 4.032522775991426e-05, + "loss": 0.2074, + "step": 5782 + }, + { + "epoch": 1.55, + "learning_rate": 4.032355305466238e-05, + "loss": 0.346, + "step": 5783 + }, + { + "epoch": 1.55, + "learning_rate": 4.032187834941051e-05, + "loss": 0.2138, + "step": 5784 + }, + { + "epoch": 1.55, + "learning_rate": 4.0320203644158625e-05, + "loss": 0.1254, + "step": 5785 + }, + { + "epoch": 1.55, + "learning_rate": 4.0318528938906756e-05, + "loss": 0.2775, + "step": 5786 + }, + { + "epoch": 1.55, + "learning_rate": 4.031685423365488e-05, + "loss": 0.1336, + "step": 5787 + }, + { + "epoch": 1.55, + "learning_rate": 4.0315179528403004e-05, + "loss": 0.1364, + "step": 5788 + }, + { + "epoch": 1.55, + "learning_rate": 4.031350482315113e-05, + "loss": 0.1789, + "step": 5789 + }, + { + "epoch": 1.55, + "learning_rate": 4.031183011789925e-05, + "loss": 0.1685, + "step": 5790 + }, + { + "epoch": 1.55, + "learning_rate": 4.031015541264738e-05, + "loss": 0.1457, + "step": 5791 + }, + { + "epoch": 1.55, + "learning_rate": 4.03084807073955e-05, + "loss": 0.1699, + "step": 5792 + }, + { + "epoch": 1.55, + "learning_rate": 4.0306806002143625e-05, + "loss": 0.2185, + "step": 5793 + }, + { + "epoch": 1.55, + "learning_rate": 4.030513129689175e-05, + "loss": 0.2776, + "step": 5794 + }, + { + "epoch": 1.55, + "learning_rate": 4.0303456591639873e-05, + "loss": 0.2077, + "step": 5795 + }, + { + "epoch": 1.55, + "learning_rate": 4.0301781886388e-05, + "loss": 0.14, + "step": 5796 + }, + { + "epoch": 1.55, + "learning_rate": 4.030010718113612e-05, + "loss": 0.1248, + "step": 5797 + }, + { + "epoch": 1.55, + "learning_rate": 4.0298432475884246e-05, + "loss": 0.2129, + "step": 5798 + }, + { + "epoch": 1.55, + "learning_rate": 4.029675777063237e-05, + "loss": 0.1782, + "step": 5799 + }, + { + "epoch": 1.55, + "learning_rate": 4.0295083065380495e-05, + "loss": 0.1373, + "step": 5800 + }, + { + "epoch": 1.55, + "learning_rate": 4.029340836012862e-05, + "loss": 0.2233, + "step": 5801 + }, + { + "epoch": 1.55, + "learning_rate": 4.029173365487674e-05, + "loss": 0.13, + "step": 5802 + }, + { + "epoch": 1.55, + "learning_rate": 4.029005894962487e-05, + "loss": 0.3757, + "step": 5803 + }, + { + "epoch": 1.56, + "learning_rate": 4.028838424437299e-05, + "loss": 0.1812, + "step": 5804 + }, + { + "epoch": 1.56, + "learning_rate": 4.0286709539121116e-05, + "loss": 0.1463, + "step": 5805 + }, + { + "epoch": 1.56, + "learning_rate": 4.028503483386924e-05, + "loss": 0.1728, + "step": 5806 + }, + { + "epoch": 1.56, + "learning_rate": 4.0283360128617364e-05, + "loss": 0.1332, + "step": 5807 + }, + { + "epoch": 1.56, + "learning_rate": 4.028168542336549e-05, + "loss": 0.1458, + "step": 5808 + }, + { + "epoch": 1.56, + "learning_rate": 4.028001071811361e-05, + "loss": 0.1893, + "step": 5809 + }, + { + "epoch": 1.56, + "learning_rate": 4.0278336012861737e-05, + "loss": 0.1504, + "step": 5810 + }, + { + "epoch": 1.56, + "learning_rate": 4.027666130760987e-05, + "loss": 0.1911, + "step": 5811 + }, + { + "epoch": 1.56, + "learning_rate": 4.027498660235799e-05, + "loss": 0.1848, + "step": 5812 + }, + { + "epoch": 1.56, + "learning_rate": 4.027331189710611e-05, + "loss": 0.1396, + "step": 5813 + }, + { + "epoch": 1.56, + "learning_rate": 4.0271637191854233e-05, + "loss": 0.1326, + "step": 5814 + }, + { + "epoch": 1.56, + "learning_rate": 4.026996248660236e-05, + "loss": 0.1769, + "step": 5815 + }, + { + "epoch": 1.56, + "learning_rate": 4.026828778135048e-05, + "loss": 0.1938, + "step": 5816 + }, + { + "epoch": 1.56, + "learning_rate": 4.0266613076098606e-05, + "loss": 0.1908, + "step": 5817 + }, + { + "epoch": 1.56, + "learning_rate": 4.026493837084673e-05, + "loss": 0.2318, + "step": 5818 + }, + { + "epoch": 1.56, + "learning_rate": 4.026326366559486e-05, + "loss": 0.1974, + "step": 5819 + }, + { + "epoch": 1.56, + "learning_rate": 4.0261588960342985e-05, + "loss": 0.1262, + "step": 5820 + }, + { + "epoch": 1.56, + "learning_rate": 4.025991425509111e-05, + "loss": 0.1564, + "step": 5821 + }, + { + "epoch": 1.56, + "learning_rate": 4.0258239549839234e-05, + "loss": 0.1783, + "step": 5822 + }, + { + "epoch": 1.56, + "learning_rate": 4.025656484458735e-05, + "loss": 0.1373, + "step": 5823 + }, + { + "epoch": 1.56, + "learning_rate": 4.0254890139335476e-05, + "loss": 0.1478, + "step": 5824 + }, + { + "epoch": 1.56, + "learning_rate": 4.02532154340836e-05, + "loss": 0.1415, + "step": 5825 + }, + { + "epoch": 1.56, + "learning_rate": 4.0251540728831724e-05, + "loss": 0.2607, + "step": 5826 + }, + { + "epoch": 1.56, + "learning_rate": 4.0249866023579855e-05, + "loss": 0.1411, + "step": 5827 + }, + { + "epoch": 1.56, + "learning_rate": 4.024819131832798e-05, + "loss": 0.1524, + "step": 5828 + }, + { + "epoch": 1.56, + "learning_rate": 4.02465166130761e-05, + "loss": 0.1293, + "step": 5829 + }, + { + "epoch": 1.56, + "learning_rate": 4.024484190782423e-05, + "loss": 0.205, + "step": 5830 + }, + { + "epoch": 1.56, + "learning_rate": 4.024316720257235e-05, + "loss": 0.2224, + "step": 5831 + }, + { + "epoch": 1.56, + "learning_rate": 4.024149249732047e-05, + "loss": 0.1413, + "step": 5832 + }, + { + "epoch": 1.56, + "learning_rate": 4.0239817792068593e-05, + "loss": 0.1609, + "step": 5833 + }, + { + "epoch": 1.56, + "learning_rate": 4.023814308681672e-05, + "loss": 0.1454, + "step": 5834 + }, + { + "epoch": 1.56, + "learning_rate": 4.023646838156485e-05, + "loss": 0.1435, + "step": 5835 + }, + { + "epoch": 1.56, + "learning_rate": 4.023479367631297e-05, + "loss": 0.2673, + "step": 5836 + }, + { + "epoch": 1.56, + "learning_rate": 4.02331189710611e-05, + "loss": 0.2444, + "step": 5837 + }, + { + "epoch": 1.56, + "learning_rate": 4.023144426580922e-05, + "loss": 0.1499, + "step": 5838 + }, + { + "epoch": 1.56, + "learning_rate": 4.0229769560557345e-05, + "loss": 0.1304, + "step": 5839 + }, + { + "epoch": 1.56, + "learning_rate": 4.022809485530547e-05, + "loss": 0.2079, + "step": 5840 + }, + { + "epoch": 1.57, + "learning_rate": 4.022642015005359e-05, + "loss": 0.1258, + "step": 5841 + }, + { + "epoch": 1.57, + "learning_rate": 4.022474544480171e-05, + "loss": 0.1627, + "step": 5842 + }, + { + "epoch": 1.57, + "learning_rate": 4.022307073954984e-05, + "loss": 0.1131, + "step": 5843 + }, + { + "epoch": 1.57, + "learning_rate": 4.0221396034297967e-05, + "loss": 0.1715, + "step": 5844 + }, + { + "epoch": 1.57, + "learning_rate": 4.021972132904609e-05, + "loss": 0.1697, + "step": 5845 + }, + { + "epoch": 1.57, + "learning_rate": 4.0218046623794215e-05, + "loss": 0.1477, + "step": 5846 + }, + { + "epoch": 1.57, + "learning_rate": 4.021637191854234e-05, + "loss": 0.1922, + "step": 5847 + }, + { + "epoch": 1.57, + "learning_rate": 4.021469721329046e-05, + "loss": 0.1453, + "step": 5848 + }, + { + "epoch": 1.57, + "learning_rate": 4.021302250803859e-05, + "loss": 0.1616, + "step": 5849 + }, + { + "epoch": 1.57, + "learning_rate": 4.021134780278671e-05, + "loss": 0.1433, + "step": 5850 + }, + { + "epoch": 1.57, + "learning_rate": 4.0209673097534836e-05, + "loss": 0.1943, + "step": 5851 + }, + { + "epoch": 1.57, + "learning_rate": 4.020799839228296e-05, + "loss": 0.1221, + "step": 5852 + }, + { + "epoch": 1.57, + "learning_rate": 4.0206323687031084e-05, + "loss": 0.1443, + "step": 5853 + }, + { + "epoch": 1.57, + "learning_rate": 4.020464898177921e-05, + "loss": 0.1265, + "step": 5854 + }, + { + "epoch": 1.57, + "learning_rate": 4.020297427652733e-05, + "loss": 0.1695, + "step": 5855 + }, + { + "epoch": 1.57, + "learning_rate": 4.020129957127546e-05, + "loss": 0.2616, + "step": 5856 + }, + { + "epoch": 1.57, + "learning_rate": 4.019962486602358e-05, + "loss": 0.1152, + "step": 5857 + }, + { + "epoch": 1.57, + "learning_rate": 4.0197950160771705e-05, + "loss": 0.343, + "step": 5858 + }, + { + "epoch": 1.57, + "learning_rate": 4.019627545551983e-05, + "loss": 0.1706, + "step": 5859 + }, + { + "epoch": 1.57, + "learning_rate": 4.0194600750267954e-05, + "loss": 0.359, + "step": 5860 + }, + { + "epoch": 1.57, + "learning_rate": 4.019292604501608e-05, + "loss": 0.1983, + "step": 5861 + }, + { + "epoch": 1.57, + "learning_rate": 4.01912513397642e-05, + "loss": 0.2104, + "step": 5862 + }, + { + "epoch": 1.57, + "learning_rate": 4.0189576634512326e-05, + "loss": 0.3225, + "step": 5863 + }, + { + "epoch": 1.57, + "learning_rate": 4.018790192926045e-05, + "loss": 0.249, + "step": 5864 + }, + { + "epoch": 1.57, + "learning_rate": 4.0186227224008575e-05, + "loss": 0.1406, + "step": 5865 + }, + { + "epoch": 1.57, + "learning_rate": 4.01845525187567e-05, + "loss": 0.1371, + "step": 5866 + }, + { + "epoch": 1.57, + "learning_rate": 4.018287781350483e-05, + "loss": 0.1815, + "step": 5867 + }, + { + "epoch": 1.57, + "learning_rate": 4.0181203108252954e-05, + "loss": 0.3732, + "step": 5868 + }, + { + "epoch": 1.57, + "learning_rate": 4.017952840300107e-05, + "loss": 0.1241, + "step": 5869 + }, + { + "epoch": 1.57, + "learning_rate": 4.0177853697749196e-05, + "loss": 0.1888, + "step": 5870 + }, + { + "epoch": 1.57, + "learning_rate": 4.017617899249732e-05, + "loss": 0.1747, + "step": 5871 + }, + { + "epoch": 1.57, + "learning_rate": 4.0174504287245444e-05, + "loss": 0.261, + "step": 5872 + }, + { + "epoch": 1.57, + "learning_rate": 4.017282958199357e-05, + "loss": 0.1284, + "step": 5873 + }, + { + "epoch": 1.57, + "learning_rate": 4.017115487674169e-05, + "loss": 0.1855, + "step": 5874 + }, + { + "epoch": 1.57, + "learning_rate": 4.0169480171489824e-05, + "loss": 0.1123, + "step": 5875 + }, + { + "epoch": 1.57, + "learning_rate": 4.016780546623795e-05, + "loss": 0.134, + "step": 5876 + }, + { + "epoch": 1.57, + "learning_rate": 4.016613076098607e-05, + "loss": 0.129, + "step": 5877 + }, + { + "epoch": 1.58, + "learning_rate": 4.0164456055734196e-05, + "loss": 0.1233, + "step": 5878 + }, + { + "epoch": 1.58, + "learning_rate": 4.0162781350482314e-05, + "loss": 0.1213, + "step": 5879 + }, + { + "epoch": 1.58, + "learning_rate": 4.016110664523044e-05, + "loss": 0.1168, + "step": 5880 + }, + { + "epoch": 1.58, + "learning_rate": 4.015943193997856e-05, + "loss": 0.1136, + "step": 5881 + }, + { + "epoch": 1.58, + "learning_rate": 4.0157757234726686e-05, + "loss": 0.2503, + "step": 5882 + }, + { + "epoch": 1.58, + "learning_rate": 4.015608252947482e-05, + "loss": 0.1236, + "step": 5883 + }, + { + "epoch": 1.58, + "learning_rate": 4.015440782422294e-05, + "loss": 0.1173, + "step": 5884 + }, + { + "epoch": 1.58, + "learning_rate": 4.0152733118971066e-05, + "loss": 0.1351, + "step": 5885 + }, + { + "epoch": 1.58, + "learning_rate": 4.015105841371919e-05, + "loss": 0.1487, + "step": 5886 + }, + { + "epoch": 1.58, + "learning_rate": 4.0149383708467314e-05, + "loss": 0.3122, + "step": 5887 + }, + { + "epoch": 1.58, + "learning_rate": 4.014770900321543e-05, + "loss": 0.1641, + "step": 5888 + }, + { + "epoch": 1.58, + "learning_rate": 4.0146034297963556e-05, + "loss": 0.3324, + "step": 5889 + }, + { + "epoch": 1.58, + "learning_rate": 4.014435959271168e-05, + "loss": 0.1207, + "step": 5890 + }, + { + "epoch": 1.58, + "learning_rate": 4.014268488745981e-05, + "loss": 0.1327, + "step": 5891 + }, + { + "epoch": 1.58, + "learning_rate": 4.0141010182207935e-05, + "loss": 0.3071, + "step": 5892 + }, + { + "epoch": 1.58, + "learning_rate": 4.013933547695606e-05, + "loss": 0.1533, + "step": 5893 + }, + { + "epoch": 1.58, + "learning_rate": 4.0137660771704184e-05, + "loss": 0.1548, + "step": 5894 + }, + { + "epoch": 1.58, + "learning_rate": 4.013598606645231e-05, + "loss": 0.1326, + "step": 5895 + }, + { + "epoch": 1.58, + "learning_rate": 4.013431136120043e-05, + "loss": 0.122, + "step": 5896 + }, + { + "epoch": 1.58, + "learning_rate": 4.013263665594855e-05, + "loss": 0.1383, + "step": 5897 + }, + { + "epoch": 1.58, + "learning_rate": 4.0130961950696674e-05, + "loss": 0.1541, + "step": 5898 + }, + { + "epoch": 1.58, + "learning_rate": 4.0129287245444805e-05, + "loss": 0.1327, + "step": 5899 + }, + { + "epoch": 1.58, + "learning_rate": 4.012761254019293e-05, + "loss": 0.1365, + "step": 5900 + }, + { + "epoch": 1.58, + "learning_rate": 4.012593783494105e-05, + "loss": 0.2205, + "step": 5901 + }, + { + "epoch": 1.58, + "learning_rate": 4.012426312968918e-05, + "loss": 0.2479, + "step": 5902 + }, + { + "epoch": 1.58, + "learning_rate": 4.01225884244373e-05, + "loss": 0.1214, + "step": 5903 + }, + { + "epoch": 1.58, + "learning_rate": 4.0120913719185426e-05, + "loss": 0.1668, + "step": 5904 + }, + { + "epoch": 1.58, + "learning_rate": 4.011923901393355e-05, + "loss": 0.2104, + "step": 5905 + }, + { + "epoch": 1.58, + "learning_rate": 4.0117564308681674e-05, + "loss": 0.217, + "step": 5906 + }, + { + "epoch": 1.58, + "learning_rate": 4.01158896034298e-05, + "loss": 0.382, + "step": 5907 + }, + { + "epoch": 1.58, + "learning_rate": 4.011421489817792e-05, + "loss": 0.3942, + "step": 5908 + }, + { + "epoch": 1.58, + "learning_rate": 4.011254019292605e-05, + "loss": 0.1139, + "step": 5909 + }, + { + "epoch": 1.58, + "learning_rate": 4.011086548767417e-05, + "loss": 0.1327, + "step": 5910 + }, + { + "epoch": 1.58, + "learning_rate": 4.0109190782422295e-05, + "loss": 0.2193, + "step": 5911 + }, + { + "epoch": 1.58, + "learning_rate": 4.010751607717042e-05, + "loss": 0.1154, + "step": 5912 + }, + { + "epoch": 1.58, + "learning_rate": 4.0105841371918544e-05, + "loss": 0.1492, + "step": 5913 + }, + { + "epoch": 1.58, + "learning_rate": 4.010416666666667e-05, + "loss": 0.2144, + "step": 5914 + }, + { + "epoch": 1.58, + "learning_rate": 4.010249196141479e-05, + "loss": 0.1219, + "step": 5915 + }, + { + "epoch": 1.59, + "learning_rate": 4.0100817256162916e-05, + "loss": 0.1314, + "step": 5916 + }, + { + "epoch": 1.59, + "learning_rate": 4.009914255091104e-05, + "loss": 0.1547, + "step": 5917 + }, + { + "epoch": 1.59, + "learning_rate": 4.0097467845659165e-05, + "loss": 0.1411, + "step": 5918 + }, + { + "epoch": 1.59, + "learning_rate": 4.009579314040729e-05, + "loss": 0.1312, + "step": 5919 + }, + { + "epoch": 1.59, + "learning_rate": 4.009411843515541e-05, + "loss": 0.1335, + "step": 5920 + }, + { + "epoch": 1.59, + "learning_rate": 4.009244372990354e-05, + "loss": 0.1243, + "step": 5921 + }, + { + "epoch": 1.59, + "learning_rate": 4.009076902465166e-05, + "loss": 0.1336, + "step": 5922 + }, + { + "epoch": 1.59, + "learning_rate": 4.0089094319399786e-05, + "loss": 0.1347, + "step": 5923 + }, + { + "epoch": 1.59, + "learning_rate": 4.008741961414792e-05, + "loss": 0.2735, + "step": 5924 + }, + { + "epoch": 1.59, + "learning_rate": 4.0085744908896034e-05, + "loss": 0.1484, + "step": 5925 + }, + { + "epoch": 1.59, + "learning_rate": 4.008407020364416e-05, + "loss": 0.2581, + "step": 5926 + }, + { + "epoch": 1.59, + "learning_rate": 4.008239549839228e-05, + "loss": 0.126, + "step": 5927 + }, + { + "epoch": 1.59, + "learning_rate": 4.008072079314041e-05, + "loss": 0.1143, + "step": 5928 + }, + { + "epoch": 1.59, + "learning_rate": 4.007904608788853e-05, + "loss": 0.2476, + "step": 5929 + }, + { + "epoch": 1.59, + "learning_rate": 4.0077371382636655e-05, + "loss": 0.1567, + "step": 5930 + }, + { + "epoch": 1.59, + "learning_rate": 4.0075696677384786e-05, + "loss": 0.1303, + "step": 5931 + }, + { + "epoch": 1.59, + "learning_rate": 4.007402197213291e-05, + "loss": 0.1383, + "step": 5932 + }, + { + "epoch": 1.59, + "learning_rate": 4.0072347266881035e-05, + "loss": 0.1534, + "step": 5933 + }, + { + "epoch": 1.59, + "learning_rate": 4.007067256162916e-05, + "loss": 0.1265, + "step": 5934 + }, + { + "epoch": 1.59, + "learning_rate": 4.0068997856377276e-05, + "loss": 0.1136, + "step": 5935 + }, + { + "epoch": 1.59, + "learning_rate": 4.00673231511254e-05, + "loss": 0.2822, + "step": 5936 + }, + { + "epoch": 1.59, + "learning_rate": 4.0065648445873525e-05, + "loss": 0.2922, + "step": 5937 + }, + { + "epoch": 1.59, + "learning_rate": 4.006397374062165e-05, + "loss": 0.124, + "step": 5938 + }, + { + "epoch": 1.59, + "learning_rate": 4.006229903536978e-05, + "loss": 0.2259, + "step": 5939 + }, + { + "epoch": 1.59, + "learning_rate": 4.0060624330117904e-05, + "loss": 0.1473, + "step": 5940 + }, + { + "epoch": 1.59, + "learning_rate": 4.005894962486603e-05, + "loss": 0.1442, + "step": 5941 + }, + { + "epoch": 1.59, + "learning_rate": 4.005727491961415e-05, + "loss": 0.2533, + "step": 5942 + }, + { + "epoch": 1.59, + "learning_rate": 4.005560021436228e-05, + "loss": 0.3023, + "step": 5943 + }, + { + "epoch": 1.59, + "learning_rate": 4.0053925509110394e-05, + "loss": 0.1802, + "step": 5944 + }, + { + "epoch": 1.59, + "learning_rate": 4.005225080385852e-05, + "loss": 0.329, + "step": 5945 + }, + { + "epoch": 1.59, + "learning_rate": 4.005057609860664e-05, + "loss": 0.1321, + "step": 5946 + }, + { + "epoch": 1.59, + "learning_rate": 4.0048901393354774e-05, + "loss": 0.109, + "step": 5947 + }, + { + "epoch": 1.59, + "learning_rate": 4.00472266881029e-05, + "loss": 0.1543, + "step": 5948 + }, + { + "epoch": 1.59, + "learning_rate": 4.004555198285102e-05, + "loss": 0.1584, + "step": 5949 + }, + { + "epoch": 1.59, + "learning_rate": 4.0043877277599146e-05, + "loss": 0.2215, + "step": 5950 + }, + { + "epoch": 1.59, + "learning_rate": 4.004220257234727e-05, + "loss": 0.1443, + "step": 5951 + }, + { + "epoch": 1.59, + "learning_rate": 4.0040527867095395e-05, + "loss": 0.1243, + "step": 5952 + }, + { + "epoch": 1.6, + "learning_rate": 4.003885316184351e-05, + "loss": 0.2106, + "step": 5953 + }, + { + "epoch": 1.6, + "learning_rate": 4.0037178456591636e-05, + "loss": 0.1675, + "step": 5954 + }, + { + "epoch": 1.6, + "learning_rate": 4.003550375133977e-05, + "loss": 0.1695, + "step": 5955 + }, + { + "epoch": 1.6, + "learning_rate": 4.003382904608789e-05, + "loss": 0.2474, + "step": 5956 + }, + { + "epoch": 1.6, + "learning_rate": 4.0032154340836016e-05, + "loss": 0.1791, + "step": 5957 + }, + { + "epoch": 1.6, + "learning_rate": 4.003047963558414e-05, + "loss": 0.2157, + "step": 5958 + }, + { + "epoch": 1.6, + "learning_rate": 4.0028804930332264e-05, + "loss": 0.1864, + "step": 5959 + }, + { + "epoch": 1.6, + "learning_rate": 4.002713022508039e-05, + "loss": 0.2309, + "step": 5960 + }, + { + "epoch": 1.6, + "learning_rate": 4.002545551982851e-05, + "loss": 0.1375, + "step": 5961 + }, + { + "epoch": 1.6, + "learning_rate": 4.002378081457664e-05, + "loss": 0.1285, + "step": 5962 + }, + { + "epoch": 1.6, + "learning_rate": 4.002210610932476e-05, + "loss": 0.4719, + "step": 5963 + }, + { + "epoch": 1.6, + "learning_rate": 4.0020431404072885e-05, + "loss": 0.1269, + "step": 5964 + }, + { + "epoch": 1.6, + "learning_rate": 4.001875669882101e-05, + "loss": 0.1614, + "step": 5965 + }, + { + "epoch": 1.6, + "learning_rate": 4.0017081993569134e-05, + "loss": 0.1418, + "step": 5966 + }, + { + "epoch": 1.6, + "learning_rate": 4.001540728831726e-05, + "loss": 0.3912, + "step": 5967 + }, + { + "epoch": 1.6, + "learning_rate": 4.001373258306538e-05, + "loss": 0.1075, + "step": 5968 + }, + { + "epoch": 1.6, + "learning_rate": 4.0012057877813506e-05, + "loss": 0.1766, + "step": 5969 + }, + { + "epoch": 1.6, + "learning_rate": 4.001038317256163e-05, + "loss": 0.153, + "step": 5970 + }, + { + "epoch": 1.6, + "learning_rate": 4.0008708467309755e-05, + "loss": 0.1319, + "step": 5971 + }, + { + "epoch": 1.6, + "learning_rate": 4.000703376205788e-05, + "loss": 0.1469, + "step": 5972 + }, + { + "epoch": 1.6, + "learning_rate": 4.0005359056806e-05, + "loss": 0.1692, + "step": 5973 + }, + { + "epoch": 1.6, + "learning_rate": 4.000368435155413e-05, + "loss": 0.1442, + "step": 5974 + }, + { + "epoch": 1.6, + "learning_rate": 4.000200964630225e-05, + "loss": 0.1975, + "step": 5975 + }, + { + "epoch": 1.6, + "learning_rate": 4.0000334941050376e-05, + "loss": 0.1237, + "step": 5976 + }, + { + "epoch": 1.6, + "learning_rate": 3.99986602357985e-05, + "loss": 0.1451, + "step": 5977 + }, + { + "epoch": 1.6, + "learning_rate": 3.9996985530546624e-05, + "loss": 0.1227, + "step": 5978 + }, + { + "epoch": 1.6, + "learning_rate": 3.999531082529475e-05, + "loss": 0.1224, + "step": 5979 + }, + { + "epoch": 1.6, + "learning_rate": 3.999363612004288e-05, + "loss": 0.1395, + "step": 5980 + }, + { + "epoch": 1.6, + "learning_rate": 3.9991961414791e-05, + "loss": 0.2446, + "step": 5981 + }, + { + "epoch": 1.6, + "learning_rate": 3.999028670953912e-05, + "loss": 0.1141, + "step": 5982 + }, + { + "epoch": 1.6, + "learning_rate": 3.9988612004287245e-05, + "loss": 0.1356, + "step": 5983 + }, + { + "epoch": 1.6, + "learning_rate": 3.998693729903537e-05, + "loss": 0.1309, + "step": 5984 + }, + { + "epoch": 1.6, + "learning_rate": 3.9985262593783494e-05, + "loss": 0.143, + "step": 5985 + }, + { + "epoch": 1.6, + "learning_rate": 3.998358788853162e-05, + "loss": 0.1282, + "step": 5986 + }, + { + "epoch": 1.6, + "learning_rate": 3.998191318327975e-05, + "loss": 0.1664, + "step": 5987 + }, + { + "epoch": 1.6, + "learning_rate": 3.998023847802787e-05, + "loss": 0.1412, + "step": 5988 + }, + { + "epoch": 1.6, + "learning_rate": 3.9978563772776e-05, + "loss": 0.2457, + "step": 5989 + }, + { + "epoch": 1.61, + "learning_rate": 3.997688906752412e-05, + "loss": 0.1747, + "step": 5990 + }, + { + "epoch": 1.61, + "learning_rate": 3.997521436227224e-05, + "loss": 0.2668, + "step": 5991 + }, + { + "epoch": 1.61, + "learning_rate": 3.997353965702036e-05, + "loss": 0.1782, + "step": 5992 + }, + { + "epoch": 1.61, + "learning_rate": 3.997186495176849e-05, + "loss": 0.1296, + "step": 5993 + }, + { + "epoch": 1.61, + "learning_rate": 3.997019024651661e-05, + "loss": 0.5272, + "step": 5994 + }, + { + "epoch": 1.61, + "learning_rate": 3.996851554126474e-05, + "loss": 0.1536, + "step": 5995 + }, + { + "epoch": 1.61, + "learning_rate": 3.996684083601287e-05, + "loss": 0.1319, + "step": 5996 + }, + { + "epoch": 1.61, + "learning_rate": 3.996516613076099e-05, + "loss": 0.1407, + "step": 5997 + }, + { + "epoch": 1.61, + "learning_rate": 3.9963491425509115e-05, + "loss": 0.2139, + "step": 5998 + }, + { + "epoch": 1.61, + "learning_rate": 3.996181672025724e-05, + "loss": 0.2229, + "step": 5999 + }, + { + "epoch": 1.61, + "learning_rate": 3.996014201500536e-05, + "loss": 0.2233, + "step": 6000 + }, + { + "epoch": 1.61, + "learning_rate": 3.995846730975348e-05, + "loss": 0.3308, + "step": 6001 + }, + { + "epoch": 1.61, + "learning_rate": 3.9956792604501605e-05, + "loss": 0.1361, + "step": 6002 + }, + { + "epoch": 1.61, + "learning_rate": 3.9955117899249736e-05, + "loss": 0.2425, + "step": 6003 + }, + { + "epoch": 1.61, + "learning_rate": 3.995344319399786e-05, + "loss": 0.1314, + "step": 6004 + }, + { + "epoch": 1.61, + "learning_rate": 3.9951768488745985e-05, + "loss": 0.1339, + "step": 6005 + }, + { + "epoch": 1.61, + "learning_rate": 3.995009378349411e-05, + "loss": 0.1243, + "step": 6006 + }, + { + "epoch": 1.61, + "learning_rate": 3.994841907824223e-05, + "loss": 0.1208, + "step": 6007 + }, + { + "epoch": 1.61, + "learning_rate": 3.994674437299036e-05, + "loss": 0.1241, + "step": 6008 + }, + { + "epoch": 1.61, + "learning_rate": 3.9945069667738475e-05, + "loss": 0.1202, + "step": 6009 + }, + { + "epoch": 1.61, + "learning_rate": 3.99433949624866e-05, + "loss": 0.1562, + "step": 6010 + }, + { + "epoch": 1.61, + "learning_rate": 3.994172025723473e-05, + "loss": 0.1332, + "step": 6011 + }, + { + "epoch": 1.61, + "learning_rate": 3.9940045551982854e-05, + "loss": 0.1548, + "step": 6012 + }, + { + "epoch": 1.61, + "learning_rate": 3.993837084673098e-05, + "loss": 0.1604, + "step": 6013 + }, + { + "epoch": 1.61, + "learning_rate": 3.99366961414791e-05, + "loss": 0.119, + "step": 6014 + }, + { + "epoch": 1.61, + "learning_rate": 3.993502143622723e-05, + "loss": 0.1522, + "step": 6015 + }, + { + "epoch": 1.61, + "learning_rate": 3.993334673097535e-05, + "loss": 0.1302, + "step": 6016 + }, + { + "epoch": 1.61, + "learning_rate": 3.9931672025723475e-05, + "loss": 0.1407, + "step": 6017 + }, + { + "epoch": 1.61, + "learning_rate": 3.99299973204716e-05, + "loss": 0.1402, + "step": 6018 + }, + { + "epoch": 1.61, + "learning_rate": 3.9928322615219724e-05, + "loss": 0.1893, + "step": 6019 + }, + { + "epoch": 1.61, + "learning_rate": 3.992664790996785e-05, + "loss": 0.1767, + "step": 6020 + }, + { + "epoch": 1.61, + "learning_rate": 3.992497320471597e-05, + "loss": 0.2193, + "step": 6021 + }, + { + "epoch": 1.61, + "learning_rate": 3.9923298499464096e-05, + "loss": 0.2393, + "step": 6022 + }, + { + "epoch": 1.61, + "learning_rate": 3.992162379421222e-05, + "loss": 0.2019, + "step": 6023 + }, + { + "epoch": 1.61, + "learning_rate": 3.9919949088960345e-05, + "loss": 0.156, + "step": 6024 + }, + { + "epoch": 1.61, + "learning_rate": 3.991827438370847e-05, + "loss": 0.1887, + "step": 6025 + }, + { + "epoch": 1.61, + "learning_rate": 3.991659967845659e-05, + "loss": 0.1277, + "step": 6026 + }, + { + "epoch": 1.61, + "learning_rate": 3.991492497320472e-05, + "loss": 0.139, + "step": 6027 + }, + { + "epoch": 1.62, + "learning_rate": 3.991325026795284e-05, + "loss": 0.1908, + "step": 6028 + }, + { + "epoch": 1.62, + "learning_rate": 3.9911575562700966e-05, + "loss": 0.2416, + "step": 6029 + }, + { + "epoch": 1.62, + "learning_rate": 3.990990085744909e-05, + "loss": 0.1455, + "step": 6030 + }, + { + "epoch": 1.62, + "learning_rate": 3.9908226152197214e-05, + "loss": 0.2483, + "step": 6031 + }, + { + "epoch": 1.62, + "learning_rate": 3.990655144694534e-05, + "loss": 0.2517, + "step": 6032 + }, + { + "epoch": 1.62, + "learning_rate": 3.990487674169346e-05, + "loss": 0.1263, + "step": 6033 + }, + { + "epoch": 1.62, + "learning_rate": 3.990320203644159e-05, + "loss": 0.1615, + "step": 6034 + }, + { + "epoch": 1.62, + "learning_rate": 3.990152733118971e-05, + "loss": 0.1442, + "step": 6035 + }, + { + "epoch": 1.62, + "learning_rate": 3.989985262593784e-05, + "loss": 0.1747, + "step": 6036 + }, + { + "epoch": 1.62, + "learning_rate": 3.989817792068596e-05, + "loss": 0.2415, + "step": 6037 + }, + { + "epoch": 1.62, + "learning_rate": 3.9896503215434083e-05, + "loss": 0.1375, + "step": 6038 + }, + { + "epoch": 1.62, + "learning_rate": 3.989482851018221e-05, + "loss": 0.1486, + "step": 6039 + }, + { + "epoch": 1.62, + "learning_rate": 3.989315380493033e-05, + "loss": 0.1804, + "step": 6040 + }, + { + "epoch": 1.62, + "learning_rate": 3.9891479099678456e-05, + "loss": 0.1183, + "step": 6041 + }, + { + "epoch": 1.62, + "learning_rate": 3.988980439442658e-05, + "loss": 0.1851, + "step": 6042 + }, + { + "epoch": 1.62, + "learning_rate": 3.988812968917471e-05, + "loss": 0.3398, + "step": 6043 + }, + { + "epoch": 1.62, + "learning_rate": 3.9886454983922836e-05, + "loss": 0.3053, + "step": 6044 + }, + { + "epoch": 1.62, + "learning_rate": 3.988478027867096e-05, + "loss": 0.1296, + "step": 6045 + }, + { + "epoch": 1.62, + "learning_rate": 3.9883105573419084e-05, + "loss": 0.1446, + "step": 6046 + }, + { + "epoch": 1.62, + "learning_rate": 3.98814308681672e-05, + "loss": 0.1501, + "step": 6047 + }, + { + "epoch": 1.62, + "learning_rate": 3.9879756162915326e-05, + "loss": 0.2694, + "step": 6048 + }, + { + "epoch": 1.62, + "learning_rate": 3.987808145766345e-05, + "loss": 0.2122, + "step": 6049 + }, + { + "epoch": 1.62, + "learning_rate": 3.9876406752411574e-05, + "loss": 0.1952, + "step": 6050 + }, + { + "epoch": 1.62, + "learning_rate": 3.9874732047159705e-05, + "loss": 0.1555, + "step": 6051 + }, + { + "epoch": 1.62, + "learning_rate": 3.987305734190783e-05, + "loss": 0.1557, + "step": 6052 + }, + { + "epoch": 1.62, + "learning_rate": 3.9871382636655953e-05, + "loss": 0.1974, + "step": 6053 + }, + { + "epoch": 1.62, + "learning_rate": 3.986970793140408e-05, + "loss": 0.1287, + "step": 6054 + }, + { + "epoch": 1.62, + "learning_rate": 3.98680332261522e-05, + "loss": 0.2398, + "step": 6055 + }, + { + "epoch": 1.62, + "learning_rate": 3.986635852090032e-05, + "loss": 0.148, + "step": 6056 + }, + { + "epoch": 1.62, + "learning_rate": 3.9864683815648443e-05, + "loss": 0.1345, + "step": 6057 + }, + { + "epoch": 1.62, + "learning_rate": 3.986300911039657e-05, + "loss": 0.1404, + "step": 6058 + }, + { + "epoch": 1.62, + "learning_rate": 3.98613344051447e-05, + "loss": 0.1446, + "step": 6059 + }, + { + "epoch": 1.62, + "learning_rate": 3.985965969989282e-05, + "loss": 0.1332, + "step": 6060 + }, + { + "epoch": 1.62, + "learning_rate": 3.985798499464095e-05, + "loss": 0.1566, + "step": 6061 + }, + { + "epoch": 1.62, + "learning_rate": 3.985631028938907e-05, + "loss": 0.1382, + "step": 6062 + }, + { + "epoch": 1.62, + "learning_rate": 3.9854635584137196e-05, + "loss": 0.306, + "step": 6063 + }, + { + "epoch": 1.62, + "learning_rate": 3.985296087888532e-05, + "loss": 0.1445, + "step": 6064 + }, + { + "epoch": 1.63, + "learning_rate": 3.985128617363344e-05, + "loss": 0.1352, + "step": 6065 + }, + { + "epoch": 1.63, + "learning_rate": 3.984961146838156e-05, + "loss": 0.1755, + "step": 6066 + }, + { + "epoch": 1.63, + "learning_rate": 3.984793676312969e-05, + "loss": 0.1381, + "step": 6067 + }, + { + "epoch": 1.63, + "learning_rate": 3.9846262057877817e-05, + "loss": 0.1438, + "step": 6068 + }, + { + "epoch": 1.63, + "learning_rate": 3.984458735262594e-05, + "loss": 0.1207, + "step": 6069 + }, + { + "epoch": 1.63, + "learning_rate": 3.9842912647374065e-05, + "loss": 0.1282, + "step": 6070 + }, + { + "epoch": 1.63, + "learning_rate": 3.984123794212219e-05, + "loss": 0.3042, + "step": 6071 + }, + { + "epoch": 1.63, + "learning_rate": 3.983956323687031e-05, + "loss": 0.1362, + "step": 6072 + }, + { + "epoch": 1.63, + "learning_rate": 3.983788853161844e-05, + "loss": 0.1906, + "step": 6073 + }, + { + "epoch": 1.63, + "learning_rate": 3.983621382636656e-05, + "loss": 0.1508, + "step": 6074 + }, + { + "epoch": 1.63, + "learning_rate": 3.9834539121114686e-05, + "loss": 0.285, + "step": 6075 + }, + { + "epoch": 1.63, + "learning_rate": 3.983286441586281e-05, + "loss": 0.2052, + "step": 6076 + }, + { + "epoch": 1.63, + "learning_rate": 3.9831189710610934e-05, + "loss": 0.188, + "step": 6077 + }, + { + "epoch": 1.63, + "learning_rate": 3.982951500535906e-05, + "loss": 0.1514, + "step": 6078 + }, + { + "epoch": 1.63, + "learning_rate": 3.982784030010718e-05, + "loss": 0.3336, + "step": 6079 + }, + { + "epoch": 1.63, + "learning_rate": 3.982616559485531e-05, + "loss": 0.2499, + "step": 6080 + }, + { + "epoch": 1.63, + "learning_rate": 3.982449088960343e-05, + "loss": 0.1419, + "step": 6081 + }, + { + "epoch": 1.63, + "learning_rate": 3.9822816184351555e-05, + "loss": 0.2791, + "step": 6082 + }, + { + "epoch": 1.63, + "learning_rate": 3.982114147909968e-05, + "loss": 0.1517, + "step": 6083 + }, + { + "epoch": 1.63, + "learning_rate": 3.9819466773847804e-05, + "loss": 0.2171, + "step": 6084 + }, + { + "epoch": 1.63, + "learning_rate": 3.981779206859593e-05, + "loss": 0.1338, + "step": 6085 + }, + { + "epoch": 1.63, + "learning_rate": 3.981611736334405e-05, + "loss": 0.114, + "step": 6086 + }, + { + "epoch": 1.63, + "learning_rate": 3.9814442658092177e-05, + "loss": 0.1249, + "step": 6087 + }, + { + "epoch": 1.63, + "learning_rate": 3.98127679528403e-05, + "loss": 0.1245, + "step": 6088 + }, + { + "epoch": 1.63, + "learning_rate": 3.9811093247588425e-05, + "loss": 0.2738, + "step": 6089 + }, + { + "epoch": 1.63, + "learning_rate": 3.980941854233655e-05, + "loss": 0.1236, + "step": 6090 + }, + { + "epoch": 1.63, + "learning_rate": 3.980774383708467e-05, + "loss": 0.1086, + "step": 6091 + }, + { + "epoch": 1.63, + "learning_rate": 3.9806069131832804e-05, + "loss": 0.2806, + "step": 6092 + }, + { + "epoch": 1.63, + "learning_rate": 3.980439442658092e-05, + "loss": 0.1279, + "step": 6093 + }, + { + "epoch": 1.63, + "learning_rate": 3.9802719721329046e-05, + "loss": 0.1295, + "step": 6094 + }, + { + "epoch": 1.63, + "learning_rate": 3.980104501607717e-05, + "loss": 0.1531, + "step": 6095 + }, + { + "epoch": 1.63, + "learning_rate": 3.9799370310825294e-05, + "loss": 0.1155, + "step": 6096 + }, + { + "epoch": 1.63, + "learning_rate": 3.979769560557342e-05, + "loss": 0.1287, + "step": 6097 + }, + { + "epoch": 1.63, + "learning_rate": 3.979602090032154e-05, + "loss": 0.2367, + "step": 6098 + }, + { + "epoch": 1.63, + "learning_rate": 3.9794346195069674e-05, + "loss": 0.1368, + "step": 6099 + }, + { + "epoch": 1.63, + "learning_rate": 3.97926714898178e-05, + "loss": 0.1245, + "step": 6100 + }, + { + "epoch": 1.63, + "learning_rate": 3.979099678456592e-05, + "loss": 0.2951, + "step": 6101 + }, + { + "epoch": 1.64, + "learning_rate": 3.9789322079314046e-05, + "loss": 0.1526, + "step": 6102 + }, + { + "epoch": 1.64, + "learning_rate": 3.9787647374062164e-05, + "loss": 0.1407, + "step": 6103 + }, + { + "epoch": 1.64, + "learning_rate": 3.978597266881029e-05, + "loss": 0.1527, + "step": 6104 + }, + { + "epoch": 1.64, + "learning_rate": 3.978429796355841e-05, + "loss": 0.1408, + "step": 6105 + }, + { + "epoch": 1.64, + "learning_rate": 3.9782623258306537e-05, + "loss": 0.1349, + "step": 6106 + }, + { + "epoch": 1.64, + "learning_rate": 3.978094855305467e-05, + "loss": 0.1254, + "step": 6107 + }, + { + "epoch": 1.64, + "learning_rate": 3.977927384780279e-05, + "loss": 0.2523, + "step": 6108 + }, + { + "epoch": 1.64, + "learning_rate": 3.9777599142550916e-05, + "loss": 0.2831, + "step": 6109 + }, + { + "epoch": 1.64, + "learning_rate": 3.977592443729904e-05, + "loss": 0.1874, + "step": 6110 + }, + { + "epoch": 1.64, + "learning_rate": 3.9774249732047164e-05, + "loss": 0.1347, + "step": 6111 + }, + { + "epoch": 1.64, + "learning_rate": 3.977257502679528e-05, + "loss": 0.2276, + "step": 6112 + }, + { + "epoch": 1.64, + "learning_rate": 3.9770900321543406e-05, + "loss": 0.1729, + "step": 6113 + }, + { + "epoch": 1.64, + "learning_rate": 3.976922561629153e-05, + "loss": 0.1255, + "step": 6114 + }, + { + "epoch": 1.64, + "learning_rate": 3.976755091103966e-05, + "loss": 0.1177, + "step": 6115 + }, + { + "epoch": 1.64, + "learning_rate": 3.9765876205787785e-05, + "loss": 0.1452, + "step": 6116 + }, + { + "epoch": 1.64, + "learning_rate": 3.976420150053591e-05, + "loss": 0.1605, + "step": 6117 + }, + { + "epoch": 1.64, + "learning_rate": 3.9762526795284034e-05, + "loss": 0.1283, + "step": 6118 + }, + { + "epoch": 1.64, + "learning_rate": 3.976085209003216e-05, + "loss": 0.1326, + "step": 6119 + }, + { + "epoch": 1.64, + "learning_rate": 3.975917738478028e-05, + "loss": 0.1382, + "step": 6120 + }, + { + "epoch": 1.64, + "learning_rate": 3.97575026795284e-05, + "loss": 0.158, + "step": 6121 + }, + { + "epoch": 1.64, + "learning_rate": 3.9755827974276524e-05, + "loss": 0.1345, + "step": 6122 + }, + { + "epoch": 1.64, + "learning_rate": 3.9754153269024655e-05, + "loss": 0.5072, + "step": 6123 + }, + { + "epoch": 1.64, + "learning_rate": 3.975247856377278e-05, + "loss": 0.1229, + "step": 6124 + }, + { + "epoch": 1.64, + "learning_rate": 3.97508038585209e-05, + "loss": 0.1247, + "step": 6125 + }, + { + "epoch": 1.64, + "learning_rate": 3.974912915326903e-05, + "loss": 0.2326, + "step": 6126 + }, + { + "epoch": 1.64, + "learning_rate": 3.974745444801715e-05, + "loss": 0.1504, + "step": 6127 + }, + { + "epoch": 1.64, + "learning_rate": 3.9745779742765276e-05, + "loss": 0.1858, + "step": 6128 + }, + { + "epoch": 1.64, + "learning_rate": 3.97441050375134e-05, + "loss": 0.1194, + "step": 6129 + }, + { + "epoch": 1.64, + "learning_rate": 3.9742430332261524e-05, + "loss": 0.2342, + "step": 6130 + }, + { + "epoch": 1.64, + "learning_rate": 3.974075562700965e-05, + "loss": 0.2521, + "step": 6131 + }, + { + "epoch": 1.64, + "learning_rate": 3.973908092175777e-05, + "loss": 0.2931, + "step": 6132 + }, + { + "epoch": 1.64, + "learning_rate": 3.97374062165059e-05, + "loss": 0.1781, + "step": 6133 + }, + { + "epoch": 1.64, + "learning_rate": 3.973573151125402e-05, + "loss": 0.1096, + "step": 6134 + }, + { + "epoch": 1.64, + "learning_rate": 3.9734056806002145e-05, + "loss": 0.2079, + "step": 6135 + }, + { + "epoch": 1.64, + "learning_rate": 3.973238210075027e-05, + "loss": 0.3238, + "step": 6136 + }, + { + "epoch": 1.64, + "learning_rate": 3.9730707395498394e-05, + "loss": 0.1667, + "step": 6137 + }, + { + "epoch": 1.64, + "learning_rate": 3.972903269024652e-05, + "loss": 0.1156, + "step": 6138 + }, + { + "epoch": 1.64, + "learning_rate": 3.972735798499464e-05, + "loss": 0.1533, + "step": 6139 + }, + { + "epoch": 1.65, + "learning_rate": 3.9725683279742766e-05, + "loss": 0.1431, + "step": 6140 + }, + { + "epoch": 1.65, + "learning_rate": 3.972400857449089e-05, + "loss": 0.1217, + "step": 6141 + }, + { + "epoch": 1.65, + "learning_rate": 3.9722333869239015e-05, + "loss": 0.2331, + "step": 6142 + }, + { + "epoch": 1.65, + "learning_rate": 3.972065916398714e-05, + "loss": 0.125, + "step": 6143 + }, + { + "epoch": 1.65, + "learning_rate": 3.971898445873526e-05, + "loss": 0.2011, + "step": 6144 + }, + { + "epoch": 1.65, + "learning_rate": 3.971730975348339e-05, + "loss": 0.1426, + "step": 6145 + }, + { + "epoch": 1.65, + "learning_rate": 3.971563504823151e-05, + "loss": 0.1652, + "step": 6146 + }, + { + "epoch": 1.65, + "learning_rate": 3.9713960342979636e-05, + "loss": 0.1437, + "step": 6147 + }, + { + "epoch": 1.65, + "learning_rate": 3.971228563772777e-05, + "loss": 0.1391, + "step": 6148 + }, + { + "epoch": 1.65, + "learning_rate": 3.9710610932475884e-05, + "loss": 0.21, + "step": 6149 + }, + { + "epoch": 1.65, + "learning_rate": 3.970893622722401e-05, + "loss": 0.286, + "step": 6150 + }, + { + "epoch": 1.65, + "learning_rate": 3.970726152197213e-05, + "loss": 0.2688, + "step": 6151 + }, + { + "epoch": 1.65, + "learning_rate": 3.970558681672026e-05, + "loss": 0.2331, + "step": 6152 + }, + { + "epoch": 1.65, + "learning_rate": 3.970391211146838e-05, + "loss": 0.3366, + "step": 6153 + }, + { + "epoch": 1.65, + "learning_rate": 3.9702237406216505e-05, + "loss": 0.1298, + "step": 6154 + }, + { + "epoch": 1.65, + "learning_rate": 3.9700562700964636e-05, + "loss": 0.1508, + "step": 6155 + }, + { + "epoch": 1.65, + "learning_rate": 3.969888799571276e-05, + "loss": 0.1069, + "step": 6156 + }, + { + "epoch": 1.65, + "learning_rate": 3.9697213290460885e-05, + "loss": 0.1799, + "step": 6157 + }, + { + "epoch": 1.65, + "learning_rate": 3.969553858520901e-05, + "loss": 0.1058, + "step": 6158 + }, + { + "epoch": 1.65, + "learning_rate": 3.9693863879957126e-05, + "loss": 0.1366, + "step": 6159 + }, + { + "epoch": 1.65, + "learning_rate": 3.969218917470525e-05, + "loss": 0.1376, + "step": 6160 + }, + { + "epoch": 1.65, + "learning_rate": 3.9690514469453375e-05, + "loss": 0.1864, + "step": 6161 + }, + { + "epoch": 1.65, + "learning_rate": 3.96888397642015e-05, + "loss": 0.3384, + "step": 6162 + }, + { + "epoch": 1.65, + "learning_rate": 3.968716505894963e-05, + "loss": 0.1189, + "step": 6163 + }, + { + "epoch": 1.65, + "learning_rate": 3.9685490353697754e-05, + "loss": 0.1372, + "step": 6164 + }, + { + "epoch": 1.65, + "learning_rate": 3.968381564844588e-05, + "loss": 0.2237, + "step": 6165 + }, + { + "epoch": 1.65, + "learning_rate": 3.9682140943194e-05, + "loss": 0.1171, + "step": 6166 + }, + { + "epoch": 1.65, + "learning_rate": 3.968046623794213e-05, + "loss": 0.1278, + "step": 6167 + }, + { + "epoch": 1.65, + "learning_rate": 3.9678791532690244e-05, + "loss": 0.2086, + "step": 6168 + }, + { + "epoch": 1.65, + "learning_rate": 3.967711682743837e-05, + "loss": 0.167, + "step": 6169 + }, + { + "epoch": 1.65, + "learning_rate": 3.967544212218649e-05, + "loss": 0.2682, + "step": 6170 + }, + { + "epoch": 1.65, + "learning_rate": 3.9673767416934624e-05, + "loss": 0.1751, + "step": 6171 + }, + { + "epoch": 1.65, + "learning_rate": 3.967209271168275e-05, + "loss": 0.1763, + "step": 6172 + }, + { + "epoch": 1.65, + "learning_rate": 3.967041800643087e-05, + "loss": 0.1539, + "step": 6173 + }, + { + "epoch": 1.65, + "learning_rate": 3.9668743301178996e-05, + "loss": 0.142, + "step": 6174 + }, + { + "epoch": 1.65, + "learning_rate": 3.966706859592712e-05, + "loss": 0.2488, + "step": 6175 + }, + { + "epoch": 1.65, + "learning_rate": 3.9665393890675245e-05, + "loss": 0.1121, + "step": 6176 + }, + { + "epoch": 1.66, + "learning_rate": 3.966371918542336e-05, + "loss": 0.13, + "step": 6177 + }, + { + "epoch": 1.66, + "learning_rate": 3.9662044480171486e-05, + "loss": 0.1593, + "step": 6178 + }, + { + "epoch": 1.66, + "learning_rate": 3.966036977491962e-05, + "loss": 0.121, + "step": 6179 + }, + { + "epoch": 1.66, + "learning_rate": 3.965869506966774e-05, + "loss": 0.1156, + "step": 6180 + }, + { + "epoch": 1.66, + "learning_rate": 3.9657020364415866e-05, + "loss": 0.1066, + "step": 6181 + }, + { + "epoch": 1.66, + "learning_rate": 3.965534565916399e-05, + "loss": 0.2084, + "step": 6182 + }, + { + "epoch": 1.66, + "learning_rate": 3.9653670953912114e-05, + "loss": 0.1833, + "step": 6183 + }, + { + "epoch": 1.66, + "learning_rate": 3.965199624866024e-05, + "loss": 0.1387, + "step": 6184 + }, + { + "epoch": 1.66, + "learning_rate": 3.965032154340836e-05, + "loss": 0.1404, + "step": 6185 + }, + { + "epoch": 1.66, + "learning_rate": 3.964864683815649e-05, + "loss": 0.1715, + "step": 6186 + }, + { + "epoch": 1.66, + "learning_rate": 3.964697213290461e-05, + "loss": 0.2121, + "step": 6187 + }, + { + "epoch": 1.66, + "learning_rate": 3.9645297427652735e-05, + "loss": 0.1075, + "step": 6188 + }, + { + "epoch": 1.66, + "learning_rate": 3.964362272240086e-05, + "loss": 0.1441, + "step": 6189 + }, + { + "epoch": 1.66, + "learning_rate": 3.9641948017148984e-05, + "loss": 0.2142, + "step": 6190 + }, + { + "epoch": 1.66, + "learning_rate": 3.964027331189711e-05, + "loss": 0.2893, + "step": 6191 + }, + { + "epoch": 1.66, + "learning_rate": 3.963859860664523e-05, + "loss": 0.1761, + "step": 6192 + }, + { + "epoch": 1.66, + "learning_rate": 3.9636923901393356e-05, + "loss": 0.1362, + "step": 6193 + }, + { + "epoch": 1.66, + "learning_rate": 3.963524919614148e-05, + "loss": 0.1987, + "step": 6194 + }, + { + "epoch": 1.66, + "learning_rate": 3.9633574490889605e-05, + "loss": 0.2333, + "step": 6195 + }, + { + "epoch": 1.66, + "learning_rate": 3.963189978563773e-05, + "loss": 0.1802, + "step": 6196 + }, + { + "epoch": 1.66, + "learning_rate": 3.963022508038585e-05, + "loss": 0.146, + "step": 6197 + }, + { + "epoch": 1.66, + "learning_rate": 3.962855037513398e-05, + "loss": 0.1385, + "step": 6198 + }, + { + "epoch": 1.66, + "learning_rate": 3.96268756698821e-05, + "loss": 0.326, + "step": 6199 + }, + { + "epoch": 1.66, + "learning_rate": 3.9625200964630226e-05, + "loss": 0.2524, + "step": 6200 + }, + { + "epoch": 1.66, + "learning_rate": 3.962352625937835e-05, + "loss": 0.1936, + "step": 6201 + }, + { + "epoch": 1.66, + "learning_rate": 3.9621851554126474e-05, + "loss": 0.1785, + "step": 6202 + }, + { + "epoch": 1.66, + "learning_rate": 3.96201768488746e-05, + "loss": 0.1812, + "step": 6203 + }, + { + "epoch": 1.66, + "learning_rate": 3.961850214362273e-05, + "loss": 0.1659, + "step": 6204 + }, + { + "epoch": 1.66, + "learning_rate": 3.961682743837085e-05, + "loss": 0.2607, + "step": 6205 + }, + { + "epoch": 1.66, + "learning_rate": 3.961515273311897e-05, + "loss": 0.2054, + "step": 6206 + }, + { + "epoch": 1.66, + "learning_rate": 3.9613478027867095e-05, + "loss": 0.217, + "step": 6207 + }, + { + "epoch": 1.66, + "learning_rate": 3.961180332261522e-05, + "loss": 0.166, + "step": 6208 + }, + { + "epoch": 1.66, + "learning_rate": 3.9610128617363344e-05, + "loss": 0.139, + "step": 6209 + }, + { + "epoch": 1.66, + "learning_rate": 3.960845391211147e-05, + "loss": 0.1533, + "step": 6210 + }, + { + "epoch": 1.66, + "learning_rate": 3.96067792068596e-05, + "loss": 0.1798, + "step": 6211 + }, + { + "epoch": 1.66, + "learning_rate": 3.960510450160772e-05, + "loss": 0.169, + "step": 6212 + }, + { + "epoch": 1.66, + "learning_rate": 3.960342979635585e-05, + "loss": 0.281, + "step": 6213 + }, + { + "epoch": 1.67, + "learning_rate": 3.960175509110397e-05, + "loss": 0.1364, + "step": 6214 + }, + { + "epoch": 1.67, + "learning_rate": 3.960008038585209e-05, + "loss": 0.2333, + "step": 6215 + }, + { + "epoch": 1.67, + "learning_rate": 3.959840568060021e-05, + "loss": 0.2682, + "step": 6216 + }, + { + "epoch": 1.67, + "learning_rate": 3.959673097534834e-05, + "loss": 0.1162, + "step": 6217 + }, + { + "epoch": 1.67, + "learning_rate": 3.959505627009646e-05, + "loss": 0.2226, + "step": 6218 + }, + { + "epoch": 1.67, + "learning_rate": 3.959338156484459e-05, + "loss": 0.1302, + "step": 6219 + }, + { + "epoch": 1.67, + "learning_rate": 3.959170685959272e-05, + "loss": 0.1186, + "step": 6220 + }, + { + "epoch": 1.67, + "learning_rate": 3.959003215434084e-05, + "loss": 0.1341, + "step": 6221 + }, + { + "epoch": 1.67, + "learning_rate": 3.9588357449088965e-05, + "loss": 0.1258, + "step": 6222 + }, + { + "epoch": 1.67, + "learning_rate": 3.958668274383709e-05, + "loss": 0.2664, + "step": 6223 + }, + { + "epoch": 1.67, + "learning_rate": 3.958500803858521e-05, + "loss": 0.1873, + "step": 6224 + }, + { + "epoch": 1.67, + "learning_rate": 3.958333333333333e-05, + "loss": 0.1692, + "step": 6225 + }, + { + "epoch": 1.67, + "learning_rate": 3.9581658628081455e-05, + "loss": 0.1464, + "step": 6226 + }, + { + "epoch": 1.67, + "learning_rate": 3.9579983922829586e-05, + "loss": 0.2464, + "step": 6227 + }, + { + "epoch": 1.67, + "learning_rate": 3.957830921757771e-05, + "loss": 0.1121, + "step": 6228 + }, + { + "epoch": 1.67, + "learning_rate": 3.9576634512325835e-05, + "loss": 0.1265, + "step": 6229 + }, + { + "epoch": 1.67, + "learning_rate": 3.957495980707396e-05, + "loss": 0.1211, + "step": 6230 + }, + { + "epoch": 1.67, + "learning_rate": 3.957328510182208e-05, + "loss": 0.1837, + "step": 6231 + }, + { + "epoch": 1.67, + "learning_rate": 3.957161039657021e-05, + "loss": 0.1345, + "step": 6232 + }, + { + "epoch": 1.67, + "learning_rate": 3.9569935691318325e-05, + "loss": 0.2099, + "step": 6233 + }, + { + "epoch": 1.67, + "learning_rate": 3.956826098606645e-05, + "loss": 0.2173, + "step": 6234 + }, + { + "epoch": 1.67, + "learning_rate": 3.956658628081458e-05, + "loss": 0.1211, + "step": 6235 + }, + { + "epoch": 1.67, + "learning_rate": 3.9564911575562704e-05, + "loss": 0.1231, + "step": 6236 + }, + { + "epoch": 1.67, + "learning_rate": 3.956323687031083e-05, + "loss": 0.168, + "step": 6237 + }, + { + "epoch": 1.67, + "learning_rate": 3.956156216505895e-05, + "loss": 0.333, + "step": 6238 + }, + { + "epoch": 1.67, + "learning_rate": 3.955988745980708e-05, + "loss": 0.2174, + "step": 6239 + }, + { + "epoch": 1.67, + "learning_rate": 3.95582127545552e-05, + "loss": 0.1191, + "step": 6240 + }, + { + "epoch": 1.67, + "learning_rate": 3.9556538049303325e-05, + "loss": 0.1334, + "step": 6241 + }, + { + "epoch": 1.67, + "learning_rate": 3.955486334405145e-05, + "loss": 0.1116, + "step": 6242 + }, + { + "epoch": 1.67, + "learning_rate": 3.9553188638799574e-05, + "loss": 0.1525, + "step": 6243 + }, + { + "epoch": 1.67, + "learning_rate": 3.95515139335477e-05, + "loss": 0.3015, + "step": 6244 + }, + { + "epoch": 1.67, + "learning_rate": 3.954983922829582e-05, + "loss": 0.1135, + "step": 6245 + }, + { + "epoch": 1.67, + "learning_rate": 3.9548164523043946e-05, + "loss": 0.1157, + "step": 6246 + }, + { + "epoch": 1.67, + "learning_rate": 3.954648981779207e-05, + "loss": 0.297, + "step": 6247 + }, + { + "epoch": 1.67, + "learning_rate": 3.9544815112540195e-05, + "loss": 0.1444, + "step": 6248 + }, + { + "epoch": 1.67, + "learning_rate": 3.954314040728832e-05, + "loss": 0.1276, + "step": 6249 + }, + { + "epoch": 1.67, + "learning_rate": 3.954146570203644e-05, + "loss": 0.3648, + "step": 6250 + }, + { + "epoch": 1.67, + "learning_rate": 3.953979099678457e-05, + "loss": 0.4916, + "step": 6251 + }, + { + "epoch": 1.68, + "learning_rate": 3.953811629153269e-05, + "loss": 0.1053, + "step": 6252 + }, + { + "epoch": 1.68, + "learning_rate": 3.9536441586280816e-05, + "loss": 0.1899, + "step": 6253 + }, + { + "epoch": 1.68, + "learning_rate": 3.953476688102894e-05, + "loss": 0.1568, + "step": 6254 + }, + { + "epoch": 1.68, + "learning_rate": 3.9533092175777064e-05, + "loss": 0.1182, + "step": 6255 + }, + { + "epoch": 1.68, + "learning_rate": 3.953141747052519e-05, + "loss": 0.114, + "step": 6256 + }, + { + "epoch": 1.68, + "learning_rate": 3.952974276527331e-05, + "loss": 0.2302, + "step": 6257 + }, + { + "epoch": 1.68, + "learning_rate": 3.952806806002144e-05, + "loss": 0.1415, + "step": 6258 + }, + { + "epoch": 1.68, + "learning_rate": 3.952639335476956e-05, + "loss": 0.334, + "step": 6259 + }, + { + "epoch": 1.68, + "learning_rate": 3.952471864951769e-05, + "loss": 0.1677, + "step": 6260 + }, + { + "epoch": 1.68, + "learning_rate": 3.952304394426581e-05, + "loss": 0.2199, + "step": 6261 + }, + { + "epoch": 1.68, + "learning_rate": 3.9521369239013934e-05, + "loss": 0.1846, + "step": 6262 + }, + { + "epoch": 1.68, + "learning_rate": 3.951969453376206e-05, + "loss": 0.1246, + "step": 6263 + }, + { + "epoch": 1.68, + "learning_rate": 3.951801982851018e-05, + "loss": 0.2462, + "step": 6264 + }, + { + "epoch": 1.68, + "learning_rate": 3.9516345123258306e-05, + "loss": 0.1474, + "step": 6265 + }, + { + "epoch": 1.68, + "learning_rate": 3.951467041800643e-05, + "loss": 0.2692, + "step": 6266 + }, + { + "epoch": 1.68, + "learning_rate": 3.951299571275456e-05, + "loss": 0.1977, + "step": 6267 + }, + { + "epoch": 1.68, + "learning_rate": 3.9511321007502686e-05, + "loss": 0.1291, + "step": 6268 + }, + { + "epoch": 1.68, + "learning_rate": 3.950964630225081e-05, + "loss": 0.2497, + "step": 6269 + }, + { + "epoch": 1.68, + "learning_rate": 3.9507971596998934e-05, + "loss": 0.3232, + "step": 6270 + }, + { + "epoch": 1.68, + "learning_rate": 3.950629689174705e-05, + "loss": 0.1309, + "step": 6271 + }, + { + "epoch": 1.68, + "learning_rate": 3.9504622186495176e-05, + "loss": 0.1333, + "step": 6272 + }, + { + "epoch": 1.68, + "learning_rate": 3.95029474812433e-05, + "loss": 0.2149, + "step": 6273 + }, + { + "epoch": 1.68, + "learning_rate": 3.9501272775991424e-05, + "loss": 0.136, + "step": 6274 + }, + { + "epoch": 1.68, + "learning_rate": 3.9499598070739555e-05, + "loss": 0.2233, + "step": 6275 + }, + { + "epoch": 1.68, + "learning_rate": 3.949792336548768e-05, + "loss": 0.1882, + "step": 6276 + }, + { + "epoch": 1.68, + "learning_rate": 3.9496248660235803e-05, + "loss": 0.1556, + "step": 6277 + }, + { + "epoch": 1.68, + "learning_rate": 3.949457395498393e-05, + "loss": 0.3028, + "step": 6278 + }, + { + "epoch": 1.68, + "learning_rate": 3.949289924973205e-05, + "loss": 0.126, + "step": 6279 + }, + { + "epoch": 1.68, + "learning_rate": 3.949122454448017e-05, + "loss": 0.3304, + "step": 6280 + }, + { + "epoch": 1.68, + "learning_rate": 3.9489549839228294e-05, + "loss": 0.2251, + "step": 6281 + }, + { + "epoch": 1.68, + "learning_rate": 3.948787513397642e-05, + "loss": 0.2263, + "step": 6282 + }, + { + "epoch": 1.68, + "learning_rate": 3.948620042872455e-05, + "loss": 0.1657, + "step": 6283 + }, + { + "epoch": 1.68, + "learning_rate": 3.948452572347267e-05, + "loss": 0.1778, + "step": 6284 + }, + { + "epoch": 1.68, + "learning_rate": 3.94828510182208e-05, + "loss": 0.1417, + "step": 6285 + }, + { + "epoch": 1.68, + "learning_rate": 3.948117631296892e-05, + "loss": 0.104, + "step": 6286 + }, + { + "epoch": 1.68, + "learning_rate": 3.9479501607717046e-05, + "loss": 0.202, + "step": 6287 + }, + { + "epoch": 1.68, + "learning_rate": 3.947782690246517e-05, + "loss": 0.1933, + "step": 6288 + }, + { + "epoch": 1.69, + "learning_rate": 3.947615219721329e-05, + "loss": 0.139, + "step": 6289 + }, + { + "epoch": 1.69, + "learning_rate": 3.947447749196141e-05, + "loss": 0.2025, + "step": 6290 + }, + { + "epoch": 1.69, + "learning_rate": 3.947280278670954e-05, + "loss": 0.3523, + "step": 6291 + }, + { + "epoch": 1.69, + "learning_rate": 3.9471128081457667e-05, + "loss": 0.1339, + "step": 6292 + }, + { + "epoch": 1.69, + "learning_rate": 3.946945337620579e-05, + "loss": 0.1671, + "step": 6293 + }, + { + "epoch": 1.69, + "learning_rate": 3.9467778670953915e-05, + "loss": 0.1357, + "step": 6294 + }, + { + "epoch": 1.69, + "learning_rate": 3.946610396570204e-05, + "loss": 0.1525, + "step": 6295 + }, + { + "epoch": 1.69, + "learning_rate": 3.9464429260450163e-05, + "loss": 0.1508, + "step": 6296 + }, + { + "epoch": 1.69, + "learning_rate": 3.946275455519829e-05, + "loss": 0.357, + "step": 6297 + }, + { + "epoch": 1.69, + "learning_rate": 3.946107984994641e-05, + "loss": 0.2458, + "step": 6298 + }, + { + "epoch": 1.69, + "learning_rate": 3.9459405144694536e-05, + "loss": 0.1135, + "step": 6299 + }, + { + "epoch": 1.69, + "learning_rate": 3.945773043944266e-05, + "loss": 0.2894, + "step": 6300 + }, + { + "epoch": 1.69, + "learning_rate": 3.9456055734190784e-05, + "loss": 0.1365, + "step": 6301 + }, + { + "epoch": 1.69, + "learning_rate": 3.945438102893891e-05, + "loss": 0.1157, + "step": 6302 + }, + { + "epoch": 1.69, + "learning_rate": 3.945270632368703e-05, + "loss": 0.1278, + "step": 6303 + }, + { + "epoch": 1.69, + "learning_rate": 3.945103161843516e-05, + "loss": 0.1626, + "step": 6304 + }, + { + "epoch": 1.69, + "learning_rate": 3.944935691318328e-05, + "loss": 0.1261, + "step": 6305 + }, + { + "epoch": 1.69, + "learning_rate": 3.9447682207931406e-05, + "loss": 0.2364, + "step": 6306 + }, + { + "epoch": 1.69, + "learning_rate": 3.944600750267953e-05, + "loss": 0.1588, + "step": 6307 + }, + { + "epoch": 1.69, + "learning_rate": 3.9444332797427654e-05, + "loss": 0.1996, + "step": 6308 + }, + { + "epoch": 1.69, + "learning_rate": 3.944265809217578e-05, + "loss": 0.1085, + "step": 6309 + }, + { + "epoch": 1.69, + "learning_rate": 3.94409833869239e-05, + "loss": 0.1999, + "step": 6310 + }, + { + "epoch": 1.69, + "learning_rate": 3.9439308681672027e-05, + "loss": 0.1297, + "step": 6311 + }, + { + "epoch": 1.69, + "learning_rate": 3.943763397642015e-05, + "loss": 0.1721, + "step": 6312 + }, + { + "epoch": 1.69, + "learning_rate": 3.9435959271168275e-05, + "loss": 0.1744, + "step": 6313 + }, + { + "epoch": 1.69, + "learning_rate": 3.94342845659164e-05, + "loss": 0.2732, + "step": 6314 + }, + { + "epoch": 1.69, + "learning_rate": 3.9432609860664523e-05, + "loss": 0.3105, + "step": 6315 + }, + { + "epoch": 1.69, + "learning_rate": 3.9430935155412654e-05, + "loss": 0.1191, + "step": 6316 + }, + { + "epoch": 1.69, + "learning_rate": 3.942926045016077e-05, + "loss": 0.1346, + "step": 6317 + }, + { + "epoch": 1.69, + "learning_rate": 3.9427585744908896e-05, + "loss": 0.1417, + "step": 6318 + }, + { + "epoch": 1.69, + "learning_rate": 3.942591103965702e-05, + "loss": 0.2638, + "step": 6319 + }, + { + "epoch": 1.69, + "learning_rate": 3.9424236334405144e-05, + "loss": 0.3023, + "step": 6320 + }, + { + "epoch": 1.69, + "learning_rate": 3.942256162915327e-05, + "loss": 0.2058, + "step": 6321 + }, + { + "epoch": 1.69, + "learning_rate": 3.942088692390139e-05, + "loss": 0.1413, + "step": 6322 + }, + { + "epoch": 1.69, + "learning_rate": 3.9419212218649524e-05, + "loss": 0.1299, + "step": 6323 + }, + { + "epoch": 1.69, + "learning_rate": 3.941753751339765e-05, + "loss": 0.1281, + "step": 6324 + }, + { + "epoch": 1.69, + "learning_rate": 3.941586280814577e-05, + "loss": 0.2308, + "step": 6325 + }, + { + "epoch": 1.7, + "learning_rate": 3.9414188102893896e-05, + "loss": 0.4367, + "step": 6326 + }, + { + "epoch": 1.7, + "learning_rate": 3.9412513397642014e-05, + "loss": 0.1657, + "step": 6327 + }, + { + "epoch": 1.7, + "learning_rate": 3.941083869239014e-05, + "loss": 0.2319, + "step": 6328 + }, + { + "epoch": 1.7, + "learning_rate": 3.940916398713826e-05, + "loss": 0.1533, + "step": 6329 + }, + { + "epoch": 1.7, + "learning_rate": 3.9407489281886387e-05, + "loss": 0.1289, + "step": 6330 + }, + { + "epoch": 1.7, + "learning_rate": 3.940581457663452e-05, + "loss": 0.1354, + "step": 6331 + }, + { + "epoch": 1.7, + "learning_rate": 3.940413987138264e-05, + "loss": 0.1328, + "step": 6332 + }, + { + "epoch": 1.7, + "learning_rate": 3.9402465166130766e-05, + "loss": 0.2186, + "step": 6333 + }, + { + "epoch": 1.7, + "learning_rate": 3.940079046087889e-05, + "loss": 0.2163, + "step": 6334 + }, + { + "epoch": 1.7, + "learning_rate": 3.9399115755627014e-05, + "loss": 0.1617, + "step": 6335 + }, + { + "epoch": 1.7, + "learning_rate": 3.939744105037513e-05, + "loss": 0.2729, + "step": 6336 + }, + { + "epoch": 1.7, + "learning_rate": 3.9395766345123256e-05, + "loss": 0.1096, + "step": 6337 + }, + { + "epoch": 1.7, + "learning_rate": 3.939409163987138e-05, + "loss": 0.1136, + "step": 6338 + }, + { + "epoch": 1.7, + "learning_rate": 3.939241693461951e-05, + "loss": 0.2812, + "step": 6339 + }, + { + "epoch": 1.7, + "learning_rate": 3.9390742229367635e-05, + "loss": 0.1555, + "step": 6340 + }, + { + "epoch": 1.7, + "learning_rate": 3.938906752411576e-05, + "loss": 0.1959, + "step": 6341 + }, + { + "epoch": 1.7, + "learning_rate": 3.9387392818863884e-05, + "loss": 0.1367, + "step": 6342 + }, + { + "epoch": 1.7, + "learning_rate": 3.938571811361201e-05, + "loss": 0.2249, + "step": 6343 + }, + { + "epoch": 1.7, + "learning_rate": 3.938404340836013e-05, + "loss": 0.137, + "step": 6344 + }, + { + "epoch": 1.7, + "learning_rate": 3.938236870310825e-05, + "loss": 0.1336, + "step": 6345 + }, + { + "epoch": 1.7, + "learning_rate": 3.9380693997856374e-05, + "loss": 0.328, + "step": 6346 + }, + { + "epoch": 1.7, + "learning_rate": 3.9379019292604505e-05, + "loss": 0.1688, + "step": 6347 + }, + { + "epoch": 1.7, + "learning_rate": 3.937734458735263e-05, + "loss": 0.2246, + "step": 6348 + }, + { + "epoch": 1.7, + "learning_rate": 3.937566988210075e-05, + "loss": 0.1319, + "step": 6349 + }, + { + "epoch": 1.7, + "learning_rate": 3.937399517684888e-05, + "loss": 0.1341, + "step": 6350 + }, + { + "epoch": 1.7, + "learning_rate": 3.9372320471597e-05, + "loss": 0.1317, + "step": 6351 + }, + { + "epoch": 1.7, + "learning_rate": 3.9370645766345126e-05, + "loss": 0.2679, + "step": 6352 + }, + { + "epoch": 1.7, + "learning_rate": 3.936897106109325e-05, + "loss": 0.2245, + "step": 6353 + }, + { + "epoch": 1.7, + "learning_rate": 3.9367296355841374e-05, + "loss": 0.1596, + "step": 6354 + }, + { + "epoch": 1.7, + "learning_rate": 3.93656216505895e-05, + "loss": 0.1471, + "step": 6355 + }, + { + "epoch": 1.7, + "learning_rate": 3.936394694533762e-05, + "loss": 0.1545, + "step": 6356 + }, + { + "epoch": 1.7, + "learning_rate": 3.936227224008575e-05, + "loss": 0.1978, + "step": 6357 + }, + { + "epoch": 1.7, + "learning_rate": 3.936059753483387e-05, + "loss": 0.1577, + "step": 6358 + }, + { + "epoch": 1.7, + "learning_rate": 3.9358922829581995e-05, + "loss": 0.1468, + "step": 6359 + }, + { + "epoch": 1.7, + "learning_rate": 3.935724812433012e-05, + "loss": 0.1194, + "step": 6360 + }, + { + "epoch": 1.7, + "learning_rate": 3.9355573419078244e-05, + "loss": 0.1216, + "step": 6361 + }, + { + "epoch": 1.7, + "learning_rate": 3.935389871382637e-05, + "loss": 0.1569, + "step": 6362 + }, + { + "epoch": 1.7, + "learning_rate": 3.935222400857449e-05, + "loss": 0.1507, + "step": 6363 + }, + { + "epoch": 1.71, + "learning_rate": 3.9350549303322616e-05, + "loss": 0.1166, + "step": 6364 + }, + { + "epoch": 1.71, + "learning_rate": 3.934887459807074e-05, + "loss": 0.1999, + "step": 6365 + }, + { + "epoch": 1.71, + "learning_rate": 3.9347199892818865e-05, + "loss": 0.134, + "step": 6366 + }, + { + "epoch": 1.71, + "learning_rate": 3.934552518756699e-05, + "loss": 0.1835, + "step": 6367 + }, + { + "epoch": 1.71, + "learning_rate": 3.934385048231511e-05, + "loss": 0.3207, + "step": 6368 + }, + { + "epoch": 1.71, + "learning_rate": 3.934217577706324e-05, + "loss": 0.1158, + "step": 6369 + }, + { + "epoch": 1.71, + "learning_rate": 3.934050107181136e-05, + "loss": 0.1282, + "step": 6370 + }, + { + "epoch": 1.71, + "learning_rate": 3.9338826366559486e-05, + "loss": 0.1253, + "step": 6371 + }, + { + "epoch": 1.71, + "learning_rate": 3.933715166130762e-05, + "loss": 0.2256, + "step": 6372 + }, + { + "epoch": 1.71, + "learning_rate": 3.9335476956055734e-05, + "loss": 0.2114, + "step": 6373 + }, + { + "epoch": 1.71, + "learning_rate": 3.933380225080386e-05, + "loss": 0.2485, + "step": 6374 + }, + { + "epoch": 1.71, + "learning_rate": 3.933212754555198e-05, + "loss": 0.1794, + "step": 6375 + }, + { + "epoch": 1.71, + "learning_rate": 3.933045284030011e-05, + "loss": 0.1224, + "step": 6376 + }, + { + "epoch": 1.71, + "learning_rate": 3.932877813504823e-05, + "loss": 0.1141, + "step": 6377 + }, + { + "epoch": 1.71, + "learning_rate": 3.9327103429796355e-05, + "loss": 0.1519, + "step": 6378 + }, + { + "epoch": 1.71, + "learning_rate": 3.9325428724544486e-05, + "loss": 0.1269, + "step": 6379 + }, + { + "epoch": 1.71, + "learning_rate": 3.932375401929261e-05, + "loss": 0.2013, + "step": 6380 + }, + { + "epoch": 1.71, + "learning_rate": 3.9322079314040735e-05, + "loss": 0.1249, + "step": 6381 + }, + { + "epoch": 1.71, + "learning_rate": 3.932040460878886e-05, + "loss": 0.1549, + "step": 6382 + }, + { + "epoch": 1.71, + "learning_rate": 3.9318729903536976e-05, + "loss": 0.137, + "step": 6383 + }, + { + "epoch": 1.71, + "learning_rate": 3.93170551982851e-05, + "loss": 0.1561, + "step": 6384 + }, + { + "epoch": 1.71, + "learning_rate": 3.9315380493033225e-05, + "loss": 0.1478, + "step": 6385 + }, + { + "epoch": 1.71, + "learning_rate": 3.931370578778135e-05, + "loss": 0.1219, + "step": 6386 + }, + { + "epoch": 1.71, + "learning_rate": 3.931203108252948e-05, + "loss": 0.2697, + "step": 6387 + }, + { + "epoch": 1.71, + "learning_rate": 3.9310356377277604e-05, + "loss": 0.2275, + "step": 6388 + }, + { + "epoch": 1.71, + "learning_rate": 3.930868167202573e-05, + "loss": 0.2816, + "step": 6389 + }, + { + "epoch": 1.71, + "learning_rate": 3.930700696677385e-05, + "loss": 0.1669, + "step": 6390 + }, + { + "epoch": 1.71, + "learning_rate": 3.930533226152198e-05, + "loss": 0.1306, + "step": 6391 + }, + { + "epoch": 1.71, + "learning_rate": 3.9303657556270094e-05, + "loss": 0.2814, + "step": 6392 + }, + { + "epoch": 1.71, + "learning_rate": 3.930198285101822e-05, + "loss": 0.1773, + "step": 6393 + }, + { + "epoch": 1.71, + "learning_rate": 3.930030814576634e-05, + "loss": 0.122, + "step": 6394 + }, + { + "epoch": 1.71, + "learning_rate": 3.9298633440514474e-05, + "loss": 0.1273, + "step": 6395 + }, + { + "epoch": 1.71, + "learning_rate": 3.92969587352626e-05, + "loss": 0.181, + "step": 6396 + }, + { + "epoch": 1.71, + "learning_rate": 3.929528403001072e-05, + "loss": 0.1576, + "step": 6397 + }, + { + "epoch": 1.71, + "learning_rate": 3.9293609324758846e-05, + "loss": 0.236, + "step": 6398 + }, + { + "epoch": 1.71, + "learning_rate": 3.929193461950697e-05, + "loss": 0.1316, + "step": 6399 + }, + { + "epoch": 1.71, + "learning_rate": 3.9290259914255095e-05, + "loss": 0.2082, + "step": 6400 + }, + { + "epoch": 1.72, + "learning_rate": 3.928858520900321e-05, + "loss": 0.144, + "step": 6401 + }, + { + "epoch": 1.72, + "learning_rate": 3.9286910503751336e-05, + "loss": 0.1275, + "step": 6402 + }, + { + "epoch": 1.72, + "learning_rate": 3.928523579849947e-05, + "loss": 0.2356, + "step": 6403 + }, + { + "epoch": 1.72, + "learning_rate": 3.928356109324759e-05, + "loss": 0.1248, + "step": 6404 + }, + { + "epoch": 1.72, + "learning_rate": 3.9281886387995716e-05, + "loss": 0.1209, + "step": 6405 + }, + { + "epoch": 1.72, + "learning_rate": 3.928021168274384e-05, + "loss": 0.1197, + "step": 6406 + }, + { + "epoch": 1.72, + "learning_rate": 3.9278536977491964e-05, + "loss": 0.1577, + "step": 6407 + }, + { + "epoch": 1.72, + "learning_rate": 3.927686227224009e-05, + "loss": 0.3328, + "step": 6408 + }, + { + "epoch": 1.72, + "learning_rate": 3.927518756698821e-05, + "loss": 0.1995, + "step": 6409 + }, + { + "epoch": 1.72, + "learning_rate": 3.927351286173634e-05, + "loss": 0.1233, + "step": 6410 + }, + { + "epoch": 1.72, + "learning_rate": 3.927183815648446e-05, + "loss": 0.1152, + "step": 6411 + }, + { + "epoch": 1.72, + "learning_rate": 3.9270163451232585e-05, + "loss": 0.2631, + "step": 6412 + }, + { + "epoch": 1.72, + "learning_rate": 3.926848874598071e-05, + "loss": 0.1498, + "step": 6413 + }, + { + "epoch": 1.72, + "learning_rate": 3.9266814040728834e-05, + "loss": 0.1224, + "step": 6414 + }, + { + "epoch": 1.72, + "learning_rate": 3.926513933547696e-05, + "loss": 0.1526, + "step": 6415 + }, + { + "epoch": 1.72, + "learning_rate": 3.926346463022508e-05, + "loss": 0.1414, + "step": 6416 + }, + { + "epoch": 1.72, + "learning_rate": 3.9261789924973206e-05, + "loss": 0.2119, + "step": 6417 + }, + { + "epoch": 1.72, + "learning_rate": 3.926011521972133e-05, + "loss": 0.1079, + "step": 6418 + }, + { + "epoch": 1.72, + "learning_rate": 3.9258440514469455e-05, + "loss": 0.1686, + "step": 6419 + }, + { + "epoch": 1.72, + "learning_rate": 3.925676580921758e-05, + "loss": 0.1662, + "step": 6420 + }, + { + "epoch": 1.72, + "learning_rate": 3.92550911039657e-05, + "loss": 0.1227, + "step": 6421 + }, + { + "epoch": 1.72, + "learning_rate": 3.925341639871383e-05, + "loss": 0.1647, + "step": 6422 + }, + { + "epoch": 1.72, + "learning_rate": 3.925174169346195e-05, + "loss": 0.2093, + "step": 6423 + }, + { + "epoch": 1.72, + "learning_rate": 3.9250066988210076e-05, + "loss": 0.2467, + "step": 6424 + }, + { + "epoch": 1.72, + "learning_rate": 3.92483922829582e-05, + "loss": 0.1993, + "step": 6425 + }, + { + "epoch": 1.72, + "learning_rate": 3.9246717577706324e-05, + "loss": 0.123, + "step": 6426 + }, + { + "epoch": 1.72, + "learning_rate": 3.924504287245445e-05, + "loss": 0.3629, + "step": 6427 + }, + { + "epoch": 1.72, + "learning_rate": 3.924336816720258e-05, + "loss": 0.1126, + "step": 6428 + }, + { + "epoch": 1.72, + "learning_rate": 3.92416934619507e-05, + "loss": 0.135, + "step": 6429 + }, + { + "epoch": 1.72, + "learning_rate": 3.924001875669882e-05, + "loss": 0.1209, + "step": 6430 + }, + { + "epoch": 1.72, + "learning_rate": 3.9238344051446945e-05, + "loss": 0.1586, + "step": 6431 + }, + { + "epoch": 1.72, + "learning_rate": 3.923666934619507e-05, + "loss": 0.1338, + "step": 6432 + }, + { + "epoch": 1.72, + "learning_rate": 3.9234994640943194e-05, + "loss": 0.2678, + "step": 6433 + }, + { + "epoch": 1.72, + "learning_rate": 3.923331993569132e-05, + "loss": 0.2693, + "step": 6434 + }, + { + "epoch": 1.72, + "learning_rate": 3.923164523043945e-05, + "loss": 0.1329, + "step": 6435 + }, + { + "epoch": 1.72, + "learning_rate": 3.922997052518757e-05, + "loss": 0.1346, + "step": 6436 + }, + { + "epoch": 1.72, + "learning_rate": 3.92282958199357e-05, + "loss": 0.1603, + "step": 6437 + }, + { + "epoch": 1.73, + "learning_rate": 3.922662111468382e-05, + "loss": 0.1192, + "step": 6438 + }, + { + "epoch": 1.73, + "learning_rate": 3.922494640943194e-05, + "loss": 0.3263, + "step": 6439 + }, + { + "epoch": 1.73, + "learning_rate": 3.922327170418006e-05, + "loss": 0.2008, + "step": 6440 + }, + { + "epoch": 1.73, + "learning_rate": 3.922159699892819e-05, + "loss": 0.1042, + "step": 6441 + }, + { + "epoch": 1.73, + "learning_rate": 3.921992229367631e-05, + "loss": 0.1262, + "step": 6442 + }, + { + "epoch": 1.73, + "learning_rate": 3.921824758842444e-05, + "loss": 0.1093, + "step": 6443 + }, + { + "epoch": 1.73, + "learning_rate": 3.921657288317257e-05, + "loss": 0.2362, + "step": 6444 + }, + { + "epoch": 1.73, + "learning_rate": 3.921489817792069e-05, + "loss": 0.1146, + "step": 6445 + }, + { + "epoch": 1.73, + "learning_rate": 3.9213223472668815e-05, + "loss": 0.2511, + "step": 6446 + }, + { + "epoch": 1.73, + "learning_rate": 3.921154876741694e-05, + "loss": 0.1613, + "step": 6447 + }, + { + "epoch": 1.73, + "learning_rate": 3.920987406216506e-05, + "loss": 0.1298, + "step": 6448 + }, + { + "epoch": 1.73, + "learning_rate": 3.920819935691318e-05, + "loss": 0.1817, + "step": 6449 + }, + { + "epoch": 1.73, + "learning_rate": 3.9206524651661305e-05, + "loss": 0.1839, + "step": 6450 + }, + { + "epoch": 1.73, + "learning_rate": 3.9204849946409436e-05, + "loss": 0.1107, + "step": 6451 + }, + { + "epoch": 1.73, + "learning_rate": 3.920317524115756e-05, + "loss": 0.1306, + "step": 6452 + }, + { + "epoch": 1.73, + "learning_rate": 3.9201500535905685e-05, + "loss": 0.2377, + "step": 6453 + }, + { + "epoch": 1.73, + "learning_rate": 3.919982583065381e-05, + "loss": 0.2483, + "step": 6454 + }, + { + "epoch": 1.73, + "learning_rate": 3.919815112540193e-05, + "loss": 0.1401, + "step": 6455 + }, + { + "epoch": 1.73, + "learning_rate": 3.919647642015006e-05, + "loss": 0.2984, + "step": 6456 + }, + { + "epoch": 1.73, + "learning_rate": 3.9194801714898175e-05, + "loss": 0.1267, + "step": 6457 + }, + { + "epoch": 1.73, + "learning_rate": 3.91931270096463e-05, + "loss": 0.1197, + "step": 6458 + }, + { + "epoch": 1.73, + "learning_rate": 3.919145230439443e-05, + "loss": 0.2016, + "step": 6459 + }, + { + "epoch": 1.73, + "learning_rate": 3.9189777599142554e-05, + "loss": 0.1953, + "step": 6460 + }, + { + "epoch": 1.73, + "learning_rate": 3.918810289389068e-05, + "loss": 0.1909, + "step": 6461 + }, + { + "epoch": 1.73, + "learning_rate": 3.91864281886388e-05, + "loss": 0.1802, + "step": 6462 + }, + { + "epoch": 1.73, + "learning_rate": 3.918475348338693e-05, + "loss": 0.152, + "step": 6463 + }, + { + "epoch": 1.73, + "learning_rate": 3.918307877813505e-05, + "loss": 0.1333, + "step": 6464 + }, + { + "epoch": 1.73, + "learning_rate": 3.9181404072883175e-05, + "loss": 0.1442, + "step": 6465 + }, + { + "epoch": 1.73, + "learning_rate": 3.91797293676313e-05, + "loss": 0.1137, + "step": 6466 + }, + { + "epoch": 1.73, + "learning_rate": 3.9178054662379424e-05, + "loss": 0.1847, + "step": 6467 + }, + { + "epoch": 1.73, + "learning_rate": 3.917637995712755e-05, + "loss": 0.1004, + "step": 6468 + }, + { + "epoch": 1.73, + "learning_rate": 3.917470525187567e-05, + "loss": 0.1145, + "step": 6469 + }, + { + "epoch": 1.73, + "learning_rate": 3.9173030546623796e-05, + "loss": 0.1151, + "step": 6470 + }, + { + "epoch": 1.73, + "learning_rate": 3.917135584137192e-05, + "loss": 0.1467, + "step": 6471 + }, + { + "epoch": 1.73, + "learning_rate": 3.9169681136120045e-05, + "loss": 0.1245, + "step": 6472 + }, + { + "epoch": 1.73, + "learning_rate": 3.916800643086817e-05, + "loss": 0.266, + "step": 6473 + }, + { + "epoch": 1.73, + "learning_rate": 3.916633172561629e-05, + "loss": 0.2559, + "step": 6474 + }, + { + "epoch": 1.73, + "learning_rate": 3.916465702036442e-05, + "loss": 0.1081, + "step": 6475 + }, + { + "epoch": 1.74, + "learning_rate": 3.916298231511254e-05, + "loss": 0.223, + "step": 6476 + }, + { + "epoch": 1.74, + "learning_rate": 3.9161307609860666e-05, + "loss": 0.1274, + "step": 6477 + }, + { + "epoch": 1.74, + "learning_rate": 3.915963290460879e-05, + "loss": 0.2396, + "step": 6478 + }, + { + "epoch": 1.74, + "learning_rate": 3.9157958199356914e-05, + "loss": 0.3162, + "step": 6479 + }, + { + "epoch": 1.74, + "learning_rate": 3.915628349410504e-05, + "loss": 0.1844, + "step": 6480 + }, + { + "epoch": 1.74, + "learning_rate": 3.915460878885316e-05, + "loss": 0.1205, + "step": 6481 + }, + { + "epoch": 1.74, + "learning_rate": 3.915293408360129e-05, + "loss": 0.1199, + "step": 6482 + }, + { + "epoch": 1.74, + "learning_rate": 3.915125937834941e-05, + "loss": 0.118, + "step": 6483 + }, + { + "epoch": 1.74, + "learning_rate": 3.914958467309754e-05, + "loss": 0.3049, + "step": 6484 + }, + { + "epoch": 1.74, + "learning_rate": 3.914790996784566e-05, + "loss": 0.1198, + "step": 6485 + }, + { + "epoch": 1.74, + "learning_rate": 3.9146235262593784e-05, + "loss": 0.0976, + "step": 6486 + }, + { + "epoch": 1.74, + "learning_rate": 3.914456055734191e-05, + "loss": 0.1319, + "step": 6487 + }, + { + "epoch": 1.74, + "learning_rate": 3.914288585209003e-05, + "loss": 0.1202, + "step": 6488 + }, + { + "epoch": 1.74, + "learning_rate": 3.9141211146838156e-05, + "loss": 0.1402, + "step": 6489 + }, + { + "epoch": 1.74, + "learning_rate": 3.913953644158628e-05, + "loss": 0.1233, + "step": 6490 + }, + { + "epoch": 1.74, + "learning_rate": 3.913786173633441e-05, + "loss": 0.1859, + "step": 6491 + }, + { + "epoch": 1.74, + "learning_rate": 3.9136187031082536e-05, + "loss": 0.1295, + "step": 6492 + }, + { + "epoch": 1.74, + "learning_rate": 3.913451232583066e-05, + "loss": 0.1346, + "step": 6493 + }, + { + "epoch": 1.74, + "learning_rate": 3.9132837620578784e-05, + "loss": 0.3026, + "step": 6494 + }, + { + "epoch": 1.74, + "learning_rate": 3.91311629153269e-05, + "loss": 0.1202, + "step": 6495 + }, + { + "epoch": 1.74, + "learning_rate": 3.9129488210075026e-05, + "loss": 0.1114, + "step": 6496 + }, + { + "epoch": 1.74, + "learning_rate": 3.912781350482315e-05, + "loss": 0.1284, + "step": 6497 + }, + { + "epoch": 1.74, + "learning_rate": 3.9126138799571274e-05, + "loss": 0.1906, + "step": 6498 + }, + { + "epoch": 1.74, + "learning_rate": 3.9124464094319405e-05, + "loss": 0.1767, + "step": 6499 + }, + { + "epoch": 1.74, + "learning_rate": 3.912278938906753e-05, + "loss": 0.1488, + "step": 6500 + }, + { + "epoch": 1.74, + "learning_rate": 3.9121114683815653e-05, + "loss": 0.126, + "step": 6501 + }, + { + "epoch": 1.74, + "learning_rate": 3.911943997856378e-05, + "loss": 0.127, + "step": 6502 + }, + { + "epoch": 1.74, + "learning_rate": 3.91177652733119e-05, + "loss": 0.1178, + "step": 6503 + }, + { + "epoch": 1.74, + "learning_rate": 3.911609056806002e-05, + "loss": 0.2176, + "step": 6504 + }, + { + "epoch": 1.74, + "learning_rate": 3.9114415862808144e-05, + "loss": 0.13, + "step": 6505 + }, + { + "epoch": 1.74, + "learning_rate": 3.911274115755627e-05, + "loss": 0.3659, + "step": 6506 + }, + { + "epoch": 1.74, + "learning_rate": 3.91110664523044e-05, + "loss": 0.2173, + "step": 6507 + }, + { + "epoch": 1.74, + "learning_rate": 3.910939174705252e-05, + "loss": 0.1599, + "step": 6508 + }, + { + "epoch": 1.74, + "learning_rate": 3.910771704180065e-05, + "loss": 0.3141, + "step": 6509 + }, + { + "epoch": 1.74, + "learning_rate": 3.910604233654877e-05, + "loss": 0.1734, + "step": 6510 + }, + { + "epoch": 1.74, + "learning_rate": 3.9104367631296896e-05, + "loss": 0.2241, + "step": 6511 + }, + { + "epoch": 1.74, + "learning_rate": 3.910269292604502e-05, + "loss": 0.1243, + "step": 6512 + }, + { + "epoch": 1.75, + "learning_rate": 3.910101822079314e-05, + "loss": 0.2908, + "step": 6513 + }, + { + "epoch": 1.75, + "learning_rate": 3.909934351554126e-05, + "loss": 0.152, + "step": 6514 + }, + { + "epoch": 1.75, + "learning_rate": 3.909766881028939e-05, + "loss": 0.194, + "step": 6515 + }, + { + "epoch": 1.75, + "learning_rate": 3.909599410503752e-05, + "loss": 0.1374, + "step": 6516 + }, + { + "epoch": 1.75, + "learning_rate": 3.909431939978564e-05, + "loss": 0.176, + "step": 6517 + }, + { + "epoch": 1.75, + "learning_rate": 3.9092644694533765e-05, + "loss": 0.2675, + "step": 6518 + }, + { + "epoch": 1.75, + "learning_rate": 3.909096998928189e-05, + "loss": 0.1236, + "step": 6519 + }, + { + "epoch": 1.75, + "learning_rate": 3.9089295284030013e-05, + "loss": 0.1239, + "step": 6520 + }, + { + "epoch": 1.75, + "learning_rate": 3.908762057877814e-05, + "loss": 0.2671, + "step": 6521 + }, + { + "epoch": 1.75, + "learning_rate": 3.908594587352626e-05, + "loss": 0.1705, + "step": 6522 + }, + { + "epoch": 1.75, + "learning_rate": 3.9084271168274386e-05, + "loss": 0.1321, + "step": 6523 + }, + { + "epoch": 1.75, + "learning_rate": 3.908259646302251e-05, + "loss": 0.1886, + "step": 6524 + }, + { + "epoch": 1.75, + "learning_rate": 3.9080921757770635e-05, + "loss": 0.1543, + "step": 6525 + }, + { + "epoch": 1.75, + "learning_rate": 3.907924705251876e-05, + "loss": 0.1428, + "step": 6526 + }, + { + "epoch": 1.75, + "learning_rate": 3.907757234726688e-05, + "loss": 0.3411, + "step": 6527 + }, + { + "epoch": 1.75, + "learning_rate": 3.907589764201501e-05, + "loss": 0.2432, + "step": 6528 + }, + { + "epoch": 1.75, + "learning_rate": 3.907422293676313e-05, + "loss": 0.1227, + "step": 6529 + }, + { + "epoch": 1.75, + "learning_rate": 3.9072548231511256e-05, + "loss": 0.1311, + "step": 6530 + }, + { + "epoch": 1.75, + "learning_rate": 3.907087352625938e-05, + "loss": 0.1448, + "step": 6531 + }, + { + "epoch": 1.75, + "learning_rate": 3.9069198821007504e-05, + "loss": 0.1136, + "step": 6532 + }, + { + "epoch": 1.75, + "learning_rate": 3.906752411575563e-05, + "loss": 0.1319, + "step": 6533 + }, + { + "epoch": 1.75, + "learning_rate": 3.906584941050375e-05, + "loss": 0.1001, + "step": 6534 + }, + { + "epoch": 1.75, + "learning_rate": 3.9064174705251877e-05, + "loss": 0.1439, + "step": 6535 + }, + { + "epoch": 1.75, + "learning_rate": 3.90625e-05, + "loss": 0.1258, + "step": 6536 + }, + { + "epoch": 1.75, + "learning_rate": 3.9060825294748125e-05, + "loss": 0.1714, + "step": 6537 + }, + { + "epoch": 1.75, + "learning_rate": 3.905915058949625e-05, + "loss": 0.1147, + "step": 6538 + }, + { + "epoch": 1.75, + "learning_rate": 3.9057475884244373e-05, + "loss": 0.1548, + "step": 6539 + }, + { + "epoch": 1.75, + "learning_rate": 3.9055801178992504e-05, + "loss": 0.1719, + "step": 6540 + }, + { + "epoch": 1.75, + "learning_rate": 3.905412647374062e-05, + "loss": 0.1518, + "step": 6541 + }, + { + "epoch": 1.75, + "learning_rate": 3.9052451768488746e-05, + "loss": 0.1367, + "step": 6542 + }, + { + "epoch": 1.75, + "learning_rate": 3.905077706323687e-05, + "loss": 0.1377, + "step": 6543 + }, + { + "epoch": 1.75, + "learning_rate": 3.9049102357984994e-05, + "loss": 0.1445, + "step": 6544 + }, + { + "epoch": 1.75, + "learning_rate": 3.904742765273312e-05, + "loss": 0.2194, + "step": 6545 + }, + { + "epoch": 1.75, + "learning_rate": 3.904575294748124e-05, + "loss": 0.3246, + "step": 6546 + }, + { + "epoch": 1.75, + "learning_rate": 3.904407824222937e-05, + "loss": 0.187, + "step": 6547 + }, + { + "epoch": 1.75, + "learning_rate": 3.90424035369775e-05, + "loss": 0.2146, + "step": 6548 + }, + { + "epoch": 1.75, + "learning_rate": 3.904072883172562e-05, + "loss": 0.2184, + "step": 6549 + }, + { + "epoch": 1.76, + "learning_rate": 3.903905412647374e-05, + "loss": 0.1363, + "step": 6550 + }, + { + "epoch": 1.76, + "learning_rate": 3.9037379421221864e-05, + "loss": 0.1248, + "step": 6551 + }, + { + "epoch": 1.76, + "learning_rate": 3.903570471596999e-05, + "loss": 0.2235, + "step": 6552 + }, + { + "epoch": 1.76, + "learning_rate": 3.903403001071811e-05, + "loss": 0.1124, + "step": 6553 + }, + { + "epoch": 1.76, + "learning_rate": 3.9032355305466237e-05, + "loss": 0.1288, + "step": 6554 + }, + { + "epoch": 1.76, + "learning_rate": 3.903068060021437e-05, + "loss": 0.1689, + "step": 6555 + }, + { + "epoch": 1.76, + "learning_rate": 3.902900589496249e-05, + "loss": 0.1325, + "step": 6556 + }, + { + "epoch": 1.76, + "learning_rate": 3.9027331189710616e-05, + "loss": 0.1316, + "step": 6557 + }, + { + "epoch": 1.76, + "learning_rate": 3.902565648445874e-05, + "loss": 0.146, + "step": 6558 + }, + { + "epoch": 1.76, + "learning_rate": 3.9023981779206864e-05, + "loss": 0.1552, + "step": 6559 + }, + { + "epoch": 1.76, + "learning_rate": 3.902230707395498e-05, + "loss": 0.1171, + "step": 6560 + }, + { + "epoch": 1.76, + "learning_rate": 3.9020632368703106e-05, + "loss": 0.1189, + "step": 6561 + }, + { + "epoch": 1.76, + "learning_rate": 3.901895766345123e-05, + "loss": 0.2068, + "step": 6562 + }, + { + "epoch": 1.76, + "learning_rate": 3.901728295819936e-05, + "loss": 0.1434, + "step": 6563 + }, + { + "epoch": 1.76, + "learning_rate": 3.9015608252947485e-05, + "loss": 0.1209, + "step": 6564 + }, + { + "epoch": 1.76, + "learning_rate": 3.901393354769561e-05, + "loss": 0.2284, + "step": 6565 + }, + { + "epoch": 1.76, + "learning_rate": 3.9012258842443734e-05, + "loss": 0.1898, + "step": 6566 + }, + { + "epoch": 1.76, + "learning_rate": 3.901058413719186e-05, + "loss": 0.1909, + "step": 6567 + }, + { + "epoch": 1.76, + "learning_rate": 3.900890943193998e-05, + "loss": 0.1176, + "step": 6568 + }, + { + "epoch": 1.76, + "learning_rate": 3.90072347266881e-05, + "loss": 0.1288, + "step": 6569 + }, + { + "epoch": 1.76, + "learning_rate": 3.9005560021436224e-05, + "loss": 0.1995, + "step": 6570 + }, + { + "epoch": 1.76, + "learning_rate": 3.9003885316184355e-05, + "loss": 0.1167, + "step": 6571 + }, + { + "epoch": 1.76, + "learning_rate": 3.900221061093248e-05, + "loss": 0.2556, + "step": 6572 + }, + { + "epoch": 1.76, + "learning_rate": 3.90005359056806e-05, + "loss": 0.3986, + "step": 6573 + }, + { + "epoch": 1.76, + "learning_rate": 3.899886120042873e-05, + "loss": 0.1277, + "step": 6574 + }, + { + "epoch": 1.76, + "learning_rate": 3.899718649517685e-05, + "loss": 0.1335, + "step": 6575 + }, + { + "epoch": 1.76, + "learning_rate": 3.8995511789924976e-05, + "loss": 0.2109, + "step": 6576 + }, + { + "epoch": 1.76, + "learning_rate": 3.89938370846731e-05, + "loss": 0.3185, + "step": 6577 + }, + { + "epoch": 1.76, + "learning_rate": 3.899216237942122e-05, + "loss": 0.1904, + "step": 6578 + }, + { + "epoch": 1.76, + "learning_rate": 3.899048767416935e-05, + "loss": 0.3333, + "step": 6579 + }, + { + "epoch": 1.76, + "learning_rate": 3.898881296891747e-05, + "loss": 0.174, + "step": 6580 + }, + { + "epoch": 1.76, + "learning_rate": 3.89871382636656e-05, + "loss": 0.2319, + "step": 6581 + }, + { + "epoch": 1.76, + "learning_rate": 3.898546355841372e-05, + "loss": 0.2027, + "step": 6582 + }, + { + "epoch": 1.76, + "learning_rate": 3.8983788853161845e-05, + "loss": 0.1401, + "step": 6583 + }, + { + "epoch": 1.76, + "learning_rate": 3.898211414790997e-05, + "loss": 0.1236, + "step": 6584 + }, + { + "epoch": 1.76, + "learning_rate": 3.8980439442658094e-05, + "loss": 0.255, + "step": 6585 + }, + { + "epoch": 1.76, + "learning_rate": 3.897876473740622e-05, + "loss": 0.2857, + "step": 6586 + }, + { + "epoch": 1.76, + "learning_rate": 3.897709003215434e-05, + "loss": 0.1342, + "step": 6587 + }, + { + "epoch": 1.77, + "learning_rate": 3.8975415326902466e-05, + "loss": 0.1316, + "step": 6588 + }, + { + "epoch": 1.77, + "learning_rate": 3.897374062165059e-05, + "loss": 0.1364, + "step": 6589 + }, + { + "epoch": 1.77, + "learning_rate": 3.8972065916398715e-05, + "loss": 0.1931, + "step": 6590 + }, + { + "epoch": 1.77, + "learning_rate": 3.897039121114684e-05, + "loss": 0.1076, + "step": 6591 + }, + { + "epoch": 1.77, + "learning_rate": 3.896871650589496e-05, + "loss": 0.1212, + "step": 6592 + }, + { + "epoch": 1.77, + "learning_rate": 3.896704180064309e-05, + "loss": 0.122, + "step": 6593 + }, + { + "epoch": 1.77, + "learning_rate": 3.896536709539121e-05, + "loss": 0.1105, + "step": 6594 + }, + { + "epoch": 1.77, + "learning_rate": 3.8963692390139336e-05, + "loss": 0.1796, + "step": 6595 + }, + { + "epoch": 1.77, + "learning_rate": 3.896201768488747e-05, + "loss": 0.3427, + "step": 6596 + }, + { + "epoch": 1.77, + "learning_rate": 3.8960342979635584e-05, + "loss": 0.1429, + "step": 6597 + }, + { + "epoch": 1.77, + "learning_rate": 3.895866827438371e-05, + "loss": 0.1289, + "step": 6598 + }, + { + "epoch": 1.77, + "learning_rate": 3.895699356913183e-05, + "loss": 0.3169, + "step": 6599 + }, + { + "epoch": 1.77, + "learning_rate": 3.895531886387996e-05, + "loss": 0.1298, + "step": 6600 + }, + { + "epoch": 1.77, + "learning_rate": 3.895364415862808e-05, + "loss": 0.1816, + "step": 6601 + }, + { + "epoch": 1.77, + "learning_rate": 3.8951969453376205e-05, + "loss": 0.1247, + "step": 6602 + }, + { + "epoch": 1.77, + "learning_rate": 3.895029474812433e-05, + "loss": 0.1995, + "step": 6603 + }, + { + "epoch": 1.77, + "learning_rate": 3.894862004287246e-05, + "loss": 0.1353, + "step": 6604 + }, + { + "epoch": 1.77, + "learning_rate": 3.8946945337620585e-05, + "loss": 0.126, + "step": 6605 + }, + { + "epoch": 1.77, + "learning_rate": 3.89452706323687e-05, + "loss": 0.1608, + "step": 6606 + }, + { + "epoch": 1.77, + "learning_rate": 3.8943595927116826e-05, + "loss": 0.128, + "step": 6607 + }, + { + "epoch": 1.77, + "learning_rate": 3.894192122186495e-05, + "loss": 0.128, + "step": 6608 + }, + { + "epoch": 1.77, + "learning_rate": 3.8940246516613075e-05, + "loss": 0.1185, + "step": 6609 + }, + { + "epoch": 1.77, + "learning_rate": 3.89385718113612e-05, + "loss": 0.1297, + "step": 6610 + }, + { + "epoch": 1.77, + "learning_rate": 3.893689710610933e-05, + "loss": 0.1647, + "step": 6611 + }, + { + "epoch": 1.77, + "learning_rate": 3.8935222400857454e-05, + "loss": 0.1349, + "step": 6612 + }, + { + "epoch": 1.77, + "learning_rate": 3.893354769560558e-05, + "loss": 0.1137, + "step": 6613 + }, + { + "epoch": 1.77, + "learning_rate": 3.89318729903537e-05, + "loss": 0.1106, + "step": 6614 + }, + { + "epoch": 1.77, + "learning_rate": 3.893019828510183e-05, + "loss": 0.1727, + "step": 6615 + }, + { + "epoch": 1.77, + "learning_rate": 3.8928523579849944e-05, + "loss": 0.1219, + "step": 6616 + }, + { + "epoch": 1.77, + "learning_rate": 3.892684887459807e-05, + "loss": 0.1402, + "step": 6617 + }, + { + "epoch": 1.77, + "learning_rate": 3.892517416934619e-05, + "loss": 0.1402, + "step": 6618 + }, + { + "epoch": 1.77, + "learning_rate": 3.8923499464094324e-05, + "loss": 0.118, + "step": 6619 + }, + { + "epoch": 1.77, + "learning_rate": 3.892182475884245e-05, + "loss": 0.1056, + "step": 6620 + }, + { + "epoch": 1.77, + "learning_rate": 3.892015005359057e-05, + "loss": 0.1695, + "step": 6621 + }, + { + "epoch": 1.77, + "learning_rate": 3.8918475348338696e-05, + "loss": 0.1855, + "step": 6622 + }, + { + "epoch": 1.77, + "learning_rate": 3.891680064308682e-05, + "loss": 0.1644, + "step": 6623 + }, + { + "epoch": 1.77, + "learning_rate": 3.8915125937834945e-05, + "loss": 0.1753, + "step": 6624 + }, + { + "epoch": 1.78, + "learning_rate": 3.891345123258306e-05, + "loss": 0.1607, + "step": 6625 + }, + { + "epoch": 1.78, + "learning_rate": 3.8911776527331186e-05, + "loss": 0.1836, + "step": 6626 + }, + { + "epoch": 1.78, + "learning_rate": 3.891010182207932e-05, + "loss": 0.1134, + "step": 6627 + }, + { + "epoch": 1.78, + "learning_rate": 3.890842711682744e-05, + "loss": 0.1627, + "step": 6628 + }, + { + "epoch": 1.78, + "learning_rate": 3.8906752411575566e-05, + "loss": 0.2096, + "step": 6629 + }, + { + "epoch": 1.78, + "learning_rate": 3.890507770632369e-05, + "loss": 0.1649, + "step": 6630 + }, + { + "epoch": 1.78, + "learning_rate": 3.8903403001071814e-05, + "loss": 0.224, + "step": 6631 + }, + { + "epoch": 1.78, + "learning_rate": 3.890172829581994e-05, + "loss": 0.1298, + "step": 6632 + }, + { + "epoch": 1.78, + "learning_rate": 3.890005359056806e-05, + "loss": 0.2496, + "step": 6633 + }, + { + "epoch": 1.78, + "learning_rate": 3.889837888531618e-05, + "loss": 0.2247, + "step": 6634 + }, + { + "epoch": 1.78, + "learning_rate": 3.889670418006431e-05, + "loss": 0.1543, + "step": 6635 + }, + { + "epoch": 1.78, + "learning_rate": 3.8895029474812435e-05, + "loss": 0.2694, + "step": 6636 + }, + { + "epoch": 1.78, + "learning_rate": 3.889335476956056e-05, + "loss": 0.1922, + "step": 6637 + }, + { + "epoch": 1.78, + "learning_rate": 3.8891680064308684e-05, + "loss": 0.1456, + "step": 6638 + }, + { + "epoch": 1.78, + "learning_rate": 3.889000535905681e-05, + "loss": 0.1237, + "step": 6639 + }, + { + "epoch": 1.78, + "learning_rate": 3.888833065380493e-05, + "loss": 0.1257, + "step": 6640 + }, + { + "epoch": 1.78, + "learning_rate": 3.8886655948553056e-05, + "loss": 0.204, + "step": 6641 + }, + { + "epoch": 1.78, + "learning_rate": 3.888498124330118e-05, + "loss": 0.1191, + "step": 6642 + }, + { + "epoch": 1.78, + "learning_rate": 3.8883306538049305e-05, + "loss": 0.1407, + "step": 6643 + }, + { + "epoch": 1.78, + "learning_rate": 3.888163183279743e-05, + "loss": 0.1466, + "step": 6644 + }, + { + "epoch": 1.78, + "learning_rate": 3.887995712754555e-05, + "loss": 0.13, + "step": 6645 + }, + { + "epoch": 1.78, + "learning_rate": 3.887828242229368e-05, + "loss": 0.347, + "step": 6646 + }, + { + "epoch": 1.78, + "learning_rate": 3.88766077170418e-05, + "loss": 0.1372, + "step": 6647 + }, + { + "epoch": 1.78, + "learning_rate": 3.8874933011789926e-05, + "loss": 0.1257, + "step": 6648 + }, + { + "epoch": 1.78, + "learning_rate": 3.887325830653805e-05, + "loss": 0.3354, + "step": 6649 + }, + { + "epoch": 1.78, + "learning_rate": 3.8871583601286174e-05, + "loss": 0.1406, + "step": 6650 + }, + { + "epoch": 1.78, + "learning_rate": 3.88699088960343e-05, + "loss": 0.1235, + "step": 6651 + }, + { + "epoch": 1.78, + "learning_rate": 3.886823419078243e-05, + "loss": 0.1246, + "step": 6652 + }, + { + "epoch": 1.78, + "learning_rate": 3.886655948553055e-05, + "loss": 0.1253, + "step": 6653 + }, + { + "epoch": 1.78, + "learning_rate": 3.886488478027867e-05, + "loss": 0.1518, + "step": 6654 + }, + { + "epoch": 1.78, + "learning_rate": 3.8863210075026795e-05, + "loss": 0.1777, + "step": 6655 + }, + { + "epoch": 1.78, + "learning_rate": 3.886153536977492e-05, + "loss": 0.1441, + "step": 6656 + }, + { + "epoch": 1.78, + "learning_rate": 3.8859860664523044e-05, + "loss": 0.2936, + "step": 6657 + }, + { + "epoch": 1.78, + "learning_rate": 3.885818595927117e-05, + "loss": 0.1355, + "step": 6658 + }, + { + "epoch": 1.78, + "learning_rate": 3.885651125401929e-05, + "loss": 0.1171, + "step": 6659 + }, + { + "epoch": 1.78, + "learning_rate": 3.885483654876742e-05, + "loss": 0.1482, + "step": 6660 + }, + { + "epoch": 1.78, + "learning_rate": 3.885316184351555e-05, + "loss": 0.1333, + "step": 6661 + }, + { + "epoch": 1.79, + "learning_rate": 3.8851487138263665e-05, + "loss": 0.1237, + "step": 6662 + }, + { + "epoch": 1.79, + "learning_rate": 3.884981243301179e-05, + "loss": 0.1343, + "step": 6663 + }, + { + "epoch": 1.79, + "learning_rate": 3.884813772775991e-05, + "loss": 0.1397, + "step": 6664 + }, + { + "epoch": 1.79, + "learning_rate": 3.884646302250804e-05, + "loss": 0.1717, + "step": 6665 + }, + { + "epoch": 1.79, + "learning_rate": 3.884478831725616e-05, + "loss": 0.1087, + "step": 6666 + }, + { + "epoch": 1.79, + "learning_rate": 3.884311361200429e-05, + "loss": 0.1284, + "step": 6667 + }, + { + "epoch": 1.79, + "learning_rate": 3.884143890675242e-05, + "loss": 0.2564, + "step": 6668 + }, + { + "epoch": 1.79, + "learning_rate": 3.883976420150054e-05, + "loss": 0.1623, + "step": 6669 + }, + { + "epoch": 1.79, + "learning_rate": 3.8838089496248665e-05, + "loss": 0.1263, + "step": 6670 + }, + { + "epoch": 1.79, + "learning_rate": 3.883641479099679e-05, + "loss": 0.1169, + "step": 6671 + }, + { + "epoch": 1.79, + "learning_rate": 3.883474008574491e-05, + "loss": 0.1041, + "step": 6672 + }, + { + "epoch": 1.79, + "learning_rate": 3.883306538049303e-05, + "loss": 0.1399, + "step": 6673 + }, + { + "epoch": 1.79, + "learning_rate": 3.8831390675241155e-05, + "loss": 0.1283, + "step": 6674 + }, + { + "epoch": 1.79, + "learning_rate": 3.8829715969989286e-05, + "loss": 0.1899, + "step": 6675 + }, + { + "epoch": 1.79, + "learning_rate": 3.882804126473741e-05, + "loss": 0.1406, + "step": 6676 + }, + { + "epoch": 1.79, + "learning_rate": 3.8826366559485535e-05, + "loss": 0.1485, + "step": 6677 + }, + { + "epoch": 1.79, + "learning_rate": 3.882469185423366e-05, + "loss": 0.1622, + "step": 6678 + }, + { + "epoch": 1.79, + "learning_rate": 3.882301714898178e-05, + "loss": 0.1339, + "step": 6679 + }, + { + "epoch": 1.79, + "learning_rate": 3.882134244372991e-05, + "loss": 0.1378, + "step": 6680 + }, + { + "epoch": 1.79, + "learning_rate": 3.8819667738478025e-05, + "loss": 0.1294, + "step": 6681 + }, + { + "epoch": 1.79, + "learning_rate": 3.881799303322615e-05, + "loss": 0.1502, + "step": 6682 + }, + { + "epoch": 1.79, + "learning_rate": 3.881631832797428e-05, + "loss": 0.1168, + "step": 6683 + }, + { + "epoch": 1.79, + "learning_rate": 3.8814643622722404e-05, + "loss": 0.1196, + "step": 6684 + }, + { + "epoch": 1.79, + "learning_rate": 3.881296891747053e-05, + "loss": 0.1123, + "step": 6685 + }, + { + "epoch": 1.79, + "learning_rate": 3.881129421221865e-05, + "loss": 0.1212, + "step": 6686 + }, + { + "epoch": 1.79, + "learning_rate": 3.880961950696678e-05, + "loss": 0.1428, + "step": 6687 + }, + { + "epoch": 1.79, + "learning_rate": 3.88079448017149e-05, + "loss": 0.1363, + "step": 6688 + }, + { + "epoch": 1.79, + "learning_rate": 3.8806270096463025e-05, + "loss": 0.1241, + "step": 6689 + }, + { + "epoch": 1.79, + "learning_rate": 3.880459539121114e-05, + "loss": 0.1393, + "step": 6690 + }, + { + "epoch": 1.79, + "learning_rate": 3.8802920685959274e-05, + "loss": 0.2013, + "step": 6691 + }, + { + "epoch": 1.79, + "learning_rate": 3.88012459807074e-05, + "loss": 0.1165, + "step": 6692 + }, + { + "epoch": 1.79, + "learning_rate": 3.879957127545552e-05, + "loss": 0.2423, + "step": 6693 + }, + { + "epoch": 1.79, + "learning_rate": 3.8797896570203646e-05, + "loss": 0.3505, + "step": 6694 + }, + { + "epoch": 1.79, + "learning_rate": 3.879622186495177e-05, + "loss": 0.1565, + "step": 6695 + }, + { + "epoch": 1.79, + "learning_rate": 3.8794547159699895e-05, + "loss": 0.1288, + "step": 6696 + }, + { + "epoch": 1.79, + "learning_rate": 3.879287245444802e-05, + "loss": 0.1132, + "step": 6697 + }, + { + "epoch": 1.79, + "learning_rate": 3.879119774919614e-05, + "loss": 0.2587, + "step": 6698 + }, + { + "epoch": 1.79, + "learning_rate": 3.878952304394427e-05, + "loss": 0.1856, + "step": 6699 + }, + { + "epoch": 1.8, + "learning_rate": 3.878784833869239e-05, + "loss": 0.1875, + "step": 6700 + }, + { + "epoch": 1.8, + "learning_rate": 3.8786173633440516e-05, + "loss": 0.1925, + "step": 6701 + }, + { + "epoch": 1.8, + "learning_rate": 3.878449892818864e-05, + "loss": 0.1176, + "step": 6702 + }, + { + "epoch": 1.8, + "learning_rate": 3.8782824222936764e-05, + "loss": 0.2936, + "step": 6703 + }, + { + "epoch": 1.8, + "learning_rate": 3.878114951768489e-05, + "loss": 0.2023, + "step": 6704 + }, + { + "epoch": 1.8, + "learning_rate": 3.877947481243301e-05, + "loss": 0.1127, + "step": 6705 + }, + { + "epoch": 1.8, + "learning_rate": 3.877780010718114e-05, + "loss": 0.1228, + "step": 6706 + }, + { + "epoch": 1.8, + "learning_rate": 3.877612540192926e-05, + "loss": 0.1335, + "step": 6707 + }, + { + "epoch": 1.8, + "learning_rate": 3.877445069667739e-05, + "loss": 0.1285, + "step": 6708 + }, + { + "epoch": 1.8, + "learning_rate": 3.877277599142551e-05, + "loss": 0.1315, + "step": 6709 + }, + { + "epoch": 1.8, + "learning_rate": 3.8771101286173634e-05, + "loss": 0.1224, + "step": 6710 + }, + { + "epoch": 1.8, + "learning_rate": 3.876942658092176e-05, + "loss": 0.2404, + "step": 6711 + }, + { + "epoch": 1.8, + "learning_rate": 3.876775187566988e-05, + "loss": 0.1314, + "step": 6712 + }, + { + "epoch": 1.8, + "learning_rate": 3.8766077170418006e-05, + "loss": 0.2323, + "step": 6713 + }, + { + "epoch": 1.8, + "learning_rate": 3.876440246516613e-05, + "loss": 0.1216, + "step": 6714 + }, + { + "epoch": 1.8, + "learning_rate": 3.8762727759914255e-05, + "loss": 0.1487, + "step": 6715 + }, + { + "epoch": 1.8, + "learning_rate": 3.8761053054662386e-05, + "loss": 0.2728, + "step": 6716 + }, + { + "epoch": 1.8, + "learning_rate": 3.875937834941051e-05, + "loss": 0.1984, + "step": 6717 + }, + { + "epoch": 1.8, + "learning_rate": 3.875770364415863e-05, + "loss": 0.2667, + "step": 6718 + }, + { + "epoch": 1.8, + "learning_rate": 3.875602893890675e-05, + "loss": 0.1314, + "step": 6719 + }, + { + "epoch": 1.8, + "learning_rate": 3.8754354233654876e-05, + "loss": 0.1411, + "step": 6720 + }, + { + "epoch": 1.8, + "learning_rate": 3.8752679528403e-05, + "loss": 0.1133, + "step": 6721 + }, + { + "epoch": 1.8, + "learning_rate": 3.8751004823151124e-05, + "loss": 0.1381, + "step": 6722 + }, + { + "epoch": 1.8, + "learning_rate": 3.8749330117899255e-05, + "loss": 0.184, + "step": 6723 + }, + { + "epoch": 1.8, + "learning_rate": 3.874765541264738e-05, + "loss": 0.1178, + "step": 6724 + }, + { + "epoch": 1.8, + "learning_rate": 3.8745980707395504e-05, + "loss": 0.1275, + "step": 6725 + }, + { + "epoch": 1.8, + "learning_rate": 3.874430600214363e-05, + "loss": 0.1952, + "step": 6726 + }, + { + "epoch": 1.8, + "learning_rate": 3.874263129689175e-05, + "loss": 0.3001, + "step": 6727 + }, + { + "epoch": 1.8, + "learning_rate": 3.874095659163987e-05, + "loss": 0.1208, + "step": 6728 + }, + { + "epoch": 1.8, + "learning_rate": 3.8739281886387994e-05, + "loss": 0.1539, + "step": 6729 + }, + { + "epoch": 1.8, + "learning_rate": 3.873760718113612e-05, + "loss": 0.1969, + "step": 6730 + }, + { + "epoch": 1.8, + "learning_rate": 3.873593247588425e-05, + "loss": 0.1994, + "step": 6731 + }, + { + "epoch": 1.8, + "learning_rate": 3.873425777063237e-05, + "loss": 0.1298, + "step": 6732 + }, + { + "epoch": 1.8, + "learning_rate": 3.87325830653805e-05, + "loss": 0.3362, + "step": 6733 + }, + { + "epoch": 1.8, + "learning_rate": 3.873090836012862e-05, + "loss": 0.1337, + "step": 6734 + }, + { + "epoch": 1.8, + "learning_rate": 3.8729233654876746e-05, + "loss": 0.1492, + "step": 6735 + }, + { + "epoch": 1.8, + "learning_rate": 3.872755894962487e-05, + "loss": 0.2068, + "step": 6736 + }, + { + "epoch": 1.81, + "learning_rate": 3.872588424437299e-05, + "loss": 0.1827, + "step": 6737 + }, + { + "epoch": 1.81, + "learning_rate": 3.872420953912111e-05, + "loss": 0.1337, + "step": 6738 + }, + { + "epoch": 1.81, + "learning_rate": 3.872253483386924e-05, + "loss": 0.1206, + "step": 6739 + }, + { + "epoch": 1.81, + "learning_rate": 3.872086012861737e-05, + "loss": 0.1452, + "step": 6740 + }, + { + "epoch": 1.81, + "learning_rate": 3.871918542336549e-05, + "loss": 0.1286, + "step": 6741 + }, + { + "epoch": 1.81, + "learning_rate": 3.8717510718113615e-05, + "loss": 0.1144, + "step": 6742 + }, + { + "epoch": 1.81, + "learning_rate": 3.871583601286174e-05, + "loss": 0.1071, + "step": 6743 + }, + { + "epoch": 1.81, + "learning_rate": 3.8714161307609863e-05, + "loss": 0.141, + "step": 6744 + }, + { + "epoch": 1.81, + "learning_rate": 3.871248660235799e-05, + "loss": 0.1481, + "step": 6745 + }, + { + "epoch": 1.81, + "learning_rate": 3.8710811897106105e-05, + "loss": 0.3094, + "step": 6746 + }, + { + "epoch": 1.81, + "learning_rate": 3.8709137191854236e-05, + "loss": 0.1073, + "step": 6747 + }, + { + "epoch": 1.81, + "learning_rate": 3.870746248660236e-05, + "loss": 0.1262, + "step": 6748 + }, + { + "epoch": 1.81, + "learning_rate": 3.8705787781350485e-05, + "loss": 0.4999, + "step": 6749 + }, + { + "epoch": 1.81, + "learning_rate": 3.870411307609861e-05, + "loss": 0.1192, + "step": 6750 + }, + { + "epoch": 1.81, + "learning_rate": 3.870243837084673e-05, + "loss": 0.1225, + "step": 6751 + }, + { + "epoch": 1.81, + "learning_rate": 3.870076366559486e-05, + "loss": 0.1584, + "step": 6752 + }, + { + "epoch": 1.81, + "learning_rate": 3.869908896034298e-05, + "loss": 0.1641, + "step": 6753 + }, + { + "epoch": 1.81, + "learning_rate": 3.8697414255091106e-05, + "loss": 0.1201, + "step": 6754 + }, + { + "epoch": 1.81, + "learning_rate": 3.869573954983923e-05, + "loss": 0.1211, + "step": 6755 + }, + { + "epoch": 1.81, + "learning_rate": 3.8694064844587354e-05, + "loss": 0.1381, + "step": 6756 + }, + { + "epoch": 1.81, + "learning_rate": 3.869239013933548e-05, + "loss": 0.1159, + "step": 6757 + }, + { + "epoch": 1.81, + "learning_rate": 3.86907154340836e-05, + "loss": 0.1524, + "step": 6758 + }, + { + "epoch": 1.81, + "learning_rate": 3.868904072883173e-05, + "loss": 0.4467, + "step": 6759 + }, + { + "epoch": 1.81, + "learning_rate": 3.868736602357985e-05, + "loss": 0.1336, + "step": 6760 + }, + { + "epoch": 1.81, + "learning_rate": 3.8685691318327975e-05, + "loss": 0.1716, + "step": 6761 + }, + { + "epoch": 1.81, + "learning_rate": 3.86840166130761e-05, + "loss": 0.1137, + "step": 6762 + }, + { + "epoch": 1.81, + "learning_rate": 3.8682341907824223e-05, + "loss": 0.1745, + "step": 6763 + }, + { + "epoch": 1.81, + "learning_rate": 3.8680667202572354e-05, + "loss": 0.1327, + "step": 6764 + }, + { + "epoch": 1.81, + "learning_rate": 3.867899249732047e-05, + "loss": 0.1289, + "step": 6765 + }, + { + "epoch": 1.81, + "learning_rate": 3.8677317792068596e-05, + "loss": 0.1103, + "step": 6766 + }, + { + "epoch": 1.81, + "learning_rate": 3.867564308681672e-05, + "loss": 0.1191, + "step": 6767 + }, + { + "epoch": 1.81, + "learning_rate": 3.8673968381564845e-05, + "loss": 0.1134, + "step": 6768 + }, + { + "epoch": 1.81, + "learning_rate": 3.867229367631297e-05, + "loss": 0.2602, + "step": 6769 + }, + { + "epoch": 1.81, + "learning_rate": 3.867061897106109e-05, + "loss": 0.3572, + "step": 6770 + }, + { + "epoch": 1.81, + "learning_rate": 3.866894426580922e-05, + "loss": 0.2172, + "step": 6771 + }, + { + "epoch": 1.81, + "learning_rate": 3.866726956055735e-05, + "loss": 0.1981, + "step": 6772 + }, + { + "epoch": 1.81, + "learning_rate": 3.866559485530547e-05, + "loss": 0.2259, + "step": 6773 + }, + { + "epoch": 1.82, + "learning_rate": 3.866392015005359e-05, + "loss": 0.1836, + "step": 6774 + }, + { + "epoch": 1.82, + "learning_rate": 3.8662245444801714e-05, + "loss": 0.2139, + "step": 6775 + }, + { + "epoch": 1.82, + "learning_rate": 3.866057073954984e-05, + "loss": 0.2246, + "step": 6776 + }, + { + "epoch": 1.82, + "learning_rate": 3.865889603429796e-05, + "loss": 0.1107, + "step": 6777 + }, + { + "epoch": 1.82, + "learning_rate": 3.865722132904609e-05, + "loss": 0.1319, + "step": 6778 + }, + { + "epoch": 1.82, + "learning_rate": 3.865554662379422e-05, + "loss": 0.158, + "step": 6779 + }, + { + "epoch": 1.82, + "learning_rate": 3.865387191854234e-05, + "loss": 0.3336, + "step": 6780 + }, + { + "epoch": 1.82, + "learning_rate": 3.8652197213290466e-05, + "loss": 0.1321, + "step": 6781 + }, + { + "epoch": 1.82, + "learning_rate": 3.865052250803859e-05, + "loss": 0.2858, + "step": 6782 + }, + { + "epoch": 1.82, + "learning_rate": 3.8648847802786714e-05, + "loss": 0.1325, + "step": 6783 + }, + { + "epoch": 1.82, + "learning_rate": 3.864717309753483e-05, + "loss": 0.1616, + "step": 6784 + }, + { + "epoch": 1.82, + "learning_rate": 3.8645498392282956e-05, + "loss": 0.2161, + "step": 6785 + }, + { + "epoch": 1.82, + "learning_rate": 3.864382368703108e-05, + "loss": 0.117, + "step": 6786 + }, + { + "epoch": 1.82, + "learning_rate": 3.864214898177921e-05, + "loss": 0.1204, + "step": 6787 + }, + { + "epoch": 1.82, + "learning_rate": 3.8640474276527335e-05, + "loss": 0.1188, + "step": 6788 + }, + { + "epoch": 1.82, + "learning_rate": 3.863879957127546e-05, + "loss": 0.1768, + "step": 6789 + }, + { + "epoch": 1.82, + "learning_rate": 3.8637124866023584e-05, + "loss": 0.1453, + "step": 6790 + }, + { + "epoch": 1.82, + "learning_rate": 3.863545016077171e-05, + "loss": 0.1489, + "step": 6791 + }, + { + "epoch": 1.82, + "learning_rate": 3.863377545551983e-05, + "loss": 0.2946, + "step": 6792 + }, + { + "epoch": 1.82, + "learning_rate": 3.863210075026795e-05, + "loss": 0.1521, + "step": 6793 + }, + { + "epoch": 1.82, + "learning_rate": 3.8630426045016074e-05, + "loss": 0.245, + "step": 6794 + }, + { + "epoch": 1.82, + "learning_rate": 3.8628751339764205e-05, + "loss": 0.1791, + "step": 6795 + }, + { + "epoch": 1.82, + "learning_rate": 3.862707663451233e-05, + "loss": 0.1325, + "step": 6796 + }, + { + "epoch": 1.82, + "learning_rate": 3.862540192926045e-05, + "loss": 0.1351, + "step": 6797 + }, + { + "epoch": 1.82, + "learning_rate": 3.862372722400858e-05, + "loss": 0.204, + "step": 6798 + }, + { + "epoch": 1.82, + "learning_rate": 3.86220525187567e-05, + "loss": 0.113, + "step": 6799 + }, + { + "epoch": 1.82, + "learning_rate": 3.8620377813504826e-05, + "loss": 0.213, + "step": 6800 + }, + { + "epoch": 1.82, + "learning_rate": 3.861870310825295e-05, + "loss": 0.1198, + "step": 6801 + }, + { + "epoch": 1.82, + "learning_rate": 3.861702840300107e-05, + "loss": 0.1227, + "step": 6802 + }, + { + "epoch": 1.82, + "learning_rate": 3.86153536977492e-05, + "loss": 0.1276, + "step": 6803 + }, + { + "epoch": 1.82, + "learning_rate": 3.861367899249732e-05, + "loss": 0.1322, + "step": 6804 + }, + { + "epoch": 1.82, + "learning_rate": 3.861200428724545e-05, + "loss": 0.1295, + "step": 6805 + }, + { + "epoch": 1.82, + "learning_rate": 3.861032958199357e-05, + "loss": 0.1567, + "step": 6806 + }, + { + "epoch": 1.82, + "learning_rate": 3.8608654876741695e-05, + "loss": 0.1201, + "step": 6807 + }, + { + "epoch": 1.82, + "learning_rate": 3.860698017148982e-05, + "loss": 0.2878, + "step": 6808 + }, + { + "epoch": 1.82, + "learning_rate": 3.8605305466237944e-05, + "loss": 0.1155, + "step": 6809 + }, + { + "epoch": 1.82, + "learning_rate": 3.860363076098607e-05, + "loss": 0.1185, + "step": 6810 + }, + { + "epoch": 1.82, + "learning_rate": 3.860195605573419e-05, + "loss": 0.1369, + "step": 6811 + }, + { + "epoch": 1.83, + "learning_rate": 3.8600281350482317e-05, + "loss": 0.2097, + "step": 6812 + }, + { + "epoch": 1.83, + "learning_rate": 3.859860664523044e-05, + "loss": 0.106, + "step": 6813 + }, + { + "epoch": 1.83, + "learning_rate": 3.8596931939978565e-05, + "loss": 0.1252, + "step": 6814 + }, + { + "epoch": 1.83, + "learning_rate": 3.859525723472669e-05, + "loss": 0.1929, + "step": 6815 + }, + { + "epoch": 1.83, + "learning_rate": 3.859358252947481e-05, + "loss": 0.3454, + "step": 6816 + }, + { + "epoch": 1.83, + "learning_rate": 3.859190782422294e-05, + "loss": 0.1429, + "step": 6817 + }, + { + "epoch": 1.83, + "learning_rate": 3.859023311897106e-05, + "loss": 0.1151, + "step": 6818 + }, + { + "epoch": 1.83, + "learning_rate": 3.8588558413719186e-05, + "loss": 0.1187, + "step": 6819 + }, + { + "epoch": 1.83, + "learning_rate": 3.858688370846732e-05, + "loss": 0.1169, + "step": 6820 + }, + { + "epoch": 1.83, + "learning_rate": 3.8585209003215434e-05, + "loss": 0.3491, + "step": 6821 + }, + { + "epoch": 1.83, + "learning_rate": 3.858353429796356e-05, + "loss": 0.1274, + "step": 6822 + }, + { + "epoch": 1.83, + "learning_rate": 3.858185959271168e-05, + "loss": 0.208, + "step": 6823 + }, + { + "epoch": 1.83, + "learning_rate": 3.858018488745981e-05, + "loss": 0.1099, + "step": 6824 + }, + { + "epoch": 1.83, + "learning_rate": 3.857851018220793e-05, + "loss": 0.1274, + "step": 6825 + }, + { + "epoch": 1.83, + "learning_rate": 3.8576835476956055e-05, + "loss": 0.235, + "step": 6826 + }, + { + "epoch": 1.83, + "learning_rate": 3.857516077170418e-05, + "loss": 0.19, + "step": 6827 + }, + { + "epoch": 1.83, + "learning_rate": 3.857348606645231e-05, + "loss": 0.1179, + "step": 6828 + }, + { + "epoch": 1.83, + "learning_rate": 3.8571811361200435e-05, + "loss": 0.1494, + "step": 6829 + }, + { + "epoch": 1.83, + "learning_rate": 3.857013665594855e-05, + "loss": 0.2086, + "step": 6830 + }, + { + "epoch": 1.83, + "learning_rate": 3.8568461950696677e-05, + "loss": 0.1099, + "step": 6831 + }, + { + "epoch": 1.83, + "learning_rate": 3.85667872454448e-05, + "loss": 0.1934, + "step": 6832 + }, + { + "epoch": 1.83, + "learning_rate": 3.8565112540192925e-05, + "loss": 0.1335, + "step": 6833 + }, + { + "epoch": 1.83, + "learning_rate": 3.856343783494105e-05, + "loss": 0.1461, + "step": 6834 + }, + { + "epoch": 1.83, + "learning_rate": 3.856176312968918e-05, + "loss": 0.151, + "step": 6835 + }, + { + "epoch": 1.83, + "learning_rate": 3.8560088424437304e-05, + "loss": 0.2663, + "step": 6836 + }, + { + "epoch": 1.83, + "learning_rate": 3.855841371918543e-05, + "loss": 0.1055, + "step": 6837 + }, + { + "epoch": 1.83, + "learning_rate": 3.855673901393355e-05, + "loss": 0.115, + "step": 6838 + }, + { + "epoch": 1.83, + "learning_rate": 3.855506430868168e-05, + "loss": 0.1359, + "step": 6839 + }, + { + "epoch": 1.83, + "learning_rate": 3.8553389603429794e-05, + "loss": 0.2057, + "step": 6840 + }, + { + "epoch": 1.83, + "learning_rate": 3.855171489817792e-05, + "loss": 0.106, + "step": 6841 + }, + { + "epoch": 1.83, + "learning_rate": 3.855004019292604e-05, + "loss": 0.1255, + "step": 6842 + }, + { + "epoch": 1.83, + "learning_rate": 3.8548365487674174e-05, + "loss": 0.1352, + "step": 6843 + }, + { + "epoch": 1.83, + "learning_rate": 3.85466907824223e-05, + "loss": 0.2238, + "step": 6844 + }, + { + "epoch": 1.83, + "learning_rate": 3.854501607717042e-05, + "loss": 0.157, + "step": 6845 + }, + { + "epoch": 1.83, + "learning_rate": 3.8543341371918546e-05, + "loss": 0.1816, + "step": 6846 + }, + { + "epoch": 1.83, + "learning_rate": 3.854166666666667e-05, + "loss": 0.2299, + "step": 6847 + }, + { + "epoch": 1.83, + "learning_rate": 3.8539991961414795e-05, + "loss": 0.1202, + "step": 6848 + }, + { + "epoch": 1.84, + "learning_rate": 3.853831725616291e-05, + "loss": 0.13, + "step": 6849 + }, + { + "epoch": 1.84, + "learning_rate": 3.8536642550911036e-05, + "loss": 0.1465, + "step": 6850 + }, + { + "epoch": 1.84, + "learning_rate": 3.853496784565917e-05, + "loss": 0.1948, + "step": 6851 + }, + { + "epoch": 1.84, + "learning_rate": 3.853329314040729e-05, + "loss": 0.1728, + "step": 6852 + }, + { + "epoch": 1.84, + "learning_rate": 3.8531618435155416e-05, + "loss": 0.1749, + "step": 6853 + }, + { + "epoch": 1.84, + "learning_rate": 3.852994372990354e-05, + "loss": 0.1902, + "step": 6854 + }, + { + "epoch": 1.84, + "learning_rate": 3.8528269024651664e-05, + "loss": 0.1849, + "step": 6855 + }, + { + "epoch": 1.84, + "learning_rate": 3.852659431939979e-05, + "loss": 0.1348, + "step": 6856 + }, + { + "epoch": 1.84, + "learning_rate": 3.852491961414791e-05, + "loss": 0.1393, + "step": 6857 + }, + { + "epoch": 1.84, + "learning_rate": 3.852324490889603e-05, + "loss": 0.1374, + "step": 6858 + }, + { + "epoch": 1.84, + "learning_rate": 3.852157020364416e-05, + "loss": 0.1741, + "step": 6859 + }, + { + "epoch": 1.84, + "learning_rate": 3.8519895498392285e-05, + "loss": 0.1153, + "step": 6860 + }, + { + "epoch": 1.84, + "learning_rate": 3.851822079314041e-05, + "loss": 0.1394, + "step": 6861 + }, + { + "epoch": 1.84, + "learning_rate": 3.8516546087888534e-05, + "loss": 0.1598, + "step": 6862 + }, + { + "epoch": 1.84, + "learning_rate": 3.851487138263666e-05, + "loss": 0.1847, + "step": 6863 + }, + { + "epoch": 1.84, + "learning_rate": 3.851319667738478e-05, + "loss": 0.1304, + "step": 6864 + }, + { + "epoch": 1.84, + "learning_rate": 3.8511521972132906e-05, + "loss": 0.1622, + "step": 6865 + }, + { + "epoch": 1.84, + "learning_rate": 3.850984726688103e-05, + "loss": 0.1054, + "step": 6866 + }, + { + "epoch": 1.84, + "learning_rate": 3.8508172561629155e-05, + "loss": 0.1993, + "step": 6867 + }, + { + "epoch": 1.84, + "learning_rate": 3.850649785637728e-05, + "loss": 0.2463, + "step": 6868 + }, + { + "epoch": 1.84, + "learning_rate": 3.85048231511254e-05, + "loss": 0.1281, + "step": 6869 + }, + { + "epoch": 1.84, + "learning_rate": 3.850314844587353e-05, + "loss": 0.1996, + "step": 6870 + }, + { + "epoch": 1.84, + "learning_rate": 3.850147374062165e-05, + "loss": 0.1619, + "step": 6871 + }, + { + "epoch": 1.84, + "learning_rate": 3.8499799035369776e-05, + "loss": 0.1743, + "step": 6872 + }, + { + "epoch": 1.84, + "learning_rate": 3.84981243301179e-05, + "loss": 0.1267, + "step": 6873 + }, + { + "epoch": 1.84, + "learning_rate": 3.8496449624866024e-05, + "loss": 0.1334, + "step": 6874 + }, + { + "epoch": 1.84, + "learning_rate": 3.849477491961415e-05, + "loss": 0.1624, + "step": 6875 + }, + { + "epoch": 1.84, + "learning_rate": 3.849310021436228e-05, + "loss": 0.1144, + "step": 6876 + }, + { + "epoch": 1.84, + "learning_rate": 3.84914255091104e-05, + "loss": 0.1322, + "step": 6877 + }, + { + "epoch": 1.84, + "learning_rate": 3.848975080385852e-05, + "loss": 0.1151, + "step": 6878 + }, + { + "epoch": 1.84, + "learning_rate": 3.8488076098606645e-05, + "loss": 0.5238, + "step": 6879 + }, + { + "epoch": 1.84, + "learning_rate": 3.848640139335477e-05, + "loss": 0.1978, + "step": 6880 + }, + { + "epoch": 1.84, + "learning_rate": 3.8484726688102894e-05, + "loss": 0.1475, + "step": 6881 + }, + { + "epoch": 1.84, + "learning_rate": 3.848305198285102e-05, + "loss": 0.4141, + "step": 6882 + }, + { + "epoch": 1.84, + "learning_rate": 3.848137727759914e-05, + "loss": 0.1426, + "step": 6883 + }, + { + "epoch": 1.84, + "learning_rate": 3.847970257234727e-05, + "loss": 0.1924, + "step": 6884 + }, + { + "epoch": 1.84, + "learning_rate": 3.84780278670954e-05, + "loss": 0.1131, + "step": 6885 + }, + { + "epoch": 1.85, + "learning_rate": 3.8476353161843515e-05, + "loss": 0.112, + "step": 6886 + }, + { + "epoch": 1.85, + "learning_rate": 3.847467845659164e-05, + "loss": 0.1922, + "step": 6887 + }, + { + "epoch": 1.85, + "learning_rate": 3.847300375133976e-05, + "loss": 0.1432, + "step": 6888 + }, + { + "epoch": 1.85, + "learning_rate": 3.847132904608789e-05, + "loss": 0.1326, + "step": 6889 + }, + { + "epoch": 1.85, + "learning_rate": 3.846965434083601e-05, + "loss": 0.1892, + "step": 6890 + }, + { + "epoch": 1.85, + "learning_rate": 3.846797963558414e-05, + "loss": 0.1318, + "step": 6891 + }, + { + "epoch": 1.85, + "learning_rate": 3.846630493033227e-05, + "loss": 0.1227, + "step": 6892 + }, + { + "epoch": 1.85, + "learning_rate": 3.846463022508039e-05, + "loss": 0.2645, + "step": 6893 + }, + { + "epoch": 1.85, + "learning_rate": 3.8462955519828515e-05, + "loss": 0.1504, + "step": 6894 + }, + { + "epoch": 1.85, + "learning_rate": 3.846128081457664e-05, + "loss": 0.1707, + "step": 6895 + }, + { + "epoch": 1.85, + "learning_rate": 3.845960610932476e-05, + "loss": 0.1143, + "step": 6896 + }, + { + "epoch": 1.85, + "learning_rate": 3.845793140407288e-05, + "loss": 0.2199, + "step": 6897 + }, + { + "epoch": 1.85, + "learning_rate": 3.8456256698821005e-05, + "loss": 0.1198, + "step": 6898 + }, + { + "epoch": 1.85, + "learning_rate": 3.8454581993569136e-05, + "loss": 0.1798, + "step": 6899 + }, + { + "epoch": 1.85, + "learning_rate": 3.845290728831726e-05, + "loss": 0.1322, + "step": 6900 + }, + { + "epoch": 1.85, + "learning_rate": 3.8451232583065385e-05, + "loss": 0.2006, + "step": 6901 + }, + { + "epoch": 1.85, + "learning_rate": 3.844955787781351e-05, + "loss": 0.1387, + "step": 6902 + }, + { + "epoch": 1.85, + "learning_rate": 3.844788317256163e-05, + "loss": 0.1188, + "step": 6903 + }, + { + "epoch": 1.85, + "learning_rate": 3.844620846730976e-05, + "loss": 0.2335, + "step": 6904 + }, + { + "epoch": 1.85, + "learning_rate": 3.8444533762057875e-05, + "loss": 0.1273, + "step": 6905 + }, + { + "epoch": 1.85, + "learning_rate": 3.8442859056806e-05, + "loss": 0.3146, + "step": 6906 + }, + { + "epoch": 1.85, + "learning_rate": 3.844118435155413e-05, + "loss": 0.1079, + "step": 6907 + }, + { + "epoch": 1.85, + "learning_rate": 3.8439509646302254e-05, + "loss": 0.1958, + "step": 6908 + }, + { + "epoch": 1.85, + "learning_rate": 3.843783494105038e-05, + "loss": 0.1512, + "step": 6909 + }, + { + "epoch": 1.85, + "learning_rate": 3.84361602357985e-05, + "loss": 0.108, + "step": 6910 + }, + { + "epoch": 1.85, + "learning_rate": 3.843448553054663e-05, + "loss": 0.1809, + "step": 6911 + }, + { + "epoch": 1.85, + "learning_rate": 3.843281082529475e-05, + "loss": 0.2237, + "step": 6912 + }, + { + "epoch": 1.85, + "learning_rate": 3.8431136120042875e-05, + "loss": 0.1374, + "step": 6913 + }, + { + "epoch": 1.85, + "learning_rate": 3.842946141479099e-05, + "loss": 0.1334, + "step": 6914 + }, + { + "epoch": 1.85, + "learning_rate": 3.8427786709539124e-05, + "loss": 0.3518, + "step": 6915 + }, + { + "epoch": 1.85, + "learning_rate": 3.842611200428725e-05, + "loss": 0.1133, + "step": 6916 + }, + { + "epoch": 1.85, + "learning_rate": 3.842443729903537e-05, + "loss": 0.1682, + "step": 6917 + }, + { + "epoch": 1.85, + "learning_rate": 3.8422762593783496e-05, + "loss": 0.1422, + "step": 6918 + }, + { + "epoch": 1.85, + "learning_rate": 3.842108788853162e-05, + "loss": 0.123, + "step": 6919 + }, + { + "epoch": 1.85, + "learning_rate": 3.8419413183279745e-05, + "loss": 0.1705, + "step": 6920 + }, + { + "epoch": 1.85, + "learning_rate": 3.841773847802787e-05, + "loss": 0.1271, + "step": 6921 + }, + { + "epoch": 1.85, + "learning_rate": 3.841606377277599e-05, + "loss": 0.1146, + "step": 6922 + }, + { + "epoch": 1.86, + "learning_rate": 3.841438906752412e-05, + "loss": 0.1114, + "step": 6923 + }, + { + "epoch": 1.86, + "learning_rate": 3.841271436227224e-05, + "loss": 0.1259, + "step": 6924 + }, + { + "epoch": 1.86, + "learning_rate": 3.8411039657020366e-05, + "loss": 0.1252, + "step": 6925 + }, + { + "epoch": 1.86, + "learning_rate": 3.840936495176849e-05, + "loss": 0.1306, + "step": 6926 + }, + { + "epoch": 1.86, + "learning_rate": 3.8407690246516614e-05, + "loss": 0.1293, + "step": 6927 + }, + { + "epoch": 1.86, + "learning_rate": 3.840601554126474e-05, + "loss": 0.1062, + "step": 6928 + }, + { + "epoch": 1.86, + "learning_rate": 3.840434083601286e-05, + "loss": 0.1391, + "step": 6929 + }, + { + "epoch": 1.86, + "learning_rate": 3.840266613076099e-05, + "loss": 0.1842, + "step": 6930 + }, + { + "epoch": 1.86, + "learning_rate": 3.840099142550911e-05, + "loss": 0.3182, + "step": 6931 + }, + { + "epoch": 1.86, + "learning_rate": 3.839931672025724e-05, + "loss": 0.1295, + "step": 6932 + }, + { + "epoch": 1.86, + "learning_rate": 3.839764201500536e-05, + "loss": 0.1188, + "step": 6933 + }, + { + "epoch": 1.86, + "learning_rate": 3.8395967309753484e-05, + "loss": 0.2038, + "step": 6934 + }, + { + "epoch": 1.86, + "learning_rate": 3.839429260450161e-05, + "loss": 0.1205, + "step": 6935 + }, + { + "epoch": 1.86, + "learning_rate": 3.839261789924973e-05, + "loss": 0.1316, + "step": 6936 + }, + { + "epoch": 1.86, + "learning_rate": 3.8390943193997856e-05, + "loss": 0.1144, + "step": 6937 + }, + { + "epoch": 1.86, + "learning_rate": 3.838926848874598e-05, + "loss": 0.1658, + "step": 6938 + }, + { + "epoch": 1.86, + "learning_rate": 3.8387593783494105e-05, + "loss": 0.1394, + "step": 6939 + }, + { + "epoch": 1.86, + "learning_rate": 3.8385919078242236e-05, + "loss": 0.1532, + "step": 6940 + }, + { + "epoch": 1.86, + "learning_rate": 3.838424437299036e-05, + "loss": 0.1268, + "step": 6941 + }, + { + "epoch": 1.86, + "learning_rate": 3.838256966773848e-05, + "loss": 0.118, + "step": 6942 + }, + { + "epoch": 1.86, + "learning_rate": 3.83808949624866e-05, + "loss": 0.1249, + "step": 6943 + }, + { + "epoch": 1.86, + "learning_rate": 3.8379220257234726e-05, + "loss": 0.216, + "step": 6944 + }, + { + "epoch": 1.86, + "learning_rate": 3.837754555198285e-05, + "loss": 0.1577, + "step": 6945 + }, + { + "epoch": 1.86, + "learning_rate": 3.8375870846730974e-05, + "loss": 0.1887, + "step": 6946 + }, + { + "epoch": 1.86, + "learning_rate": 3.8374196141479105e-05, + "loss": 0.155, + "step": 6947 + }, + { + "epoch": 1.86, + "learning_rate": 3.837252143622723e-05, + "loss": 0.1871, + "step": 6948 + }, + { + "epoch": 1.86, + "learning_rate": 3.8370846730975354e-05, + "loss": 0.1666, + "step": 6949 + }, + { + "epoch": 1.86, + "learning_rate": 3.836917202572348e-05, + "loss": 0.2672, + "step": 6950 + }, + { + "epoch": 1.86, + "learning_rate": 3.83674973204716e-05, + "loss": 0.1064, + "step": 6951 + }, + { + "epoch": 1.86, + "learning_rate": 3.836582261521972e-05, + "loss": 0.1229, + "step": 6952 + }, + { + "epoch": 1.86, + "learning_rate": 3.8364147909967844e-05, + "loss": 0.1337, + "step": 6953 + }, + { + "epoch": 1.86, + "learning_rate": 3.836247320471597e-05, + "loss": 0.1354, + "step": 6954 + }, + { + "epoch": 1.86, + "learning_rate": 3.83607984994641e-05, + "loss": 0.1288, + "step": 6955 + }, + { + "epoch": 1.86, + "learning_rate": 3.835912379421222e-05, + "loss": 0.1605, + "step": 6956 + }, + { + "epoch": 1.86, + "learning_rate": 3.835744908896035e-05, + "loss": 0.2898, + "step": 6957 + }, + { + "epoch": 1.86, + "learning_rate": 3.835577438370847e-05, + "loss": 0.2552, + "step": 6958 + }, + { + "epoch": 1.86, + "learning_rate": 3.8354099678456596e-05, + "loss": 0.1358, + "step": 6959 + }, + { + "epoch": 1.86, + "learning_rate": 3.835242497320472e-05, + "loss": 0.1176, + "step": 6960 + }, + { + "epoch": 1.87, + "learning_rate": 3.835075026795284e-05, + "loss": 0.2008, + "step": 6961 + }, + { + "epoch": 1.87, + "learning_rate": 3.834907556270096e-05, + "loss": 0.1388, + "step": 6962 + }, + { + "epoch": 1.87, + "learning_rate": 3.834740085744909e-05, + "loss": 0.3222, + "step": 6963 + }, + { + "epoch": 1.87, + "learning_rate": 3.834572615219722e-05, + "loss": 0.2425, + "step": 6964 + }, + { + "epoch": 1.87, + "learning_rate": 3.834405144694534e-05, + "loss": 0.1178, + "step": 6965 + }, + { + "epoch": 1.87, + "learning_rate": 3.8342376741693465e-05, + "loss": 0.1325, + "step": 6966 + }, + { + "epoch": 1.87, + "learning_rate": 3.834070203644159e-05, + "loss": 0.1256, + "step": 6967 + }, + { + "epoch": 1.87, + "learning_rate": 3.8339027331189714e-05, + "loss": 0.1488, + "step": 6968 + }, + { + "epoch": 1.87, + "learning_rate": 3.833735262593784e-05, + "loss": 0.2962, + "step": 6969 + }, + { + "epoch": 1.87, + "learning_rate": 3.8335677920685955e-05, + "loss": 0.1921, + "step": 6970 + }, + { + "epoch": 1.87, + "learning_rate": 3.8334003215434086e-05, + "loss": 0.2008, + "step": 6971 + }, + { + "epoch": 1.87, + "learning_rate": 3.833232851018221e-05, + "loss": 0.1538, + "step": 6972 + }, + { + "epoch": 1.87, + "learning_rate": 3.8330653804930335e-05, + "loss": 0.1749, + "step": 6973 + }, + { + "epoch": 1.87, + "learning_rate": 3.832897909967846e-05, + "loss": 0.1257, + "step": 6974 + }, + { + "epoch": 1.87, + "learning_rate": 3.832730439442658e-05, + "loss": 0.1358, + "step": 6975 + }, + { + "epoch": 1.87, + "learning_rate": 3.832562968917471e-05, + "loss": 0.1692, + "step": 6976 + }, + { + "epoch": 1.87, + "learning_rate": 3.832395498392283e-05, + "loss": 0.135, + "step": 6977 + }, + { + "epoch": 1.87, + "learning_rate": 3.8322280278670956e-05, + "loss": 0.1194, + "step": 6978 + }, + { + "epoch": 1.87, + "learning_rate": 3.832060557341908e-05, + "loss": 0.1374, + "step": 6979 + }, + { + "epoch": 1.87, + "learning_rate": 3.8318930868167204e-05, + "loss": 0.2084, + "step": 6980 + }, + { + "epoch": 1.87, + "learning_rate": 3.831725616291533e-05, + "loss": 0.1267, + "step": 6981 + }, + { + "epoch": 1.87, + "learning_rate": 3.831558145766345e-05, + "loss": 0.1181, + "step": 6982 + }, + { + "epoch": 1.87, + "learning_rate": 3.831390675241158e-05, + "loss": 0.1295, + "step": 6983 + }, + { + "epoch": 1.87, + "learning_rate": 3.83122320471597e-05, + "loss": 0.224, + "step": 6984 + }, + { + "epoch": 1.87, + "learning_rate": 3.8310557341907825e-05, + "loss": 0.2585, + "step": 6985 + }, + { + "epoch": 1.87, + "learning_rate": 3.830888263665595e-05, + "loss": 0.168, + "step": 6986 + }, + { + "epoch": 1.87, + "learning_rate": 3.8307207931404074e-05, + "loss": 0.299, + "step": 6987 + }, + { + "epoch": 1.87, + "learning_rate": 3.8305533226152205e-05, + "loss": 0.129, + "step": 6988 + }, + { + "epoch": 1.87, + "learning_rate": 3.830385852090032e-05, + "loss": 0.2849, + "step": 6989 + }, + { + "epoch": 1.87, + "learning_rate": 3.8302183815648446e-05, + "loss": 0.2157, + "step": 6990 + }, + { + "epoch": 1.87, + "learning_rate": 3.830050911039657e-05, + "loss": 0.1132, + "step": 6991 + }, + { + "epoch": 1.87, + "learning_rate": 3.8298834405144695e-05, + "loss": 0.171, + "step": 6992 + }, + { + "epoch": 1.87, + "learning_rate": 3.829715969989282e-05, + "loss": 0.1192, + "step": 6993 + }, + { + "epoch": 1.87, + "learning_rate": 3.829548499464094e-05, + "loss": 0.1077, + "step": 6994 + }, + { + "epoch": 1.87, + "learning_rate": 3.829381028938907e-05, + "loss": 0.2547, + "step": 6995 + }, + { + "epoch": 1.87, + "learning_rate": 3.82921355841372e-05, + "loss": 0.2108, + "step": 6996 + }, + { + "epoch": 1.87, + "learning_rate": 3.829046087888532e-05, + "loss": 0.1222, + "step": 6997 + }, + { + "epoch": 1.88, + "learning_rate": 3.828878617363344e-05, + "loss": 0.1986, + "step": 6998 + }, + { + "epoch": 1.88, + "learning_rate": 3.8287111468381564e-05, + "loss": 0.1228, + "step": 6999 + }, + { + "epoch": 1.88, + "learning_rate": 3.828543676312969e-05, + "loss": 0.1095, + "step": 7000 + }, + { + "epoch": 1.88, + "learning_rate": 3.828376205787781e-05, + "loss": 0.1503, + "step": 7001 + }, + { + "epoch": 1.88, + "learning_rate": 3.828208735262594e-05, + "loss": 0.1561, + "step": 7002 + }, + { + "epoch": 1.88, + "learning_rate": 3.828041264737407e-05, + "loss": 0.2461, + "step": 7003 + }, + { + "epoch": 1.88, + "learning_rate": 3.827873794212219e-05, + "loss": 0.1339, + "step": 7004 + }, + { + "epoch": 1.88, + "learning_rate": 3.8277063236870316e-05, + "loss": 0.1591, + "step": 7005 + }, + { + "epoch": 1.88, + "learning_rate": 3.827538853161844e-05, + "loss": 0.1586, + "step": 7006 + }, + { + "epoch": 1.88, + "learning_rate": 3.8273713826366564e-05, + "loss": 0.1085, + "step": 7007 + }, + { + "epoch": 1.88, + "learning_rate": 3.827203912111468e-05, + "loss": 0.1808, + "step": 7008 + }, + { + "epoch": 1.88, + "learning_rate": 3.8270364415862806e-05, + "loss": 0.109, + "step": 7009 + }, + { + "epoch": 1.88, + "learning_rate": 3.826868971061093e-05, + "loss": 0.1762, + "step": 7010 + }, + { + "epoch": 1.88, + "learning_rate": 3.826701500535906e-05, + "loss": 0.1119, + "step": 7011 + }, + { + "epoch": 1.88, + "learning_rate": 3.8265340300107186e-05, + "loss": 0.1145, + "step": 7012 + }, + { + "epoch": 1.88, + "learning_rate": 3.826366559485531e-05, + "loss": 0.2194, + "step": 7013 + }, + { + "epoch": 1.88, + "learning_rate": 3.8261990889603434e-05, + "loss": 0.1485, + "step": 7014 + }, + { + "epoch": 1.88, + "learning_rate": 3.826031618435156e-05, + "loss": 0.1021, + "step": 7015 + }, + { + "epoch": 1.88, + "learning_rate": 3.825864147909968e-05, + "loss": 0.2794, + "step": 7016 + }, + { + "epoch": 1.88, + "learning_rate": 3.82569667738478e-05, + "loss": 0.2168, + "step": 7017 + }, + { + "epoch": 1.88, + "learning_rate": 3.8255292068595924e-05, + "loss": 0.1337, + "step": 7018 + }, + { + "epoch": 1.88, + "learning_rate": 3.8253617363344055e-05, + "loss": 0.1875, + "step": 7019 + }, + { + "epoch": 1.88, + "learning_rate": 3.825194265809218e-05, + "loss": 0.1252, + "step": 7020 + }, + { + "epoch": 1.88, + "learning_rate": 3.8250267952840303e-05, + "loss": 0.1474, + "step": 7021 + }, + { + "epoch": 1.88, + "learning_rate": 3.824859324758843e-05, + "loss": 0.1336, + "step": 7022 + }, + { + "epoch": 1.88, + "learning_rate": 3.824691854233655e-05, + "loss": 0.1531, + "step": 7023 + }, + { + "epoch": 1.88, + "learning_rate": 3.8245243837084676e-05, + "loss": 0.1252, + "step": 7024 + }, + { + "epoch": 1.88, + "learning_rate": 3.82435691318328e-05, + "loss": 0.4135, + "step": 7025 + }, + { + "epoch": 1.88, + "learning_rate": 3.824189442658092e-05, + "loss": 0.1545, + "step": 7026 + }, + { + "epoch": 1.88, + "learning_rate": 3.824021972132905e-05, + "loss": 0.1234, + "step": 7027 + }, + { + "epoch": 1.88, + "learning_rate": 3.823854501607717e-05, + "loss": 0.347, + "step": 7028 + }, + { + "epoch": 1.88, + "learning_rate": 3.82368703108253e-05, + "loss": 0.1005, + "step": 7029 + }, + { + "epoch": 1.88, + "learning_rate": 3.823519560557342e-05, + "loss": 0.1765, + "step": 7030 + }, + { + "epoch": 1.88, + "learning_rate": 3.8233520900321546e-05, + "loss": 0.1302, + "step": 7031 + }, + { + "epoch": 1.88, + "learning_rate": 3.823184619506967e-05, + "loss": 0.1569, + "step": 7032 + }, + { + "epoch": 1.88, + "learning_rate": 3.8230171489817794e-05, + "loss": 0.1237, + "step": 7033 + }, + { + "epoch": 1.88, + "learning_rate": 3.822849678456592e-05, + "loss": 0.2534, + "step": 7034 + }, + { + "epoch": 1.89, + "learning_rate": 3.822682207931404e-05, + "loss": 0.203, + "step": 7035 + }, + { + "epoch": 1.89, + "learning_rate": 3.8225147374062167e-05, + "loss": 0.1901, + "step": 7036 + }, + { + "epoch": 1.89, + "learning_rate": 3.822347266881029e-05, + "loss": 0.1382, + "step": 7037 + }, + { + "epoch": 1.89, + "learning_rate": 3.8221797963558415e-05, + "loss": 0.1175, + "step": 7038 + }, + { + "epoch": 1.89, + "learning_rate": 3.822012325830654e-05, + "loss": 0.1881, + "step": 7039 + }, + { + "epoch": 1.89, + "learning_rate": 3.8218448553054663e-05, + "loss": 0.1066, + "step": 7040 + }, + { + "epoch": 1.89, + "learning_rate": 3.821677384780279e-05, + "loss": 0.21, + "step": 7041 + }, + { + "epoch": 1.89, + "learning_rate": 3.821509914255091e-05, + "loss": 0.1246, + "step": 7042 + }, + { + "epoch": 1.89, + "learning_rate": 3.8213424437299036e-05, + "loss": 0.1316, + "step": 7043 + }, + { + "epoch": 1.89, + "learning_rate": 3.821174973204717e-05, + "loss": 0.2288, + "step": 7044 + }, + { + "epoch": 1.89, + "learning_rate": 3.8210075026795284e-05, + "loss": 0.1921, + "step": 7045 + }, + { + "epoch": 1.89, + "learning_rate": 3.820840032154341e-05, + "loss": 0.1339, + "step": 7046 + }, + { + "epoch": 1.89, + "learning_rate": 3.820672561629153e-05, + "loss": 0.1191, + "step": 7047 + }, + { + "epoch": 1.89, + "learning_rate": 3.820505091103966e-05, + "loss": 0.143, + "step": 7048 + }, + { + "epoch": 1.89, + "learning_rate": 3.820337620578778e-05, + "loss": 0.1222, + "step": 7049 + }, + { + "epoch": 1.89, + "learning_rate": 3.8201701500535905e-05, + "loss": 0.1034, + "step": 7050 + }, + { + "epoch": 1.89, + "learning_rate": 3.820002679528403e-05, + "loss": 0.131, + "step": 7051 + }, + { + "epoch": 1.89, + "learning_rate": 3.819835209003216e-05, + "loss": 0.1072, + "step": 7052 + }, + { + "epoch": 1.89, + "learning_rate": 3.8196677384780285e-05, + "loss": 0.1482, + "step": 7053 + }, + { + "epoch": 1.89, + "learning_rate": 3.81950026795284e-05, + "loss": 0.1265, + "step": 7054 + }, + { + "epoch": 1.89, + "learning_rate": 3.8193327974276527e-05, + "loss": 0.1128, + "step": 7055 + }, + { + "epoch": 1.89, + "learning_rate": 3.819165326902465e-05, + "loss": 0.1326, + "step": 7056 + }, + { + "epoch": 1.89, + "learning_rate": 3.8189978563772775e-05, + "loss": 0.1161, + "step": 7057 + }, + { + "epoch": 1.89, + "learning_rate": 3.81883038585209e-05, + "loss": 0.2386, + "step": 7058 + }, + { + "epoch": 1.89, + "learning_rate": 3.818662915326903e-05, + "loss": 0.117, + "step": 7059 + }, + { + "epoch": 1.89, + "learning_rate": 3.8184954448017154e-05, + "loss": 0.1876, + "step": 7060 + }, + { + "epoch": 1.89, + "learning_rate": 3.818327974276528e-05, + "loss": 0.1825, + "step": 7061 + }, + { + "epoch": 1.89, + "learning_rate": 3.81816050375134e-05, + "loss": 0.1614, + "step": 7062 + }, + { + "epoch": 1.89, + "learning_rate": 3.817993033226153e-05, + "loss": 0.1236, + "step": 7063 + }, + { + "epoch": 1.89, + "learning_rate": 3.8178255627009644e-05, + "loss": 0.2648, + "step": 7064 + }, + { + "epoch": 1.89, + "learning_rate": 3.817658092175777e-05, + "loss": 0.116, + "step": 7065 + }, + { + "epoch": 1.89, + "learning_rate": 3.817490621650589e-05, + "loss": 0.191, + "step": 7066 + }, + { + "epoch": 1.89, + "learning_rate": 3.8173231511254024e-05, + "loss": 0.1484, + "step": 7067 + }, + { + "epoch": 1.89, + "learning_rate": 3.817155680600215e-05, + "loss": 0.2641, + "step": 7068 + }, + { + "epoch": 1.89, + "learning_rate": 3.816988210075027e-05, + "loss": 0.1973, + "step": 7069 + }, + { + "epoch": 1.89, + "learning_rate": 3.8168207395498396e-05, + "loss": 0.2244, + "step": 7070 + }, + { + "epoch": 1.89, + "learning_rate": 3.816653269024652e-05, + "loss": 0.2488, + "step": 7071 + }, + { + "epoch": 1.89, + "learning_rate": 3.8164857984994645e-05, + "loss": 0.1703, + "step": 7072 + }, + { + "epoch": 1.9, + "learning_rate": 3.816318327974276e-05, + "loss": 0.1263, + "step": 7073 + }, + { + "epoch": 1.9, + "learning_rate": 3.8161508574490887e-05, + "loss": 0.1113, + "step": 7074 + }, + { + "epoch": 1.9, + "learning_rate": 3.815983386923902e-05, + "loss": 0.1904, + "step": 7075 + }, + { + "epoch": 1.9, + "learning_rate": 3.815815916398714e-05, + "loss": 0.1236, + "step": 7076 + }, + { + "epoch": 1.9, + "learning_rate": 3.8156484458735266e-05, + "loss": 0.1057, + "step": 7077 + }, + { + "epoch": 1.9, + "learning_rate": 3.815480975348339e-05, + "loss": 0.1589, + "step": 7078 + }, + { + "epoch": 1.9, + "learning_rate": 3.8153135048231514e-05, + "loss": 0.1051, + "step": 7079 + }, + { + "epoch": 1.9, + "learning_rate": 3.815146034297964e-05, + "loss": 0.1523, + "step": 7080 + }, + { + "epoch": 1.9, + "learning_rate": 3.814978563772776e-05, + "loss": 0.1346, + "step": 7081 + }, + { + "epoch": 1.9, + "learning_rate": 3.814811093247588e-05, + "loss": 0.1056, + "step": 7082 + }, + { + "epoch": 1.9, + "learning_rate": 3.814643622722401e-05, + "loss": 0.1136, + "step": 7083 + }, + { + "epoch": 1.9, + "learning_rate": 3.8144761521972135e-05, + "loss": 0.1484, + "step": 7084 + }, + { + "epoch": 1.9, + "learning_rate": 3.814308681672026e-05, + "loss": 0.196, + "step": 7085 + }, + { + "epoch": 1.9, + "learning_rate": 3.8141412111468384e-05, + "loss": 0.3396, + "step": 7086 + }, + { + "epoch": 1.9, + "learning_rate": 3.813973740621651e-05, + "loss": 0.1282, + "step": 7087 + }, + { + "epoch": 1.9, + "learning_rate": 3.813806270096463e-05, + "loss": 0.1097, + "step": 7088 + }, + { + "epoch": 1.9, + "learning_rate": 3.8136387995712756e-05, + "loss": 0.1298, + "step": 7089 + }, + { + "epoch": 1.9, + "learning_rate": 3.813471329046088e-05, + "loss": 0.1749, + "step": 7090 + }, + { + "epoch": 1.9, + "learning_rate": 3.8133038585209005e-05, + "loss": 0.2806, + "step": 7091 + }, + { + "epoch": 1.9, + "learning_rate": 3.813136387995713e-05, + "loss": 0.1088, + "step": 7092 + }, + { + "epoch": 1.9, + "learning_rate": 3.812968917470525e-05, + "loss": 0.4112, + "step": 7093 + }, + { + "epoch": 1.9, + "learning_rate": 3.812801446945338e-05, + "loss": 0.1213, + "step": 7094 + }, + { + "epoch": 1.9, + "learning_rate": 3.81263397642015e-05, + "loss": 0.1204, + "step": 7095 + }, + { + "epoch": 1.9, + "learning_rate": 3.8124665058949626e-05, + "loss": 0.3456, + "step": 7096 + }, + { + "epoch": 1.9, + "learning_rate": 3.812299035369775e-05, + "loss": 0.1159, + "step": 7097 + }, + { + "epoch": 1.9, + "learning_rate": 3.8121315648445874e-05, + "loss": 0.1139, + "step": 7098 + }, + { + "epoch": 1.9, + "learning_rate": 3.8119640943194e-05, + "loss": 0.216, + "step": 7099 + }, + { + "epoch": 1.9, + "learning_rate": 3.811796623794213e-05, + "loss": 0.1237, + "step": 7100 + }, + { + "epoch": 1.9, + "learning_rate": 3.811629153269025e-05, + "loss": 0.1383, + "step": 7101 + }, + { + "epoch": 1.9, + "learning_rate": 3.811461682743837e-05, + "loss": 0.1233, + "step": 7102 + }, + { + "epoch": 1.9, + "learning_rate": 3.8112942122186495e-05, + "loss": 0.4944, + "step": 7103 + }, + { + "epoch": 1.9, + "learning_rate": 3.811126741693462e-05, + "loss": 0.1294, + "step": 7104 + }, + { + "epoch": 1.9, + "learning_rate": 3.8109592711682744e-05, + "loss": 0.1309, + "step": 7105 + }, + { + "epoch": 1.9, + "learning_rate": 3.810791800643087e-05, + "loss": 0.1375, + "step": 7106 + }, + { + "epoch": 1.9, + "learning_rate": 3.810624330117899e-05, + "loss": 0.1733, + "step": 7107 + }, + { + "epoch": 1.9, + "learning_rate": 3.810456859592712e-05, + "loss": 0.237, + "step": 7108 + }, + { + "epoch": 1.9, + "learning_rate": 3.810289389067525e-05, + "loss": 0.2209, + "step": 7109 + }, + { + "epoch": 1.91, + "learning_rate": 3.8101219185423365e-05, + "loss": 0.1183, + "step": 7110 + }, + { + "epoch": 1.91, + "learning_rate": 3.809954448017149e-05, + "loss": 0.1339, + "step": 7111 + }, + { + "epoch": 1.91, + "learning_rate": 3.809786977491961e-05, + "loss": 0.1499, + "step": 7112 + }, + { + "epoch": 1.91, + "learning_rate": 3.809619506966774e-05, + "loss": 0.1384, + "step": 7113 + }, + { + "epoch": 1.91, + "learning_rate": 3.809452036441586e-05, + "loss": 0.1682, + "step": 7114 + }, + { + "epoch": 1.91, + "learning_rate": 3.809284565916399e-05, + "loss": 0.1159, + "step": 7115 + }, + { + "epoch": 1.91, + "learning_rate": 3.809117095391212e-05, + "loss": 0.2983, + "step": 7116 + }, + { + "epoch": 1.91, + "learning_rate": 3.808949624866024e-05, + "loss": 0.1586, + "step": 7117 + }, + { + "epoch": 1.91, + "learning_rate": 3.8087821543408365e-05, + "loss": 0.2446, + "step": 7118 + }, + { + "epoch": 1.91, + "learning_rate": 3.808614683815649e-05, + "loss": 0.1658, + "step": 7119 + }, + { + "epoch": 1.91, + "learning_rate": 3.808447213290461e-05, + "loss": 0.2023, + "step": 7120 + }, + { + "epoch": 1.91, + "learning_rate": 3.808279742765273e-05, + "loss": 0.1162, + "step": 7121 + }, + { + "epoch": 1.91, + "learning_rate": 3.8081122722400855e-05, + "loss": 0.1998, + "step": 7122 + }, + { + "epoch": 1.91, + "learning_rate": 3.8079448017148986e-05, + "loss": 0.2183, + "step": 7123 + }, + { + "epoch": 1.91, + "learning_rate": 3.807777331189711e-05, + "loss": 0.1071, + "step": 7124 + }, + { + "epoch": 1.91, + "learning_rate": 3.8076098606645235e-05, + "loss": 0.1285, + "step": 7125 + }, + { + "epoch": 1.91, + "learning_rate": 3.807442390139336e-05, + "loss": 0.2777, + "step": 7126 + }, + { + "epoch": 1.91, + "learning_rate": 3.807274919614148e-05, + "loss": 0.2517, + "step": 7127 + }, + { + "epoch": 1.91, + "learning_rate": 3.807107449088961e-05, + "loss": 0.2691, + "step": 7128 + }, + { + "epoch": 1.91, + "learning_rate": 3.8069399785637725e-05, + "loss": 0.1276, + "step": 7129 + }, + { + "epoch": 1.91, + "learning_rate": 3.806772508038585e-05, + "loss": 0.1584, + "step": 7130 + }, + { + "epoch": 1.91, + "learning_rate": 3.806605037513398e-05, + "loss": 0.1745, + "step": 7131 + }, + { + "epoch": 1.91, + "learning_rate": 3.8064375669882104e-05, + "loss": 0.12, + "step": 7132 + }, + { + "epoch": 1.91, + "learning_rate": 3.806270096463023e-05, + "loss": 0.1147, + "step": 7133 + }, + { + "epoch": 1.91, + "learning_rate": 3.806102625937835e-05, + "loss": 0.4049, + "step": 7134 + }, + { + "epoch": 1.91, + "learning_rate": 3.805935155412648e-05, + "loss": 0.1301, + "step": 7135 + }, + { + "epoch": 1.91, + "learning_rate": 3.80576768488746e-05, + "loss": 0.127, + "step": 7136 + }, + { + "epoch": 1.91, + "learning_rate": 3.8056002143622725e-05, + "loss": 0.1278, + "step": 7137 + }, + { + "epoch": 1.91, + "learning_rate": 3.805432743837084e-05, + "loss": 0.1515, + "step": 7138 + }, + { + "epoch": 1.91, + "learning_rate": 3.8052652733118974e-05, + "loss": 0.2631, + "step": 7139 + }, + { + "epoch": 1.91, + "learning_rate": 3.80509780278671e-05, + "loss": 0.1604, + "step": 7140 + }, + { + "epoch": 1.91, + "learning_rate": 3.804930332261522e-05, + "loss": 0.2155, + "step": 7141 + }, + { + "epoch": 1.91, + "learning_rate": 3.8047628617363346e-05, + "loss": 0.1817, + "step": 7142 + }, + { + "epoch": 1.91, + "learning_rate": 3.804595391211147e-05, + "loss": 0.1351, + "step": 7143 + }, + { + "epoch": 1.91, + "learning_rate": 3.8044279206859595e-05, + "loss": 0.1322, + "step": 7144 + }, + { + "epoch": 1.91, + "learning_rate": 3.804260450160772e-05, + "loss": 0.164, + "step": 7145 + }, + { + "epoch": 1.91, + "learning_rate": 3.804092979635584e-05, + "loss": 0.1224, + "step": 7146 + }, + { + "epoch": 1.92, + "learning_rate": 3.803925509110397e-05, + "loss": 0.1154, + "step": 7147 + }, + { + "epoch": 1.92, + "learning_rate": 3.803758038585209e-05, + "loss": 0.2232, + "step": 7148 + }, + { + "epoch": 1.92, + "learning_rate": 3.8035905680600216e-05, + "loss": 0.1245, + "step": 7149 + }, + { + "epoch": 1.92, + "learning_rate": 3.803423097534834e-05, + "loss": 0.2544, + "step": 7150 + }, + { + "epoch": 1.92, + "learning_rate": 3.8032556270096464e-05, + "loss": 0.1233, + "step": 7151 + }, + { + "epoch": 1.92, + "learning_rate": 3.803088156484459e-05, + "loss": 0.1823, + "step": 7152 + }, + { + "epoch": 1.92, + "learning_rate": 3.802920685959271e-05, + "loss": 0.1357, + "step": 7153 + }, + { + "epoch": 1.92, + "learning_rate": 3.802753215434084e-05, + "loss": 0.131, + "step": 7154 + }, + { + "epoch": 1.92, + "learning_rate": 3.802585744908896e-05, + "loss": 0.1119, + "step": 7155 + }, + { + "epoch": 1.92, + "learning_rate": 3.802418274383709e-05, + "loss": 0.1143, + "step": 7156 + }, + { + "epoch": 1.92, + "learning_rate": 3.802250803858521e-05, + "loss": 0.17, + "step": 7157 + }, + { + "epoch": 1.92, + "learning_rate": 3.8020833333333334e-05, + "loss": 0.2981, + "step": 7158 + }, + { + "epoch": 1.92, + "learning_rate": 3.801915862808146e-05, + "loss": 0.2607, + "step": 7159 + }, + { + "epoch": 1.92, + "learning_rate": 3.801748392282958e-05, + "loss": 0.1449, + "step": 7160 + }, + { + "epoch": 1.92, + "learning_rate": 3.8015809217577706e-05, + "loss": 0.1529, + "step": 7161 + }, + { + "epoch": 1.92, + "learning_rate": 3.801413451232583e-05, + "loss": 0.1432, + "step": 7162 + }, + { + "epoch": 1.92, + "learning_rate": 3.8012459807073955e-05, + "loss": 0.2514, + "step": 7163 + }, + { + "epoch": 1.92, + "learning_rate": 3.8010785101822086e-05, + "loss": 0.1163, + "step": 7164 + }, + { + "epoch": 1.92, + "learning_rate": 3.800911039657021e-05, + "loss": 0.2765, + "step": 7165 + }, + { + "epoch": 1.92, + "learning_rate": 3.800743569131833e-05, + "loss": 0.1429, + "step": 7166 + }, + { + "epoch": 1.92, + "learning_rate": 3.800576098606645e-05, + "loss": 0.1192, + "step": 7167 + }, + { + "epoch": 1.92, + "learning_rate": 3.8004086280814576e-05, + "loss": 0.2364, + "step": 7168 + }, + { + "epoch": 1.92, + "learning_rate": 3.80024115755627e-05, + "loss": 0.29, + "step": 7169 + }, + { + "epoch": 1.92, + "learning_rate": 3.8000736870310824e-05, + "loss": 0.125, + "step": 7170 + }, + { + "epoch": 1.92, + "learning_rate": 3.799906216505895e-05, + "loss": 0.1066, + "step": 7171 + }, + { + "epoch": 1.92, + "learning_rate": 3.799738745980708e-05, + "loss": 0.1375, + "step": 7172 + }, + { + "epoch": 1.92, + "learning_rate": 3.7995712754555204e-05, + "loss": 0.1277, + "step": 7173 + }, + { + "epoch": 1.92, + "learning_rate": 3.799403804930333e-05, + "loss": 0.1749, + "step": 7174 + }, + { + "epoch": 1.92, + "learning_rate": 3.799236334405145e-05, + "loss": 0.1771, + "step": 7175 + }, + { + "epoch": 1.92, + "learning_rate": 3.799068863879957e-05, + "loss": 0.1727, + "step": 7176 + }, + { + "epoch": 1.92, + "learning_rate": 3.7989013933547694e-05, + "loss": 0.1267, + "step": 7177 + }, + { + "epoch": 1.92, + "learning_rate": 3.798733922829582e-05, + "loss": 0.1107, + "step": 7178 + }, + { + "epoch": 1.92, + "learning_rate": 3.798566452304395e-05, + "loss": 0.1395, + "step": 7179 + }, + { + "epoch": 1.92, + "learning_rate": 3.798398981779207e-05, + "loss": 0.2563, + "step": 7180 + }, + { + "epoch": 1.92, + "learning_rate": 3.79823151125402e-05, + "loss": 0.1362, + "step": 7181 + }, + { + "epoch": 1.92, + "learning_rate": 3.798064040728832e-05, + "loss": 0.2551, + "step": 7182 + }, + { + "epoch": 1.92, + "learning_rate": 3.7978965702036446e-05, + "loss": 0.2124, + "step": 7183 + }, + { + "epoch": 1.92, + "learning_rate": 3.797729099678457e-05, + "loss": 0.171, + "step": 7184 + }, + { + "epoch": 1.93, + "learning_rate": 3.797561629153269e-05, + "loss": 0.1301, + "step": 7185 + }, + { + "epoch": 1.93, + "learning_rate": 3.797394158628081e-05, + "loss": 0.1055, + "step": 7186 + }, + { + "epoch": 1.93, + "learning_rate": 3.797226688102894e-05, + "loss": 0.2064, + "step": 7187 + }, + { + "epoch": 1.93, + "learning_rate": 3.797059217577707e-05, + "loss": 0.1384, + "step": 7188 + }, + { + "epoch": 1.93, + "learning_rate": 3.796891747052519e-05, + "loss": 0.1106, + "step": 7189 + }, + { + "epoch": 1.93, + "learning_rate": 3.7967242765273315e-05, + "loss": 0.2848, + "step": 7190 + }, + { + "epoch": 1.93, + "learning_rate": 3.796556806002144e-05, + "loss": 0.1442, + "step": 7191 + }, + { + "epoch": 1.93, + "learning_rate": 3.7963893354769564e-05, + "loss": 0.1246, + "step": 7192 + }, + { + "epoch": 1.93, + "learning_rate": 3.796221864951769e-05, + "loss": 0.2028, + "step": 7193 + }, + { + "epoch": 1.93, + "learning_rate": 3.7960543944265805e-05, + "loss": 0.1397, + "step": 7194 + }, + { + "epoch": 1.93, + "learning_rate": 3.7958869239013936e-05, + "loss": 0.1155, + "step": 7195 + }, + { + "epoch": 1.93, + "learning_rate": 3.795719453376206e-05, + "loss": 0.161, + "step": 7196 + }, + { + "epoch": 1.93, + "learning_rate": 3.7955519828510185e-05, + "loss": 0.0957, + "step": 7197 + }, + { + "epoch": 1.93, + "learning_rate": 3.795384512325831e-05, + "loss": 0.1273, + "step": 7198 + }, + { + "epoch": 1.93, + "learning_rate": 3.795217041800643e-05, + "loss": 0.1664, + "step": 7199 + }, + { + "epoch": 1.93, + "learning_rate": 3.795049571275456e-05, + "loss": 0.1851, + "step": 7200 + }, + { + "epoch": 1.93, + "learning_rate": 3.794882100750268e-05, + "loss": 0.1566, + "step": 7201 + }, + { + "epoch": 1.93, + "learning_rate": 3.7947146302250806e-05, + "loss": 0.2805, + "step": 7202 + }, + { + "epoch": 1.93, + "learning_rate": 3.794547159699893e-05, + "loss": 0.1246, + "step": 7203 + }, + { + "epoch": 1.93, + "learning_rate": 3.7943796891747054e-05, + "loss": 0.2821, + "step": 7204 + }, + { + "epoch": 1.93, + "learning_rate": 3.794212218649518e-05, + "loss": 0.1269, + "step": 7205 + }, + { + "epoch": 1.93, + "learning_rate": 3.79404474812433e-05, + "loss": 0.1134, + "step": 7206 + }, + { + "epoch": 1.93, + "learning_rate": 3.793877277599143e-05, + "loss": 0.259, + "step": 7207 + }, + { + "epoch": 1.93, + "learning_rate": 3.793709807073955e-05, + "loss": 0.1301, + "step": 7208 + }, + { + "epoch": 1.93, + "learning_rate": 3.7935423365487675e-05, + "loss": 0.1548, + "step": 7209 + }, + { + "epoch": 1.93, + "learning_rate": 3.79337486602358e-05, + "loss": 0.1174, + "step": 7210 + }, + { + "epoch": 1.93, + "learning_rate": 3.7932073954983924e-05, + "loss": 0.1576, + "step": 7211 + }, + { + "epoch": 1.93, + "learning_rate": 3.7930399249732055e-05, + "loss": 0.2417, + "step": 7212 + }, + { + "epoch": 1.93, + "learning_rate": 3.792872454448017e-05, + "loss": 0.125, + "step": 7213 + }, + { + "epoch": 1.93, + "learning_rate": 3.7927049839228296e-05, + "loss": 0.1683, + "step": 7214 + }, + { + "epoch": 1.93, + "learning_rate": 3.792537513397642e-05, + "loss": 0.1057, + "step": 7215 + }, + { + "epoch": 1.93, + "learning_rate": 3.7923700428724545e-05, + "loss": 0.1485, + "step": 7216 + }, + { + "epoch": 1.93, + "learning_rate": 3.792202572347267e-05, + "loss": 0.1465, + "step": 7217 + }, + { + "epoch": 1.93, + "learning_rate": 3.792035101822079e-05, + "loss": 0.1726, + "step": 7218 + }, + { + "epoch": 1.93, + "learning_rate": 3.791867631296892e-05, + "loss": 0.1225, + "step": 7219 + }, + { + "epoch": 1.93, + "learning_rate": 3.791700160771705e-05, + "loss": 0.1141, + "step": 7220 + }, + { + "epoch": 1.93, + "learning_rate": 3.791532690246517e-05, + "loss": 0.1295, + "step": 7221 + }, + { + "epoch": 1.94, + "learning_rate": 3.791365219721329e-05, + "loss": 0.107, + "step": 7222 + }, + { + "epoch": 1.94, + "learning_rate": 3.7911977491961414e-05, + "loss": 0.1754, + "step": 7223 + }, + { + "epoch": 1.94, + "learning_rate": 3.791030278670954e-05, + "loss": 0.1806, + "step": 7224 + }, + { + "epoch": 1.94, + "learning_rate": 3.790862808145766e-05, + "loss": 0.2145, + "step": 7225 + }, + { + "epoch": 1.94, + "learning_rate": 3.790695337620579e-05, + "loss": 0.119, + "step": 7226 + }, + { + "epoch": 1.94, + "learning_rate": 3.790527867095391e-05, + "loss": 0.1981, + "step": 7227 + }, + { + "epoch": 1.94, + "learning_rate": 3.790360396570204e-05, + "loss": 0.1082, + "step": 7228 + }, + { + "epoch": 1.94, + "learning_rate": 3.7901929260450166e-05, + "loss": 0.123, + "step": 7229 + }, + { + "epoch": 1.94, + "learning_rate": 3.790025455519829e-05, + "loss": 0.1669, + "step": 7230 + }, + { + "epoch": 1.94, + "learning_rate": 3.7898579849946415e-05, + "loss": 0.175, + "step": 7231 + }, + { + "epoch": 1.94, + "learning_rate": 3.789690514469453e-05, + "loss": 0.1899, + "step": 7232 + }, + { + "epoch": 1.94, + "learning_rate": 3.7895230439442656e-05, + "loss": 0.1669, + "step": 7233 + }, + { + "epoch": 1.94, + "learning_rate": 3.789355573419078e-05, + "loss": 0.1936, + "step": 7234 + }, + { + "epoch": 1.94, + "learning_rate": 3.789188102893891e-05, + "loss": 0.1056, + "step": 7235 + }, + { + "epoch": 1.94, + "learning_rate": 3.7890206323687036e-05, + "loss": 0.1206, + "step": 7236 + }, + { + "epoch": 1.94, + "learning_rate": 3.788853161843516e-05, + "loss": 0.3967, + "step": 7237 + }, + { + "epoch": 1.94, + "learning_rate": 3.7886856913183284e-05, + "loss": 0.1542, + "step": 7238 + }, + { + "epoch": 1.94, + "learning_rate": 3.788518220793141e-05, + "loss": 0.1402, + "step": 7239 + }, + { + "epoch": 1.94, + "learning_rate": 3.788350750267953e-05, + "loss": 0.2218, + "step": 7240 + }, + { + "epoch": 1.94, + "learning_rate": 3.788183279742765e-05, + "loss": 0.1466, + "step": 7241 + }, + { + "epoch": 1.94, + "learning_rate": 3.7880158092175774e-05, + "loss": 0.2347, + "step": 7242 + }, + { + "epoch": 1.94, + "learning_rate": 3.7878483386923905e-05, + "loss": 0.1205, + "step": 7243 + }, + { + "epoch": 1.94, + "learning_rate": 3.787680868167203e-05, + "loss": 0.1403, + "step": 7244 + }, + { + "epoch": 1.94, + "learning_rate": 3.7875133976420153e-05, + "loss": 0.1458, + "step": 7245 + }, + { + "epoch": 1.94, + "learning_rate": 3.787345927116828e-05, + "loss": 0.3263, + "step": 7246 + }, + { + "epoch": 1.94, + "learning_rate": 3.78717845659164e-05, + "loss": 0.1874, + "step": 7247 + }, + { + "epoch": 1.94, + "learning_rate": 3.7870109860664526e-05, + "loss": 0.1406, + "step": 7248 + }, + { + "epoch": 1.94, + "learning_rate": 3.786843515541265e-05, + "loss": 0.1727, + "step": 7249 + }, + { + "epoch": 1.94, + "learning_rate": 3.786676045016077e-05, + "loss": 0.1897, + "step": 7250 + }, + { + "epoch": 1.94, + "learning_rate": 3.78650857449089e-05, + "loss": 0.3629, + "step": 7251 + }, + { + "epoch": 1.94, + "learning_rate": 3.786341103965702e-05, + "loss": 0.1486, + "step": 7252 + }, + { + "epoch": 1.94, + "learning_rate": 3.786173633440515e-05, + "loss": 0.1208, + "step": 7253 + }, + { + "epoch": 1.94, + "learning_rate": 3.786006162915327e-05, + "loss": 0.1294, + "step": 7254 + }, + { + "epoch": 1.94, + "learning_rate": 3.7858386923901396e-05, + "loss": 0.4132, + "step": 7255 + }, + { + "epoch": 1.94, + "learning_rate": 3.785671221864952e-05, + "loss": 0.161, + "step": 7256 + }, + { + "epoch": 1.94, + "learning_rate": 3.7855037513397644e-05, + "loss": 0.1107, + "step": 7257 + }, + { + "epoch": 1.94, + "learning_rate": 3.785336280814577e-05, + "loss": 0.1172, + "step": 7258 + }, + { + "epoch": 1.95, + "learning_rate": 3.785168810289389e-05, + "loss": 0.1158, + "step": 7259 + }, + { + "epoch": 1.95, + "learning_rate": 3.7850013397642017e-05, + "loss": 0.1295, + "step": 7260 + }, + { + "epoch": 1.95, + "learning_rate": 3.784833869239014e-05, + "loss": 0.2409, + "step": 7261 + }, + { + "epoch": 1.95, + "learning_rate": 3.7846663987138265e-05, + "loss": 0.1333, + "step": 7262 + }, + { + "epoch": 1.95, + "learning_rate": 3.784498928188639e-05, + "loss": 0.484, + "step": 7263 + }, + { + "epoch": 1.95, + "learning_rate": 3.7843314576634513e-05, + "loss": 0.1397, + "step": 7264 + }, + { + "epoch": 1.95, + "learning_rate": 3.784163987138264e-05, + "loss": 0.1027, + "step": 7265 + }, + { + "epoch": 1.95, + "learning_rate": 3.783996516613076e-05, + "loss": 0.3021, + "step": 7266 + }, + { + "epoch": 1.95, + "learning_rate": 3.7838290460878886e-05, + "loss": 0.2582, + "step": 7267 + }, + { + "epoch": 1.95, + "learning_rate": 3.783661575562702e-05, + "loss": 0.1279, + "step": 7268 + }, + { + "epoch": 1.95, + "learning_rate": 3.7834941050375134e-05, + "loss": 0.1215, + "step": 7269 + }, + { + "epoch": 1.95, + "learning_rate": 3.783326634512326e-05, + "loss": 0.2166, + "step": 7270 + }, + { + "epoch": 1.95, + "learning_rate": 3.783159163987138e-05, + "loss": 0.3586, + "step": 7271 + }, + { + "epoch": 1.95, + "learning_rate": 3.782991693461951e-05, + "loss": 0.2858, + "step": 7272 + }, + { + "epoch": 1.95, + "learning_rate": 3.782824222936763e-05, + "loss": 0.2347, + "step": 7273 + }, + { + "epoch": 1.95, + "learning_rate": 3.7826567524115756e-05, + "loss": 0.135, + "step": 7274 + }, + { + "epoch": 1.95, + "learning_rate": 3.782489281886388e-05, + "loss": 0.1121, + "step": 7275 + }, + { + "epoch": 1.95, + "learning_rate": 3.782321811361201e-05, + "loss": 0.113, + "step": 7276 + }, + { + "epoch": 1.95, + "learning_rate": 3.7821543408360135e-05, + "loss": 0.4462, + "step": 7277 + }, + { + "epoch": 1.95, + "learning_rate": 3.781986870310825e-05, + "loss": 0.1214, + "step": 7278 + }, + { + "epoch": 1.95, + "learning_rate": 3.7818193997856377e-05, + "loss": 0.1234, + "step": 7279 + }, + { + "epoch": 1.95, + "learning_rate": 3.78165192926045e-05, + "loss": 0.2488, + "step": 7280 + }, + { + "epoch": 1.95, + "learning_rate": 3.7814844587352625e-05, + "loss": 0.1631, + "step": 7281 + }, + { + "epoch": 1.95, + "learning_rate": 3.781316988210075e-05, + "loss": 0.1162, + "step": 7282 + }, + { + "epoch": 1.95, + "learning_rate": 3.7811495176848873e-05, + "loss": 0.1592, + "step": 7283 + }, + { + "epoch": 1.95, + "learning_rate": 3.7809820471597004e-05, + "loss": 0.1353, + "step": 7284 + }, + { + "epoch": 1.95, + "learning_rate": 3.780814576634513e-05, + "loss": 0.1292, + "step": 7285 + }, + { + "epoch": 1.95, + "learning_rate": 3.780647106109325e-05, + "loss": 0.3385, + "step": 7286 + }, + { + "epoch": 1.95, + "learning_rate": 3.780479635584138e-05, + "loss": 0.1268, + "step": 7287 + }, + { + "epoch": 1.95, + "learning_rate": 3.7803121650589494e-05, + "loss": 0.1237, + "step": 7288 + }, + { + "epoch": 1.95, + "learning_rate": 3.780144694533762e-05, + "loss": 0.1135, + "step": 7289 + }, + { + "epoch": 1.95, + "learning_rate": 3.779977224008574e-05, + "loss": 0.1224, + "step": 7290 + }, + { + "epoch": 1.95, + "learning_rate": 3.7798097534833874e-05, + "loss": 0.1221, + "step": 7291 + }, + { + "epoch": 1.95, + "learning_rate": 3.7796422829582e-05, + "loss": 0.1253, + "step": 7292 + }, + { + "epoch": 1.95, + "learning_rate": 3.779474812433012e-05, + "loss": 0.1117, + "step": 7293 + }, + { + "epoch": 1.95, + "learning_rate": 3.7793073419078246e-05, + "loss": 0.1912, + "step": 7294 + }, + { + "epoch": 1.95, + "learning_rate": 3.779139871382637e-05, + "loss": 0.2859, + "step": 7295 + }, + { + "epoch": 1.95, + "learning_rate": 3.7789724008574495e-05, + "loss": 0.1322, + "step": 7296 + }, + { + "epoch": 1.96, + "learning_rate": 3.778804930332261e-05, + "loss": 0.1992, + "step": 7297 + }, + { + "epoch": 1.96, + "learning_rate": 3.7786374598070737e-05, + "loss": 0.1716, + "step": 7298 + }, + { + "epoch": 1.96, + "learning_rate": 3.778469989281887e-05, + "loss": 0.3024, + "step": 7299 + }, + { + "epoch": 1.96, + "learning_rate": 3.778302518756699e-05, + "loss": 0.2939, + "step": 7300 + }, + { + "epoch": 1.96, + "learning_rate": 3.7781350482315116e-05, + "loss": 0.1091, + "step": 7301 + }, + { + "epoch": 1.96, + "learning_rate": 3.777967577706324e-05, + "loss": 0.1273, + "step": 7302 + }, + { + "epoch": 1.96, + "learning_rate": 3.7778001071811364e-05, + "loss": 0.2799, + "step": 7303 + }, + { + "epoch": 1.96, + "learning_rate": 3.777632636655949e-05, + "loss": 0.1588, + "step": 7304 + }, + { + "epoch": 1.96, + "learning_rate": 3.777465166130761e-05, + "loss": 0.1219, + "step": 7305 + }, + { + "epoch": 1.96, + "learning_rate": 3.777297695605573e-05, + "loss": 0.1244, + "step": 7306 + }, + { + "epoch": 1.96, + "learning_rate": 3.777130225080386e-05, + "loss": 0.2476, + "step": 7307 + }, + { + "epoch": 1.96, + "learning_rate": 3.7769627545551985e-05, + "loss": 0.4459, + "step": 7308 + }, + { + "epoch": 1.96, + "learning_rate": 3.776795284030011e-05, + "loss": 0.1284, + "step": 7309 + }, + { + "epoch": 1.96, + "learning_rate": 3.7766278135048234e-05, + "loss": 0.1323, + "step": 7310 + }, + { + "epoch": 1.96, + "learning_rate": 3.776460342979636e-05, + "loss": 0.439, + "step": 7311 + }, + { + "epoch": 1.96, + "learning_rate": 3.776292872454448e-05, + "loss": 0.1332, + "step": 7312 + }, + { + "epoch": 1.96, + "learning_rate": 3.7761254019292606e-05, + "loss": 0.2207, + "step": 7313 + }, + { + "epoch": 1.96, + "learning_rate": 3.775957931404073e-05, + "loss": 0.2966, + "step": 7314 + }, + { + "epoch": 1.96, + "learning_rate": 3.7757904608788855e-05, + "loss": 0.1928, + "step": 7315 + }, + { + "epoch": 1.96, + "learning_rate": 3.775622990353698e-05, + "loss": 0.1664, + "step": 7316 + }, + { + "epoch": 1.96, + "learning_rate": 3.77545551982851e-05, + "loss": 0.1124, + "step": 7317 + }, + { + "epoch": 1.96, + "learning_rate": 3.775288049303323e-05, + "loss": 0.125, + "step": 7318 + }, + { + "epoch": 1.96, + "learning_rate": 3.775120578778135e-05, + "loss": 0.1257, + "step": 7319 + }, + { + "epoch": 1.96, + "learning_rate": 3.7749531082529476e-05, + "loss": 0.1288, + "step": 7320 + }, + { + "epoch": 1.96, + "learning_rate": 3.77478563772776e-05, + "loss": 0.1852, + "step": 7321 + }, + { + "epoch": 1.96, + "learning_rate": 3.7746181672025724e-05, + "loss": 0.208, + "step": 7322 + }, + { + "epoch": 1.96, + "learning_rate": 3.774450696677385e-05, + "loss": 0.1148, + "step": 7323 + }, + { + "epoch": 1.96, + "learning_rate": 3.774283226152198e-05, + "loss": 0.1193, + "step": 7324 + }, + { + "epoch": 1.96, + "learning_rate": 3.77411575562701e-05, + "loss": 0.1223, + "step": 7325 + }, + { + "epoch": 1.96, + "learning_rate": 3.773948285101822e-05, + "loss": 0.1473, + "step": 7326 + }, + { + "epoch": 1.96, + "learning_rate": 3.7737808145766345e-05, + "loss": 0.1687, + "step": 7327 + }, + { + "epoch": 1.96, + "learning_rate": 3.773613344051447e-05, + "loss": 0.1751, + "step": 7328 + }, + { + "epoch": 1.96, + "learning_rate": 3.7734458735262594e-05, + "loss": 0.1035, + "step": 7329 + }, + { + "epoch": 1.96, + "learning_rate": 3.773278403001072e-05, + "loss": 0.1152, + "step": 7330 + }, + { + "epoch": 1.96, + "learning_rate": 3.773110932475884e-05, + "loss": 0.1707, + "step": 7331 + }, + { + "epoch": 1.96, + "learning_rate": 3.772943461950697e-05, + "loss": 0.1065, + "step": 7332 + }, + { + "epoch": 1.96, + "learning_rate": 3.77277599142551e-05, + "loss": 0.1284, + "step": 7333 + }, + { + "epoch": 1.97, + "learning_rate": 3.7726085209003215e-05, + "loss": 0.1252, + "step": 7334 + }, + { + "epoch": 1.97, + "learning_rate": 3.772441050375134e-05, + "loss": 0.1578, + "step": 7335 + }, + { + "epoch": 1.97, + "learning_rate": 3.772273579849946e-05, + "loss": 0.1823, + "step": 7336 + }, + { + "epoch": 1.97, + "learning_rate": 3.772106109324759e-05, + "loss": 0.1465, + "step": 7337 + }, + { + "epoch": 1.97, + "learning_rate": 3.771938638799571e-05, + "loss": 0.1195, + "step": 7338 + }, + { + "epoch": 1.97, + "learning_rate": 3.7717711682743836e-05, + "loss": 0.1521, + "step": 7339 + }, + { + "epoch": 1.97, + "learning_rate": 3.771603697749197e-05, + "loss": 0.2083, + "step": 7340 + }, + { + "epoch": 1.97, + "learning_rate": 3.771436227224009e-05, + "loss": 0.1337, + "step": 7341 + }, + { + "epoch": 1.97, + "learning_rate": 3.7712687566988215e-05, + "loss": 0.1438, + "step": 7342 + }, + { + "epoch": 1.97, + "learning_rate": 3.771101286173634e-05, + "loss": 0.1764, + "step": 7343 + }, + { + "epoch": 1.97, + "learning_rate": 3.770933815648446e-05, + "loss": 0.1515, + "step": 7344 + }, + { + "epoch": 1.97, + "learning_rate": 3.770766345123258e-05, + "loss": 0.1552, + "step": 7345 + }, + { + "epoch": 1.97, + "learning_rate": 3.7705988745980705e-05, + "loss": 0.1705, + "step": 7346 + }, + { + "epoch": 1.97, + "learning_rate": 3.7704314040728836e-05, + "loss": 0.1167, + "step": 7347 + }, + { + "epoch": 1.97, + "learning_rate": 3.770263933547696e-05, + "loss": 0.1188, + "step": 7348 + }, + { + "epoch": 1.97, + "learning_rate": 3.7700964630225085e-05, + "loss": 0.1479, + "step": 7349 + }, + { + "epoch": 1.97, + "learning_rate": 3.769928992497321e-05, + "loss": 0.1935, + "step": 7350 + }, + { + "epoch": 1.97, + "learning_rate": 3.769761521972133e-05, + "loss": 0.1774, + "step": 7351 + }, + { + "epoch": 1.97, + "learning_rate": 3.769594051446946e-05, + "loss": 0.1937, + "step": 7352 + }, + { + "epoch": 1.97, + "learning_rate": 3.7694265809217575e-05, + "loss": 0.2043, + "step": 7353 + }, + { + "epoch": 1.97, + "learning_rate": 3.76925911039657e-05, + "loss": 0.2521, + "step": 7354 + }, + { + "epoch": 1.97, + "learning_rate": 3.769091639871383e-05, + "loss": 0.1274, + "step": 7355 + }, + { + "epoch": 1.97, + "learning_rate": 3.7689241693461954e-05, + "loss": 0.1265, + "step": 7356 + }, + { + "epoch": 1.97, + "learning_rate": 3.768756698821008e-05, + "loss": 0.1407, + "step": 7357 + }, + { + "epoch": 1.97, + "learning_rate": 3.76858922829582e-05, + "loss": 0.2373, + "step": 7358 + }, + { + "epoch": 1.97, + "learning_rate": 3.768421757770633e-05, + "loss": 0.1268, + "step": 7359 + }, + { + "epoch": 1.97, + "learning_rate": 3.768254287245445e-05, + "loss": 0.1114, + "step": 7360 + }, + { + "epoch": 1.97, + "learning_rate": 3.7680868167202575e-05, + "loss": 0.0987, + "step": 7361 + }, + { + "epoch": 1.97, + "learning_rate": 3.767919346195069e-05, + "loss": 0.1289, + "step": 7362 + }, + { + "epoch": 1.97, + "learning_rate": 3.7677518756698824e-05, + "loss": 0.1567, + "step": 7363 + }, + { + "epoch": 1.97, + "learning_rate": 3.767584405144695e-05, + "loss": 0.1759, + "step": 7364 + }, + { + "epoch": 1.97, + "learning_rate": 3.767416934619507e-05, + "loss": 0.1602, + "step": 7365 + }, + { + "epoch": 1.97, + "learning_rate": 3.7672494640943196e-05, + "loss": 0.3384, + "step": 7366 + }, + { + "epoch": 1.97, + "learning_rate": 3.767081993569132e-05, + "loss": 0.1218, + "step": 7367 + }, + { + "epoch": 1.97, + "learning_rate": 3.7669145230439445e-05, + "loss": 0.1165, + "step": 7368 + }, + { + "epoch": 1.97, + "learning_rate": 3.766747052518757e-05, + "loss": 0.1312, + "step": 7369 + }, + { + "epoch": 1.97, + "learning_rate": 3.766579581993569e-05, + "loss": 0.1424, + "step": 7370 + }, + { + "epoch": 1.98, + "learning_rate": 3.766412111468382e-05, + "loss": 0.1184, + "step": 7371 + }, + { + "epoch": 1.98, + "learning_rate": 3.766244640943194e-05, + "loss": 0.2108, + "step": 7372 + }, + { + "epoch": 1.98, + "learning_rate": 3.7660771704180066e-05, + "loss": 0.111, + "step": 7373 + }, + { + "epoch": 1.98, + "learning_rate": 3.765909699892819e-05, + "loss": 0.118, + "step": 7374 + }, + { + "epoch": 1.98, + "learning_rate": 3.7657422293676314e-05, + "loss": 0.1012, + "step": 7375 + }, + { + "epoch": 1.98, + "learning_rate": 3.765574758842444e-05, + "loss": 0.1508, + "step": 7376 + }, + { + "epoch": 1.98, + "learning_rate": 3.765407288317256e-05, + "loss": 0.1236, + "step": 7377 + }, + { + "epoch": 1.98, + "learning_rate": 3.765239817792069e-05, + "loss": 0.1532, + "step": 7378 + }, + { + "epoch": 1.98, + "learning_rate": 3.765072347266881e-05, + "loss": 0.1002, + "step": 7379 + }, + { + "epoch": 1.98, + "learning_rate": 3.764904876741694e-05, + "loss": 0.1615, + "step": 7380 + }, + { + "epoch": 1.98, + "learning_rate": 3.764737406216506e-05, + "loss": 0.2006, + "step": 7381 + }, + { + "epoch": 1.98, + "learning_rate": 3.7645699356913184e-05, + "loss": 0.1957, + "step": 7382 + }, + { + "epoch": 1.98, + "learning_rate": 3.764402465166131e-05, + "loss": 0.1911, + "step": 7383 + }, + { + "epoch": 1.98, + "learning_rate": 3.764234994640943e-05, + "loss": 0.1399, + "step": 7384 + }, + { + "epoch": 1.98, + "learning_rate": 3.7640675241157556e-05, + "loss": 0.2585, + "step": 7385 + }, + { + "epoch": 1.98, + "learning_rate": 3.763900053590568e-05, + "loss": 0.1572, + "step": 7386 + }, + { + "epoch": 1.98, + "learning_rate": 3.7637325830653805e-05, + "loss": 0.196, + "step": 7387 + }, + { + "epoch": 1.98, + "learning_rate": 3.7635651125401936e-05, + "loss": 0.1115, + "step": 7388 + }, + { + "epoch": 1.98, + "learning_rate": 3.763397642015006e-05, + "loss": 0.2101, + "step": 7389 + }, + { + "epoch": 1.98, + "learning_rate": 3.763230171489818e-05, + "loss": 0.1049, + "step": 7390 + }, + { + "epoch": 1.98, + "learning_rate": 3.76306270096463e-05, + "loss": 0.1402, + "step": 7391 + }, + { + "epoch": 1.98, + "learning_rate": 3.7628952304394426e-05, + "loss": 0.2111, + "step": 7392 + }, + { + "epoch": 1.98, + "learning_rate": 3.762727759914255e-05, + "loss": 0.19, + "step": 7393 + }, + { + "epoch": 1.98, + "learning_rate": 3.7625602893890674e-05, + "loss": 0.2742, + "step": 7394 + }, + { + "epoch": 1.98, + "learning_rate": 3.76239281886388e-05, + "loss": 0.139, + "step": 7395 + }, + { + "epoch": 1.98, + "learning_rate": 3.762225348338693e-05, + "loss": 0.3125, + "step": 7396 + }, + { + "epoch": 1.98, + "learning_rate": 3.7620578778135054e-05, + "loss": 0.2036, + "step": 7397 + }, + { + "epoch": 1.98, + "learning_rate": 3.761890407288318e-05, + "loss": 0.1447, + "step": 7398 + }, + { + "epoch": 1.98, + "learning_rate": 3.76172293676313e-05, + "loss": 0.1126, + "step": 7399 + }, + { + "epoch": 1.98, + "learning_rate": 3.761555466237942e-05, + "loss": 0.1555, + "step": 7400 + }, + { + "epoch": 1.98, + "learning_rate": 3.7613879957127544e-05, + "loss": 0.1238, + "step": 7401 + }, + { + "epoch": 1.98, + "learning_rate": 3.761220525187567e-05, + "loss": 0.1192, + "step": 7402 + }, + { + "epoch": 1.98, + "learning_rate": 3.76105305466238e-05, + "loss": 0.1362, + "step": 7403 + }, + { + "epoch": 1.98, + "learning_rate": 3.760885584137192e-05, + "loss": 0.185, + "step": 7404 + }, + { + "epoch": 1.98, + "learning_rate": 3.760718113612005e-05, + "loss": 0.1522, + "step": 7405 + }, + { + "epoch": 1.98, + "learning_rate": 3.760550643086817e-05, + "loss": 0.1315, + "step": 7406 + }, + { + "epoch": 1.98, + "learning_rate": 3.7603831725616296e-05, + "loss": 0.1632, + "step": 7407 + }, + { + "epoch": 1.98, + "learning_rate": 3.760215702036442e-05, + "loss": 0.205, + "step": 7408 + }, + { + "epoch": 1.99, + "learning_rate": 3.760048231511254e-05, + "loss": 0.15, + "step": 7409 + }, + { + "epoch": 1.99, + "learning_rate": 3.759880760986066e-05, + "loss": 0.1446, + "step": 7410 + }, + { + "epoch": 1.99, + "learning_rate": 3.759713290460879e-05, + "loss": 0.1511, + "step": 7411 + }, + { + "epoch": 1.99, + "learning_rate": 3.759545819935692e-05, + "loss": 0.1107, + "step": 7412 + }, + { + "epoch": 1.99, + "learning_rate": 3.759378349410504e-05, + "loss": 0.1415, + "step": 7413 + }, + { + "epoch": 1.99, + "learning_rate": 3.7592108788853165e-05, + "loss": 0.2684, + "step": 7414 + }, + { + "epoch": 1.99, + "learning_rate": 3.759043408360129e-05, + "loss": 0.1297, + "step": 7415 + }, + { + "epoch": 1.99, + "learning_rate": 3.7588759378349414e-05, + "loss": 0.1029, + "step": 7416 + }, + { + "epoch": 1.99, + "learning_rate": 3.758708467309754e-05, + "loss": 0.2872, + "step": 7417 + }, + { + "epoch": 1.99, + "learning_rate": 3.7585409967845655e-05, + "loss": 0.3036, + "step": 7418 + }, + { + "epoch": 1.99, + "learning_rate": 3.7583735262593786e-05, + "loss": 0.1717, + "step": 7419 + }, + { + "epoch": 1.99, + "learning_rate": 3.758206055734191e-05, + "loss": 0.2128, + "step": 7420 + }, + { + "epoch": 1.99, + "learning_rate": 3.7580385852090035e-05, + "loss": 0.0931, + "step": 7421 + }, + { + "epoch": 1.99, + "learning_rate": 3.757871114683816e-05, + "loss": 0.1881, + "step": 7422 + }, + { + "epoch": 1.99, + "learning_rate": 3.757703644158628e-05, + "loss": 0.3287, + "step": 7423 + }, + { + "epoch": 1.99, + "learning_rate": 3.757536173633441e-05, + "loss": 0.1275, + "step": 7424 + }, + { + "epoch": 1.99, + "learning_rate": 3.757368703108253e-05, + "loss": 0.1306, + "step": 7425 + }, + { + "epoch": 1.99, + "learning_rate": 3.7572012325830656e-05, + "loss": 0.1498, + "step": 7426 + }, + { + "epoch": 1.99, + "learning_rate": 3.757033762057878e-05, + "loss": 0.3004, + "step": 7427 + }, + { + "epoch": 1.99, + "learning_rate": 3.7568662915326904e-05, + "loss": 0.1218, + "step": 7428 + }, + { + "epoch": 1.99, + "learning_rate": 3.756698821007503e-05, + "loss": 0.1815, + "step": 7429 + }, + { + "epoch": 1.99, + "learning_rate": 3.756531350482315e-05, + "loss": 0.1345, + "step": 7430 + }, + { + "epoch": 1.99, + "learning_rate": 3.756363879957128e-05, + "loss": 0.2036, + "step": 7431 + }, + { + "epoch": 1.99, + "learning_rate": 3.75619640943194e-05, + "loss": 0.1236, + "step": 7432 + }, + { + "epoch": 1.99, + "learning_rate": 3.7560289389067525e-05, + "loss": 0.1272, + "step": 7433 + }, + { + "epoch": 1.99, + "learning_rate": 3.755861468381565e-05, + "loss": 0.0949, + "step": 7434 + }, + { + "epoch": 1.99, + "learning_rate": 3.7556939978563774e-05, + "loss": 0.1411, + "step": 7435 + }, + { + "epoch": 1.99, + "learning_rate": 3.7555265273311905e-05, + "loss": 0.1783, + "step": 7436 + }, + { + "epoch": 1.99, + "learning_rate": 3.755359056806002e-05, + "loss": 0.2319, + "step": 7437 + }, + { + "epoch": 1.99, + "learning_rate": 3.7551915862808146e-05, + "loss": 0.1292, + "step": 7438 + }, + { + "epoch": 1.99, + "learning_rate": 3.755024115755627e-05, + "loss": 0.1726, + "step": 7439 + }, + { + "epoch": 1.99, + "learning_rate": 3.7548566452304395e-05, + "loss": 0.1174, + "step": 7440 + }, + { + "epoch": 1.99, + "learning_rate": 3.754689174705252e-05, + "loss": 0.2393, + "step": 7441 + }, + { + "epoch": 1.99, + "learning_rate": 3.754521704180064e-05, + "loss": 0.2189, + "step": 7442 + }, + { + "epoch": 1.99, + "learning_rate": 3.754354233654877e-05, + "loss": 0.13, + "step": 7443 + }, + { + "epoch": 1.99, + "learning_rate": 3.75418676312969e-05, + "loss": 0.2216, + "step": 7444 + }, + { + "epoch": 1.99, + "learning_rate": 3.754019292604502e-05, + "loss": 0.2238, + "step": 7445 + }, + { + "epoch": 2.0, + "learning_rate": 3.753851822079314e-05, + "loss": 0.1804, + "step": 7446 + }, + { + "epoch": 2.0, + "learning_rate": 3.7536843515541264e-05, + "loss": 0.1475, + "step": 7447 + }, + { + "epoch": 2.0, + "learning_rate": 3.753516881028939e-05, + "loss": 0.1363, + "step": 7448 + }, + { + "epoch": 2.0, + "learning_rate": 3.753349410503751e-05, + "loss": 0.1216, + "step": 7449 + }, + { + "epoch": 2.0, + "learning_rate": 3.753181939978564e-05, + "loss": 0.2236, + "step": 7450 + }, + { + "epoch": 2.0, + "learning_rate": 3.753014469453376e-05, + "loss": 0.1053, + "step": 7451 + }, + { + "epoch": 2.0, + "learning_rate": 3.752846998928189e-05, + "loss": 0.2348, + "step": 7452 + }, + { + "epoch": 2.0, + "learning_rate": 3.7526795284030016e-05, + "loss": 0.1116, + "step": 7453 + }, + { + "epoch": 2.0, + "learning_rate": 3.752512057877814e-05, + "loss": 0.1189, + "step": 7454 + }, + { + "epoch": 2.0, + "learning_rate": 3.7523445873526265e-05, + "loss": 0.2103, + "step": 7455 + }, + { + "epoch": 2.0, + "learning_rate": 3.752177116827438e-05, + "loss": 0.2607, + "step": 7456 + }, + { + "epoch": 2.0, + "learning_rate": 3.7520096463022506e-05, + "loss": 0.1265, + "step": 7457 + }, + { + "epoch": 2.0, + "learning_rate": 3.751842175777063e-05, + "loss": 0.2276, + "step": 7458 + }, + { + "epoch": 2.0, + "learning_rate": 3.751674705251876e-05, + "loss": 0.167, + "step": 7459 + }, + { + "epoch": 2.0, + "learning_rate": 3.7515072347266886e-05, + "loss": 0.244, + "step": 7460 + }, + { + "epoch": 2.0, + "learning_rate": 3.751339764201501e-05, + "loss": 0.1692, + "step": 7461 + }, + { + "epoch": 2.0, + "learning_rate": 3.7511722936763134e-05, + "loss": 0.1542, + "step": 7462 + }, + { + "epoch": 2.0, + "learning_rate": 3.751004823151126e-05, + "loss": 0.1157, + "step": 7463 + }, + { + "epoch": 2.0, + "learning_rate": 3.750837352625938e-05, + "loss": 0.1436, + "step": 7464 + } + ], + "logging_steps": 1, + "max_steps": 29856, + "num_train_epochs": 8, + "save_steps": 32, + "total_flos": 2.5819203154427904e+18, + "trial_name": null, + "trial_params": null +}